def test(self, item):
        revs = Feature.revs(item)
        if revs["current"] is None:
            raise Exception("Empty revision!")

        curr = revs['current']
        avg_score =  (self.guests["vandal"]/self.guests["total"]) \
                        if curr["user"]["id"] is None \
                        else (self.users["vandal"]/self.users["total"])
        user_score = 0
        if curr["user"]["id"] is None:
            country_code = self.get_country(curr["user"]["name"])
            if country_code not in self.countries or self.countries[
                    country_code]["vandal"] < 10:
                user_score = avg_score
            else:
                user_score = self.countries[country_code][
                    "vandal"] / self.countries[country_code]["total"]
        else:
            user_score = avg_score

        hour = curr["timestamp"].hour
        hour_score = self.hours[hour]["vandal"] / self.hours[hour]["total"]

        day = curr["timestamp"].weekday()
        day_score = self.days[day]["vandal"] / self.days[day]["total"]

        return 1  # hour_score * day_score
Ejemplo n.º 2
0
 def create_feature_instance(self) -> Feature:
     shape = self.feature_shape()
     name = self.feature_name()
     units = self.feature_units()
     dtype = self.feature_dtype()
     annotations = self.feature_annotations()
     return Feature(name, self.recording, self.current_channel.id, \
                    units=units, datapoint_shape=shape, \
                    data_type=dtype, annotations=annotations)
Ejemplo n.º 3
0
def split_features_by_distance(features, distance):
    """ Split up each geometry in a list of features based on distance

    Arguments:
        features {list} -- List of Feature objects
        distance {int|float} -- Approx distance in metres between splits
    """
    split_features = []
    for f in features:
        split_geoms = split_line_by_distance(f.geom, distance)
        for sg in split_geoms:
            split_features.append(Feature(sg, f.data))
    return split_features
Ejemplo n.º 4
0
def snap_features(r, features):
    """ Geometrically 'snap' (connect) features together which are within
    radius `r` of each other

    Arguments:
        r {int|float} -- Tolerance radius in metres within which to snap lines 
                   together
        features {list} -- list of Features

    Returns:
        list -- list of snapped Features
    """
    snapped_geoms = snap_linestrings(r, [f.geom for f in features])
    return [Feature(snapped_geoms[i], f.data) for i, f in enumerate(features)]
Ejemplo n.º 5
0
def raster_to_features(path):
    """ Convert each pixel in a raster to a Shapely Point located at that 
    pixels centroid, and give it a value attribute equal to the pixels value.
    Return these as a list of Features.

    Arguments:
        path {str} -- Path to raster file

    Note: TODO potential way to vectorise:

    # All rows and columns into numpy mesh grid
    # cols, rows = np.meshgrid(np.arange(A.shape[2]), np.arange(A.shape[1]))

    # All eastings and northings
    # lats, lons = np.vectorize(rc2en, otypes=[np.float, np.float])(rows, cols)
    """

    # Read raster
    with rasterio.open(path) as r:
        T0 = r.transform  # upper-left pixel corner affine transform
        p1 = pyproj.Proj(r.crs)
        A = r.read()  # pixel values
        pixelSizeX, pixelSizeY = r.res

    # Get affine transform for pixel centres
    T1 = T0 * Affine.translation(0.5, 0.5)

    # Function to convert pixel row/column index (from 0) to lat/lon at centre
    def rc2en(r, c):
        return (c, r) * T1

    features = []
    it = np.nditer(A, flags=['multi_index'])

    while not it.finished:
        value = np.asscalar(it[0])
        if value > 0:
            features.append(
                Feature(
                    Point(rc2en(it.multi_index[1], it.multi_index[2])),
                    {
                        'value': np.asscalar(it[0]),
                        # assumes projected CRS
                        'pixel_size': (pixelSizeX * pixelSizeY) * 1e-6
                    }))
        it.iternext()

    return features
Ejemplo n.º 6
0
def merge_features(features):
    """ Merge feature geometries together where possible, forming several
    contiguous MultiLineStrings. Applies data of first feature to all.

    Arguments:
        features {list} -- list of Features
    """
    merged_features = []
    merged_geoms = linemerge([f.geom for f in features])

    if merged_geoms.geom_type == 'MultiLineString':
        merged_geoms = merged_geoms.geoms
    else:
        merged_geoms = [merged_geoms]

    for mg in merged_geoms:
        merged_features.append(Feature(mg, features[0].data))

    return merged_features
    def train_one(self, raw):
        revs = Feature.revs(raw)
        curr = revs["current"]
        if curr is None:
            return

        vandal_int = 1 if raw["vandal"] else 0
        if curr["user"]["id"] is None:
            self._append_ip(curr["user"]["name"], raw["vandal"])
            self.guests["total"] += 1
            self.guests["vandal"] += vandal_int
        else:
            self.users["total"] += 1
            self.users["vandal"] += vandal_int

        self.days[curr["timestamp"].weekday()]["vandal"] += vandal_int
        self.days[curr["timestamp"].weekday()]["total"] += 1

        self.hours[curr["timestamp"].hour]["vandal"] += vandal_int
        self.hours[curr["timestamp"].hour]["total"] += 1
Ejemplo n.º 8
0
def load_features(path, data=False):
    """ From a shapefile, create a list of features with geometry and data
    loaded from file. If data is specified, data will instead be filled with
    whatever is provided.

    Arguments:
        path {str} -- Path to shapefile to load
        data {boolean|dict} -- False, or value to fill each feature's data with
    """
    features = []
    with fiona.open(path) as source:
        for f in source:
            if not f['geometry']:
                print('Ignoring feature with no geometry...')
                continue
            shapely_class = get_shapely_class_from_geom_type(
                f['geometry']['type'])
            features.append(
                Feature(shapely_class(shape(f['geometry'])),
                        data if data else dict(f['properties'])))
    return features
    if any(f not in raw["f"] for f in OK_FEATURES):
        continue

    #raw["f"].pop('t_biscore', None)
    vandal_score = 0.1216 if raw["f"]["lr_guest"] else 0.10
    day_score = 1 if raw["revs"][-1]["timestamp"].weekday() <= 4 else 0.97
    raw_sec.append([raw["f"]["t_biscore"]*day_score*vandal_score])
    raw_chr.append([raw["f"]["t_charscore"]])
    tmp = []
    for f in OK_F_LIST:
        tmp.append(raw["f"][f])
 #   raw_list.append([x for n, x in raw["f"].items() if n in OK_FEATURES])
    raw_list.append(tmp)

    revs = Feature.revs(raw)
    raw["url"] = "https://ru.wikipedia.org/w/index.php?type=revision&diff={}&oldid={}".format(
            revs["current"]["id"], revs["prev_user"]["id"]
        );

    if raw["f"]["sb_added"] > 0:
        print(raw["url"])

    del raw["revs"]
    del raw["rwords"]
    raw_orig.append(raw)
    raw_res.append(1 if raw["vandal"] else 0)

print(len(raw_list))
sys.exit(0)
from sklearn.metrics import confusion_matrix
Ejemplo n.º 10
0
cnt = Counter(50)

for raw in raw_collection.find({}):
    #if "tmp" in raw and raw["tmp"] is not None:
    #    continue

    if raw["revs"] is None or len(raw["revs"]) <= 1:
        #print(raw)   this should not happen
        continue

    #if "rwords" in raw:
    #    cnt.tick()
    #    continue

    texts = Feature.revs(raw)

    if texts['prev_user'] is None or texts['current']['text'] is None:
        continue

    if texts['prev_user']['text'] is None:
        continue


    cnt.tick()
    #if cnt.value() < 18000:
   #      continue

    prev_text = strip_accents(texts['prev_user']['text']) #strip_blockquotes
    curr_text = strip_accents(texts['current']['text'])
Ejemplo n.º 11
0
flags2 = DepRepo.flags()

TRUSTED_GROUPS = [
    'editor', 'autoeditor', 'rollbacker', 'reviewer', 'sysop', 'bureaucrat'
]
users = 0
total = 0
for item in collection.find({
        "f.link_avg_new": 0,
        "vandal": True
},
                            no_cursor_timeout=True):
    if len(item["revs"]) < 2:
        continue

    revs = Feature.revs(item)
    print(get_url(revs))

import pymorphy2

morph = pymorphy2.MorphAnalyzer()

test = morph.parse('')

print("http://google.com")
sys.exit(0)

client = MongoClient('localhost', 27017)

raw_list = []
raw_res = []