def set_features(collection_name): raw_list = [] raw_ids = [] counter = Counter(100) collection = client.wiki[collection_name] # type: collection.Collection for raw in collection.find({}): if len(raw["revs"]) <= 1: continue if "rwords" not in raw: continue distr = calculate(raw) collection.update_one( {"_id": raw["_id"]}, {"$set": { "f.t_charscore": BHdist(distr, good_distr) }}) raw_list.append({key: value for key, value in distr.items()}) raw_ids.append(raw["_id"]) counter.tick() '''
client = MongoClient('localhost', 27017) items_collection = client.wiki.any_items # type: collection.Collection total_count = 0 for collection_name in TASK: collection = client.wiki[collection_name] collection.delete_many({}) collection.drop_indexes() collection.create_index([("r", pymongo.ASCENDING)]) total_count += sum(TASK[collection_name].values()) # set random field seed() random_counter = Counter(100) print("Randomizing") for item in items_collection.find({}, no_cursor_timeout=True): items_collection.update_one({'_id': item['_id']}, {'$set': { 'r': random() }}) random_counter.tick() print("Random elements created") # setting indices indices = items_collection.index_information() if 'random' not in indices: print("Creating index") items_collection.create_index([("vandal", pymongo.ASCENDING),
client = MongoClient('localhost', 27017) raw_collection = client.wiki[COLLECTION_NAME] # type: collection.Collection features_list = dict() if len(REQUIRED_FEATURES) == 1 and REQUIRED_FEATURES[0] == '*': features_list = FEATURES_LIST else: for key, val in FEATURES_LIST.items(): if key in REQUIRED_FEATURES: features_list[key] = val for key in features_list: features_list[key] = features_list[key]() cnt = Counter(100, raw_collection.count()) for raw in raw_collection.find({}, no_cursor_timeout=True): if raw["revs"] is None or len(raw["revs"]) <= 1: continue if raw["revs"][-1]["text"] == "": continue f = (raw["f"] if "f" in raw else None) or dict() for key in features_list: res = features_list[key].extract(raw) if type(res) is dict: f.update(res) else: f[key] = res