async def register(cls, player: discord.User, characters: Optional[Iterable[Character]] = tuple()): exists = await bot.parties.find_one({"player": bson.Int64(player.id)}) if exists: raise AlreadyRegistered await bot.parties.insert_one({ "_id": (identifier := uuid()), "player": bson.Int64(player.id), "characters": [c.identifier for c in characters], })
def migrate(self): # Get existing termination groups tg_data = self.db.execute( "SELECT id, name, description, remote_system, remote_id, tags FROM sa_terminationgroup" ) if not tg_data: return # Nothing to convert bulk = [] # Create root node for migrated termination groups root_id = bson.ObjectId() bulk += [ InsertOne({ "_id": root_id, "name": "Converted T.G.", "parent": None, "description": "Created from former termination groups", "technology": bson.ObjectId("5b6d6819d706360001a0b716"), # Group "bi_id": bson.Int64(bi_hash(root_id)), }) ] # Attach termination groups for id, name, description, remote_system, remote_id, tags in tg_data: new_id = bson.ObjectId() bulk += [ InsertOne({ "_id": new_id, "name": name, "parent": root_id, "description": description, # May be changed by phone migration "technology": bson.ObjectId("5b6d6be1d706360001f5c04e" ), # Network | IPoE Termination "remote_system": bson.ObjectId(remote_system) if remote_system else None, "remote_id": remote_id, "bi_id": bson.Int64(bi_hash(new_id)), "_legacy_id": id, # To be removed in future migrations }) ] # Apply groups self.mongo_db.resourcegroups.bulk_write(bulk)
def forwards(self): # Update mongodb collections mdb = get_db() for coll_name in ["noc.firmwares", "noc.interface_profiles", "noc.networksegments", "noc.networksegmentprofiles", "noc.objects", "noc.platforms", "noc.vendors"]: coll = mdb[coll_name] updates = [] for d in coll.find({"bi_id": {"$exists": False}}, {"_id": 1}): updates += [ UpdateOne({ "_id": d["_id"] }, { "$set": { "bi_id": bson.Int64(bi_hash(d["_id"])) } }) ] if len(updates) >= MONGO_CHUNK: coll.bulk_write(updates) updates = [] if updates: coll.bulk_write(updates)
def create_room(username, room_name, num_member): room_id = room_collection.insert_one({ '_id': room_name, 'created_by': username, 'n_members': bson.Int64(num_member), 'created_at': datetime.now() }) add_member(username, room_name)
def new_data(sensor_id, date, value, hs): result = { "sensor_id": sensor_id, "date": datetime.datetime.combine(date, datetime.time.min), "value": bson.Int64(value), "hash": hs } return result
def fix_document(model): coll = model._get_collection() bulk = [] for d in coll.find({"bi_id": {"$exists": False}}, {"_id": 1}): bi_id = bi_hash(d["_id"]) bulk += [UpdateOne({"_id": d["_id"]}, {"$set": {"bi_id": bson.Int64(bi_id)}})] if bulk: print(" Update %d items" % len(bulk)) coll.bulk_write(bulk)
def on_data(self, data): try: tweet = json.loads(data) except requests.exceptions.ReadTimeout: print("ReadTimeout Occured") return # print(tweet) if "text" in tweet: try: r = requests.post(self.url, data={ 'content-type': 'application/json', 'sentence': tweet['text'], 'id': tweet['id'] }) sentiment = r.json()['sentiment'] if (sentiment == 'positive' or sentiment == 'positif'): sentiment = 'positive' elif (sentiment == 'neutral' or sentiment == 'netral'): sentiment = 'neutral' elif (sentiment == 'negative' or sentiment == 'negatif'): sentiment = 'negative' tweet['sentiment'] = sentiment tweet['timestamp_ms'] = bson.Int64(tweet['timestamp_ms']) self.col.insert_one(tweet) #proses clean data wordClean = [] dSentiment = [] datasentence = tweet['text'] tokens = self.clean_doc(datasentence) teks_sentence = " ".join(tokens) wordClean.append(teks_sentence) dSen = tweet['sentiment'] dSentiment.append(dSen) dataUp = {'sentence': wordClean, 'sentiment': dSentiment} dataFrame = pd.DataFrame(data=dataUp) dataFrame.drop_duplicates(subset=['sentence'], keep='first') data = CleanData(sentence=dataFrame.sentence, sentiment=dataFrame.sentiment) data.save() except pymongo.errors.DuplicateKeyError: pass except Exception as ex: template = "An exception of type {0} occurred. Arguments:\n{1!r}" message = template.format(type(ex).__name__, ex.args) print(message)
def upload_face(): json = request.get_json() #print(json) if json: imageBytes = base64.b64decode(json['data']) # FIXME skip write image file image = open('data/decode.jpg', 'wb') image.write(imageBytes) image.close() faceImage = face_recognition.load_image_file('data/decode.jpg') face_encodings = face_recognition.face_encodings(faceImage) if len(face_encodings) > 0: face_encoding = face_encodings[0] #print(face_encoding) # Get distances between face_encoding to all known_faces face_distances = face_recognition.face_distance( known_faces, face_encoding) # Get minimum distance distance = min(face_distances) min_index = np.argmin(face_distances) # Get user ID by face encodings userid = name_index[min_index] # Get user data by ID obj = collection.find_one({'_id': bson.Int64(userid)}) user = obj['user'] response = { 'id': obj['_id'], 'contribution': obj['contribution'], 'followers': user['followers'], 'publicgists': user['publicgists'], 'publicrepos': user['publicrepos'], 'distance': distance } print(response) return jsonify(response) else: print("Failed to get encoding from image") else: print("Get image from request error") return jsonify({})
async def register(cls, player: Player, name: str): exists = await bot.clans.find_one({"members": bson.Int64(player.id)}) duplicate_name = await bot.clans.find_one({"name": name}) if exists or duplicate_name: raise AlreadyRegistered if isinstance(player, discord.User) or isinstance( player, discord.Member): player = await Party.from_user(player) await bot.clans.insert_one({ "_id": (identifier := uuid()), "leader": player.identifier, "name": name, "members": [player.identifier], })
def visit_struct(self, array): fields = collections.OrderedDict() for i, field in enumerate(array.type): assert field.name is not None assert len(field.name) > 0 field_data = array.field(i) field_doc = collections.OrderedDict() _DataWriter(field_doc, self.compression_level).accept(field_data) fields[field.name] = field_doc self.doc[DATA] = {LENGTH: bson.Int64(len(array)), FIELDS: fields} self._make_mask(array) write_type(self.doc, array.type)
def migrate(self): MODELS = [ "sa_administrativedomain", "sa_authprofile", "sa_managedobject", "sa_managedobjectprofile", "sa_terminationgroup", ] # Update postgresql tables for table in MODELS: rows = self.db.execute("SELECT id FROM %s WHERE bi_id IS NULL" % table) values = ["(%d, %d)" % (r[0], bi_hash(r[0])) for r in rows] while values: chunk, values = values[:PG_CHUNK], values[PG_CHUNK:] self.db.execute(""" UPDATE %s AS t SET bi_id = c.bi_id FROM ( VALUES %s ) AS c(id, bi_id) WHERE c.id = t.id """ % (table, ",\n".join(chunk))) # Update mongodb collections mdb = self.mongo_db for coll_name in [ "noc.profiles", "noc.services", "noc.serviceprofiles" ]: coll = mdb[coll_name] updates = [] for d in coll.find({"bi_id": {"$exists": False}}, {"_id": 1}): updates += [ UpdateOne( {"_id": d["_id"]}, {"$set": { "bi_id": bson.Int64(bi_hash(d["_id"])) }}) ] if len(updates) >= MONGO_CHUNK: coll.bulk_write(updates) updates = [] if updates: coll.bulk_write(updates) # Alter bi_id fields and create indexes for table in MODELS: self.db.execute("ALTER TABLE %s ALTER bi_id SET NOT NULL" % table) self.db.create_index(table, ["bi_id"], unique=True)
def migrate(self): # Update mongodb collections mdb = self.mongo_db for coll_name in ["noc.alarmclasses"]: coll = mdb[coll_name] updates = [] for d in coll.find({"bi_id": {"$exists": False}}, {"_id": 1}): updates += [ UpdateOne({"_id": d["_id"]}, {"$set": {"bi_id": bson.Int64(bi_hash(d["_id"]))}}) ] if len(updates) >= MONGO_CHUNK: coll.bulk_write(updates) updates = [] if updates: coll.bulk_write(updates)
def face_detection(): # get encoding from request print(request.get_json()) encoding = np.asarray(request.get_json()['encoding']) # Get user ID by face encodings face_distances = face_recognition.face_distance(known_faces, encoding) distance = min(face_distances) min_index = np.argmin(face_distances) userid = name_index[min_index] # Get user data by ID user = collection.find_one({'_id': bson.Int64(userid)}) return jsonify({'user': user, 'distance': distance})
def get_userid(start, nums): import uuid, time, bson from pymongo import MongoClient conn = MongoClient(host='47.92.72.108', port=28010) conn = conn.get_database('iTROdb') conn = conn.get_collection('iTRO_User') result = {} for i in range(start, start + nums): username = str(i) tmp = conn.find_one({'UserName': username}, {'NickName': 1}) if not tmp: nickname = '测试%s' % username userinfo = { 'NId': str(uuid.uuid3(uuid.NAMESPACE_DNS, username)), # UUID唯一数据 'RoleId': '', # 角色编号 'ParentId': '', # 父级编号 'UserName': username, 'NickName': nickname, 'UserPsw': 'ETWOlU8T9f2SmhAlXY1JCA==', # 密码te123456 'Sex': int(1), 'OpenId': '', 'Unionid': '', 'HeadimgUrl': '', 'Webchat': '', 'Qq': '', 'Mobile': '', 'Email': '', 'IdCard': '', 'Money': int(0), 'IsLogin': int(1), 'IsChat': int(1), 'IsOnline': int(0), 'LogitudeAndLat': '', 'CreateDt': bson.Int64(int(time.time())), 'UseMomey': int(0), 'ExtendMan': '', 'desc': '测试账号', 'labletag': '无', 'qrcode': '', 'truename': '', } conn.insert_one(userinfo) else: nickname = tmp['NickName'] result[username] = nickname return result
def forwards(self): # Update mongodb collections mdb = get_db() for coll_name in ["noc.metrictypes"]: coll = mdb[coll_name] updates = [] for d in coll.find({"bi_id": {"$exists": False}}, {"_id": 1}): updates += [ UpdateOne( {"_id": d["_id"]}, {"$set": { "bi_id": bson.Int64(bi_hash(d["_id"])) }}) ] if len(updates) >= MONGO_CHUNK: coll.bulk_write(updates) updates = [] if updates: coll.bulk_write(updates)
def set_priority(self, user: str, project: str, priority: int): """Modifica la priorità di un progetto per l'utente specificato. """ assert self.exists(user), f'User {user} inesistente' return self._mongo.read('users').find_one_and_update( { '$and': [{ '$or': [ { 'telegram': user }, { 'email': user }, ] }, { 'projects.url': project }] }, {'$set': { "projects.$.priority": bson.Int64(priority) }})
def _get_next_sequence_id(self, sequence): """ Manage monotonically incrementing sequences. Returns the next ID in the sequence, or zero if the sequence is created for the first time. """ seq = self.db['sequences'].find_one_and_update( {'_id': sequence}, {'$inc': { 'seq': bson.Int64(1) }}, projection={ 'seq': True, '_id': False }, upsert=True) if seq is None: return 0 else: return seq['seq']
async def register( cls, player: discord.User, name: str, job: str, race: Optional[str] = "human" ): if job.upper() not in [c.name for c in list(Job)] or race.upper() not in [ r.name for r in list(Race) ]: raise ValueError jobs = [0] * 16 jobs[Job[job.upper()].value] = 1 # TODO: use `gen_stats` here await bot.characters.insert_one( { "_id": (identifier := uuid()), "player": bson.Int64(player.id), "name": name, "jobs": jobs, "race": Race[race.upper()].value, "stats": [0] * 30, } )
def save_session(self, app, session, response): session_id = session.get_id() unset_query = dict() set_query = dict() doc = dict() for key in session.modified_keys(): value = session.get(key) if value is None: unset_query[key] = True else: set_query[key] = value if len(unset_query.keys()): doc['$unset'] = unset_query if len(set_query.keys()): doc['$set'] = set_query if len(unset_query.keys()) + len(set_query.keys()): set_query = doc.get('$set') or dict() now_date = bson.Int64(time.mktime(datetime.now().timetuple()) * 1000) if not(set_query.get('_createDate')): set_query['_createDate'] = now_date set_query['_lastUpdateDate'] = now_date doc['$set'] = set_query self.__collection.update(dict(_id = session_id), doc, upsert = True) session.reset_modified_keys() response.set_cookie(app.session_cookie_name, session_id)
exceptions = ['...', 'J ...'] authors_in_institute = [] for document in authors_collection.find(): authors_in_institute.append(document['name']) for document in authors_collection.find(): cooperating_authors = [] for article in document['articles']: for author in article['authors']: if author not in exceptions: cooperating_authors.append(author.lstrip()) unique, counts = np.unique(cooperating_authors, return_counts=True) cooperating_authors = dict(zip(unique, counts)) cooperating_internal_authors = dict() cooperating_external_authors = dict() for key, value in cooperating_authors.items(): if key in authors_in_institute: cooperating_internal_authors[key] = value else: cooperating_external_authors[key] = value internal_collaborators = {k: bson.Int64(v) for k, v in cooperating_internal_authors.items()} external_collaborators = {k: bson.Int64(v) for k, v in cooperating_external_authors.items()} document.update({'internal_collaborators': internal_collaborators}) document.update({'external_collaborators': external_collaborators}) authors_collection.save(document)
def to_python(self, value): try: return bson.Int64(value) except (TypeError, ValueError): abort(404)
if ef["name"] in nameIdDict: # print ef["value"] print "calling for efid :" + str( nameIdDict[ef["name"]]) + " value id :" + str( ef["value"]) id = getNewEfValueId(ef["value"], nameIdDict[ef["name"]]) print "id is " + str(id) if (len(id) > 0): ## update the doc here update = db[mongoCollection].update( { "_id": ObjectId(emp["_id"]), "extendedFields.name": ef["name"] }, { "$set": { "extendedFields.$.value": bson.Int64(id[0]) } }) print(update) i = i + 1 f.write("updated doc %d : %s new %s old %s\r\n" % ((i), emp["_id"], str(id), ef["value"])) # print ameIdDict[ef["name"]] except Exception, e: print str(e) f.write("Exception is %s\r\n" % str(e)) cursor.close() cnx.close()
def build_obs_multi_ids(self, data, i, species): """Build observation object from data with more than one uniprot_id per row Go into observations collection Args: data (:obj:`Obj`): source object. i (:obj: `int`): index (row labels) of object in dataframe. species (:obj:`str`): species of yeast. Return: obj(:obj:`Obj`) { "entity": { "type": "protein", "name": "Cytoplasmic protein", "identifiers": [{}... {}] }, "genotype":{ "taxon": {} }, "values": [], "source": {}, ... } """ uniprot_ids = data.iloc[i, 0].split(";") for uniprot in uniprot_ids: query = {"identifiers.value": uniprot} projection = {"_id": 0, "name": 1} doc = self.client["datanator-demo"]["entity"].find_one( filter=query, projection=projection) if doc != None: entity = {} entity["type"] = "protein" entity["name"] = doc["name"] entity["identifiers"] = [] entity["identifiers"].append({ "namespace": "uniprot_id", "value": uniprot }) values_p = [] if data.iloc[i, 4] != "n.d.": values_p.append({ "type": "Half-life", "value": str(float(data.iloc[i, 4]) * 60), "units": "s" }) else: if ">=" in data.iloc[i, 5]: values_p.append({ "type": "Half-life", "value": "greater than or equal to " + str(float(data.iloc[i, 5][3:]) * 3600), "units": "s" }) if data.iloc[i, 2] != "n.d.": values_p.append({ "type": "Degradation rates", "value": str(float(data.iloc[i, 2]) / 60), "units": "s^(-1)" }) values_p.append({ "type": "R^2 (quality of curve fitting)", "value": data.iloc[i, 3] }) values_p.append({ "type": "Cross validation of slope", "value": data.iloc[i, 6] }) environment = {} query = {} if species == "Saccharomyces cerevisiae BY4742": genotype = { "taxon": { "ncbi_taxonomy_id": 559292, "name": species } } query = {"tax_id": 559292} environment[ "media"] = "5 ml synthetic medium, 30 mg/l heavy [13C6/15N2] L-lysine, 6.7 g/l yeast nitrogen base, 2 g/l dropout mix, all amino acids except lysine and 2% glucose" environment["temperature"] = bson.Int64(30) elif species == "Schizosaccharomyces pombe MKSP201": genotype = { "taxon": { "ncbi_taxonomy_id": 4896, "name": species } } query = {"tax_id": 4896} environment[ "media"] = "Edinburgh minimal medium supplemented with 75 mg/l leucine, histidine, uracil, and adenine, heavy [13C6/15N2] L-lysine" genotype["taxon"]["canon_ancestors"] = [] projection = { "_id": 0, "canon_anc_ids": 1, "canon_anc_names": 1 } taxon_doc = self.client["datanator-test"][ "taxon_tree"].find_one(filter=query, projection=projection) if taxon_doc != None: for j in range(len(taxon_doc["canon_anc_names"])): d = {} d["ncbi_taxonomy_id"] = taxon_doc["canon_anc_ids"][j] d["name"] = taxon_doc["canon_anc_names"][j] genotype["taxon"]["canon_ancestors"].append(d) source = [{ "namespace": "doi", "value": "10.1016/j.celrep.2014.10.065" }] ob_p = { "entity": entity, "genotype": genotype, "environment": environment, "values": values_p, "source": source, "schema_version": "2.0" } query = { "$and": [{ "namespace": "uniprot_id" }, { "value": entity["identifiers"][0]["value"] }] } self.identifier_col.update_one(query, { "$set": { "namespace": "uniprot_id", "value": entity["identifiers"][0]["value"] } }, upsert=True) #update observation collection con_1 = { "source": { "$elemMatch": { "namespace": "doi", "value": "10.1016/j.celrep.2014.10.065" } } } con_2 = { "identifier": { "namespace": "uniprot_id", "value": uniprot } } query = {"$and": [con_1, con_2]} self.obs_col.update_one(query, { "$set": { "entity": ob_p["entity"], "genotype": ob_p["genotype"], "environment": ob_p["environment"], "schema_version": "2.0", "identifier": { "namespace": "uniprot_id", "value": uniprot } }, "$addToSet": { "values": { "$each": ob_p["values"] }, "source": { "$each": ob_p["source"] } } }, upsert=True)
def split_qa_documents_into_questions(self): logging.info("Splitting documents into questions....") qa_documents_coll = self.bankdomain_db.qa_documents qa_questions_coll = self.bankdomain_db.qa_questions qa_questions_coll.remove() qa_documents_in_db = qa_documents_coll.find() answer, question = "", "" index = 0 for el in qa_documents_in_db: content = el["content"] el["processed"] = True state = 0 for line_l in content: line = line_l.strip() if '###' in line: state = 0 if len(question) > 0 and len(answer) > 0: qa_questions_coll.insert_one({"question": question, "answer" : answer, "full_file": el["full_file"], "index" : bson.Int64(index)}) index += 1 answer, question = "", "" elif len(line) == 0: continue elif state == 0: question = line state = 1 else: if len(answer) > 0: answer = answer + "\n" answer = answer + line if len(question) > 0 and len(answer) > 0: qa_questions_coll.insert_one({"question": question, "answer": answer, "full_file": el["full_file"], "index" :bson.Int64(index)}) index += 1 answer, question = "", "" qa_documents_coll.save(el) logging.info("Finished splitting documents into questions....")
def unfavorite(): global FAVORITE_ARCHIVER toot_id = bson.Int64(request.form["toot_id"]) FAVORITE_ARCHIVER.remove(toot_id) return Response(status=200)
def favorite(): global FAVORITE_ARCHIVER toot_id = bson.Int64(request.form["toot_id"]) FAVORITE_ARCHIVER.save(toot_id) print("saved", toot_id) return Response(status=200)
def Int64(value): return bson.Int64(value)
def build_obs(self, data, i, species): """Build observation objects from obj. Go into observations collection. Args: data (:obj:`Obj`): source object. i (:obj: `int`): index (row labels) of object in dataframe. species (:obj:`str`): species of yeast. Return: obj(:obj:`Obj`) { "entity": { "type": "protein", "name": "Type 2A phosphatase-associated protein 42", "identifiers": [{"namespace": "uniprot_id", "value": "Q04372"}] }, "genotype":{ "taxon": {} }, "environment:{ "media": }, "values": [], "source": {}, ... } """ entity = {} entity["type"] = "protein" names = data.iloc[i, 8].split(";") entity["name"] = names[0] entity["identifiers"] = [] entity["identifiers"].append({ "namespace": "uniprot_id", "value": data.iloc[i, 0] }) values_p = [] if data.iloc[i, 4] != "n.d.": values_p.append({ "type": "Half-life", "value": str(float(data.iloc[i, 4]) * 60), "units": "s" }) else: if ">=" in data.iloc[i, 5]: values_p.append({ "type": "Half-life", "value": "greater than or equal to " + str(float(data.iloc[i, 5][3:]) * 3600), "units": "s" }) if data.iloc[i, 2] != "n.d.": values_p.append({ "type": "Degradation rates", "value": str(float(data.iloc[i, 2]) / 60), "units": "s^(-1)" }) values_p.append({ "type": "R^2 (quality of curve fitting)", "value": data.iloc[i, 3] }) values_p.append({ "type": "Cross validation of slope", "value": data.iloc[i, 6] }) environment = {} query = {} if species == "Saccharomyces cerevisiae BY4742": genotype = {"taxon": {"ncbi_taxonomy_id": 559292, "name": species}} query = {"tax_id": 559292} environment[ "media"] = "5 ml synthetic medium, 30 mg/l heavy [13C6/15N2] L-lysine, 6.7 g/l yeast nitrogen base, 2 g/l dropout mix, all amino acids except lysine and 2% glucose" environment["temperature"] = bson.Int64(30) elif species == "Schizosaccharomyces pombe MKSP201": genotype = {"taxon": {"ncbi_taxonomy_id": 4896, "name": species}} query = {"tax_id": 4896} environment[ "media"] = "Edinburgh minimal medium supplemented with 75 mg/l leucine, histidine, uracil, and adenine, heavy [13C6/15N2] L-lysine" genotype["taxon"]["canon_ancestors"] = [] projection = {"_id": 0, "canon_anc_ids": 1, "canon_anc_names": 1} doc = self.client["datanator-test"]["taxon_tree"].find_one( filter=query, projection=projection) if doc != None: for j in range(len(doc["canon_anc_names"])): d = {} d["ncbi_taxonomy_id"] = doc["canon_anc_ids"][j] d["name"] = doc["canon_anc_names"][j] genotype["taxon"]["canon_ancestors"].append(d) source = [{ "namespace": "doi", "value": "10.1016/j.celrep.2014.10.065" }] ob_p = { "entity": entity, "genotype": genotype, "environment": environment, "values": values_p, "source": source, "schema_version": "2.0" } return ob_p
import sys import os import re from pymongo import MongoClient import utility import bson ValidIpAddressRegex = re.compile( "^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$" ) ValidHostnameRegex = re.compile( "^(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.)*([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9\-]*[A-Za-z0-9])$" ) seqing_group_id = bson.Int64(1) dubo_group_id = bson.Int64(2) xiaoshuo_group_id = bson.Int64(3) changwei_group_id = bson.Int64(4) zhejiang_changwei_group_id = bson.Int64(88) def write_mongo(host, port, db_name, collection_name, file_name): client = MongoClient(host, port) print client.server_info() dbs = client.database_names() print '\t'.join(dbs) db = client.get_database(db_name) collections = db.collection_names(include_system_collections=False) print '\t'.join(collections) collection = db.get_collection(collection_name)
def store_data_in_db(data_to_store): """ stores the data in a mongo DB :param data_to_store: data we want to store (dictionary of numpy arrays) :return: """ try: client = MongoClient() except pymongo.errors.ServerSelectionTimeoutError: print( 'Error: Please assure that there is a instance of MongoDB running on the computer.' ) raise print "Setting up database" # get the database db = client['generated_data'] # create the collections users_collection = db['users'] steps_collection = db['steps'] days_collection = db['days'] # reset collections db['users'].delete_many({}) db['steps'].delete_many({}) db['days'].delete_many({}) # index steps collection steps_collection.create_index("startDateInUTC") # number of users we want to store n_users_to_store = len(data_to_store["user_ids"]) """ Insert the users in the database """ start = timeit.default_timer() # get the user info data (email, name, id, profile picture, etc) user_info_data = data_to_store["user_info"] # insert the users print "Preparing users.." users_to_insert = [user_info_data[i] for i in range(n_users_to_store)] print "Inserting users.." users_collection.insert_many(users_to_insert) stop = timeit.default_timer() print "Done in " + str(stop - start) + "s" """ Insert the steps data in the database """ # TODO: increase speed # https://stackoverflow.com/questions/5292370/fast-or-bulk-upsert-in-pymongo steps_data_to_insert = [] print "Preparing steps data.." start = timeit.default_timer() # iterate over the users for user_i in range(n_users_to_store): # get the user id user_id = data_to_store["user_ids"][user_i] # iterate over the dates for i, current_date in enumerate(data_to_store["dates_in_utc"]): # add them to the list of documents we want to insert steps_data_to_insert.append({ "user": user_id, "startDateInUTC": bson.Int64(current_date), "steps": data_to_store["user_data"][user_i][i] }) stop = timeit.default_timer() print "Done in " + str(stop - start) + "s" print "Inserting steps data.." start = timeit.default_timer() # insert them steps_collection.insert_many(steps_data_to_insert) stop = timeit.default_timer() print "Done in " + str(stop - start) + "s" """ Insert days data """ print "\n testing find" start = timeit.default_timer() for i, current_date in enumerate(data_to_store["dates_in_utc"]): steps_collection.find({"startDateInUTC": bson.Int64(current_date)}) stop = timeit.default_timer() print "Done in " + str(stop - start) + "s \n" print "Preparing days data.." start = timeit.default_timer() days_data_to_insert = [] for i, current_date in enumerate(data_to_store["dates_in_utc"]): steps_data_for_current_date = [ doc[u"steps"] for doc in steps_collection.find( {"startDateInUTC": bson.Int64(current_date)}) ] mean, std_error_of_mean, sums_for_mean_and_sem = calculate_mean_and_sem( steps_data_for_current_date) days_data_to_insert.append({ "dateInUTC": bson.Int64(current_date), "mean": mean, "stdErrorOfMean": std_error_of_mean, "sumsForMeanAndSEM": sums_for_mean_and_sem }) stop = timeit.default_timer() print "Done in " + str(stop - start) + "s" print "Inserting days data.." start = timeit.default_timer() days_collection.insert_many(days_data_to_insert) stop = timeit.default_timer() print "Done in " + str(stop - start) + "s" return