Beispiel #1
0
 async def register(cls,
                    player: discord.User,
                    characters: Optional[Iterable[Character]] = tuple()):
     exists = await bot.parties.find_one({"player": bson.Int64(player.id)})
     if exists:
         raise AlreadyRegistered
     await bot.parties.insert_one({
         "_id": (identifier := uuid()),
         "player":
         bson.Int64(player.id),
         "characters": [c.identifier for c in characters],
     })
Beispiel #2
0
 def migrate(self):
     # Get existing termination groups
     tg_data = self.db.execute(
         "SELECT id, name, description, remote_system, remote_id, tags FROM sa_terminationgroup"
     )
     if not tg_data:
         return  # Nothing to convert
     bulk = []
     # Create root node for migrated termination groups
     root_id = bson.ObjectId()
     bulk += [
         InsertOne({
             "_id": root_id,
             "name": "Converted T.G.",
             "parent": None,
             "description": "Created from former termination groups",
             "technology":
             bson.ObjectId("5b6d6819d706360001a0b716"),  # Group
             "bi_id": bson.Int64(bi_hash(root_id)),
         })
     ]
     # Attach termination groups
     for id, name, description, remote_system, remote_id, tags in tg_data:
         new_id = bson.ObjectId()
         bulk += [
             InsertOne({
                 "_id":
                 new_id,
                 "name":
                 name,
                 "parent":
                 root_id,
                 "description":
                 description,
                 # May be changed by phone migration
                 "technology":
                 bson.ObjectId("5b6d6be1d706360001f5c04e"
                               ),  # Network | IPoE Termination
                 "remote_system":
                 bson.ObjectId(remote_system) if remote_system else None,
                 "remote_id":
                 remote_id,
                 "bi_id":
                 bson.Int64(bi_hash(new_id)),
                 "_legacy_id":
                 id,  # To be removed in future migrations
             })
         ]
     # Apply groups
     self.mongo_db.resourcegroups.bulk_write(bulk)
Beispiel #3
0
 def forwards(self):
     # Update mongodb collections
     mdb = get_db()
     for coll_name in ["noc.firmwares",
                       "noc.interface_profiles",
                       "noc.networksegments",
                       "noc.networksegmentprofiles",
                       "noc.objects",
                       "noc.platforms",
                       "noc.vendors"]:
         coll = mdb[coll_name]
         updates = []
         for d in coll.find({"bi_id": {"$exists": False}},
                            {"_id": 1}):
             updates += [
                 UpdateOne({
                     "_id": d["_id"]
                 }, {
                     "$set": {
                         "bi_id": bson.Int64(bi_hash(d["_id"]))
                     }
                 })
             ]
             if len(updates) >= MONGO_CHUNK:
                 coll.bulk_write(updates)
                 updates = []
         if updates:
             coll.bulk_write(updates)
def create_room(username, room_name, num_member):
    room_id = room_collection.insert_one({
        '_id': room_name,
        'created_by': username,
        'n_members': bson.Int64(num_member),
        'created_at': datetime.now()
    })
    add_member(username, room_name)
def new_data(sensor_id, date, value, hs):
    result = {
        "sensor_id": sensor_id,
        "date": datetime.datetime.combine(date, datetime.time.min),
        "value": bson.Int64(value),
        "hash": hs
    }
    return result
Beispiel #6
0
def fix_document(model):
    coll = model._get_collection()
    bulk = []
    for d in coll.find({"bi_id": {"$exists": False}}, {"_id": 1}):
        bi_id = bi_hash(d["_id"])
        bulk += [UpdateOne({"_id": d["_id"]}, {"$set": {"bi_id": bson.Int64(bi_id)}})]
    if bulk:
        print("    Update %d items" % len(bulk))
        coll.bulk_write(bulk)
Beispiel #7
0
    def on_data(self, data):
        try:
            tweet = json.loads(data)
        except requests.exceptions.ReadTimeout:
            print("ReadTimeout Occured")
            return

        # print(tweet)
        if "text" in tweet:
            try:
                r = requests.post(self.url,
                                  data={
                                      'content-type': 'application/json',
                                      'sentence': tweet['text'],
                                      'id': tweet['id']
                                  })
                sentiment = r.json()['sentiment']

                if (sentiment == 'positive' or sentiment == 'positif'):
                    sentiment = 'positive'
                elif (sentiment == 'neutral' or sentiment == 'netral'):
                    sentiment = 'neutral'
                elif (sentiment == 'negative' or sentiment == 'negatif'):
                    sentiment = 'negative'

                tweet['sentiment'] = sentiment
                tweet['timestamp_ms'] = bson.Int64(tweet['timestamp_ms'])

                self.col.insert_one(tweet)

                #proses clean data
                wordClean = []
                dSentiment = []
                datasentence = tweet['text']
                tokens = self.clean_doc(datasentence)
                teks_sentence = " ".join(tokens)
                wordClean.append(teks_sentence)
                dSen = tweet['sentiment']
                dSentiment.append(dSen)

                dataUp = {'sentence': wordClean, 'sentiment': dSentiment}
                dataFrame = pd.DataFrame(data=dataUp)
                dataFrame.drop_duplicates(subset=['sentence'], keep='first')
                data = CleanData(sentence=dataFrame.sentence,
                                 sentiment=dataFrame.sentiment)
                data.save()

            except pymongo.errors.DuplicateKeyError:
                pass
            except Exception as ex:
                template = "An exception of type {0} occurred. Arguments:\n{1!r}"
                message = template.format(type(ex).__name__, ex.args)
                print(message)
Beispiel #8
0
def upload_face():

    json = request.get_json()
    #print(json)

    if json:
        imageBytes = base64.b64decode(json['data'])

        # FIXME skip write image file
        image = open('data/decode.jpg', 'wb')
        image.write(imageBytes)
        image.close()

        faceImage = face_recognition.load_image_file('data/decode.jpg')
        face_encodings = face_recognition.face_encodings(faceImage)

        if len(face_encodings) > 0:
            face_encoding = face_encodings[0]
            #print(face_encoding)

            # Get distances between face_encoding to all known_faces
            face_distances = face_recognition.face_distance(
                known_faces, face_encoding)
            # Get minimum distance
            distance = min(face_distances)
            min_index = np.argmin(face_distances)
            # Get user ID by face encodings
            userid = name_index[min_index]

            # Get user data by ID
            obj = collection.find_one({'_id': bson.Int64(userid)})
            user = obj['user']
            response = {
                'id': obj['_id'],
                'contribution': obj['contribution'],
                'followers': user['followers'],
                'publicgists': user['publicgists'],
                'publicrepos': user['publicrepos'],
                'distance': distance
            }
            print(response)

            return jsonify(response)

        else:
            print("Failed to get encoding from image")

    else:
        print("Get image from request error")

    return jsonify({})
Beispiel #9
0
 async def register(cls, player: Player, name: str):
     exists = await bot.clans.find_one({"members": bson.Int64(player.id)})
     duplicate_name = await bot.clans.find_one({"name": name})
     if exists or duplicate_name:
         raise AlreadyRegistered
     if isinstance(player, discord.User) or isinstance(
             player, discord.Member):
         player = await Party.from_user(player)
     await bot.clans.insert_one({
         "_id": (identifier := uuid()),
         "leader": player.identifier,
         "name": name,
         "members": [player.identifier],
     })
Beispiel #10
0
    def visit_struct(self, array):
        fields = collections.OrderedDict()
        for i, field in enumerate(array.type):
            assert field.name is not None
            assert len(field.name) > 0

            field_data = array.field(i)
            field_doc = collections.OrderedDict()
            _DataWriter(field_doc, self.compression_level).accept(field_data)
            fields[field.name] = field_doc

        self.doc[DATA] = {LENGTH: bson.Int64(len(array)), FIELDS: fields}
        self._make_mask(array)
        write_type(self.doc, array.type)
Beispiel #11
0
 def migrate(self):
     MODELS = [
         "sa_administrativedomain",
         "sa_authprofile",
         "sa_managedobject",
         "sa_managedobjectprofile",
         "sa_terminationgroup",
     ]
     # Update postgresql tables
     for table in MODELS:
         rows = self.db.execute("SELECT id FROM %s WHERE bi_id IS NULL" %
                                table)
         values = ["(%d, %d)" % (r[0], bi_hash(r[0])) for r in rows]
         while values:
             chunk, values = values[:PG_CHUNK], values[PG_CHUNK:]
             self.db.execute("""
                 UPDATE %s AS t
                 SET
                   bi_id = c.bi_id
                 FROM (
                   VALUES
                   %s
                 ) AS c(id, bi_id)
                 WHERE c.id = t.id
                 """ % (table, ",\n".join(chunk)))
     # Update mongodb collections
     mdb = self.mongo_db
     for coll_name in [
             "noc.profiles", "noc.services", "noc.serviceprofiles"
     ]:
         coll = mdb[coll_name]
         updates = []
         for d in coll.find({"bi_id": {"$exists": False}}, {"_id": 1}):
             updates += [
                 UpdateOne(
                     {"_id": d["_id"]},
                     {"$set": {
                         "bi_id": bson.Int64(bi_hash(d["_id"]))
                     }})
             ]
             if len(updates) >= MONGO_CHUNK:
                 coll.bulk_write(updates)
                 updates = []
         if updates:
             coll.bulk_write(updates)
     # Alter bi_id fields and create indexes
     for table in MODELS:
         self.db.execute("ALTER TABLE %s ALTER bi_id SET NOT NULL" % table)
         self.db.create_index(table, ["bi_id"], unique=True)
Beispiel #12
0
 def migrate(self):
     # Update mongodb collections
     mdb = self.mongo_db
     for coll_name in ["noc.alarmclasses"]:
         coll = mdb[coll_name]
         updates = []
         for d in coll.find({"bi_id": {"$exists": False}}, {"_id": 1}):
             updates += [
                 UpdateOne({"_id": d["_id"]}, {"$set": {"bi_id": bson.Int64(bi_hash(d["_id"]))}})
             ]
             if len(updates) >= MONGO_CHUNK:
                 coll.bulk_write(updates)
                 updates = []
         if updates:
             coll.bulk_write(updates)
Beispiel #13
0
def face_detection():

    # get encoding from request
    print(request.get_json())
    encoding = np.asarray(request.get_json()['encoding'])

    # Get user ID by face encodings
    face_distances = face_recognition.face_distance(known_faces, encoding)
    distance = min(face_distances)
    min_index = np.argmin(face_distances)
    userid = name_index[min_index]

    # Get user data by ID
    user = collection.find_one({'_id': bson.Int64(userid)})

    return jsonify({'user': user, 'distance': distance})
Beispiel #14
0
def get_userid(start, nums):
    import uuid, time, bson
    from pymongo import MongoClient
    conn = MongoClient(host='47.92.72.108', port=28010)
    conn = conn.get_database('iTROdb')
    conn = conn.get_collection('iTRO_User')
    result = {}
    for i in range(start, start + nums):
        username = str(i)
        tmp = conn.find_one({'UserName': username}, {'NickName': 1})
        if not tmp:
            nickname = '测试%s' % username
            userinfo = {
                'NId': str(uuid.uuid3(uuid.NAMESPACE_DNS,
                                      username)),  # UUID唯一数据
                'RoleId': '',  # 角色编号
                'ParentId': '',  # 父级编号
                'UserName': username,
                'NickName': nickname,
                'UserPsw': 'ETWOlU8T9f2SmhAlXY1JCA==',  # 密码te123456
                'Sex': int(1),
                'OpenId': '',
                'Unionid': '',
                'HeadimgUrl': '',
                'Webchat': '',
                'Qq': '',
                'Mobile': '',
                'Email': '',
                'IdCard': '',
                'Money': int(0),
                'IsLogin': int(1),
                'IsChat': int(1),
                'IsOnline': int(0),
                'LogitudeAndLat': '',
                'CreateDt': bson.Int64(int(time.time())),
                'UseMomey': int(0),
                'ExtendMan': '',
                'desc': '测试账号',
                'labletag': '无',
                'qrcode': '',
                'truename': '',
            }
            conn.insert_one(userinfo)
        else:
            nickname = tmp['NickName']
        result[username] = nickname
    return result
Beispiel #15
0
 def forwards(self):
     # Update mongodb collections
     mdb = get_db()
     for coll_name in ["noc.metrictypes"]:
         coll = mdb[coll_name]
         updates = []
         for d in coll.find({"bi_id": {"$exists": False}}, {"_id": 1}):
             updates += [
                 UpdateOne(
                     {"_id": d["_id"]},
                     {"$set": {
                         "bi_id": bson.Int64(bi_hash(d["_id"]))
                     }})
             ]
             if len(updates) >= MONGO_CHUNK:
                 coll.bulk_write(updates)
                 updates = []
         if updates:
             coll.bulk_write(updates)
Beispiel #16
0
 def set_priority(self, user: str, project: str, priority: int):
     """Modifica la priorità di un progetto per l'utente specificato.
     """
     assert self.exists(user), f'User {user} inesistente'
     return self._mongo.read('users').find_one_and_update(
         {
             '$and': [{
                 '$or': [
                     {
                         'telegram': user
                     },
                     {
                         'email': user
                     },
                 ]
             }, {
                 'projects.url': project
             }]
         }, {'$set': {
             "projects.$.priority": bson.Int64(priority)
         }})
Beispiel #17
0
    def _get_next_sequence_id(self, sequence):
        """
        Manage monotonically incrementing sequences.

        Returns the next ID in the sequence, or zero if the sequence is created
        for the first time.
        """

        seq = self.db['sequences'].find_one_and_update(
            {'_id': sequence}, {'$inc': {
                'seq': bson.Int64(1)
            }},
            projection={
                'seq': True,
                '_id': False
            },
            upsert=True)

        if seq is None:
            return 0
        else:
            return seq['seq']
Beispiel #18
0
    async def register(
        cls, player: discord.User, name: str, job: str, race: Optional[str] = "human"
    ):
        if job.upper() not in [c.name for c in list(Job)] or race.upper() not in [
            r.name for r in list(Race)
        ]:
            raise ValueError
        jobs = [0] * 16
        jobs[Job[job.upper()].value] = 1

        # TODO: use `gen_stats` here

        await bot.characters.insert_one(
            {
                "_id": (identifier := uuid()),
                "player": bson.Int64(player.id),
                "name": name,
                "jobs": jobs,
                "race": Race[race.upper()].value,
                "stats": [0] * 30,
            }
        )
    def save_session(self, app, session, response):
        session_id = session.get_id()
        unset_query = dict()
        set_query = dict()
        doc = dict()

        for key in session.modified_keys():

            value = session.get(key)

            if value is None:
                unset_query[key] = True

            else:
                set_query[key] = value

        if len(unset_query.keys()):
            doc['$unset'] = unset_query

        if len(set_query.keys()):
            doc['$set'] = set_query

        if len(unset_query.keys()) + len(set_query.keys()):
            set_query = doc.get('$set') or dict()
            now_date = bson.Int64(time.mktime(datetime.now().timetuple()) * 1000)

            if not(set_query.get('_createDate')): set_query['_createDate'] = now_date

            set_query['_lastUpdateDate'] = now_date
            doc['$set'] = set_query

            self.__collection.update(dict(_id = session_id), doc, upsert = True)

        session.reset_modified_keys() 

        response.set_cookie(app.session_cookie_name, session_id)
exceptions = ['...', 'J ...']
authors_in_institute = []

for document in authors_collection.find():
    authors_in_institute.append(document['name'])

for document in authors_collection.find():
    cooperating_authors = []
    for article in document['articles']:
        for author in article['authors']:
            if author not in exceptions:
                cooperating_authors.append(author.lstrip())

    unique, counts = np.unique(cooperating_authors, return_counts=True)
    cooperating_authors = dict(zip(unique, counts))

    cooperating_internal_authors = dict()
    cooperating_external_authors = dict()
    for key, value in cooperating_authors.items():
        if key in authors_in_institute:
            cooperating_internal_authors[key] = value
        else:
            cooperating_external_authors[key] = value

    internal_collaborators = {k: bson.Int64(v) for k, v in cooperating_internal_authors.items()}
    external_collaborators = {k: bson.Int64(v) for k, v in cooperating_external_authors.items()}

    document.update({'internal_collaborators': internal_collaborators})
    document.update({'external_collaborators': external_collaborators})
    authors_collection.save(document)
Beispiel #21
0
 def to_python(self, value):
     try:
         return bson.Int64(value)
     except (TypeError, ValueError):
         abort(404)
                if ef["name"] in nameIdDict:
                    # print  ef["value"]
                    print "calling for  efid :" + str(
                        nameIdDict[ef["name"]]) + " value id :" + str(
                            ef["value"])
                    id = getNewEfValueId(ef["value"], nameIdDict[ef["name"]])
                    print "id is " + str(id)
                    if (len(id) > 0):
                        ## update the doc here
                        update = db[mongoCollection].update(
                            {
                                "_id": ObjectId(emp["_id"]),
                                "extendedFields.name": ef["name"]
                            }, {
                                "$set": {
                                    "extendedFields.$.value": bson.Int64(id[0])
                                }
                            })
                        print(update)
                        i = i + 1
                        f.write("updated doc %d : %s  new %s old %s\r\n" %
                                ((i), emp["_id"], str(id), ef["value"]))

                        # print ameIdDict[ef["name"]]

    except Exception, e:
        print str(e)
        f.write("Exception is %s\r\n" % str(e))

    cursor.close()
    cnx.close()
    def build_obs_multi_ids(self, data, i, species):
        """Build observation object from data with more than one uniprot_id per row
        Go into observations collection

        Args:
            data (:obj:`Obj`): source object.
            i (:obj: `int`): index (row labels) of object in dataframe.
            species (:obj:`str`): species of yeast.
        Return:
            obj(:obj:`Obj`)
            {
                "entity": {
                    "type": "protein",
                    "name": "Cytoplasmic protein",
                    "identifiers": [{}... {}]
                },
                "genotype":{
                    "taxon": {}
                },
                "values": [],
                "source": {}, ...
            } 
        """
        uniprot_ids = data.iloc[i, 0].split(";")
        for uniprot in uniprot_ids:
            query = {"identifiers.value": uniprot}
            projection = {"_id": 0, "name": 1}
            doc = self.client["datanator-demo"]["entity"].find_one(
                filter=query, projection=projection)
            if doc != None:
                entity = {}
                entity["type"] = "protein"
                entity["name"] = doc["name"]
                entity["identifiers"] = []
                entity["identifiers"].append({
                    "namespace": "uniprot_id",
                    "value": uniprot
                })
                values_p = []
                if data.iloc[i, 4] != "n.d.":
                    values_p.append({
                        "type": "Half-life",
                        "value": str(float(data.iloc[i, 4]) * 60),
                        "units": "s"
                    })
                else:
                    if ">=" in data.iloc[i, 5]:
                        values_p.append({
                            "type":
                            "Half-life",
                            "value":
                            "greater than or equal to " +
                            str(float(data.iloc[i, 5][3:]) * 3600),
                            "units":
                            "s"
                        })
                if data.iloc[i, 2] != "n.d.":
                    values_p.append({
                        "type": "Degradation rates",
                        "value": str(float(data.iloc[i, 2]) / 60),
                        "units": "s^(-1)"
                    })
                values_p.append({
                    "type": "R^2 (quality of curve fitting)",
                    "value": data.iloc[i, 3]
                })
                values_p.append({
                    "type": "Cross validation of slope",
                    "value": data.iloc[i, 6]
                })
                environment = {}
                query = {}
                if species == "Saccharomyces cerevisiae BY4742":
                    genotype = {
                        "taxon": {
                            "ncbi_taxonomy_id": 559292,
                            "name": species
                        }
                    }
                    query = {"tax_id": 559292}
                    environment[
                        "media"] = "5 ml synthetic medium, 30 mg/l heavy [13C6/15N2] L-lysine, 6.7 g/l yeast nitrogen base, 2 g/l dropout mix, all amino acids except lysine and 2% glucose"
                    environment["temperature"] = bson.Int64(30)
                elif species == "Schizosaccharomyces pombe MKSP201":
                    genotype = {
                        "taxon": {
                            "ncbi_taxonomy_id": 4896,
                            "name": species
                        }
                    }
                    query = {"tax_id": 4896}
                    environment[
                        "media"] = "Edinburgh minimal medium supplemented with 75 mg/l leucine, histidine, uracil, and adenine, heavy [13C6/15N2] L-lysine"
                genotype["taxon"]["canon_ancestors"] = []
                projection = {
                    "_id": 0,
                    "canon_anc_ids": 1,
                    "canon_anc_names": 1
                }
                taxon_doc = self.client["datanator-test"][
                    "taxon_tree"].find_one(filter=query, projection=projection)
                if taxon_doc != None:
                    for j in range(len(taxon_doc["canon_anc_names"])):
                        d = {}
                        d["ncbi_taxonomy_id"] = taxon_doc["canon_anc_ids"][j]
                        d["name"] = taxon_doc["canon_anc_names"][j]
                        genotype["taxon"]["canon_ancestors"].append(d)
                source = [{
                    "namespace": "doi",
                    "value": "10.1016/j.celrep.2014.10.065"
                }]
                ob_p = {
                    "entity": entity,
                    "genotype": genotype,
                    "environment": environment,
                    "values": values_p,
                    "source": source,
                    "schema_version": "2.0"
                }
                query = {
                    "$and": [{
                        "namespace": "uniprot_id"
                    }, {
                        "value": entity["identifiers"][0]["value"]
                    }]
                }
                self.identifier_col.update_one(query, {
                    "$set": {
                        "namespace": "uniprot_id",
                        "value": entity["identifiers"][0]["value"]
                    }
                },
                                               upsert=True)
                #update observation collection
                con_1 = {
                    "source": {
                        "$elemMatch": {
                            "namespace": "doi",
                            "value": "10.1016/j.celrep.2014.10.065"
                        }
                    }
                }
                con_2 = {
                    "identifier": {
                        "namespace": "uniprot_id",
                        "value": uniprot
                    }
                }
                query = {"$and": [con_1, con_2]}
                self.obs_col.update_one(query, {
                    "$set": {
                        "entity": ob_p["entity"],
                        "genotype": ob_p["genotype"],
                        "environment": ob_p["environment"],
                        "schema_version": "2.0",
                        "identifier": {
                            "namespace": "uniprot_id",
                            "value": uniprot
                        }
                    },
                    "$addToSet": {
                        "values": {
                            "$each": ob_p["values"]
                        },
                        "source": {
                            "$each": ob_p["source"]
                        }
                    }
                },
                                        upsert=True)
Beispiel #24
0
    def split_qa_documents_into_questions(self):
        logging.info("Splitting documents into questions....")
        qa_documents_coll = self.bankdomain_db.qa_documents
        qa_questions_coll = self.bankdomain_db.qa_questions
        qa_questions_coll.remove()
        qa_documents_in_db = qa_documents_coll.find()
        answer, question = "", ""
        index = 0
        for  el in qa_documents_in_db:
            content = el["content"]
            el["processed"] = True
            state = 0

            for line_l in content:
                line = line_l.strip()
                if '###' in line:
                    state = 0
                    if len(question) > 0 and len(answer) > 0:
                        qa_questions_coll.insert_one({"question": question, "answer" : answer, "full_file": el["full_file"], "index" :  bson.Int64(index)})
                        index += 1
                        answer, question = "", ""
                elif len(line) == 0:
                    continue
                elif state == 0:
                    question = line
                    state = 1
                else:
                    if len(answer) > 0:
                        answer = answer + "\n"
                    answer = answer + line
            if len(question) > 0 and len(answer) > 0:
                qa_questions_coll.insert_one({"question": question, "answer": answer, "full_file": el["full_file"],  "index" :bson.Int64(index)})
                index += 1
                answer, question = "", ""
            qa_documents_coll.save(el)
        logging.info("Finished splitting documents into questions....")
Beispiel #25
0
def unfavorite():
    global FAVORITE_ARCHIVER
    toot_id = bson.Int64(request.form["toot_id"])
    FAVORITE_ARCHIVER.remove(toot_id)
    return Response(status=200)
Beispiel #26
0
def favorite():
    global FAVORITE_ARCHIVER
    toot_id = bson.Int64(request.form["toot_id"])
    FAVORITE_ARCHIVER.save(toot_id)
    print("saved", toot_id)
    return Response(status=200)
Beispiel #27
0
def Int64(value):
    return bson.Int64(value)
 def build_obs(self, data, i, species):
     """Build observation objects from obj.
     Go into observations collection.
     Args:
         data (:obj:`Obj`): source object.
         i (:obj: `int`): index (row labels) of object in dataframe.
         species (:obj:`str`): species of yeast.
     Return:
         obj(:obj:`Obj`)
         {
             "entity": {
                 "type": "protein",
                 "name": "Type 2A phosphatase-associated protein 42",
                 "identifiers": [{"namespace": "uniprot_id",
                                  "value": "Q04372"}]
             },
             "genotype":{
                 "taxon": {}
             },
             "environment:{
                 "media":
             },
             "values": [],
             "source": {}, ...
         }
     """
     entity = {}
     entity["type"] = "protein"
     names = data.iloc[i, 8].split(";")
     entity["name"] = names[0]
     entity["identifiers"] = []
     entity["identifiers"].append({
         "namespace": "uniprot_id",
         "value": data.iloc[i, 0]
     })
     values_p = []
     if data.iloc[i, 4] != "n.d.":
         values_p.append({
             "type": "Half-life",
             "value": str(float(data.iloc[i, 4]) * 60),
             "units": "s"
         })
     else:
         if ">=" in data.iloc[i, 5]:
             values_p.append({
                 "type":
                 "Half-life",
                 "value":
                 "greater than or equal to " +
                 str(float(data.iloc[i, 5][3:]) * 3600),
                 "units":
                 "s"
             })
     if data.iloc[i, 2] != "n.d.":
         values_p.append({
             "type": "Degradation rates",
             "value": str(float(data.iloc[i, 2]) / 60),
             "units": "s^(-1)"
         })
     values_p.append({
         "type": "R^2 (quality of curve fitting)",
         "value": data.iloc[i, 3]
     })
     values_p.append({
         "type": "Cross validation of slope",
         "value": data.iloc[i, 6]
     })
     environment = {}
     query = {}
     if species == "Saccharomyces cerevisiae BY4742":
         genotype = {"taxon": {"ncbi_taxonomy_id": 559292, "name": species}}
         query = {"tax_id": 559292}
         environment[
             "media"] = "5 ml synthetic medium, 30 mg/l heavy [13C6/15N2] L-lysine, 6.7 g/l yeast nitrogen base, 2 g/l dropout mix, all amino acids except lysine and 2% glucose"
         environment["temperature"] = bson.Int64(30)
     elif species == "Schizosaccharomyces pombe MKSP201":
         genotype = {"taxon": {"ncbi_taxonomy_id": 4896, "name": species}}
         query = {"tax_id": 4896}
         environment[
             "media"] = "Edinburgh minimal medium supplemented with 75 mg/l leucine, histidine, uracil, and adenine, heavy [13C6/15N2] L-lysine"
     genotype["taxon"]["canon_ancestors"] = []
     projection = {"_id": 0, "canon_anc_ids": 1, "canon_anc_names": 1}
     doc = self.client["datanator-test"]["taxon_tree"].find_one(
         filter=query, projection=projection)
     if doc != None:
         for j in range(len(doc["canon_anc_names"])):
             d = {}
             d["ncbi_taxonomy_id"] = doc["canon_anc_ids"][j]
             d["name"] = doc["canon_anc_names"][j]
             genotype["taxon"]["canon_ancestors"].append(d)
     source = [{
         "namespace": "doi",
         "value": "10.1016/j.celrep.2014.10.065"
     }]
     ob_p = {
         "entity": entity,
         "genotype": genotype,
         "environment": environment,
         "values": values_p,
         "source": source,
         "schema_version": "2.0"
     }
     return ob_p
Beispiel #29
0
import sys
import os
import re
from pymongo import MongoClient
import utility
import bson

ValidIpAddressRegex = re.compile(
    "^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
)

ValidHostnameRegex = re.compile(
    "^(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.)*([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9\-]*[A-Za-z0-9])$"
)

seqing_group_id = bson.Int64(1)
dubo_group_id = bson.Int64(2)
xiaoshuo_group_id = bson.Int64(3)
changwei_group_id = bson.Int64(4)
zhejiang_changwei_group_id = bson.Int64(88)


def write_mongo(host, port, db_name, collection_name, file_name):
    client = MongoClient(host, port)
    print client.server_info()
    dbs = client.database_names()
    print '\t'.join(dbs)
    db = client.get_database(db_name)
    collections = db.collection_names(include_system_collections=False)
    print '\t'.join(collections)
    collection = db.get_collection(collection_name)
def store_data_in_db(data_to_store):
    """
    stores the data in a mongo DB
    :param data_to_store: data we want to store (dictionary of numpy arrays)
    :return:
    """

    try:
        client = MongoClient()
    except pymongo.errors.ServerSelectionTimeoutError:
        print(
            'Error: Please assure that there is a instance of MongoDB running on the computer.'
        )
        raise

    print "Setting up database"

    # get the database
    db = client['generated_data']

    # create the collections
    users_collection = db['users']
    steps_collection = db['steps']
    days_collection = db['days']

    # reset collections
    db['users'].delete_many({})
    db['steps'].delete_many({})
    db['days'].delete_many({})

    # index steps collection
    steps_collection.create_index("startDateInUTC")

    # number of users we want to store
    n_users_to_store = len(data_to_store["user_ids"])
    """
    Insert the users in the database
    """

    start = timeit.default_timer()

    # get the user info data (email, name, id, profile picture, etc)
    user_info_data = data_to_store["user_info"]

    # insert the users
    print "Preparing users.."
    users_to_insert = [user_info_data[i] for i in range(n_users_to_store)]
    print "Inserting users.."
    users_collection.insert_many(users_to_insert)
    stop = timeit.default_timer()
    print "Done in " + str(stop - start) + "s"
    """
    Insert the steps data in the database
    """

    # TODO: increase speed
    # https://stackoverflow.com/questions/5292370/fast-or-bulk-upsert-in-pymongo

    steps_data_to_insert = []

    print "Preparing steps data.."
    start = timeit.default_timer()

    # iterate over the users
    for user_i in range(n_users_to_store):
        # get the user id
        user_id = data_to_store["user_ids"][user_i]
        # iterate over the dates
        for i, current_date in enumerate(data_to_store["dates_in_utc"]):
            # add them to the list of documents we want to insert
            steps_data_to_insert.append({
                "user":
                user_id,
                "startDateInUTC":
                bson.Int64(current_date),
                "steps":
                data_to_store["user_data"][user_i][i]
            })

    stop = timeit.default_timer()
    print "Done in " + str(stop - start) + "s"

    print "Inserting steps data.."
    start = timeit.default_timer()

    # insert them
    steps_collection.insert_many(steps_data_to_insert)
    stop = timeit.default_timer()
    print "Done in " + str(stop - start) + "s"
    """
    Insert days data
    """

    print "\n testing find"
    start = timeit.default_timer()
    for i, current_date in enumerate(data_to_store["dates_in_utc"]):
        steps_collection.find({"startDateInUTC": bson.Int64(current_date)})
    stop = timeit.default_timer()
    print "Done in " + str(stop - start) + "s \n"

    print "Preparing days data.."
    start = timeit.default_timer()

    days_data_to_insert = []
    for i, current_date in enumerate(data_to_store["dates_in_utc"]):
        steps_data_for_current_date = [
            doc[u"steps"] for doc in steps_collection.find(
                {"startDateInUTC": bson.Int64(current_date)})
        ]
        mean, std_error_of_mean, sums_for_mean_and_sem = calculate_mean_and_sem(
            steps_data_for_current_date)
        days_data_to_insert.append({
            "dateInUTC": bson.Int64(current_date),
            "mean": mean,
            "stdErrorOfMean": std_error_of_mean,
            "sumsForMeanAndSEM": sums_for_mean_and_sem
        })
    stop = timeit.default_timer()
    print "Done in " + str(stop - start) + "s"

    print "Inserting days data.."
    start = timeit.default_timer()
    days_collection.insert_many(days_data_to_insert)
    stop = timeit.default_timer()
    print "Done in " + str(stop - start) + "s"

    return