Esempio n. 1
0
 def protein_manager(self, database="datanator"):
     return query_protein.QueryProtein(username=self.username,
                                       password=self.password,
                                       server=self.server,
                                       authSource=self.authDB,
                                       readPreference=self.read_preference,
                                       replicaSet=self.repl,
                                       database=database)
Esempio n. 2
0
    def data_from_mongo_protein(self,
                                server,
                                db,
                                username,
                                password,
                                verbose=False,
                                readPreference='nearest',
                                authSource='admin',
                                projection={'_id': 0},
                                query={}):
        ''' Acquire documents from protein collection in datanator

            Args:
                server (:obj:`str`): mongodb ip address
                db (:obj:`str`): database name
                username (:obj:`str`): username for mongodb login
                password (:obj:`str`): password for mongodb login
                verbose (:obj:`bool`): display verbose messages
                readPreference (:obj:`str`): mongodb readpreference
                authSource (:obj:`str`): database login info is authenticating against
                projection (:obj:`str`): mongodb query projection
                query (:obj:`str`): mongodb query filter

            Returns:
                (:obj:`tuple`): tuple containing:

                    docs (:obj:`pymongo.Cursor`): pymongo cursor object that points to all documents in protein collection;
                    count (:obj:`int`): number of documents returned
        '''
        protein_manager = query_protein.QueryProtein(
            server=server,
            database=db,
            verbose=verbose,
            username=username,
            authSource=authSource,
            password=password,
            readPreference=readPreference)
        docs = protein_manager.collection.find(filter=query,
                                               projection=projection)
        count = protein_manager.collection.count_documents(query)
        return (count, docs)
Esempio n. 3
0
 def __init__(self, server, src_db='datanator', des_db='datanator', collection_str='uniprot',
             username=None, password=None, readPreference='nearest', authSource='admin', verbose=True,
             max_entries=float('inf')):
     super().__init__(MongoDB=server, db=des_db, verbose=verbose, max_entries=max_entries,
                     username=username, password=password, authSource=authSource,
                     readPreference=readPreference)
     self.collection_str = collection_str
     self.max_entries = max_entries
     self.verbose = verbose
     self.des_client, self.des_db, self.des_collection = self.con_db(collection_str)
     self.koc_manager = query_kegg_organism_code.QueryKOC(username=username, password=password,
     server=server, authSource=authSource, collection_str='kegg_organism_code', readPreference=readPreference,
     database=src_db)
     self.uniprot_manager = query_uniprot.QueryUniprot(username=username, password=password, server=server,
     authSource=authSource, database=src_db, collection_str='uniprot', readPreference=readPreference)
     self.kegg_manager = query_kegg_orthology.QueryKO(username=username, password=password, server=server,
     authSource=authSource, database=src_db, max_entries=max_entries, verbose=verbose, readPreference=readPreference)
     self.protein_manager = query_protein.QueryProtein(username=username, password=password, server=server,
     authSource=authSource, database=src_db, max_entries=max_entries, verbose=verbose, readPreference=readPreference)
     self.uniprot_nosql_manager = uniprot_nosql.UniprotNoSQL(MongoDB=server, db=des_db, max_entries=max_entries,
     verbose=verbose, username=username, password=password, authSource=authSource)
     self.endpoint = 'https://www.kegg.jp/ssdb-bin/ssdb_best?org_gene='
Esempio n. 4
0
    def __init__(self,
                 db=None,
                 MongoDB=None,
                 cache_directory=None,
                 verbose=False,
                 max_entries=float('inf'),
                 replicaSet=None,
                 username=None,
                 password=None,
                 authSource='admin'):
        '''
                Attributes:
                        cache_directory: JSON file (converted from sqlite) directory
                        db: mongodb database name
                        MongoDB: MongoDB server address and login e.g. 'mongodb://mongo:27017/'
        '''
        self.db = db
        self.MongoDB = MongoDB
        self.cache_directory = cache_directory
        self.verbose = verbose
        self.max_entries = max_entries
        self.collection_str = 'sabio_rk_old'
        super(SabioRkNoSQL, self).__init__(cache_dirname=cache_directory,
                                           MongoDB=MongoDB,
                                           replicaSet=replicaSet,
                                           db=db,
                                           verbose=verbose,
                                           max_entries=max_entries,
                                           username=username,
                                           password=password,
                                           authSource=authSource)

        self.client, self.db_obj, self.collection = self.con_db(
            self.collection_str)
        self.sabio_reaction_entries = self.db_obj['sabio_reaction_entries']
        self.kegg_collection = self.db_obj['kegg_orthology']
        self.sabiork_manager = query_sabiork.QuerySabio(
            MongoDB=MongoDB,
            db=db,
            collection_str='sabio_rk',
            verbose=verbose,
            max_entries=max_entries,
            username=username,
            password=password,
            authSource=authSource)
        self.protein_manager = query_protein.QueryProtein(
            username=username,
            password=password,
            server=MongoDB,
            authSource=authSource,
            database=db,
            max_entries=max_entries,
            verbose=verbose,
            collection_str='uniprot',
            readPreference='nearest')
        self.ec = self.db_obj['ec']
        self.chem_manager = chem_util.ChemUtil()
        self.tax_manager = query_taxon_tree.QueryTaxonTree(username=username,
                                                           MongoDB=MongoDB,
                                                           password=password)
        self.file_manager = file_util.FileUtil()
Esempio n. 5
0
    def __init__(self,
                 username=None,
                 password=None,
                 server=None,
                 authSource='admin',
                 src_database='datanator',
                 max_entries=float('inf'),
                 verbose=True,
                 collection='protein',
                 destination_database='datanator',
                 cache_dir=None):
        '''
                Args:
                        src_database (:obj: `str`): name of database in which source collections reside
                        destination_database (:obj: `str`): name of database to put the aggregated collection
        '''

        self.max_entries = max_entries
        self.verbose = verbose
        self.cache_dir = cache_dir
        self.mongo_manager = mongo_util.MongoUtil(MongoDB=server,
                                                  username=username,
                                                  password=password,
                                                  authSource=authSource,
                                                  db=src_database)
        self.pax_manager = query_pax.QueryPax(MongoDB=server,
                                              db=src_database,
                                              collection_str='pax',
                                              verbose=verbose,
                                              max_entries=max_entries,
                                              username=username,
                                              password=password,
                                              authSource=authSource)
        self.kegg_manager = query_kegg_orthology.QueryKO(
            server=server,
            database=src_database,
            verbose=verbose,
            max_entries=max_entries,
            username=username,
            password=password,
            authSource=authSource)
        self.taxon_manager = query_taxon_tree.QueryTaxonTree(
            collection_str='taxon_tree',
            verbose=verbose,
            max_entries=max_entries,
            username=username,
            MongoDB=server,
            password=password,
            db=src_database,
            authSource=authSource)
        self.protein_manager = query_protein.QueryProtein(
            username=username,
            password=password,
            server=server,
            collection_str='protein',
            max_entries=max_entries,
            database=src_database)
        self.client, self.db, self.col = mongo_util.MongoUtil(
            MongoDB=server,
            username=username,
            password=password,
            authSource=authSource,
            db=destination_database).con_db(collection)
        self.bad_kinlawid = [
            24416, 24417, 24418, 24419, 24420, 24421, 24422, 24423
        ]
        self.collation = Collation(locale='en',
                                   strength=CollationStrength.SECONDARY)
Esempio n. 6
0
    def setUpClass(cls):
        cls.db = 'test'
        conf = config.TestConfig()
        username = conf.USERNAME
        password = conf.PASSWORD
        MongoDB = conf.SERVER
        cls.MongoDB = MongoDB
        cls.username = username
        cls.password = password
        cls.src = query_protein.QueryProtein(server=cls.MongoDB, database=cls.db,
                 verbose=True, max_entries=20, username = cls.username,
                 password = cls.password, collection_str='test_query_protein', readPreference='primary')
        cls.src_1 = query_protein.QueryProtein(server=cls.MongoDB, database='datanator',
                 verbose=True, username = cls.username,
                 password = cls.password, readPreference='nearest')
        cls.src_2 = query_protein.QueryProtein(server=cls.MongoDB, database='datanator-test',
                 verbose=True, username = cls.username,
                 password = cls.password, readPreference='nearest')
        cls.src.db_obj.drop_collection('test_query_protein')

        mock_doc_0 = {'uniprot_id': 'MOCK_0', 'ancestor_taxon_id': [105,104,103,102,101],
                    'ancestor_name': ['name_5', 'name_4','name_3','name_2','name_1'],
                    'ko_number': 'MOCK_0', 'ncbi_taxonomy_id': 100, 'abundances': 0}

        mock_doc_1 = {'uniprot_id': 'MOCK_1', 'ko_number': 'MOCK_0'} # missing ancestor_taxon_id

        mock_doc_2 = {'uniprot_id': 'MOCK_2', 'ancestor_taxon_id': [105,104,103],
                    'ancestor_name': ['name_5', 'name_4','name_3'],
                    'ko_number': 'MOCK_0', 'ncbi_taxonomy_id': 102, 'abundances': 2}

        mock_doc_3 = {'uniprot_id': 'MOCK_3', 'ancestor_taxon_id': [105,104],
                    'ancestor_name': ['name_5', 'name_4'],
                    'ko_number': 'MOCK_1', 'ncbi_taxonomy_id': 103, 'abundances': 3} # different ko_number

        mock_doc_4 = {'uniprot_id': 'MOCK_4', 'ancestor_taxon_id': [105],
                    'ancestor_name': ['name_5'],
                    'ko_number': 'MOCK_0', 'ncbi_taxonomy_id': 104, 'abundances': 4}

        mock_doc_5 = {'uniprot_id': 'MOCK_5', 'ancestor_taxon_id': [105],
                    'ancestor_name': ['name_5'],
                    'ncbi_taxonomy_id': 104, 'abundances': 5}

        mock_doc_6 = {'uniprot_id': 'MOCK_6', 'ancestor_taxon_id': [105],
                    'ancestor_name': ['name_5'],
                    'ko_number': 'MOCK_0', 'ncbi_taxonomy_id': 104, 'abundances': 6}

        dic_0 = {'ncbi_taxonomy_id': 0, 'species_name': 's0', 'ancestor_taxon_id': [5,4,3,2,1], 'ancestor_name': ['s5', 's4', 's3', 's2', 's1'],
        'ko_number': 'KO0', 'uniprot_id': 'uniprot0', "protein_name": 'special name one', 'kinetics': [{'ncbi_taxonomy_id': 100, 'kinlaw_id': 1},
        {'ncbi_taxonomy_id': 101, 'kinlaw_id': 2}], 'abundances': [], 'ko_name': ['KO0 name']}
        dic_1 = {'ncbi_taxonomy_id': 1, 'species_name': 's1', 'ancestor_taxon_id': [5,4,3,2], 'ancestor_name': ['s5', 's4', 's3', 's2'],
        'ko_number': 'KO0', 'uniprot_id': 'uniprot1', "protein_name": 'nonspeciali name one'}
        dic_2 = {'ncbi_taxonomy_id': 2, 'species_name': 's2', 'ancestor_taxon_id': [5,4,3], 'ancestor_name': ['s5', 's4', 's3'],
        'ko_number': 'KO0', 'uniprot_id': 'uniprot2', "protein_name": 'nonspeciali name two'}
        dic_3 = {'ncbi_taxonomy_id': 3, 'species_name': 's3', 'ancestor_taxon_id': [5,4], 'ancestor_name': ['s5', 's4'], 'ko_number': 'ko3',
        'uniprot_id': 'uniprot3', "protein_name": 'your name one'}
        dic_4 = {'ncbi_taxonomy_id': 4, 'species_name': 's4', 'ancestor_taxon_id': [5], 'ancestor_name': ['s5'], 'ko_number': 'KO0', 'uniprot_id': 'uniprot4'}
        dic_5 = {'ncbi_taxonomy_id': 5, 'species_name': 's5', 'ancestor_taxon_id': [], 'ancestor_name': [], 'ko_number': 'KO0', 'uniprot_id': 'uniprot5'}
        dic_6 = {'ncbi_taxonomy_id': 6, 'species_name': 's6', 'ancestor_taxon_id': [5,4,3,2], 'ancestor_name': ['s5', 's4', 's3', 's2'],
        'ko_number': 'KO0', 'uniprot_id': 'uniprot6', "protein_name": 'your name two', 'ko_name': 'ko name 0', 'abundances': []}
        dic_15 = {'ncbi_taxonomy_id': 6, 'species_name': 's6', 'ancestor_taxon_id': [5,4,3,2], 'ancestor_name': ['s5', 's4', 's3', 's2'],
        'ko_number': 'KO1', 'uniprot_id': 'uniprot15', "protein_name": 'your name fifteen', 'ko_name': ['ko name 1']}
        dic_14 = {'ncbi_taxonomy_id': 14, 'species_name': 's6 something', 'ancestor_taxon_id': [5,4,3,2], 'ancestor_name': ['s5', 's4', 's3', 's2'],
        'ko_number': 'KO0', 'uniprot_id': 'uniprot6', "protein_name": 'your name three'}
        dic_7 = {'ncbi_taxonomy_id': 7, 'species_name': 's7', 'ancestor_taxon_id': [5,4,3,2,6], 'ancestor_name': ['s5', 's4', 's3', 's2', 's6'],
        'ko_number': 'KO0', 'uniprot_id': 'uniprot7', "protein_name": 'special name two'}
        dic_8 = {'ncbi_taxonomy_id': 8, 'species_name': 's8', 'ancestor_taxon_id': [5,4,3,2,6,7], 'ancestor_name': ['s5', 's4', 's3', 's2', 's6', 's7'],
        'ko_number': 'KO0', 'uniprot_id': 'uniprot8'}
        dic_9 = {'ncbi_taxonomy_id': 9, 'species_name': 's9', 'ancestor_taxon_id': [5,4,3], 'ancestor_name': ['s5', 's4', 's3'], 'ko_number': 'KO0',
        'uniprot_id': 'uniprot9'}
        dic_10 = {'ncbi_taxonomy_id': 10, 'species_name': 's10', 'ancestor_taxon_id': [5,4,3,9], 'ancestor_name': ['s5', 's4', 's3', 's9'],
        'ko_number': 'KO0', 'uniprot_id': 'uniprot10'}
        dic_11 = {'ncbi_taxonomy_id': 11, 'species_name': 's11', 'ancestor_taxon_id': [5,4,3,2,1,0], 'ancestor_name': ['s5', 's4', 's3', 's2', 's1', 's0'],
        'ko_number': 'KO0', 'uniprot_id': 'uniprot11'}
        dic_12 = {'ncbi_taxonomy_id': 12, 'species_name': 's12', 'ancestor_taxon_id': [5,4,3,2,1,0], 'ancestor_name': ['s5', 's4', 's3', 's2', 's1', 's0'],
        'ko_number': 'KO0', 'uniprot_id': 'uniprot12'}
        dic_13 = {'ncbi_taxonomy_id': 13, 'species_name': 's13', 'ancestor_taxon_id': [5,4,3,2,1], 'ancestor_name': ['s5', 's4', 's3', 's2', 's1'],
        'ko_number': 'KO0', 'uniprot_id': 'uniprot13', 'kinetics':[{'ncbi_taxonomy_id': 100, 'kinlaw_id': 1}, {'ncbi_taxonomy_id': 101, 'kinlaw_id': 2}]}
        dic_16 = {'ncbi_taxonomy_id': 6, 'species_name': 's6', 'ancestor_taxon_id': [5,4,3,2], 'ancestor_name': ['s5', 's4', 's3', 's2'],
        'ko_number': 'KO1', 'uniprot_id': 'uniprot16', "protein_name": 'your name fifteen'}

        cls.src.collection.insert_many([mock_doc_0, mock_doc_1, mock_doc_2, mock_doc_3, mock_doc_4,mock_doc_5,mock_doc_6])
        cls.src.collection.insert_many([dic_0,dic_1,dic_2,dic_3,dic_4,dic_5,dic_6,dic_7,dic_8,dic_9,dic_10,dic_11,dic_12,dic_13,dic_14,dic_15,dic_16])