def test_get_pubmed_id_for_pubchem_id(self): pubmed_ids = QueryPubChem.get_pubmed_id_for_pubchem_id('3500') self.assertIsNotNone(pubmed_ids) self.assertEqual(pubmed_ids, ['10860942[uid]', '11961255[uid]']) # wrong id pubmed_ids = QueryPubChem.get_pubmed_id_for_pubchem_id('35000') self.assertIsNone(pubmed_ids)
def test_get_pubchem_id_for_chembl_id(self): pubchem_id = QueryPubChem.get_pubchem_id_for_chembl_id('CHEMBL521') self.assertIsNotNone(pubchem_id) self.assertEqual(pubchem_id, '3672') # empty result pubchem_id = QueryPubChem.get_pubchem_id_for_chembl_id('chembl521') self.assertIsNone(pubchem_id) # wrong id pubchem_id = QueryPubChem.get_pubchem_id_for_chembl_id('3400') self.assertIsNone(pubchem_id)
def test_update_metabolite_desc(self): conn = Neo4jConnection(self.rtxConfig.neo4j_bolt, self.rtxConfig.neo4j_username, self.rtxConfig.neo4j_password) nodes = conn.get_metabolite_nodes() # generate random number array random_indexes = random_int_list(0, len(nodes) - 1, 100) for i in random_indexes: # retrieve data from BioLink API node_id = nodes[i] pubchem_id = QueryKEGG.map_kegg_compound_to_pub_chem_id(node_id) hmdb_url = QueryPubChem.get_description_url(pubchem_id) desc = QueryHMDB.get_compound_desc(hmdb_url) # retrieve data from Neo4j node = conn.get_node(node_id) self.assertIsNotNone(node) self.assertIsNotNone(node['n']['id']) self.assertIsNotNone(node['n']['description']) self.assertEqual(node_id, node['n']['id']) if node['n']['description'] != "None": self.assertEqual(desc, node['n']['description']) conn.close()
def test_get_description_url(self): url = QueryPubChem.get_description_url("3324") self.assertIsNotNone(url) self.assertEqual(url, "http://www.hmdb.ca/metabolites/HMDB0000243") # empty result url = QueryPubChem.get_description_url("3500") self.assertIsNone(url) # wrong arg format url = QueryPubChem.get_description_url("GO:2342343") self.assertIsNone(url) # wrong arg type url = QueryPubChem.get_description_url(3500) self.assertIsNone(url)
def read_interactions(): int_data = pandas.read_csv(QueryDGIdb.INTERACTIONS_TSV_URL, sep='\t') int_data.fillna('', inplace=True) res_list = [] for index, row in int_data.iterrows(): pmids = row['PMIDs'] gene_name = row['gene_name'] gene_claim_name = row['gene_claim_name'] if gene_name != '': gene_symbol = gene_name else: if gene_claim_name != '': gene_symbol = gene_claim_name else: continue assert ',' not in gene_symbol uniprot_ids_set = QueryDGIdb.mygene.convert_gene_symbol_to_uniprot_id( gene_symbol) if len(uniprot_ids_set) == 0: continue drug_chembl_id = row['drug_chembl_id'] drug_name = row['drug_name'] if drug_chembl_id != '': if type(drug_chembl_id) == float: print(row) assert ',' not in drug_chembl_id drug_chembl_id_set = {drug_chembl_id} if drug_name == '': print("warning; ChEMBL compound has no drug name", file=sys.stderr) else: if drug_name != '': assert ',' not in drug_name drug_chembl_id_set = QueryPubChem.get_chembl_ids_for_drug( drug_name) if len(drug_chembl_id_set) == 0: drug_chembl_id_set = QueryChEMBL.get_chembl_ids_for_drug( drug_name) if len(drug_chembl_id_set) == 0: continue else: continue interaction_claim_source_field = row['interaction_claim_source'] interaction_types_field = row['interaction_types'] if interaction_types_field != '': assert type(interaction_types_field) == str predicate_list = interaction_types_field.split(',') else: predicate_list = ['affects'] for uniprot_id in uniprot_ids_set: for predicate_str in predicate_list: res_list.append({ 'drug_chembl_id': drug_chembl_id, 'drug_name': drug_name, 'predicate': QueryDGIdb.predicate_map[predicate_str], 'predicate_extended': predicate_str, 'protein_uniprot_id': uniprot_id, 'protein_gene_symbol': gene_symbol, 'sourcedb': interaction_claim_source_field, 'pmids': pmids }) # print(drug_chembl_id + '\t' + predicate_str + '\t' + uniprot_id + '\t' + interaction_claim_source_field + '\t' + ','.join(pmids_list)) return res_list
def test_get_chembl_ids_for_drug(self): sets = QueryPubChem.get_chembl_ids_for_drug('gne-493') self.assertIsNotNone(sets) self.assertEqual(sets, {'CHEMBL1084926'})