def fillcounts(dataset): """Run the counter (ranker) for the metabolites and save to database""" query = "SELECT id FROM datasets WHERE name = ?" try: dataset_id = str(query_db(query, [dataset])[0]['id']) except TypeError: raise TypeError("No dataset with name '%s'" % dataset) PubChemPathwayCounter().count(dataset_id).save() PubChemAssayCounter().count(dataset_id).save() print("Saved!")
def fillcids(dataset): """Gather the CIDs from PubChem for the metabolites and save to pubchem_compounds table""" query = "SELECT id FROM datasets WHERE name = ?" try: dataset_id = str(query_db(query, [dataset])[0]['id']) except TypeError: raise TypeError("No dataset with name '%s'" % dataset) gatherer = CIDGatherer() data = gatherer.harvest(dataset_id) gatherer.save(data) print("Saved!")
def harvest(self, dataset_id): """ Harvest all of the CIDs from PubChem :return: List of tuples [(cid, metab_id),] """ # Query only returns the metabolites that don't already have CIDs associated query = ( "SELECT t1.id, t1.cas from metabolites t1 " "LEFT JOIN pubchem_compounds t2 ON t2.metab_ID = t1.id " "WHERE t2.metab_ID is NULL AND t1.dataset_id is ?" ) results = query_db(query, dataset_id) count = len(results) since_wait = 0 since_report = 0 cid_metab_id_map = [] # List of tuples for i, result in enumerate(results): since_wait += 1 since_report += 1 if since_wait > 2: sys.stdout.write("Waiting 1 second \n") sys.stdout.flush() since_wait = 0 if since_report > 49: sys.stdout.write(str(cid_metab_id_map)) sys.stdout.write("\n") sys.stdout.flush() since_report = 0 cids = self.get_cids(result["cas"]) metab_id = result["id"] if cids: for cid in cids: cid_metab_id_map.append((cid, metab_id)) # Progress perc = ((i + 1) / count) * 100 sys.stdout.write("%s%% \n" % perc) sys.stdout.flush() return cid_metab_id_map
def harvest(self, dataset_id): """ Harvest all of the CIDs from PubChem :return: List of tuples [(cid, metab_id),] """ # Query only returns the metabolites that don't already have CIDs associated query = "SELECT t1.id, t1.cas from metabolites t1 " \ "LEFT JOIN pubchem_compounds t2 ON t2.metab_ID = t1.id " \ "WHERE t2.metab_ID is NULL AND t1.dataset_id is ?" results = query_db(query, dataset_id) count = len(results) since_wait = 0 since_report = 0 cid_metab_id_map = [] # List of tuples for i, result in enumerate(results): since_wait += 1 since_report += 1 if since_wait > 2: sys.stdout.write("Waiting 1 second \n") sys.stdout.flush() since_wait = 0 if since_report > 49: sys.stdout.write(str(cid_metab_id_map)) sys.stdout.write("\n") sys.stdout.flush() since_report = 0 cids = self.get_cids(result['cas']) metab_id = result['id'] if cids: for cid in cids: cid_metab_id_map.append((cid, metab_id)) # Progress perc = ((i + 1) / count) * 100 sys.stdout.write("%s%% \n" % perc) sys.stdout.flush() return cid_metab_id_map
def save(self, cid_metab_id_map): insert_query = "INSERT INTO pubchem_compounds(CID, metab_ID) VALUES (?, ?)" return query_db(insert_query, cid_metab_id_map, many=True)