Example #1
0
def fillcounts(dataset):
    """Run the counter (ranker) for the metabolites and save to database"""
    query = "SELECT id FROM datasets WHERE name = ?"
    try:
        dataset_id = str(query_db(query, [dataset])[0]['id'])
    except TypeError:
        raise TypeError("No dataset with name '%s'" % dataset)

    PubChemPathwayCounter().count(dataset_id).save()
    PubChemAssayCounter().count(dataset_id).save()
    print("Saved!")
Example #2
0
def fillcids(dataset):
    """Gather the CIDs from PubChem for the metabolites and save to pubchem_compounds table"""
    query = "SELECT id FROM datasets WHERE name = ?"
    try:
        dataset_id = str(query_db(query, [dataset])[0]['id'])
    except TypeError:
        raise TypeError("No dataset with name '%s'" % dataset)

    gatherer = CIDGatherer()
    data = gatherer.harvest(dataset_id)
    gatherer.save(data)
    print("Saved!")
    def harvest(self, dataset_id):
        """
        Harvest all of the CIDs from PubChem
        :return: List of tuples [(cid, metab_id),]
        """
        # Query only returns the metabolites that don't already have CIDs associated
        query = (
            "SELECT t1.id, t1.cas from metabolites t1 "
            "LEFT JOIN pubchem_compounds t2 ON t2.metab_ID = t1.id "
            "WHERE t2.metab_ID is NULL AND t1.dataset_id is ?"
        )
        results = query_db(query, dataset_id)
        count = len(results)

        since_wait = 0
        since_report = 0

        cid_metab_id_map = []  # List of tuples
        for i, result in enumerate(results):
            since_wait += 1
            since_report += 1

            if since_wait > 2:
                sys.stdout.write("Waiting 1 second \n")
                sys.stdout.flush()
                since_wait = 0

            if since_report > 49:
                sys.stdout.write(str(cid_metab_id_map))
                sys.stdout.write("\n")
                sys.stdout.flush()
                since_report = 0

            cids = self.get_cids(result["cas"])
            metab_id = result["id"]
            if cids:
                for cid in cids:
                    cid_metab_id_map.append((cid, metab_id))

            # Progress
            perc = ((i + 1) / count) * 100
            sys.stdout.write("%s%% \n" % perc)
            sys.stdout.flush()

        return cid_metab_id_map
Example #4
0
    def harvest(self, dataset_id):
        """
        Harvest all of the CIDs from PubChem
        :return: List of tuples [(cid, metab_id),]
        """
        # Query only returns the metabolites that don't already have CIDs associated
        query = "SELECT t1.id, t1.cas from metabolites t1 " \
                "LEFT JOIN pubchem_compounds t2 ON t2.metab_ID = t1.id " \
                "WHERE t2.metab_ID is NULL AND t1.dataset_id is ?"
        results = query_db(query, dataset_id)
        count = len(results)

        since_wait = 0
        since_report = 0

        cid_metab_id_map = []  # List of tuples
        for i, result in enumerate(results):
            since_wait += 1
            since_report += 1

            if since_wait > 2:
                sys.stdout.write("Waiting 1 second \n")
                sys.stdout.flush()
                since_wait = 0

            if since_report > 49:
                sys.stdout.write(str(cid_metab_id_map))
                sys.stdout.write("\n")
                sys.stdout.flush()
                since_report = 0

            cids = self.get_cids(result['cas'])
            metab_id = result['id']
            if cids:
                for cid in cids:
                    cid_metab_id_map.append((cid, metab_id))

            # Progress
            perc = ((i + 1) / count) * 100
            sys.stdout.write("%s%% \n" % perc)
            sys.stdout.flush()

        return cid_metab_id_map
Example #5
0
 def save(self, cid_metab_id_map):
     insert_query = "INSERT INTO pubchem_compounds(CID, metab_ID) VALUES (?, ?)"
     return query_db(insert_query, cid_metab_id_map, many=True)
 def save(self, cid_metab_id_map):
     insert_query = "INSERT INTO pubchem_compounds(CID, metab_ID) VALUES (?, ?)"
     return query_db(insert_query, cid_metab_id_map, many=True)