Esempi in Python per ChEMBL, esempi in Python per bioservices.ChEMBL

Esempio n. 1

0

Mostra file

    def __init__(self, drug_decode):
        print("ChemSpiderSearch is still in progress, please do not use")
        self.dd = DrugDecode(drug_decode)
        self.dd_filled = DrugDecode(drug_decode)

        from bioservices.chemspider import ChemSpider
        from bioservices import ChEMBL
        from bioservices import UniChem

        try:
            print('Loading PubChem')
            from bioservices.pubchem import PubChem
            self.puchem = PubChem()
        except:
            # Pubchem was introduced only in dec 2015
            pass

        print('Loading ChEMBL service')
        self.chembl = ChEMBL(cache=True)

        print('Loading ChemSpider service')
        self.chemspider = ChemSpider(cache=True)

        print('Loading UniChem service')
        # in unichem db number is 22 and chembl is 1
        self.unichem = UniChem()

        print('Settings some data aliases')
        self._cs_find = self.chemspider.find
        self._cs_get = self.chemspider.GetExtendedCompoundInfo

        self.drug_ids = sorted(list(self.dd.df.index.values))
        self.drug_names = sorted(list(self.dd.df.DRUG_NAME.values))

Esempio n. 2

0

Mostra file

File: chembl.py Progetto: MehtapIsik/TargetExplorer

    def main(self):

        # get current crawl number
        crawldata_row = models.CrawlData.query.first()
        current_crawl_number = crawldata_row.current_crawl_number
        print 'Current crawl number: %d' % current_crawl_number

        # Get list of uniprot accession numbers that are in the rows of current crawl number
        uniprot_acs = [str(uniprot_ac[0]) for uniprot_ac in models.UniProt.query.filter_by(crawl_number=current_crawl_number).values(models.UniProt.ac)]

        # Iterate through Uniprot ACs
        for uniprot_ac in uniprot_acs:

            # get ChEMBL target id using bioservices
            chembl=ChEMBL(verbose=False)
            target_info = chembl.get_target_by_uniprotId(uniprot_ac)
            target_chembl_id=target_info['chemblId']
            print "ChEMBL ID of target protein:", target_chembl_id

            target = models.ChemblTarget(crawl_number=current_crawl_number, target_chembl_id=target_chembl_id)
            db.session.add(target)

        # Updata datestamp for Chembl
        now = datetime.datetime.utcnow()
        current_crawl_datestamp_row = models.DateStamps.query.filter_by(crawl_number=current_crawl_number).first()
        current_crawl_datestamp_row.chembl_datestamp = now

        db.session.commit()
        print 'Done.'

Esempio n. 3

0

Mostra file

File: synpred_process_drugcomb.py Progetto: MoreiraLAB/synpred

 def chembl_fetch(input_id):
     """
     Access ChEMBL to retrieve canonical SMILES
     """
     from bioservices import ChEMBL
     chembl = ChEMBL()
     current_molecule = chembl.search_molecule(input_id)["molecules"][0][
         "molecule_structures"]["canonical_smiles"]
     return current_molecule

Esempio n. 4

0

Mostra file

File: models.py Progetto: UPDDI/mps-database-server

def chembl_assay(chemblid):
    """Access ChEMBL to get information for related assays"""
    global CHEMBL
    if CHEMBL is None:
        from bioservices import ChEMBL as ChEMBLdb

        CHEMBL = ChEMBLdb()

    data = CHEMBL.get_assays_by_chemblId(str(chemblid))['assay']

    return {FIELDS[key]: value for key, value in list(data.items())
            if key in FIELDS}

Esempio n. 5

0

Mostra file

File: drugs.py Progetto: CancerRxGene/gdsctools

    def __init__(self, drug_decode):
        print("ChemSpiderSearch is still in progress, please do not use")
        self.dd = DrugDecode(drug_decode)
        self.dd_filled = DrugDecode(drug_decode)

        from bioservices.chemspider import ChemSpider
        from bioservices import ChEMBL
        from bioservices import UniChem

        try:
            print('Loading PubChem')
            from bioservices.pubchem import PubChem
            self.puchem = PubChem()
        except:
            # Pubchem was introduced only in dec 2015
            pass

        print('Loading ChEMBL service')
        self.chembl = ChEMBL(cache=True)

        print('Loading ChemSpider service')
        self.chemspider = ChemSpider(cache=True)

        print('Loading UniChem service')
        # in unichem db number is 22 and chembl is 1
        self.unichem = UniChem()

        print('Settings some data aliases')
        self._cs_find = self.chemspider.find
        self._cs_get = self.chemspider.GetExtendedCompoundInfo

        self.drug_ids = sorted(list(self.dd.df.index.values))
        self.drug_names = sorted(list(self.dd.df.DRUG_NAME.values))

Esempio n. 6

0

Mostra file

import os
import sys

from bioservices import ChEMBL, QuickGO, Reactome, KEGG
from py2neo import Graph

from model.core import *
from ncbi import fetch_publication_list
from quickgo import fetch_quick_go_data
from uniprot import *

graph = Graph(host=os.environ.get("DB", "localhost"),
              bolt=True,
              password=os.environ.get("NEO4J_PASSWORD", ""))

chembl = ChEMBL(verbose=False)
quick_go = QuickGO(verbose=False)
reactome = Reactome(verbose=False)
kegg = KEGG(verbose=False)

# watch("neo4j.bolt")

gene_dict = dict()
transcript_dict = dict()
pseudogene_dict = dict()
cds_dict = dict()
exon_dict = dict()
rrna_dict = dict()
trna_dict = dict()
ncrna_dict = dict()
location_dict = dict()

Esempio n. 7

0

Mostra file

File: chembl.py Progetto: MehtapIsik/pythonsandbox

from bioservices import ChEMBL

chembl=ChEMBL(verbose=False)

acc = 'P00519'

target_info = chembl.get_target_by_uniprotId(acc)
print target_info

target_chembl_id=target_info['chemblId']

bioactivities=chembl.get_target_bioactivities(str(target_chembl_id))

compound_chemblids = [ entry['ingredient_cmpd_chemblid'] for entry in bioactivities ]
print "# of compound chemblids:", len(compound_chemblids)

resjson = chembl.get_compounds_by_chemblId(chembl._chemblId_example)
print "Example compound retrieved by compound chemblid: \n", resjson

Esempio n. 8

0

Mostra file

class ChemSpiderSearch(object):
    """This class uses ChemSpider and ChEMBL to identify drug name

    .. warning:: this is a draft version in dev mode

    ::

        c = ChemSpiderSearch()
        c.search_in_chemspider()
        c.search_from_smile_inchembl()
        df = c.find_chembl_ids()

    It happens that most of public names can be found
    and almost none of non-public are found. As expected...

    If chemspider, chembl and pubchem are empty, search for the drug name in
    chemspider.

        CHEMSPIDER search:
            if no identifier found, the search if DROPPED
            if 1 identifier found, we keep going using the SMILE identifier
            If more than 1 identifier found, this is AMBIGUOUS.


    If chembl and pubchem, check with unichem
    If chembl, check smiles
    If chembl and chemspider, check smiles ?

    SMILES are not unique

    """
    def __init__(self, drug_decode):
        print("ChemSpiderSearch is still in progress, please do not use")
        self.dd = DrugDecode(drug_decode)
        self.dd_filled = DrugDecode(drug_decode)

        from bioservices.chemspider import ChemSpider
        from bioservices import ChEMBL
        from bioservices import UniChem

        try:
            print('Loading PubChem')
            from bioservices.pubchem import PubChem
            self.puchem = PubChem()
        except:
            # Pubchem was introduced only in dec 2015
            pass

        print('Loading ChEMBL service')
        self.chembl = ChEMBL(cache=True)

        print('Loading ChemSpider service')
        self.chemspider = ChemSpider(cache=True)

        print('Loading UniChem service')
        # in unichem db number is 22 and chembl is 1
        self.unichem = UniChem()

        print('Settings some data aliases')
        self._cs_find = self.chemspider.find
        self._cs_get = self.chemspider.GetExtendedCompoundInfo

        self.drug_ids = sorted(list(self.dd.df.index.values))
        self.drug_names = sorted(list(self.dd.df.DRUG_NAME.values))

    def filling_chembl_pubchem_using_unichem(self):
        """

        """
        N = len(self.drug_ids)
        pb = Progress(N)
        for i, this in enumerate(self.drug_ids):
            entry = self.dd.df.loc[this]
            # if no information is provided, we will need to get it
            # from chemspider

            # From the database, when chembl is provided, it is unique
            # same for chemspider and pubchem and CAS
            select = entry[['CHEMSPIDER', 'CHEMBL', 'PUBCHEM']]
            if select.count() == 0:
                name = self.dd.df.loc[this].DRUG_NAME
                results = self._cs_find(name)
                if len(results) == 0:
                    # nothing found
                    pass
                elif len(results) == 1:
                    self.dd_filled.df.loc[this].loc['CHEMSPIDER'] = results[0]
                else:
                    # non unique
                    #chemspider = ",".join([str(x) for x in results])
                    self.dd_filled.df.loc[this].loc['CHEMSPIDER'] = results
            pb.animate(i + 1)

        # Search in chemspider systematically
        for i, this in enumerate(self.drug_ids):
            entry = self.dd.df.loc[this]
            if select.count() == 1:
                res = self._cs_find(drug)

            pb.animate(i + 1)

    def find_chembl_ids(self):
        """


        """
        # don't know how to search for a chembl id given the drug name...
        # so we use chemspider
        #self.search_in_chemspider()

        # but chemspider returns molecular information (not chembl id)
        # so given the smile string, we look back in chembl for valid entries
        #self.search_from_smile_inchembl()

        # finally, get the chembl identifiers
        drugs = []
        chembl_ids = []
        chemspider_ids = []
        smiles_c = []
        smiles_cs = []

        for drug in self.drug_ids:
            try:
                entry = self.results_chembl[drug]

                ids = ",".join([x['chemblId'] for x in entry])
                drugs.append(drug)
                chembl_ids.append(ids)
                ids = ",".join([str(x) for x in self.results[drug]])
            except:
                print('skipping' + drug)
                ids = ",".join([drug, '', '', '', '', ''])
            chemspider_ids.append(ids)

        for drug in self.drug_ids:
            try:
                smiles_c.append(",".join(
                    [x['smiles'] for x in self.results_chembl[drug]]))
            except:
                smiles_c.append('')
            try:
                smiles_cs.append(self.results_chemspider[drug]['smiles'])
            except:
                smiles_cs.append('')

        df = pd.DataFrame(
            [drugs, chembl_ids, chemspider_ids, smiles_c, smiles_cs],
            index=[
                'DRUG_NAME', 'CHEMBL_ID', 'CHEMSPIDER_ID', 'SMILE_CHEMBL',
                'SMILE_CHEMSPIDER'
            ])
        df = df.T
        return df

    def get_chemspider_ids(self, drug_name):
        res = self._cs_find(drug)
        return res

    def search_in_chemspider(self):
        # Fill results attribute as a dictionary. Keys being the drug id
        # and values are list of chemspider identifiers
        #
        # SB52334 --> SB-52334
        N = len(self.dd)

        pb = Progress(N)
        self.results = {}
        results = []
        for i, index in enumerate(self.dd.df.index):
            drug = self.dd.df.index[i]
            drug_name = self.dd.df.loc[drug].DRUG_NAME
            try:
                res = self._cs_find(drug_name)
            except:
                print("This drug index (%s) / drug name (%s) was not found" %
                      (index, drug_name))
                res = []
            self.results[drug] = res
            pb.animate(i + 1)
            results.append(res)
        self.dd_filled.df['CHEMSPIDER_SEARCHED'] = results

    def search_from_smile_inchembl(self):

        N = len(self.drug_ids)

        pb = Progress(N)
        self.results_chembl = {}
        self.results_chemspider = {}

        for i in range(0, N):
            drug = self.drug_ids[i]
            self.results_chembl[drug] = []

            if self.results[drug]:
                for chemspider_id in self.results[drug]:
                    chemspider_entry = self._cs_get(chemspider_id)
                    self.results_chemspider[drug] = chemspider_entry
                    smile = chemspider_entry['smiles']
                    # now search in chembl
                    res_chembl = self.chembl.get_compounds_by_SMILES(smile)
                    try:
                        res_chembl['compounds']
                        self.results_chembl[drug].extend(
                            res_chembl['compounds'])
                    except:
                        pass

            pb.animate(i + 1)

Esempio n. 9

0

Mostra file

File: fetchIO.py Progetto: xianqiangsun/bio_io

from bioservices import UniProt
from bioservices import PDB
from bioservices.apps.fasta import FASTA
from bioservices import ChEMBL
import os

# Leos from uniprot id to the protein name
f = FASTA()
u = UniProt(cache=True, verbose=False)
s = PDB()
c = ChEMBL(verbose=False)
"""
pdb chain to uniprot id: ftp.ebi.ac.uk/pub/databases/msd/sifts/csv/pdb_chain_uniprot.csv
import requests
from xml.etree.ElementTree import fromstring

pdb_id = '4hhb.A'
pdb_mapping_url = 'http://www.rcsb.org/pdb/rest/das/pdb_uniprot_mapping/alignment'
uniprot_url = 'http://www.uniprot.org/uniprot/{}.xml'

def get_uniprot_accession_id(response_xml):
    root = fromstring(response_xml)
    return next(
        el for el in root.getchildren()[0].getchildren()
        if el.attrib['dbSource'] == 'UniProt'
    ).attrib['dbAccessionId']

def get_uniprot_protein_name(uniport_id):
    uinprot_response = requests.get(
        uniprot_url.format(uniport_id)
    ).text

Esempio n. 10

0

Mostra file

File: update_activities.py Progetto: UPDDI/mps-database-server

def run(days=180):
    """Run as::

      $ python manage.py runscript update_activities

    By default, updates bioactivities of compounds that were never updated
    or updates 180 days ago.

    To force updating bioactivities for all compounds, give 0 days argument
    as follows::

        $ python manage.py runscript update_activities --script-args=0

    """

    try:
        days = int(days)
    except ValueError:
        days = 180

    chembl = ChEMBLdb()

    #for table, func in [
    #    (Compound, chembl.get_compounds_activities),
    #    (Target, chembl.get_target_bioactivities),
    #    (Assay, chembl.get_assay_bioactivities), ]:
    count = skip = error = ncomp = 0
    # will iterate over all compounds one-by-one
    for compound in Compound.objects.all():

        # if no updates were made, last_update is None
        if (compound.last_update is None or
                (datetime.date.today() - compound.last_update) >= datetime.timedelta(days)):
            ncomp += 1

            try:
                acts = chembl.get_compounds_activities(str(compound.chemblid))
            except BioServicesError:
                continue

            try:
                for act in acts['bioactivities']:
                    act = {FIELDS[key]: value for key, value in list(act.items()) if key in FIELDS}

                    tid, aid, cid, pid = (
                        act['target'], act['assay'], act['compound'], act['parent_compound']
                    )
                    try:
                        parent = Compound.objects.get(chemblid=pid)
                    except Compound.DoesNotExist:
                        try:
                            # Uses implemented methods in lieu of Bioservices
                            parent_compound_data = get_chembl_compound_data(pid)
                            parent_compound_data.update(get_drugbank_data_from_chembl_id(pid))

                            parent_compound_targets = parent_compound_data.get('targets', [])
                            del parent_compound_data['targets']

                            parent = Compound.objects.create(locked=True, **parent_compound_data)

                            for target in parent_compound_targets:
                                CompoundTarget.objects.create(compound=parent, **target)
                            print(("Added Compound:", parent.name))
                        except ValueError:
                            error += 1
                            continue

                    try:
                        target = Target.objects.get(chemblid=tid)
                    except Target.DoesNotExist:
                        try:
                            target = Target.objects.create(locked=True, **chembl_target(tid))
                        except ValueError:
                            error += 1
                            continue

                    try:
                        assay = Assay.objects.get(chemblid=aid)
                    except Assay.DoesNotExist:
                        try:
                            assay = Assay.objects.create(locked=True, **chembl_assay(aid))
                        except ValueError:
                            error += 1
                            continue

                    try:
                        activity = Bioactivity.objects.get(
                            target=target, assay=assay, compound=compound)
                    except Bioactivity.DoesNotExist:

                        (act['target'], act['assay'], act['compound'], act['parent_compound']) = (
                            target, assay, compound, parent
                        )

                        try:
                            ba = Bioactivity.objects.create(locked=True, **act)
                        except ValueError:
                            error += 1
                        except Exception as err:
                            for key, val in list(act.items()):
                                if isinstance(val, str):
                                    print(('{}: ({}) {}'.format(key, len(val), val)))
                            raise err
                        else:
                            count += 1
                    else:
                        skip += 1
            except:
                print(("An error occured:", compound.name, acts))

            compound.last_update = datetime.date.today()
            compound.save()

    print(('{} bioactivities were added, {} were found in the database, and '
          '{} failed due to value errors.'.format(count, skip, error)))

    print('Updating bioactivity units...')
    cursor = connection.cursor()

    cursor.execute(
        '''
        UPDATE bioactivities_bioactivity
        SET standardized_units = '',
            standard_name = '',
            standardized_value = NULL;

        --no mass is needed in data conversion
        UPDATE public.bioactivities_bioactivity as v
        SET standard_name = s.standard_name,
        standardized_units=s.standard_unit,
            standardized_value=value*scale_factor
        FROM public.bioactivities_bioactivitytype as s
        WHERE v.bioactivity_type = s.chembl_bioactivity and
        v.units=s.chembl_unit and s.mass_flag='N';

        --mass is needed in data conversion
        UPDATE public.bioactivities_bioactivity as v
        SET standard_name = s.standard_name,
        standardized_units=s.standard_unit,
            standardized_value=value*scale_factor/c.molecular_weight
        FROM public.bioactivities_bioactivitytype as s,
        public.compounds_compound as c
        WHERE v.bioactivity_type = s.chembl_bioactivity and
        v.units=s.chembl_unit and v.compound_id =c.id and s.mass_flag='Y';
        '''
    )

    print('Units updated')

    print('Normalizing values')

    bio_types = {bio.standard_name: True for bio in Bioactivity.objects.all()}

    for bio_type in bio_types:
        targets = {
            bio.target: True for bio in Bioactivity.objects.filter(
                standard_name=bio_type
            ).prefetch_related('target')
        }
        for target in targets:
            current_bio = Bioactivity.objects.filter(
                standard_name=bio_type,
                target_id=target.id,
                standardized_value__isnull=False
            ).prefetch_related('target')

            bio_pk = [bio.id for bio in current_bio]
            bio_value = np.array([bio.standardized_value for bio in current_bio])
            if len(bio_pk) > 0 and len(bio_value) > 0:
                bio_value /= np.max(np.abs(bio_value), axis=0)
                for index, pk in enumerate(bio_pk):
                    try:
                        Bioactivity.objects.filter(pk=pk).update(
                            normalized_value=bio_value[index]
                        )
                    except:
                        print(('Update of bioactivity {} failed'.format(pk)))

    # Flag questionable entries
    print('Flagging questionable entries...')

    # Remove old flags in case they have become outdated (medians change and so on)
    Bioactivity.objects.all().update(data_validity='')

    total = 0

    all_chembl = Bioactivity.objects.all().prefetch_related('compound', 'target').filter(
        standardized_value__isnull=False
    )

    bio_types = {bio.standard_name: True for bio in all_chembl}
    bio_compounds = {bio.compound: True for bio in all_chembl}
    bio_targets = {bio.target: True for bio in all_chembl}

    chembl_entries = {}

    for entry in all_chembl:
        if entry.target:
            key = '|'.join([entry.standard_name, str(entry.compound.id), str(entry.target.id)])
        else:
            key = '|'.join([entry.standard_name, str(entry.compound.id), 'None'])

        chembl_entries.setdefault(key, []).append(entry)

    # ChEMBL contains negative values!
    # TODO Needs revision
    for bio_type in bio_types:
        for target in bio_targets:
            for compound in bio_compounds:
                if bio_type and target and compound:
                    if target:
                        current_bio = chembl_entries.get('|'.join([bio_type, str(compound.id), str(target.id)]), [])
                    else:
                        current_bio = chembl_entries.get('|'.join([bio_type, str(compound.id), 'None']), [])

                    bio_pk = [bio.id for bio in current_bio]
                    bio_value = np.array([bio.standardized_value for bio in current_bio])

                    if len(bio_value) > 0:
                        # Shift values by the minimum to avoid problems with negative values
                        bio_value = np.array(bio_value) + np.abs(np.min(bio_value)) + 1

                    if len(bio_pk) > 0 and len(bio_value) > 0:
                        bio_median = np.median(bio_value)
                        flag_threshold = bio_median * 100

                        for index, pk in enumerate(bio_pk):
                            if bio_value[index] > flag_threshold:
                                this_bio = Bioactivity.objects.get(pk=bio_pk[index])
                                #this_bio.notes = 'Flagged'
                                # Flag data validity for "Out of Range"
                                this_bio.data_validity = 'R'
                                this_bio.save()
                                print((bio_pk[index], bio_value[index], 'vs', bio_median))
                                total += 1

                        # Check for possible transcription errors (1000-fold error mistaking uM for nM)
                        for index, pk in enumerate(bio_pk):
                            thousand_fold = np.where(bio_value == bio_value[index] * 1000)[0]
                            if len(thousand_fold) > 0:
                                for error_index in thousand_fold:
                                    this_bio = Bioactivity.objects.get(pk=bio_pk[error_index])
                                    if not this_bio.data_validity:
                                        total += 1
                                    this_bio.data_validity = 'T'
                                    this_bio.save()
                                    print((bio_pk[error_index], bio_value[error_index], 'thousand fold'))

    print(total)

Esempio n. 11

0

Mostra file

File: drugs.py Progetto: CancerRxGene/gdsctools

class ChemSpiderSearch(object):
    """This class uses ChemSpider and ChEMBL to identify drug name

    .. warning:: this is a draft version in dev mode

    ::

        c = ChemSpiderSearch()
        c.search_in_chemspider()
        c.search_from_smile_inchembl()
        df = c.find_chembl_ids()

    It happens that most of public names can be found
    and almost none of non-public are found. As expected...

    If chemspider, chembl and pubchem are empty, search for the drug name in
    chemspider.

        CHEMSPIDER search:
            if no identifier found, the search if DROPPED
            if 1 identifier found, we keep going using the SMILE identifier
            If more than 1 identifier found, this is AMBIGUOUS.


    If chembl and pubchem, check with unichem
    If chembl, check smiles
    If chembl and chemspider, check smiles ?

    SMILES are not unique

    """
    def __init__(self, drug_decode):
        print("ChemSpiderSearch is still in progress, please do not use")
        self.dd = DrugDecode(drug_decode)
        self.dd_filled = DrugDecode(drug_decode)

        from bioservices.chemspider import ChemSpider
        from bioservices import ChEMBL
        from bioservices import UniChem

        try:
            print('Loading PubChem')
            from bioservices.pubchem import PubChem
            self.puchem = PubChem()
        except:
            # Pubchem was introduced only in dec 2015
            pass

        print('Loading ChEMBL service')
        self.chembl = ChEMBL(cache=True)

        print('Loading ChemSpider service')
        self.chemspider = ChemSpider(cache=True)

        print('Loading UniChem service')
        # in unichem db number is 22 and chembl is 1
        self.unichem = UniChem()

        print('Settings some data aliases')
        self._cs_find = self.chemspider.find
        self._cs_get = self.chemspider.GetExtendedCompoundInfo

        self.drug_ids = sorted(list(self.dd.df.index.values))
        self.drug_names = sorted(list(self.dd.df.DRUG_NAME.values))

    def filling_chembl_pubchem_using_unichem(self):
        """

        """
        N = len(self.drug_ids)
        pb = Progress(N)
        for i,this in enumerate(self.drug_ids):
            entry = self.dd.df.ix[this]
            # if no information is provided, we will need to get it 
            # from chemspider

            # From the database, when chembl is provided, it is unique
            # same for chemspider and pubchem and CAS
            select = entry[['CHEMSPIDER', 'CHEMBL', 'PUBCHEM']]
            if select.count() == 0:
                name = self.dd.df.ix[this].DRUG_NAME
                results = self._cs_find(name)
                if len(results) == 0:
                    # nothing found
                    pass
                elif len(results) == 1:
                    self.dd_filled.df.ix[this].loc['CHEMSPIDER'] = results[0]
                else:
                    # non unique
                    #chemspider = ",".join([str(x) for x in results])
                    self.dd_filled.df.ix[this].loc['CHEMSPIDER'] = results
            pb.animate(i+1)

        # Search in chemspider systematically
        for i, this in enumerate(self.drug_ids):
            entry = self.dd.df.ix[this]
            if select.count() == 1:
                res = self._cs_find(drug)

            pb.animate(i+1)

    def find_chembl_ids(self):
        """


        """
        # don't know how to search for a chembl id given the drug name...
        # so we use chemspider
        #self.search_in_chemspider()

        # but chemspider returns molecular information (not chembl id)
        # so given the smile string, we look back in chembl for valid entries
        #self.search_from_smile_inchembl()

        # finally, get the chembl identifiers
        drugs = []
        chembl_ids = []
        chemspider_ids = []
        smiles_c = []
        smiles_cs = []

        for drug in self.drug_ids:
            try:
                entry = self.results_chembl[drug]

                ids = ",".join([x['chemblId'] for x in entry])
                drugs.append(drug)
                chembl_ids.append(ids)
                ids = ",".join([str(x) for x in self.results[drug]])
            except:
                print('skipping' + drug)
                ids = ",".join([drug, '', '', '', '', ''])
            chemspider_ids.append(ids)

        for drug in self.drug_ids:
            try:
                smiles_c.append(",".join([x['smiles'] for x in
                    self.results_chembl[drug]]))
            except:
                smiles_c.append('')
            try:
                smiles_cs.append(self.results_chemspider[drug]['smiles'])
            except:
                smiles_cs.append('')

        df = pd.DataFrame([drugs, chembl_ids, chemspider_ids, smiles_c,
            smiles_cs],
                index=['DRUG_NAME','CHEMBL_ID','CHEMSPIDER_ID', 'SMILE_CHEMBL',
                    'SMILE_CHEMSPIDER'])
        df = df.T
        return df

    def get_chemspider_ids(self, drug_name):
        res = self._cs_find(drug)
        return res

    def search_in_chemspider(self):
        # Fill results attribute as a dictionary. Keys being the drug id
        # and values are list of chemspider identifiers
        #
        # SB52334 --> SB-52334
        N = len(self.dd)

        pb = Progress(N)
        self.results = {}
        results = []
        for i, index in enumerate(self.dd.df.index):
            drug = self.dd.df.index[i]
            drug_name = self.dd.df.ix[drug].DRUG_NAME
            try:
                res = self._cs_find(drug_name)
            except:
                print("This drug index (%s) / drug name (%s) was not found" %
                        (index, drug_name))
                res = []
            self.results[drug] = res
            pb.animate(i+1)
            results.append(res)
        self.dd_filled.df['CHEMSPIDER_SEARCHED'] = results

    def search_from_smile_inchembl(self):

        N = len(self.drug_ids)

        pb = Progress(N)
        self.results_chembl = {}
        self.results_chemspider = {}

        for i in range(0, N):
            drug = self.drug_ids[i]
            self.results_chembl[drug] = []

            if self.results[drug]:
                for chemspider_id in self.results[drug]:
                    chemspider_entry = self._cs_get(chemspider_id)
                    self.results_chemspider[drug] = chemspider_entry
                    smile = chemspider_entry['smiles']
                    # now search in chembl
                    res_chembl = self.chembl.get_compounds_by_SMILES(smile)
                    try:
                        res_chembl['compounds']
                        self.results_chembl[drug].extend(res_chembl['compounds'])
                    except:
                        pass

            pb.animate(i+1)