Beispiel #1
0
def name2id(name, outformat):

  import cirpy
  import chemspipy
  import queryDevice  
  
  idstring = None
  source = None

  if idstring is None:
    source = 'NCI'
    idstring = cirpy.resolve(name, outformat)
  if idstring is None:
    source = 'ChemSpi'
    chemspid = chemspipy.find_one(name)
    try:
      smiles = chemspid.smiles
      idstring = cirpy.resolve(smiles, outformat)
    except AttributeError:
      idstring = None
  if idstring is None:
    source = 'NCI-pattern-match'
    idstring = cirpy.resolve(name, outformat,['name_pattern'])
  if idstring is None:
    source = None
    idstring = str(idstring)
  try: 
    idstring = (idstring.rstrip(),source)
  except AttributeError:
    idstring  = (idstring[0].rstrip(),source)
    print 'There were multiple results for: ', name, ' using: ', idstring[0], '\n', idstring 

  return idstring
Beispiel #2
0
    def search(self, query):
        print('Connected to ChemSpider API')
        print("Searching started")
        print("Searching for: " + query)
        i = 0
        results = []
        for result in self.cs.search(query):
            if i > 5:
                break
            print("Compound " + str(i))
            formula = str(result.molecular_formula)
            csid = str(result.csid)
            inchi = result.inchi
            name = result.common_name
            cas = cirpy.resolve(inchi, 'cas')
            iupac_name = cirpy.resolve(inchi, 'iupac_name')

            if type(cas) is list:
                c_cas = query
                sim_cas = difflib.get_close_matches(str(c_cas), cas, 3, 0)
                print(sim_cas)
                cas_ = sim_cas[0]
            else:
                cas_ = cas
            image = result.image_url
            print(image)
            i = i + 1
            result_line = {'csid': csid, 'name': name, 'iupac_name': iupac_name, 'cas': cas_, 'inchi': inchi, \
                           'formula': formula, 'image': image}
            results.append(result_line)

        print("Searching finished")
        print(results)

        return results
Beispiel #3
0
 def test_tnt_smiles_custom_resolvers(self):
     """Test custom resolvers return the expected result."""
     self.assertEqual(
         resolve('2,4,6-trinitrotoluene', 'smiles',
                 ['name_by_opsin', 'name_by_cir']),
         'Cc1c(cc(cc1[N+]([O-])=O)[N+]([O-])=O)[N+]([O-])=O')
     self.assertEqual(
         resolve('2,4,6-trinitrotoluene', 'smiles',
                 ['name_by_cir', 'name_by_opsin']),
         'Cc1c(cc(cc1[N+]([O-])=O)[N+]([O-])=O)[N+]([O-])=O')
Beispiel #4
0
 def test_tnt_smiles_custom_resolvers(self):
     """Test custom resolvers return the expected result."""
     self.assertEqual(
         resolve('2,4,6-trinitrotoluene', 'smiles', ['name_by_opsin', 'name_by_cir']),
         'Cc1c(cc(cc1[N+]([O-])=O)[N+]([O-])=O)[N+]([O-])=O'
     )
     self.assertEqual(
         resolve('2,4,6-trinitrotoluene', 'smiles', ['name_by_cir', 'name_by_opsin']),
         'Cc1c(cc(cc1[N+]([O-])=O)[N+]([O-])=O)[N+]([O-])=O'
     )
Beispiel #5
0
def name2molecule(oname, smiles_code):
    oname = "".join(oname.split())
    pdbfile = cirpy.resolve(smiles_code, "pdb")
    pdb_output = open("PDB/" + oname + ".pdb", "w+")
    pdb_output.write(pdbfile)
    pdb_output.close()
    molfile = cirpy.resolve(smiles_code, "mol")
    mol_output = open("MOL/" + oname + ".mol", "w+")
    mol_output.write(molfile)
    mol_output.close()
    ans = True
    return None
Beispiel #6
0
 def parse_names(self):
     for name in self.names:
         smiles = cirpy.resolve(name, 'smiles', ['name_by_opsin'])
         if smiles is not None:
             return Species().fromSMILES(smiles)
     else:
         for name in self.names:
             smiles = cirpy.resolve(name, 'smiles', ['name_by_cir'])
             if smiles is not None:
                 return Species().fromSMILES(smiles)
         else:
             raise ConversionError(
                 'Could not resolve name for species {}.'.format(
                     self.prime_id))
Beispiel #7
0
def process_bioactive_identifier(request):
    cas_no = request.GET.get('cas_number')
    inchikey = request.GET.get('inchikey', '').strip()
    obj = None
    if cas_no:
        obj = Bioactive.objects.filter(
            chemical_properties__synonyms__icontains=cas_no).first()
    elif inchikey:
        obj = Bioactive.objects.filter(inchikey__exact=inchikey).first()
    if obj:
        data = {
            'object_exists': obj.get_absolute_url(),
            'object_exists_name': str(obj),
        }
        return JsonResponse(data)
    try:
        iupac_name = None
        if cas_no:
            smiles = cirpy.query(cas_no, 'smiles')[0].value
            if '.' in smiles:
                smiles = [i for i in smiles.split('.') if len(i) > 5][0]
            pcp_query = pcp.get_compounds(smiles, 'smiles')[0]
            if not pcp_query.iupac_name:
                iupac_name = cirpy.resolve(smiles, 'iupac_name', ['smiles'])
        else:
            pcp_query = pcp.get_compounds(inchikey, 'inchikey')[0]
            if not pcp_query.iupac_name:
                iupac_name = cirpy.resolve(inchikey, 'iupac_name',
                                           ['stdinchikey'])
        if not pcp_query.cid:
            raise IndexError
    except (IndexError, pcp.BadRequestError):
        return JsonResponse({'error': 'No compound found for this CAS number'})
    data = {
        'chemical_name':
        Bioactive.scrape_compound_name(pcp_query.cid),
        'iupac_name':
        pcp_query.iupac_name or iupac_name or 'n/a',
        'inchikey':
        pcp_query.inchikey,
        'structure_url':
        'https://pubchem.ncbi.nlm.nih.gov/image/imgsrv.fcgi?cid={}&t=l'.
        format(pcp_query.cid),
        'hidden_cid':
        pcp_query.cid,
        'smiles':
        pcp_query.isomeric_smiles or pcp_query.canonical_smiles or '',
    }
    return JsonResponse(data)
Beispiel #8
0
def main():
    """Run main procedure."""
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("identifiers", nargs="+")
    args = parser.parse_args()

    atomnos = []
    atomcoords = []
    for identifier in args.identifiers:
        try:
            nos, _, coords = read_xyz(cirpy.resolve(identifier, "xyz"))
        except AttributeError:
            nos, _, coords = read_xyz(identifier)
        atomnos.append(nos[-1])
        atomcoords.append(coords[-1] - np.mean(coords[-1], axis=0))

    curnos = atomnos[0]
    curcoords = atomcoords[0]
    for nos, coords in zip(atomnos[1:], atomcoords[1:]):
        curdim = curcoords.max(axis=0) - curcoords.min(axis=0)
        extradim = coords.max(axis=0) - coords.min(axis=0)
        axis = curdim.argmin()

        v = np.zeros(3)
        v[axis] = (curdim[axis] + extradim[axis]) / 2 + 2.83
        coords = coords + v
        print(write_xyz(nos, coords))
Beispiel #9
0
def LoadMutagenicity():
	compounds = []
	print('Reading existing database')
	with open(constant.DATA + 'data.csv', newline='') as files:
			data = csv.reader(files, delimiter=';', quotechar=';')
			for i,comp in enumerate(data):
				compounds.append(comp[1])
	compounds = np.array(compounds)
	print('Loading new data')
	suppl = Chem.SmilesMolSupplier('./smiles_cas_N7090.smi')
	with open('new_data copy.csv', 'w', newline='') as files:
		f = csv.writer(files)
		for compound in suppl:
			try:
				smile = str(cirpy.resolve(compound.GetProp('_Name'), 'smiles'))
				s = compound.GetProp('_Name')+';'+ smile +';'+ str(compound.GetProp('0'))
				index = np.searchsorted(compounds, smile)
				if index< len(compounds) and compounds[index] == smile:
					print('Skipped')
					continue
				f.writerow(s)
				print(s)
			except AttributeError as e:
				print(e)
				continue
Beispiel #10
0
def cas_to_smile(cas_id):
    cas_id = str(cas_id).strip()
    smile=cirpy.resolve(cas_id,"smiles")
    if smile is None:
        return ''
    else:
        return smile
Beispiel #11
0
def parse_page(soup):
    for x in soup.find_all("td"):
        t = x.get("class")
        if t is not None and "term2TD" in t:
            if "CAS No." in x.next:
                a0 = x
                a1 = x.next
                a2 = x.next.next
                cas = a2.text[1:]
            if "Density" in x.next:
                a0 = x
                a1 = x.next
                a2 = x.next.next
                density = a2.text
            if "TDENL" in x.next:
                a0 = x
                a1 = x.next
                a2 = x.next.next
                temperature = a2.text
            if "Molecular Wt." in x.next:
                a0 = x
                a1 = x.next
                a2 = x.next.next
                weight = a2.text
    if density == "NA":
        density = None
    smiles = cirpy.resolve(cas, "smiles")
    return (cas, density, temperature, weight, smiles)
Beispiel #12
0
def parse_page(soup):
    for x in soup.find_all("td"):
        t = x.get("class")
        if t is not None and "term2TD" in t:
            if "CAS No." in x.next:
                a0 = x
                a1 = x.next
                a2 = x.next.next
                cas = a2.text[1:]
            if "Density" in x.next:
                a0 = x
                a1 = x.next
                a2 = x.next.next
                density = a2.text
            if "TDENL" in x.next:
                a0 = x
                a1 = x.next
                a2 = x.next.next
                temperature = a2.text
            if "Molecular Wt." in x.next:
                a0 = x
                a1 = x.next
                a2 = x.next.next
                weight = a2.text
    if density == "NA":
        density = None
    smiles = cirpy.resolve(cas, "smiles")
    return (cas, density, temperature, weight, smiles)
Beispiel #13
0
    def set_chem_data(self):
        for d in self.drugs_data:
            try:
                pcp_query = pcp.get_compounds(d['cid_number'], 'cid')[0]
                smiles = pcp_query.canonical_smiles

                d.update({
                    'smiles':
                    smiles,
                    'inchikey':
                    pcp_query.inchikey,
                    'iupac_name':
                    pcp_query.iupac_name
                    or cirpy.resolve(smiles, 'iupac_name', ['smiles']),
                    'chemical_properties':
                    dict_from_query_object(smiles, pcp_query, additional=True),
                })
                if len(smiles.split('.')) > 1:
                    d.update({
                        'cid_number_2':
                        pcp.get_compounds(smiles.split('.')[0],
                                          'smiles')[0].cid
                    })
            except (IndexError, TypeError, pcp.BadRequestError):
                self.drugs_data.remove(d)
        return self.drugs_data
Beispiel #14
0
def LoadMutagenicity():
    compounds = []
    print('Reading existing database')
    with open(constant.DATA + 'data.csv', newline='') as files:
        data = csv.reader(files, delimiter=';', quotechar=';')
        for i, comp in enumerate(data):
            compounds.append(comp[1])
    compounds = np.array(compounds)
    print('Loading new data')
    suppl = Chem.SmilesMolSupplier('./smiles_cas_N7090.smi')
    with open('new_data copy.csv', 'w', newline='') as files:
        f = csv.writer(files)
        for compound in suppl:
            try:
                smile = str(cirpy.resolve(compound.GetProp('_Name'), 'smiles'))
                s = compound.GetProp('_Name') + ';' + smile + ';' + str(
                    compound.GetProp('0'))
                index = np.searchsorted(compounds, smile)
                if index < len(compounds) and compounds[index] == smile:
                    print('Skipped')
                    continue
                f.writerow(s)
                print(s)
            except AttributeError as e:
                print(e)
                continue
Beispiel #15
0
 def parse_cas(self):
     smiles = cirpy.resolve(self.cas, 'smiles', ['cas_number'])
     if smiles is None:
         raise ConversionError(
             'Could not resolve CAS number for species {}.'.format(
                 self.prime_id))
     else:
         return Species().fromSMILES(smiles)
Beispiel #16
0
def query_inchi(chem):
    """Returns the stdinchi of the chem via cirpy"""
    print 'Query for inchi'
    try:
        return cirpy.resolve(chem, 'stdinchi')
    except urllib2.URLError:
        print 'Sleeping for inchikey'
        time.sleep(1)
        query_inchi(chem)
Beispiel #17
0
def smiles2stdinchikey(smiles):

  import cirpy
  import queryDevice

  if smiles is None: return None
  stdinchikey = cirpy.resolve(smiles, 'stdinchikey')

  return stdinchikey
Beispiel #18
0
def fetch_name(s):
    """
    Return IUPAC name for a given smiles or inchi string.
    Requires cirpy module and internet connection
    >>> print fetch_name('C=O')
    FORMALDEHYDE
    """
    import cirpy
    frm = get_format(s)
    if frm == 'smi':
        name = cirpy.resolve(s,'iupac_name',resolvers=['smiles'])
    elif frm == 'inchi':
        name = cirpy.resolve(s,'iupac_name',resolvers=['inchi'])
    elif frm == 'xyz':
        mol = get_mol(s)
        name = cirpy.resolve(mol.write('inchi').strip(),'iupac_name',resolvers=['inchi'])
    else:
        name = None
    return name
Beispiel #19
0
def cas_odor_url(url_chemical):
    #Implement the crawling
    descriptor_list = []
    descriptor_list_final = []
    page = request_func(url_chemical)
    if url_chemical != 'http://www.thegoodscentscompany.com/data/rw1109421.html':
        if page.find('table','cheminfo').find('tbody').find('td','radw11') is not None:
            cas_n = page.find('table','cheminfo').find('tbody').find('td','radw11').text
        else:
            cas_n = 'No Cas'
        
        tags_cheminfo = page.find_all('table', class_ = 'cheminfo')
        
        for tags in tags_cheminfo:
            #descriptor_list = []
            for tag in tags.find_all('td'):
                if (tag.has_attr('class')) and (tag.attrs['class'][0] == 'radw5'):
                    if 'Odor Description' in tag.get_text():
                        string = tag.get_text().replace('Odor Description:', '').lower()
                        descriptor = descriptor_formatter(string)
                        descriptor_list.append(descriptor)
                        #descriptor_list = itertools.chain(*descriptor_list)
                        #descriptor_list = list(chain.from_iterable(descriptor_list))
                        #descriptor_list = list(set(descriptor_list))
                        descriptor_list_final = list(set(itertools.chain(*descriptor_list)))
                        #print(descriptor_list_final)

        if cas_n != 'No Cas':
          if cirpy.resolve(cas_n, 'smiles'):
              smiles_str = cirpy.resolve(cas_n, 'smiles')
          else:
                smiles_str = 'No Smiles'
        else:
            if cirpy.resolve(cas_n, 'smiles'):
              smiles_str = cirpy.resolve(cas_n, 'smiles')
            else:
                smiles_str = 'No Smiles'

        output_dict = {'cas_number': cas_n, 'descriptors':descriptor_list_final, 'smile_string':smiles_str, 'page':url_chemical}
    else:
        print('No page')
        output_dict = {'cas_number': 'No Page', 'descriptors':'No Page', 'smile_string':'No Page', 'page':'No Page'}  
    return output_dict
Beispiel #20
0
def find_smiles(cas):
    '''
    Find SMILES representation using CirPy
    Inputs:
        - Cas_number (str): CAS number in original format
    Outputs:
        - SMILES (str): original SMILES code
    '''

    return cirpy.resolve(cas, 'smiles')
Beispiel #21
0
def resolve_structure(compound):
    """ Resolves a compound structure using CIRPY """

    try:
        smiles = cirpy.resolve(compound, 'smiles')
        return smiles
    except URLError:
        log.warning(
            'Cannot connect to Chemical Identify Resolver - chemical names may not be resolved.'
        )
        return compound
Beispiel #22
0
 def test_cml(self):
     """Test CML file format is resolved."""
     cmlstring = resolve('Aspirin', 'cml')
     cml = etree.fromstring(cmlstring)
     self.assertEqual(cml.tag,
                      '{http://www.xml-cml.org/schema/cml2/core}list')
     self.assertEqual(
         len(
             cml.findall(
                 './/{http://www.xml-cml.org/schema/cml2/core}molecule')),
         1)
Beispiel #23
0
def query_smiles(chem):
    print 'Query for smiles'
    try:
        return cirpy.resolve(chem, 'smiles')
    except urllib2.URLError:
        print 'Sleeping for smiles'
        time.sleep(1)
        query_smiles(chem)
    except Exception as e:
        print e
        return None
Beispiel #24
0
def canonicalize_smiles(result):
    # Run NCI CIR to get chemical names

    print('SMILES before cirpy: %s' % result.smiles)
    if result.smiles:
        canon_smiles = cirpy.resolve(result.smiles, 'smiles')
        if canon_smiles:
            result.smiles = canon_smiles

    print('SMILES after cirpy: %s' % result.smiles)
    return result
Beispiel #25
0
def fetch_smiles(s):
    """
    Returns the smiles string for a given chemical name.
    Requires cirpy module and internet connection
    >>> fetch_smiles('methane')
    'C'
    """
    import cirpy
    if cirpy:
        return cirpy.resolve(s,'smiles')
    else:
        return None
def resolve_via_cirpy(identifier, target, source):
    try:
        converted = caches[source][target].get(identifier)
        if converted is None:
            sourcehint = 'cas_number' if source == 'cas' else source
            sourcehints = ['name_by_opsin', 'name_by_cir'] if sourcehint == 'name' else [sourcehint]
            converted = cirpy.resolve(identifier, target, sourcehints)
            caches[source][target].set(identifier, converted)
        return converted
    except HTTPError as err:
        if err.code == 504 or err.code == 408:
            raise CirpyError(504, "Timeout while waiting for identifier resolution service")
        raise CirpyError(500, "HTTPError while communicating with identifier resolution service" + err.reason)
def cactus_search(comp_name, type):
    result = cirpy.resolve(comp_name, type)
    synonyms = ""
    if result:
        if type == 'stdinchikey':
            return result.replace('InChIKey=', '')
        if type == 'names':
            for synonym in result:
                if get_relevant_synonym(synonym):
                    synonyms = synonyms + ';' + synonym
            return synonyms

    return result
Beispiel #28
0
def fetch_IUPAC_name(s):
    """
    Return IUPAC name for a given smiles or inchi string.
    Requires cirpy module and internet connection
    >>> print(fetch_IUPAC_name('C=O'))
    FORMALDEHYDE
    """
    try:
        import cirpy
    except:
        r = 'cirpy module not installed, see http://cirpy.readthedocs.io/'
        return
    frm = get_format(s)
    if frm == 'smi':
        name = cirpy.resolve(s,'iupac_name',resolvers=['smiles'])
    elif frm == 'inchi':
        name = cirpy.resolve(s,'iupac_name',resolvers=['inchi'])
    elif frm == 'xyz':
        mol = get_mol(s)
        name = cirpy.resolve(mol.write('inchi').strip(),'iupac_name',resolvers=['inchi'])
    else:
        name = None
    return name
Beispiel #29
0
def fetch_inchi(s):
    """
    Returns the smiles string for a given chemical name.
    Requires cirpy module and internet connection
    >>> fetch_inchi('methane')
    'InChI=1/CH4/h1H4'
    """
    try:
        import cirpy
    except:
        r = 'requires_cirpy'
    if cirpy:
        r = cirpy.resolve(s,'inchi')  
    return r
Beispiel #30
0
def get_smiles(com, user=None):
    smiles_array = []
    
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(com.identifiers, ["smiles"]):
        if iden["identifier"] not in smiles_array:
            smiles_array.append(iden["identifier"])

    if smiles_array:
        return smiles_array
    
    ids_completed = []
    
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(com.identifiers, ["chemspider", "chemspider id", "chemspider identifier", "cs id", "csid"]):
        if iden["identifier"] not in ids_completed and user is not None:
            ids_completed.append(iden["identifier"])
            
            for cs_com in gnomics.objects.compound.Compound.chemspider_compound(com, user):
                if cs_com.smiles not in smiles_array:
                    gnomics.objects.compound.Compound.add_identifier(com, identifier = cs_com.smiles, language = None, identifier_type = "SMILES", source = "ChemSpider")
                    smiles_array.append(cs_com.smiles)
        
        elif iden["identifier"] not in ids_completed and user is None:
            ids_completed.append(iden["identifier"])
            print("Cannot use ChemSpider conversion when user is None. Please create and pass a valid user with a ChemSpider security token to this method.")
    
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(com.identifiers, ["chebi", "chebi id", "chebi identifier"]):
        if iden["identifier"] not in ids_completed:
            ids_completed.append(iden["identifier"])
            
            for sub_com in gnomics.objects.compound.Compound.chebi_entity(com):
                if sub_com.get_smiles() not in smiles_array:
                    gnomics.objects.compound.Compound.add_identifier(com, identifier = sub_com.get_smiles(), language = None, identifier_type = "SMILES", source = "ChEBI")
                    smiles_array.append(sub_com.get_smiles())
    
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(com.identifiers, ["cas", "cas registry", "cas registry number", "cas rn"]):
        if iden["identifier"] not in ids_completed:
            ids_completed.append(iden["identifier"])
            
            smiles = cirpy.resolve(iden["identifier"], "smiles")
            if smiles not in smiles_array and smiles is not None and smiles != "None":
                gnomics.objects.compound.Compound.add_identifier(com, identifier = smiles, language = None, identifier_type = "SMILES", source = "CIR")
                smiles_array.append(smiles)
                
    if smiles_array:
        return smiles_array
    
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(com.identifiers, ["kegg compound", "kegg compound id", "kegg compound identifier", "kegg", "kegg compound accession", "kegg id", "kegg identifier", "kegg accession"]):
        gnomics.objects.compound.Compound.chebi_id(com)
        return gnomics.objects.compound.Compound.smiles(com)
Beispiel #31
0
def fetch_inchi(s):
    """
    Returns the smiles string for a given chemical name.
    Requires cirpy module and internet connection
    >>> fetch_inchi('methane')
    'InChI=1/CH4/h1H4'
    """
    try:
        import cirpy
    except:
        r = 'cirpy module not installed, see http://cirpy.readthedocs.io/'
        return
    if cirpy:
        r = cirpy.resolve(s,'inchi')
    return r
Beispiel #32
0
def fetch_smiles(s):
    """
    Returns the smiles string for a given chemical name.
    Requires cirpy module and internet connection
    >>> fetch_smiles('methane')
    'C'
    """
    try:
        import cirpy
    except:
        r = 'cirpy module not installed, see http://cirpy.readthedocs.io/'
        return
    if cirpy:
        return cirpy.resolve(s,'smiles')
    else:
        return None
Beispiel #33
0
def get_standard_inchi_key(com, user=None):
    
    inchi_key_array = []
    
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(com.identifiers, ["standard inchi key", "standard iupac international chemical id key", "standard iupac international chemical identifier key", "stdinchikey"]):
        if iden["identifier"] not in inchi_key_array:
            inchi_key_array.append(iden["identifier"])
            
    if inchi_key_array:
        return inchi_key_array
            
    ids_completed = []
    
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(com.identifiers, ["chemspider", "chemspider id", "chemspider identifier", "cs id", "csid"]):
        if iden["identifier"] not in ids_completed and user is not None:
            ids_completed.append(iden["identifier"])
    
            for sub_com in gnomics.objects.compound.Compound.chemspider_compound(com, user):
                temp_inchi_key = sub_com.stdinchikey
                if temp_inchi_key not in inchi_key_array:
                    gnomics.objects.compound.Compound.add_identifier(com, identifier = temp_inchi_key, identifier_type = "Standard InChI Key", language = None, source = "ChemSpider")
                    inchi_key_array.append(temp_inchi_key)
                
        elif user is None:
            
            print("Cannot use ChemSpider conversion when user is None. Please create and pass a valid user with a ChemSpider security token to this method.")
    
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(com.identifiers, ["cas", "cas registry", "cas registry number", "cas rn"]):
        if iden["identifier"] not in ids_completed:
            ids_completed.append(iden["identifier"])
            temp_inchi_key = cirpy.resolve(iden["identifier"], "stdinchikey")
            if temp_inchi_key not in inchi_key_array:
                gnomics.objects.compound.Compound.add_identifier(com, identifier = temp_inchi_key, identifier_type = "Standard InChI Key", language = None, source = "CIR")
                inchi_key_array.append(temp_inchi_key)
                
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(com.identifiers, ["chembl", "chembl compound", "chembl compound id", "chembl compound identifier", "chembl id", "chembl identifier"]):
        if iden["identifier"] not in ids_completed:
            ids_completed.append(iden["identifier"])
            
            temp_inchi_key = gnomics.objects.compound.Compound.chembl_molecule(com)[0]["molecule_structures"]["standard_inchi_key"]
            
            if temp_inchi_key not in inchi_key_array and temp_inchi is not None and temp_inchi != "None":
                gnomics.objects.compound.Compound.add_identifier(com, identifier = temp_inchi_key, identifier_type = "Standard InChI Key", language = None, source = "ChEMBL")
                inchi_key_array.append(temp_inchi_key)
                
    return inchi_key_array
def resolve_via_cirpy(identifier, target, source):
    try:
        converted = caches[source][target].get(identifier)
        if converted is None:
            sourcehint = 'cas_number' if source == 'cas' else source
            sourcehints = ['name_by_opsin', 'name_by_cir'
                           ] if sourcehint == 'name' else [sourcehint]
            converted = cirpy.resolve(identifier, target, sourcehints)
            caches[source][target].set(identifier, converted)
        return converted
    except HTTPError as err:
        if err.code == 504 or err.code == 408:
            raise CirpyError(
                504, "Timeout while waiting for identifier resolution service")
        raise CirpyError(
            500,
            "HTTPError while communicating with identifier resolution service"
            + err.reason)
Beispiel #35
0
def resolveID(file, column):  # TODO Consider incorporation of this function in load_csv()
    """ Resolves chemical ID using cripy package from NCI.
    Accepts csv file path and name (as string) and string of column header to be resolved.
    Returns dataframe with added column containing smiles."""
    if isinstance(file, pd.core.frame.DataFrame):
        df = file
    elif isinstance(file, str):
        df = pd.read_csv(file)  # read csv file

    # for i in df.head(0):  # look at all columns
    #     try:
    #         pd.DataFrame(list(map(Chem.MolFromSmiles, csv[i])))
    #         # pd.DataFrame(list(map(cirpy.resolve(,'smiles'), csv[i])))
    #         df[i].apply(cirpy.resolve, args=())
    #         s.apply(subtract_custom_value, args=(5,))
    #         from functools import partial
    #
    #         mapfunc = partial(my_function, ip=ip)
    #         map(mapfunc, volume_ids)
    #         smiles_col = csv[i]
    #
    #     except Exception:
    #         pass
    for i, row in enumerate(df.itertuples(), 1):  # iterate through dataframe
        c = row.Index
        id = df.loc[c, column]  # get cas number from df
        # print('Resolving', id)

        # look up the CAS, convert to smiles
        df.loc[c, 'smiles'] = cirpy.resolve(id, 'smiles')  # store in df

        # provides output text
        # if df.loc[c, 'smiles'] == None:
        #     print('No SMILES found')
        #     print()
        #
        # else:
        #     print('smiles found :)')
        #     print()

    # drop if smiles was not found
    df3 = df.dropna()
    # print(df3.head(5))
    return df3
Beispiel #36
0
    def __init__(self, cas_or_aa, min_atoms=6):
        """
        Initialize using cas numbers OR amino acid name
        Requires openmoltools.openeye and cirpy

        Arguments
            cas_or_aa (list of strings) either cas number or name of amino acid

        Optional Arguments
            min_atoms (int) - a minimum number of atoms for substructure match (default: 6)

        Creates class variables:
            self.cas_or_aa (list of strings) 
              input representing molecules to be combined 
            self.smiles_strings (list of strings) 
              smiles representation of molecules to be combined
            self.ligands (list of OEMol) 
              openeye molecule representation of molecules to be combined
            self.title (string) 
              used as an identifier for input group of molecules
            self.min_atoms (int) 
              minimum number of common atoms to constitute a substructure match (default: 6)

        """

        self.cas_or_aa = cas_or_aa
        self.smiles_strings = []
        self.ligands = []
        for cas in cas_or_aa:
            smiles = cirpy.resolve(cas, 'smiles')
            self.smiles_strings.append(smiles)
            ligand = openeye.smiles_to_oemol(smiles)
            ligand = openeye.get_charges(ligand, strictStereo=False)
            self.ligands.append(ligand)
        self.title = self.cas_or_aa[0] + "_and_analogs"
        self.min_atoms = min_atoms
        self.common_substructure = None
        self.dual_topology = None
        self.each_molecule_N = []
        self.mapping_dictionaries = []
        self.pdb_filename = None
        self.ffxml_filename = None
Beispiel #37
0
    def __init__(self, cas_or_aa, min_atoms=6):
        """
        Initialize using cas numbers OR amino acid name
        Requires openmoltools.openeye and cirpy

        Arguments
            cas_or_aa (list of strings) either cas number or name of amino acid

        Optional Arguments
            min_atoms (int) - a minimum number of atoms for substructure match (default: 6)

        Creates class variables:
            self.cas_or_aa (list of strings) 
              input representing molecules to be combined 
            self.smiles_strings (list of strings) 
              smiles representation of molecules to be combined
            self.ligands (list of OEMol) 
              openeye molecule representation of molecules to be combined
            self.title (string) 
              used as an identifier for input group of molecules
            self.min_atoms (int) 
              minimum number of common atoms to constitute a substructure match (default: 6)

        """

        self.cas_or_aa = cas_or_aa
        self.smiles_strings = []
        self.ligands = []
        for cas in cas_or_aa:
            smiles = cirpy.resolve(cas,'smiles')
            self.smiles_strings.append(smiles)
            ligand = openeye.smiles_to_oemol(smiles)
            ligand = openeye.get_charges(ligand, strictStereo=False) 
            self.ligands.append(ligand)
        self.title = self.cas_or_aa[0]+"_and_analogs"
        self.min_atoms = min_atoms
        self.common_substructure = None
        self.dual_topology = None
        self.each_molecule_N = []
        self.mapping_dictionaries = []
        self.pdb_filename = None
        self.ffxml_filename = None
Beispiel #38
0
def find_iupac_names():
    df_ = read_df()
    chems = df_.groupby("chemical_").size().sort_values()[::-1].index.values
    iupac_names = []
    num_found = 0
    for i, chem in enumerate(chems):
        if not chem:
            continue
        print chem
        iupac = cirpy.resolve(chem, "iupac_name")
        if iupac:
            print "Found:", iupac
            num_found += 1
        else:
            print "Not found."
        iupac_names.append(iupac)
        print "{} / {}".format(num_found, i + 1)
        print ""
        time.sleep(1)
    return iupac_names
Beispiel #39
0
def add_structures(result):
    # Run OPSIN
    with tempfile.NamedTemporaryFile(delete=False) as tf:
        for record in result['records']:
            for name in record.get('names', []):
                tf.write(('%s\n' % name).encode('utf-8'))
    subprocess.call([
        app.config['OPSIN_PATH'], '--allowRadicals', '--wildcardRadicals',
        '--allowAcidsWithoutAcid', '--allowUninterpretableStereo', tf.name,
        '%s.result' % tf.name
    ])
    with open('%s.result' % tf.name) as res:
        structures = [line.strip() for line in res]
        i = 0
        for record in result['records']:
            for name in record.get('names', []):
                if 'smiles' not in record and structures[i]:
                    log.debug('Resolved with OPSIN: %s = %s', name,
                              structures[i])
                    record['smiles'] = structures[i]
                i += 1
    os.remove(tf.name)
    os.remove('%s.result' % tf.name)
    # For failures, use NCI CIR (with local cache of results)
    for record in result['records']:
        for name in record.get('names', []):
            if 'smiles' not in record:
                local_entry = ChemDict.query.filter_by(name=name).first()
                if local_entry:
                    log.debug('Resolved with local dict: %s = %s', name,
                              local_entry.smiles)
                    if local_entry.smiles:
                        record['smiles'] = local_entry.smiles
                else:
                    smiles = cirpy.resolve(
                        chem_normalize(name).encode('utf-8'), 'smiles')
                    log.debug('Resolved with CIR: %s = %s', name, smiles)
                    db.session.add(ChemDict(name=name, smiles=smiles))
                    if smiles:
                        record['smiles'] = smiles
    return result
Beispiel #40
0
def LoadAMES():
	compounds = []
	print('Reading existing database')
	with open(constant.DATA + 'data.csv', newline='') as files:
			data = csv.reader(files, delimiter=';', quotechar=';')
			for i,comp in enumerate(data):
				compounds.append(comp[1])
	compounds = np.array(compounds)
	print('Loading new data')
	suppl = Chem.SDMolSupplier('./AMESdata.sdf')
	with open('new_data.csv', 'w', newline='') as files:
		f = csv.writer(files)
		for compound in suppl:
			smile = str(cirpy.resolve(compound.GetProp('IDNUMBER'), 'smiles'))
			s = compound.GetProp('IDNUMBER')+';'+ smile +';'+compound.GetProp('AMES_Activity')
			index = np.searchsorted(compounds, smile)
			if index< len(compounds) and compounds[index] == smile:
				print('Skipped')
				continue
			f.writerow(s)
			print(s)
Beispiel #41
0
    def resolve(self):
        res = cirpy.resolve(self.name, 'xyz')
        print("Resolved chemical identification of %s" % (self.name))
        r = res.split("\n")
        self.natoms = int(r[0])
        self.frag = [0] * self.natoms

        for i, line in enumerate(r):
            if i <= 1:
                continue
            elif i < self.natoms + 2:
                tmp = [
                    float(line.split()[1]),
                    float(line.split()[2]),
                    float(line.split()[3])
                ]
                self.coord.append(tmp)
                self.atom.append(line.split()[0])
                self.mass.append(element.ELEMENTS[self.atom[-1]].mass)
            else:
                continue
        return
Beispiel #42
0
def canonicalize_smiles(smiles, sanitize=True, iso=False, SLN=False):
    """Canonicalize given SMILES string
    The function is a wrapper around RDKIT function

    :argumnts:
      smiles -- (string) a compound in SMILES format
      sanitize -- (bool) sanitize the molecule
      iso -- (bool) include isomeric data in SMILES
      SLN -- (bool) is the molecule given in SLN format

    :return:
      canonicalized SMILES
    """
    if SLN:
        smiles_ = cirpy.resolve(smiles, "smiles")

        mol = Chem.MolToSmiles(
            Chem.MolFromSmiles(smiles_), canonical=True, isomericSmiles=iso
        )
    else:
        mol = Chem.MolToSmiles(
            Chem.MolFromSmiles(smiles), canonical=True, isomericSmiles=iso
        )

    mol = Chem.MolFromSmiles(mol)
    if sanitize:
        mol.UpdatePropertyCache(strict=False)
        mol = Chem.RemoveHs(
            mol, implicitOnly=False, updateExplicitCount=True, sanitize=True
)
        Chem.SanitizeMol(
            mol, Chem.rdmolops.SanitizeFlags.SANITIZE_ALL, catchErrors=False
        )
        AllChem.AssignStereochemistry(
            mol, cleanIt=True, force=True, flagPossibleStereoCenters=True
        )
        return Chem.MolToSmiles(mol, canonical=True, isomericSmiles=iso)
    else:
        return Chem.MolToSmiles(mol, canonical=True, isomericSmiles=iso)
Beispiel #43
0
def download_cas_to_mol(molecule_cas, sanitize=True):
    """
    Download molecule via cas, add hydrogens, clean up
    """
    sdf_str = cirpy.resolve(molecule_cas, 'sdf3000', get_3d=True)
    mol = sdbs_util.sdfstr_to_mol(sdf_str)
    mol = Chem.AddHs(mol)

    # this is not a good place to do this
    # # FOR INSANE REASONS I DONT UNDERSTAND we get
    # #  INITROT  --  Rotation about     1     4 occurs more than once in Z-matrix
    # # and supposeldy reordering helps

    # np.random.seed(0)
    # mol = Chem.RenumberAtoms(mol, np.random.permutation(mol.GetNumAtoms()).astype(int).tolist())

    #mol.SetProp("_Name", molecule_cas)
    # rough geometry
    Chem.SanitizeMol(mol, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL)
    AllChem.EmbedMolecule(mol, AllChem.ETKDG())

    return mol
def resolve_cached(x, rtype):
   return cirpy.resolve(x, rtype)
Beispiel #45
0
def MOL(SMILES):
    molfile = None
    try:
        molfile = cirpy.resolve(SMILES, "mol")
    except HTTPError, e:
        print "HTTPError: %s " % e.code
Beispiel #46
0
import cirpy
import sys
import pandas as pd
#molecule = sys.argv[1]
df=pd.read_csv('SAMPL4.csv')
for molecule in df.NAME[:24]: 
	smiles_code = cirpy.resolve(molecule, 'smiles')
	print smiles_code,molecule
	pdbfile = cirpy.resolve(smiles_code, 'pdb')
	oname = ''.join(molecule.split())
	file_output = open(oname + '.pdb', "w+")
	file_output.write(pdbfile)
	file_output.close()
Beispiel #47
0
def iupac(SMILES):
    name = None
    try:
        name = cirpy.resolve(SMILES, "iupac_name")
    except HTTPError, e:
        print "HTTPError: %s" % e.code
Beispiel #48
0
 def test_invalid_representation_resolve(self):
     """Test that HTTPError is raised when an invalid representation is specified."""
     with self.assertRaises(HTTPError):
         resolve('Morphine', 'ogiuewrgpw')
Beispiel #49
0
 def test_tnt_smiles(self):
     """Test that TNT smiles resolves the expected result."""
     self.assertEqual(
         resolve('2,4,6-trinitrotoluene', 'smiles'),
         'Cc1c(cc(cc1[N+]([O-])=O)[N+]([O-])=O)[N+]([O-])=O'
     )
Beispiel #50
0
 def test_cml(self):
     """Test CML file format is resolved."""
     cmlstring = resolve('Aspirin', 'cml')
     cml = etree.fromstring(cmlstring)
     self.assertEqual(cml.tag, '{http://www.xml-cml.org/schema/cml2/core}list')
     self.assertEqual(len(cml.findall('.//{http://www.xml-cml.org/schema/cml2/core}molecule')), 1)
Beispiel #51
0
 def test_pdb(self):
     """Test PDB file format is resolved."""
     result = resolve('Aspirin', 'pdb')
     self.assertIn('HEADER', result)
     self.assertIn('ATOM', result)
     self.assertIn('CONECT', result)
Beispiel #52
0
 def test_no_results_resolve(self):
     """Test that None is returned when there are no results."""
     self.assertEqual(resolve('aruighaelirugaerg', 'inchi'), None)
import cirpy
from simtk.openmm import app
import builder
import mdtraj as md
import pymbar
import scipy.interpolate
import os
import pandas as pd
import glob

filenames = glob.glob("./data/equil/*.pdb")
filename_munger = lambda filename: os.path.splitext(os.path.split(filename)[1])[0].split("_")
cirpy.resolve("71-23-8", "formula")
data = []
for pdb_filename in filenames:
    cas, n_molecules, temperature, stage = filename_munger(pdb_filename)
    print(cas, temperature)
    dcd_filename = "./data/production/%s_%s_%s_production.dcd" % (cas, n_molecules, temperature)
    try:
        traj = md.load(dcd_filename, top=pdb_filename)
    except IOError:
        continue
    if traj.unitcell_lengths is None: continue
    rho = md.geometry.density(traj)
    [t0, g, Neff] = pymbar.timeseries.detectEquilibration(rho)
    mu = rho[t0:].mean()
    sigma = rho[t0:].std() * Neff ** -0.5
    forcefield = app.ForceField("./data/ffxml/%s.xml" % cas)
    system, charges = builder.build_simulation(traj, forcefield)
    temperature = float(temperature)
    dielectric = md.geometry.static_dielectric(traj, charges, temperature)
Beispiel #54
0
 def test_alanine_smiles(self):
     """Test that alanine smiles resolves the expected result."""
     self.assertEqual(resolve('Alanine', 'smiles'), 'C[C@H](N)C(O)=O')
def handleword(word):
    #compare the word (to lower) to the chem list
    if word.lower() in chemlist:
        #if we've already failed the word
        if word in failedlist:
            print word+" is in my failed list... not trying it again"
            mention.reply('I\'m pretty sure '+word+' is either a chemical compound or a portion of what you ment but I wasn\'t able to find a CAS number for it.\n\nIf this is a problem, please '+footerGen(mention.permalink))
            return 0
        #This is a new or unfailed word
        else:
            print "Resolving "+word
            #Look up the CAS number, smiles and formula of the compound from CIR
            try:
                cas_num = cirpy.resolve(word,'cas')
                smiles = cirpy.resolve(word,'smiles')
                formula = cirpy.resolve(word,'formula')
            except:
                cas_num = False
            #The chemical is defined if we have at least one CAS number
            if cas_num:
                #Some compounds may have multiple CAS numbers so we want to handle the grammar
                isare = " is"
                if len(cas_num) > 1:
                    isare = "s are"
                if len(cas_num[0])>1:
                    #Build the WebBoook links and the list for multiple CAS numbers
                    formattedcas = ', '.join(cas_num)
                    link = ""
                    for cas in cas_num:
                        link = link+"["+cas+"](http://webbook.nist.gov/cgi/cbook.cgi?ID="+cas+"&Units=SI) "
                else:
                    #Build the WebBoook link and the list for one CAS number
                    formattedcas = cas_num
                    link = ""
                    link = link+"["+cas_num+"](http://webbook.nist.gov/cgi/cbook.cgi?ID="+cas_num+"&Units=SI)"
                #wolfram portion of the query
                waeo = wap.WolframAlphaEngine(appid, server)
                query = waeo.CreateQuery(word)
                result = waeo.PerformQuery(query)
                waeqr = wap.WolframAlphaQueryResult(result)
                pods = waeqr.Pods()
                structureimage =""
                propertiesimage=""
                propertiestext=""
                #See Wolfram|Alpha API docs for descriptions of pods
                for pod in pods:
                    if str(pod[1][1]) == "Structure diagram":
                        structureimage = pod[6][3][1][1]
                    if str(pod[1][1]) == "Basic properties":
                        propertiesimage = pod[6][3][1][1]
                        propertiestext = pod[6][3][5][1]
                #Build the reply
                mention.reply('How about some more info on '+word+':\n\nThe CAS number'+isare+' '+formattedcas+'\n\nThe chemical structure is '+smiles+'\n\nChemical formula: '+formula+'\n\nNIST WebBook '+link+'.\n\n\nThe following is from Wolfram|Alpha:\n\n[structure image]('+structureimage+')\n\n[basic properties image]('+propertiesimage+')\n\nBasic properties: '+propertiestext+'\n\n\n\nProvided by your friendly neighborhood Chemistry_Bot\n\n'+footerGen(mention.permalink))
                #Add the mention ot the already done set
                already_done.add(mention.id)
                print "Success on "+word
                #Add the mention to the commented on file
                with open("commentedonchem.txt", "a") as commentedfile:
                    commentedfile.write(mention.id+'\n')
                return 1
            else:
                #If a chemical was in our list but didn't resolve in the CIR we record it in the failed file
                print "Failure on "+word
                with open("failedat.txt","a") as failedat:
                    failedat.write(word+'\n')
                mention.reply('I\'m pretty sure '+word+' is either a chemical compound or a portion of what you ment but I wasn\'t able to find a CAS number for it.\n\nIf this is a problem, please '+footerGen(mention.permalink))
                return 0
    else:
        #The word is not in our chemlist
        print word+' not in my list'
        return 0
X["is_good"] = X_is_good
X = X[X.is_good]

X["n_components"] = X.components.apply(lambda x: len(x.split("__")))
X = X[X.n_components == 1]
X.dropna(axis=1, how='all', inplace=True)

X["n_heavy_atoms"] = X.components.apply(lambda x: thermoml_lib.count_atoms(name_to_formula[x]))
X = X[X.n_heavy_atoms <= 10]
X.dropna(axis=1, how='all', inplace=True)

X["n_atoms"] = X.components.apply(lambda x: thermoml_lib.count_atoms(name_to_formula[x], which_atoms=which_atoms))
X = X[X.n_atoms <= 100]
X.dropna(axis=1, how='all', inplace=True)

X["smiles"] = X.components.apply(lambda x: cirpy.resolve(x, "smiles"))  # This should be cached via sklearn.
X = X[X.smiles != None]
X = X.ix[X.smiles.dropna().index]

    
X["cas"] = X.components.apply(lambda x: thermoml_lib.get_first_entry(cirpy.resolve(x, "cas")))  # This should be cached via sklearn.
X = X[X.cas != None]
X = X.ix[X.cas.dropna().index]

X["Pressure, kPa"] = 101.325  # Assume everything within range is comparable.  

mu = X.groupby(["components", "smiles", "cas", "Temperature, K", "Pressure, kPa"])[experiments].mean()
sigma = X.groupby(["components", "smiles", "cas", "Temperature, K", "Pressure, kPa"])[experiments].std().dropna()

mu = mu.dropna()