Beispiel #1
0
 def compute_one_inchi(mol):
     molfrom = Chem.MolFromSmiles(mol)
     if molfrom == None:
         return ((None, None, None))
     ini = inchi.MolToInchi(molfrom)
     inikey = inchi.InchiToInchiKey(ini)
     hsh = hashlib.md5(ini.encode('utf-8')).hexdigest()
     return ((hsh, inikey, ini))
Beispiel #2
0
 def set_computable(self):
     mol = tool_chemical.read_string("mol", self._mol)
     # molecular_formula = Descriptors.rdMolDescriptors.CalcMolFormula(mol)
     # molecular_weight = Descriptors.ExactMolWt(mol)
     self._smiles = Chem.MolToSmiles(mol, isomericSmiles=False)
     self._inchi = inchi.MolToInchi(mol)
     self._inchikey = inchi.MolToInchiKey(mol)
     self._molecular_formula = Chem.CalcMolFormula(mol)
     self._molecular_weight = Chem.CalcExactMolWt(mol)
Beispiel #3
0
def convert(input, input_mod='smi'):
    """
    convert SMILES into other molecular identifier
    :param input: SMILES
    :param input_mod: 'smi'
    :return: str(molecular formula), str(inchi), str(inchikey)
    """
    mol = read_string(input_mod, input)
    molecular_formula = Descriptors.rdMolDescriptors.CalcMolFormula(mol)
    molecular_inchi = inchi.MolToInchi(mol)
    molecular_inchikey = inchi.MolToInchiKey(mol)
    return molecular_formula, molecular_inchi, molecular_inchikey
Beispiel #4
0
    def set_computables_from_mol(self, mol):
        try:  # warning comes up in pycharm (bug of pycharm)
            self.molecular_formula = Descriptors.rdMolDescriptors.CalcMolFormula(
                mol)
            self.molecular_weight = Descriptors.ExactMolWt(mol)
            self.inchi = inchi.MolToInchi(mol)
            self.inchikey = inchi.MolToInchiKey(mol)
            self.smiles = Chem.MolToSmiles(mol, isomericSmiles=False)
        except Exception as e:
            raise SpectrumError("Error occurred while computing properties" +
                                e.args) from e

        assert self.molecular_formula is not None, "molecular-formula can't be None"
        assert self.molecular_weight is not None, "molecular-weight can't be None"
        assert self.inchi is not None, "inchi can't be None"
        assert self.inchikey is not None, "inchikey can't be None"
        assert self.smiles is not None, "smiles can't be None"
Beispiel #5
0
def fill_base_test(cursor):
    df = pd.read_csv("toxicity_85832.csv")
    #df = df.drop("Unnamed: 0", axis=1)
    names_of_columns = list(df.columns)
    smiles = list(df["SMILES"])
    df = df.drop("SMILES", axis=1)
    toxic_vals = np.array(df.values)

    #molecules
    canonize_smiles = [_canonize_mixture(smile) for smile in smiles]
    inchi_smiles = [
        inchi.MolToInchi(Chem.MolFromSmiles(smile))
        for smile in canonize_smiles
    ]
    inchikey = [
        inchi.MolToInchiKey(Chem.MolFromSmiles(smile))
        for smile in canonize_smiles
    ]
    ids = [x for x in range(len(canonize_smiles))]
    ziped_vals = zip(inchikey, inchi_smiles, canonize_smiles)
    cursor.executemany(
        """insert into 'molecules' (inchi_key,inchi,canonical_smiles) values (?,?,?)""",
        ziped_vals)

    #tasks
    descr_tasks = [
        randomStringwithDigitsAndSymbols(random.randint(1, 30))
        for i in range(20)
    ]
    cursor.executemany("""insert into 'tasks' (descr) values (?)""",
                       zip(descr_tasks))

    #tasks_running
    completed = [random.randint(0, 1) for i in range(1000)]
    id_tasks = [random.randint(1, len(descr_tasks)) for i in range(1000)]
    id_molecules = [random.randint(1, len(smiles)) for i in range(1000)]
    zip_tasks_running = zip(id_tasks, id_molecules, completed)
    cursor.executemany(
        """insert into 'tasks_running' (id_task, id_molecule, completed) values (?,?,?)""",
        zip_tasks_running)

    #descriptors
    name_of_descr = [
        randomStringwithDigitsAndSymbols(random.randint(1, 30))
        for i in range(10)
    ]
    name_of_version = [
        randomStringwithDigitsAndSymbols(random.randint(1, 30))
        for i in range(10)
    ]
    ziped_versions = zip(name_of_descr, name_of_version)
    cursor.executemany(
        """insert into 'descriptors' (descriptor, version) values (?,?)""",
        ziped_versions)
    cursor.execute(
        """insert into 'descriptors' (descriptor, version) values (?,?)""",
        ("mordred", "0.315"))

    #descriptor_values
    id_descriptor = [11 for i in range(len(smiles))]
    id_molecule = [x + 1 for x in range(len(smiles))]
    id_tasks = [
        random.randint(1, len(descr_tasks)) for i in range(len(smiles))
    ]
    valid = [random.randint(0, 1) for i in range(len(smiles))]
    value = func(canonize_smiles)
    ziped_descr_vals = zip(id_molecule, id_descriptor, id_tasks, valid, value)
    cursor.executemany(
        """insert into 'descriptors_values' (id_molecule, id_descriptor, id_task, valid, value) values (?,?,?,?,?)""",
        ziped_descr_vals)

    #endpoints
    features = names_of_columns[1:]
    descriptions = [feature.split('_')[1] for feature in features]
    types = ['_'.join(feature.split('_')[2:]) for feature in features]
    ziped_endpoints = zip(descriptions, types)
    cursor.executemany("""insert into 'endpoints' (desc, type) values (?,?)""",
                       ziped_endpoints)

    #experimnetal data
    ids_molecules = []
    ids_endpoints = []
    values_endpoints = []
    for i in range(len(toxic_vals[:, 0])):
        for j in range(len(toxic_vals[0, :])):
            if (~np.isnan(toxic_vals[i, j])):
                ids_molecules.append(i + 1)
                ids_endpoints.append(j + 1)
                values_endpoints.append(toxic_vals[i, j])

    ziped_experimental_data = zip(ids_molecules, ids_endpoints,
                                  values_endpoints)
    cursor.executemany(
        """insert into 'experimental_data' (id_molecule, id_endpoint, value) values (?,?,?)""",
        ziped_experimental_data)

    return cursor
Beispiel #6
0
def calculate_inchi(smile):
    return inchi.MolToInchi(Chem.MolFromSmiles(smile))
Beispiel #7
0
def loadSDF(sdfPath):
    # Create images
    #generateImages(sdfPath)
     
    # Create a molecule supplier
    suppl = Chem.SDMolSupplier(sdfPath)
    
    # Filter empty entries
    sdf = [x for x in suppl if x is not None]
    
    # For each molecule in supplier
    for mol in sdf:
        data = {}
        
        try:
            data['fCharge'] = mol.GetProp('Charge')
        except:
            data['fCharge'] = Chem.GetFormalCharge(mol)
            
        try:
            data['name'] = mol.GetProp('DATABASE_ID')
        except:
            data['name'] = 'unkown'
            
        try:
            data['molMass'] = mol.GetProp('Total Molweight')
        except:
            data['molMass'] = Descriptors.ExactMolWt(mol) 
            
        try:
            data['cLogP'] = mol.GetProp('cLogP')
        except:
            data['cLogP'] = Crippen.MolLogP(mol) # não sei se ta certo
            
        try:
            data['cLogS'] = mol.GetProp('cLogS')
        except:
            data['cLogS'] = 0.0
            
        try:
            data['tpsa'] = mol.GetProp('Polar Surface Area')
        except:
            data['tpsa'] = rdMolDescriptors.CalcTPSA(mol)
            
        try:
            data['totalSurfaceArea'] = mol.GetProp('Total Surface Area')
        except:
            data['totalSurfaceArea'] = rdMolDescriptors.CalcTPSA(mol)
        
        try:
            data['hbondAcceptors'] = mol.GetProp('H-Acceptors')
        except:
            data['hbondAcceptors'] = rdMolDescriptors.CalcNumHBA(mol)
            
        try:
            data['hbondDonnors'] = mol.GetProp('H-Donors')
        except:
            data['hbondDonnors'] = rdMolDescriptors.CalcNumHBD(mol)
            
        try:
            data['rotable'] = mol.GetProp('Rotatable Bonds')
        except:
            data['rotable'] = rdMolDescriptors.CalcNumRotatableBonds(mol)
            
        try:
            data['mutagenic'] = mol.GetProp('Mutagenic')
        except:
            data['mutagenic'] = 'Unknown'
            
        try:
            data['tumorigenic'] = mol.GetProp('Tumorigenic')
        except:
            data['tumorigenic'] = 'Unknown'
            
        try:
            data['irritant'] = mol.GetProp('Irritant')
        except:
            data['irritant'] = 'Unkown'
            
        try:
            data['smiles'] = mol.GetProp('SMILES')
        except:
            data['smiles'] = Chem.MolToSmiles(mol)
            
        try:
            data['InChI'] = mol.GetProp('INCHI_IDENTIFIER')
        except:
            data['InChI'] = inchi.MolToInchi(mol)
            
        try:
            data['inchiKey'] = mol.GetProp('INCHI_KEY')
        except:
            data['inchiKey'] = inchi.MolToInchiKey(mol)
            
        try:
            data['nonHAtoms'] = mol.GetProp('Non-H Atoms')
        except:
            data['nonHAtoms'] = -1 # Não sei calcular
            
            
        try:
            data['numAtoms'] = mol.GetProp('numAtoms')
        except:
            data['numAtoms'] = mol.GetNumAtoms()
        
        try:
            data['stereoCenters'] = mol.GetProp('Stereo Centers')
        except:
            data['stereoCenters'] = mol.GetNumAtoms()
            
        try:
            data['provider'] = mol.GetProp('DATABASE_NAME')
        except:
            print("Nenhum fornecedor encontrado, o campo é obrigatório!")
            continue
        
        tmp = AllChem.Compute2DCoords(mol) # Compute its coordinates
        
        Draw.MolToFile(mol, 
            os.path.join(settings.FILES_DIR, f'molImages/' + data["inchiKey"] + '.png'),
            size=(300,300),
            kekulize=True, 
            wedgeBonds=True,
            fitImage=True) # Save it
        
        Draw.MolToFile(mol, 
            os.path.join(settings.FILES_DIR, f'molThumbs/' + data["inchiKey"] + '.png'),
            size=(150,150),
            kekulize=True,
            wedgeBonds=True,
            fitImage=True)
        
        feedDatabase(data)

        if Compounds.objects.filter(inChIKey=data['inchiKey']).exists():
            if not Compounds.objects.filter(provider=['provider']).exists():
                feedDatabase(data)
                print("feed1")
                # append no sdf da base de dados
                a = 1
            else:
                print("continue123")
                continue
                
        else:
            a = 1
            feedDatabase(data)
            print("feed2")
        '''except:
Beispiel #8
0
def parse_f(f):
    names = ['']
    cid = -1
    CAS = f.split('/')[1] if '/' in f else f
    CAS = CAS.split('.')[0]
    if CAS in ignored_CASs:
        return None
    failed_mol = False
    try:
        if CAS in syn_data:
            d = syn_data[CAS]
            if 'pubchem' in d:
                raise Exception(
                    'Pubchem specified, not trying to use the mol file')
            elif 'formula' in d:
                raise Exception(
                    'Formula specified, not trying to use the mol file')
        try:
            mol = Chem.MolFromMolFile(f)
            assert mol is not None
        except:
            print('Cannot read %s' % f)
            1 / 0
        try:
            inchi_val = inchi.MolToInchi(mol)
        except:
            print('BAILING ON %s' % f)
            1 / 0
        mol = inchi.MolFromInchi(inchi_val)  # Works better for ions
        if mol is None:
            print('BAILING ON reconversion to mol %s' % f)
            1 / 0
    except:
        failed_mol = True
        if CAS in syn_data:
            d = syn_data[CAS]
            if 'pubchem' in d:
                if str(d['pubchem']) in mycache:
                    cid, iupac_name, names, mw, smi, inchi_val, inchikey, formula = mycache[
                        str(d['pubchem'])]
                else:
                    pc = Compound.from_cid(d['pubchem'])
                    cid = pc.cid
                    iupac_name = pc.iupac_name
                    names = pc.synonyms
                    mw = pc.molecular_weight
                    smi = pc.canonical_smiles
                    inchi_val = pc.inchi
                    inchikey = pc.inchikey
                    formula = pc.molecular_formula

                    mycache[str(d['pubchem'])] = (cid, iupac_name, names, mw,
                                                  smi, inchi_val, inchikey,
                                                  formula)
            else:
                cid = -1
                names = d['synonyms'] if 'synonyms' in d else ['']
                mw = float(d['MW'])
                smi = d['smiles'] if 'smiles' in d else ''
                formula = d['formula'] if 'formula' in d else ''
                inchi_val = d['inchi'] if 'inchi' in d else ''
                inchikey = d['inchikey'] if 'inchikey' in d else ''
                iupac_name = ''
        else:
            print('FAILED on %s and no custom data was available either' % CAS)
            return None

    if not failed_mol:
        smi = Chem.MolToSmiles(mol, True)
        inchi_val = inchi.MolToInchi(mol)
        inchikey = inchi.InchiToInchiKey(inchi_val)
        mw = Descriptors.MolWt(mol)
        #        for i in mol.GetAtoms():
        #            if i.GetIsotope():
        #                mw = Descriptors.ExactMolWt(mol)
        #                break

        formula = CalcMolFormula(mol, True, True)
        iupac_name = ''
    try:
        if not failed_mol:
            if str(inchikey) in mycache:
                cid, iupac_name, names = mycache[str(inchikey)]
            else:
                try:
                    pc = get_compounds(inchikey, 'inchikey')[0]
                    cid = pc.cid
                    iupac_name = pc.iupac_name
                    names = pc.synonyms
                    mycache[str(inchikey)] = (cid, iupac_name, names)
                except:
                    mycache[str(inchikey)] = (-1, '', [''])
    except:
        cid = -1
        iupac_name = ''
        names = ['']

    other_CAS = []
    if CAS in pdf_data:
        d = pdf_data[CAS]
        name = d['Name']
        if 'Other Names' in d:
            syns = d['Other Names']
        else:
            syns = []
        if not iupac_name:
            iupac_name = name
        else:
            syns.insert(0, name)
        if 'Deleted CAS' in d:
            other_CAS.extend(d['Deleted CAS'])
        if 'Alternate CAS' in d:
            other_CAS.extend(d['Alternate CAS'])

        syns = [i for i in syns if i not in dup_names]
        names = syns + [i for i in names if i not in all_names] + other_CAS
    actual_names = []
    for name in names:
        if name in all_user_names:
            # If the name is in the user db, only add it if it corresponds to this CAS number
            if CAS in syn_data and 'synonyms' in syn_data[
                    CAS] and name in syn_data[CAS]['synonyms']:
                actual_names.append(name)
            else:
                # Discard it otherwise
                pass
        else:
            # If the name is not in the user db we're all good
            actual_names.append(name)
    if CAS in syn_data and 'synonyms' in syn_data[CAS]:
        # If the user has any syns for this cas number, add those names if the name hasn't already been aded
        for n in syn_data[CAS]['synonyms']:
            if n not in actual_names:
                actual_names.append(n)

    actual_names = [i for i in actual_names if i]

    if inchi_val is not None:
        inchi_val = inchi_val.replace('InChI=1S/', '')

    formula = serialize_formula(formula)
    s = '%d\t%s\t%s\t%g\t%s\t%s\t%s\t%s\t' % (cid, CAS, formula, mw, smi,
                                              inchi_val, inchikey, iupac_name)

    s += '\t'.join(actual_names)
    print(s)
    return None
Beispiel #9
0
    except Exception:
        print("Not able to remove stereochemistry. Chembl.")
    try:
        mol = standardise.run(mol)
    except standardise.StandardiseException as e:
        logging.warn(e.message)
    try:
        mol = s.standardize(mol)
    except Exception:
        print("Not able to standardize. Chembl.")
    try:
        mol = s.tautomer_parent(mol, skip_standardize=True)
    except Exception:
        print("Not able to make tautomer parent. Chembl.")
    mol = s.stereo_parent(mol, skip_standardize=True)
    chembl_help[lig][0] = inchi.MolToInchi(mol)

#BDB preparing
bdb_help = []
list_help = []
conn = psycopg2.connect('dbname=bdb user=data host=/tmp/')
curs = conn.cursor()
curs.execute(
    "select bdb_2_ligand_inchi, bdb_2_ligand_inchi from bdb_base B FULL OUTER JOIN bdb_extend E ON B.bdb_0_bindingdb_reactant_set_id = E.bdb_0_bindingdb_reactant_set_id where bdb_40_uniprot__swissprot__entry_name_of_target_chain IN ('THA_HUMAN', 'THB_HUMAN', 'RARA_HUMAN', 'RARB_HUMAN', 'RARG_HUMAN', 'PPARA_HUMAN', 'PPARD_HUMAN', 'PPARG_HUMAN', 'NR1D1_HUMAN', 'NR1D2_HUMAN', 'RORA_HUMAN', 'RORB_HUMAN', 'RORG_HUMAN', 'NR1H2_HUMAN', 'NR1H3_HUMAN', 'NR1H4_HUMAN', 'VDR_HUMAN', 'NR1I2_HUMAN', 'NR1I3_HUMAN', 'HNF4A_HUMAN', 'HNF4G_HUMAN', 'RXRA_HUMAN', 'RXRB_HUMAN', 'RXRG_HUMAN', 'NR2C1_HUMAN', 'NR2C2_HUMAN', 'NR2E1_HUMAN', 'NR2E3_HUMAN', 'COT1_HUMAN', 'COT2_HUMAN', 'NR2F6_HUMAN', 'ESR1_HUMAN', 'ESR2_HUMAN', 'ERR1_HUMAN', 'ERR2_HUMAN', 'ERR3_HUMAN', 'GCR_HUMAN', 'MCR_HUMAN', 'PRGR_HUMAN', 'ANDR_HUMAN', 'NR4A1_HUMAN', 'NR4A2_HUMAN', 'NR4A3_HUMAN', 'STF1_HUMAN', 'NR5A2_HUMAN', 'NR6A1_HUMAN', 'NR0B1_HUMAN', 'NR0B2_HUMAN') OR bdb_41_uniprot__swissprot__primary_id_of_target_chain IN ('P10827', 'P10828', 'P10276', 'P10826', 'P13631', 'Q07869', 'Q03181', 'P37231', 'P20393', 'Q14995', 'P35398', 'Q92753', 'P51449', 'P55055', 'Q13133', 'Q96RI1', 'P11473', 'O75469', 'Q14994', 'P41235', 'Q14541', 'P19793', 'P28702', 'P48443', 'P13056', 'P49116', 'Q9Y466', 'Q9Y5X4', 'P10589', 'P24468', 'P10588', 'P03372', 'Q92731', 'P11474', 'O95718', 'P62508', 'P04150', 'P08235', 'P06401', 'P10275', 'P22736', 'P43354', 'Q92570', 'Q13285', 'O00482', 'Q15406', 'P51843', 'Q15466');"
)
bdb = curs.fetchall()

#BDB duplicates
i = 0
while i < len(bdb):
    delete = False