Python MolToInchiKey Exemples, rdkit.Chem.inchi.MolToInchiKey Python Exemples

Exemple #1

0

Afficher le fichier

 def set_computable(self):
     mol = tool_chemical.read_string("mol", self._mol)
     # molecular_formula = Descriptors.rdMolDescriptors.CalcMolFormula(mol)
     # molecular_weight = Descriptors.ExactMolWt(mol)
     self._smiles = Chem.MolToSmiles(mol, isomericSmiles=False)
     self._inchi = inchi.MolToInchi(mol)
     self._inchikey = inchi.MolToInchiKey(mol)
     self._molecular_formula = Chem.CalcMolFormula(mol)
     self._molecular_weight = Chem.CalcExactMolWt(mol)

Exemple #2

0

Afficher le fichier

def add_to_summary(summary_dic, conf_dic, smiles, save_dir):
    inchikey = inchi.MolToInchiKey(get_mol(smiles))
    pickle_path = os.path.join(os.path.abspath(save_dir), f"{inchikey}.pickle")
    summary_dic[smiles] = {
        key: val
        for key, val in conf_dic.items() if key != "conformers"
    }
    summary_dic[smiles].update({"pickle_path": pickle_path})

    return summary_dic, pickle_path

Exemple #3

0

Afficher le fichier

Fichier : fileio.py Projet : arcaulfield/Chemical_Structure_Reconstruction

def sdf_to_inchikey():
    molecules = Chem.SDMolSupplier(sys.argv[1])

    csv = open(sys.argv[1] + ".inchikey", "w")

    for mol in molecules:
        if mol:
            csv.write(
                inchi.MolToInchiKey(mol) + " " + mol.GetProp("_Name") + "\n")

    csv.close()

Exemple #4

0

Afficher le fichier

def convert(input, input_mod='smi'):
    """
    convert SMILES into other molecular identifier
    :param input: SMILES
    :param input_mod: 'smi'
    :return: str(molecular formula), str(inchi), str(inchikey)
    """
    mol = read_string(input_mod, input)
    molecular_formula = Descriptors.rdMolDescriptors.CalcMolFormula(mol)
    molecular_inchi = inchi.MolToInchi(mol)
    molecular_inchikey = inchi.MolToInchiKey(mol)
    return molecular_formula, molecular_inchi, molecular_inchikey

Exemple #5

0

Afficher le fichier

 def _append_inchi_keys_dictionary_by_reference(self, inchi_dict: dict,
                                                smile: str):
     mol = Chem.MolFromSmiles(smile)
     if mol is not None:
         try:
             inchi_key = inchi.MolToInchiKey(mol)
             try:
                 inchi_dict[inchi_key][0] += 1
             except:
                 inchi_dict[inchi_key] = [1, mol]
         except:
             self.log_message(f"Failed to transform SMILES string: {smile}")

Exemple #6

0

Afficher le fichier

Fichier : public_spectrum.py Projet : chalbori/bmdms-np

    def set_computables_from_mol(self, mol):
        try:  # warning comes up in pycharm (bug of pycharm)
            self.molecular_formula = Descriptors.rdMolDescriptors.CalcMolFormula(
                mol)
            self.molecular_weight = Descriptors.ExactMolWt(mol)
            self.inchi = inchi.MolToInchi(mol)
            self.inchikey = inchi.MolToInchiKey(mol)
            self.smiles = Chem.MolToSmiles(mol, isomericSmiles=False)
        except Exception as e:
            raise SpectrumError("Error occurred while computing properties" +
                                e.args) from e

        assert self.molecular_formula is not None, "molecular-formula can't be None"
        assert self.molecular_weight is not None, "molecular-weight can't be None"
        assert self.inchi is not None, "inchi can't be None"
        assert self.inchikey is not None, "inchikey can't be None"
        assert self.smiles is not None, "smiles can't be None"

Exemple #7

0

Afficher le fichier

Fichier : base_sampling_logger.py Projet : sirimullalab/Reinvent

 def _count_unique_inchi_keys(self, smiles):
     """returns key value pair where value is [count, mol]"""
     inchi_dict = {}
     for smile in smiles:
         mol = Chem.MolFromSmiles(smile)
         if mol is not None:
             inchi_key = inchi.MolToInchiKey(mol)
             try:
                 inchi_dict[inchi_key][0] += 1
             except:
                 inchi_dict[inchi_key] = [1, mol]
     counts = [v[0] for v in inchi_dict.values()]
     mols = [v[1] for v in inchi_dict.values()]
     to_sort = zip(counts, mols)
     sorted_tuple = sorted(to_sort, key=lambda tup: -tup[0])
     sorted_tuple = sorted_tuple[:self._sample_size]
     list_of_labels = [f"Times sampled: {v[0]}" for v in sorted_tuple]
     sorted_mols = [v[1] for v in sorted_tuple]
     return list_of_labels, sorted_mols

Exemple #8

0

Afficher le fichier

def main():
    missingfile = sys.argv[1]
    outputfile = sys.argv[2]

    missing = pd.read_csv(missingfile, sep="\t", header=0)

    names = []
    for row in missing.iterrows():
        smile = row[1].smiles

        m = Chem.MolFromSmiles(smile)
        inchikey = ri.MolToInchiKey(m)

        names.append(inchikey)

    missing['name'] = names
    print(missing)

    missing.to_csv(outputfile, sep="\t", index=False)

Exemple #9

0

Afficher le fichier

Fichier : process_database_sdf.py Projet : ntcockroft/STarFish

def main():
    parser = argparse.ArgumentParser(description='Convert compounds and \
                                     associated information from an sdf file \
                                     into a csv file and generate smiles')
    parser.add_argument('-S', '--sdf', action='store', nargs=1,
                        dest='sdf', help='File containing compounds \
                        (.sdf format)')
    parser.add_argument('-n', '--name', action='store', nargs=1,
                        dest='name', help='Name of output csv file to write')
    parser.add_argument('-i', '--input_directory', action='store', nargs=1,
                        dest='input', default=['./'],
                        help='Directory where input files are stored')
    parser.add_argument('-o', '--output_directory', action='store', nargs=1,
                        dest='output', default=['./'],
                        help='Directory where output files should be written')
    args = vars(parser.parse_args())


    sdf_df = PandasTools.LoadSDF(args['input'][0] + args['sdf'][0],
                                 smilesName='smiles')
    sdf_df['inchikey'] = [inchi.MolToInchiKey(mol) for mol in sdf_df['ROMol']]
    sdf_df.to_csv(args['output'][0] + args['name'][0], index=False)

Exemple #10

0

Afficher le fichier

Fichier : fill_base.py Projet : FCreate/DBChem

def fill_base_test(cursor):
    df = pd.read_csv("toxicity_85832.csv")
    #df = df.drop("Unnamed: 0", axis=1)
    names_of_columns = list(df.columns)
    smiles = list(df["SMILES"])
    df = df.drop("SMILES", axis=1)
    toxic_vals = np.array(df.values)

    #molecules
    canonize_smiles = [_canonize_mixture(smile) for smile in smiles]
    inchi_smiles = [
        inchi.MolToInchi(Chem.MolFromSmiles(smile))
        for smile in canonize_smiles
    ]
    inchikey = [
        inchi.MolToInchiKey(Chem.MolFromSmiles(smile))
        for smile in canonize_smiles
    ]
    ids = [x for x in range(len(canonize_smiles))]
    ziped_vals = zip(inchikey, inchi_smiles, canonize_smiles)
    cursor.executemany(
        """insert into 'molecules' (inchi_key,inchi,canonical_smiles) values (?,?,?)""",
        ziped_vals)

    #tasks
    descr_tasks = [
        randomStringwithDigitsAndSymbols(random.randint(1, 30))
        for i in range(20)
    ]
    cursor.executemany("""insert into 'tasks' (descr) values (?)""",
                       zip(descr_tasks))

    #tasks_running
    completed = [random.randint(0, 1) for i in range(1000)]
    id_tasks = [random.randint(1, len(descr_tasks)) for i in range(1000)]
    id_molecules = [random.randint(1, len(smiles)) for i in range(1000)]
    zip_tasks_running = zip(id_tasks, id_molecules, completed)
    cursor.executemany(
        """insert into 'tasks_running' (id_task, id_molecule, completed) values (?,?,?)""",
        zip_tasks_running)

    #descriptors
    name_of_descr = [
        randomStringwithDigitsAndSymbols(random.randint(1, 30))
        for i in range(10)
    ]
    name_of_version = [
        randomStringwithDigitsAndSymbols(random.randint(1, 30))
        for i in range(10)
    ]
    ziped_versions = zip(name_of_descr, name_of_version)
    cursor.executemany(
        """insert into 'descriptors' (descriptor, version) values (?,?)""",
        ziped_versions)
    cursor.execute(
        """insert into 'descriptors' (descriptor, version) values (?,?)""",
        ("mordred", "0.315"))

    #descriptor_values
    id_descriptor = [11 for i in range(len(smiles))]
    id_molecule = [x + 1 for x in range(len(smiles))]
    id_tasks = [
        random.randint(1, len(descr_tasks)) for i in range(len(smiles))
    ]
    valid = [random.randint(0, 1) for i in range(len(smiles))]
    value = func(canonize_smiles)
    ziped_descr_vals = zip(id_molecule, id_descriptor, id_tasks, valid, value)
    cursor.executemany(
        """insert into 'descriptors_values' (id_molecule, id_descriptor, id_task, valid, value) values (?,?,?,?,?)""",
        ziped_descr_vals)

    #endpoints
    features = names_of_columns[1:]
    descriptions = [feature.split('_')[1] for feature in features]
    types = ['_'.join(feature.split('_')[2:]) for feature in features]
    ziped_endpoints = zip(descriptions, types)
    cursor.executemany("""insert into 'endpoints' (desc, type) values (?,?)""",
                       ziped_endpoints)

    #experimnetal data
    ids_molecules = []
    ids_endpoints = []
    values_endpoints = []
    for i in range(len(toxic_vals[:, 0])):
        for j in range(len(toxic_vals[0, :])):
            if (~np.isnan(toxic_vals[i, j])):
                ids_molecules.append(i + 1)
                ids_endpoints.append(j + 1)
                values_endpoints.append(toxic_vals[i, j])

    ziped_experimental_data = zip(ids_molecules, ids_endpoints,
                                  values_endpoints)
    cursor.executemany(
        """insert into 'experimental_data' (id_molecule, id_endpoint, value) values (?,?,?)""",
        ziped_experimental_data)

    return cursor

Exemple #11

0

Afficher le fichier

Fichier : fill_base.py Projet : FCreate/DBChem

def calculate_inchi_key(smile):
    return inchi.MolToInchiKey(Chem.MolFromSmiles(smile))

Exemple #12

0

Afficher le fichier

Fichier : views.py Projet : Arturossi/quimioteca

def loadSDF(sdfPath):
    # Create images
    #generateImages(sdfPath)
     
    # Create a molecule supplier
    suppl = Chem.SDMolSupplier(sdfPath)
    
    # Filter empty entries
    sdf = [x for x in suppl if x is not None]
    
    # For each molecule in supplier
    for mol in sdf:
        data = {}
        
        try:
            data['fCharge'] = mol.GetProp('Charge')
        except:
            data['fCharge'] = Chem.GetFormalCharge(mol)
            
        try:
            data['name'] = mol.GetProp('DATABASE_ID')
        except:
            data['name'] = 'unkown'
            
        try:
            data['molMass'] = mol.GetProp('Total Molweight')
        except:
            data['molMass'] = Descriptors.ExactMolWt(mol) 
            
        try:
            data['cLogP'] = mol.GetProp('cLogP')
        except:
            data['cLogP'] = Crippen.MolLogP(mol) # não sei se ta certo
            
        try:
            data['cLogS'] = mol.GetProp('cLogS')
        except:
            data['cLogS'] = 0.0
            
        try:
            data['tpsa'] = mol.GetProp('Polar Surface Area')
        except:
            data['tpsa'] = rdMolDescriptors.CalcTPSA(mol)
            
        try:
            data['totalSurfaceArea'] = mol.GetProp('Total Surface Area')
        except:
            data['totalSurfaceArea'] = rdMolDescriptors.CalcTPSA(mol)
        
        try:
            data['hbondAcceptors'] = mol.GetProp('H-Acceptors')
        except:
            data['hbondAcceptors'] = rdMolDescriptors.CalcNumHBA(mol)
            
        try:
            data['hbondDonnors'] = mol.GetProp('H-Donors')
        except:
            data['hbondDonnors'] = rdMolDescriptors.CalcNumHBD(mol)
            
        try:
            data['rotable'] = mol.GetProp('Rotatable Bonds')
        except:
            data['rotable'] = rdMolDescriptors.CalcNumRotatableBonds(mol)
            
        try:
            data['mutagenic'] = mol.GetProp('Mutagenic')
        except:
            data['mutagenic'] = 'Unknown'
            
        try:
            data['tumorigenic'] = mol.GetProp('Tumorigenic')
        except:
            data['tumorigenic'] = 'Unknown'
            
        try:
            data['irritant'] = mol.GetProp('Irritant')
        except:
            data['irritant'] = 'Unkown'
            
        try:
            data['smiles'] = mol.GetProp('SMILES')
        except:
            data['smiles'] = Chem.MolToSmiles(mol)
            
        try:
            data['InChI'] = mol.GetProp('INCHI_IDENTIFIER')
        except:
            data['InChI'] = inchi.MolToInchi(mol)
            
        try:
            data['inchiKey'] = mol.GetProp('INCHI_KEY')
        except:
            data['inchiKey'] = inchi.MolToInchiKey(mol)
            
        try:
            data['nonHAtoms'] = mol.GetProp('Non-H Atoms')
        except:
            data['nonHAtoms'] = -1 # Não sei calcular
            
            
        try:
            data['numAtoms'] = mol.GetProp('numAtoms')
        except:
            data['numAtoms'] = mol.GetNumAtoms()
        
        try:
            data['stereoCenters'] = mol.GetProp('Stereo Centers')
        except:
            data['stereoCenters'] = mol.GetNumAtoms()
            
        try:
            data['provider'] = mol.GetProp('DATABASE_NAME')
        except:
            print("Nenhum fornecedor encontrado, o campo é obrigatório!")
            continue
        
        tmp = AllChem.Compute2DCoords(mol) # Compute its coordinates
        
        Draw.MolToFile(mol, 
            os.path.join(settings.FILES_DIR, f'molImages/' + data["inchiKey"] + '.png'),
            size=(300,300),
            kekulize=True, 
            wedgeBonds=True,
            fitImage=True) # Save it
        
        Draw.MolToFile(mol, 
            os.path.join(settings.FILES_DIR, f'molThumbs/' + data["inchiKey"] + '.png'),
            size=(150,150),
            kekulize=True,
            wedgeBonds=True,
            fitImage=True)
        
        feedDatabase(data)

        if Compounds.objects.filter(inChIKey=data['inchiKey']).exists():
            if not Compounds.objects.filter(provider=['provider']).exists():
                feedDatabase(data)
                print("feed1")
                # append no sdf da base de dados
                a = 1
            else:
                print("continue123")
                continue
                
        else:
            a = 1
            feedDatabase(data)
            print("feed2")
        '''except:

Exemple #13

0

Afficher le fichier

def one_species_confs(molecule, log, other_props, max_confs, forcefield,
                      nconf_gen, e_window, rms_tol, prun_tol, job_dir,
                      log_file, rep_e_window, fallback_to_align, temp,
                      clean_up, start_time):

    smiles = copy.deepcopy(molecule)
    with open(log, "w") as output:
        output.write("The smiles strings that will be run are:\n")
        output.write("\n".join([molecule]) + "\n")

        if any([element in molecule for element in UFF_ELEMENTS]):
            output.write(("Switching to UFF, since MMFF94 does "
                          "not have boron and/or aluminum\n"))
            forcefield = 'uff'

        confgen, gen_time, min_time = minimize(output=output,
                                               molecule=molecule,
                                               forcefield=forcefield,
                                               nconf_gen=nconf_gen,
                                               prun_tol=prun_tol,
                                               e_window=e_window,
                                               rms_tol=rms_tol,
                                               rep_e_window=rep_e_window)
        clustered_confs = confgen.cluster(rms_tolerance=float(rms_tol),
                                          max_ranked_conformers=int(max_confs),
                                          energy_window=float(e_window),
                                          Report_e_tol=float(rep_e_window),
                                          output=output)

        cluster_time = time.time()
        inchikey = inchi.MolToInchiKey(get_mol(molecule))

        for i, conformer in enumerate(clustered_confs):
            write_clusters(output=output,
                           idx=i,
                           conformer=conformer,
                           inchikey=inchikey,
                           path=job_dir)

        molecule = run_obabel(inchikey=inchikey, idx=i)
        confgen.recluster(path=job_dir,
                          rms_tolerance=float(rms_tol),
                          max_ranked_conformers=int(max_confs),
                          energy_window=float(e_window),
                          output=output,
                          clustered_confs=clustered_confs,
                          molecule=molecule,
                          key=inchikey,
                          fallback_to_align=fallback_to_align)
        rename_xyz_files(path=job_dir)
        summarize(output=output,
                  gen_time=gen_time,
                  start_time=start_time,
                  min_time=min_time,
                  cluster_time=cluster_time)

    conf_dic = parse_results(job_dir=job_dir,
                             log_file=log_file,
                             inchikey=inchikey,
                             max_confs=max_confs,
                             other_props=other_props,
                             temp=temp,
                             smiles=smiles,
                             clean_up=clean_up)
    return conf_dic

Exemple #14

0

Afficher le fichier

Fichier : remove_dupl_rdkit.py Projet : meddwl/rdkit-scripts

def get_inchi_key(mol, stereo):
    inchi_key = inchi.MolToInchiKey(mol)
    if not stereo:
        q = inchi_key.split('-')
        inchi_key = q[0] + '-' + q[2]  # remove middle part responsible for stereo and isotopes
    return inchi_key

Exemple #15

0

Afficher le fichier

Fichier : valid_unique_np.py Projet : chalbori/coconut_retrieve

converted_not_match_file = open(
    "out/inchi_valid_check/converted_not_match.txt",
    mode="w",
    encoding="utf-8")
w = SDWriter("out/inchi_valid_check/converted.sdf")
np: Unique_NP
for np in repo.get_unique_stream():
    mol = Mol()
    try:
        mol = Chem.MolFromInchi(inchi=np.inchi, treatWarningAsError=True)
        mol.SetProp("coconut_id", np.coconut_id)
    except:
        not_converted += 1

    if mol:
        mol_inchikey = inchi.MolToInchiKey(mol)

        if np.inchikey == mol_inchikey:
            converted_list_file.write(np.inchi + "\n")
            w.write(mol)
            converted += 1
        else:
            converted_not_match_file.write(np.inchi + "\n")
            not_converted += 1
    i += 1
    del np

    if i % 1000 == 0:
        print("{}th checked".format(i))

    # if i > 5: