Python MolToInchiKey 예제들, rdkit.Chem.inchi.MolToInchiKey Python 예제들

예제 #1

0

파일 보기

 def set_computable(self):
     mol = tool_chemical.read_string("mol", self._mol)
     # molecular_formula = Descriptors.rdMolDescriptors.CalcMolFormula(mol)
     # molecular_weight = Descriptors.ExactMolWt(mol)
     self._smiles = Chem.MolToSmiles(mol, isomericSmiles=False)
     self._inchi = inchi.MolToInchi(mol)
     self._inchikey = inchi.MolToInchiKey(mol)
     self._molecular_formula = Chem.CalcMolFormula(mol)
     self._molecular_weight = Chem.CalcExactMolWt(mol)

예제 #2

0

파일 보기

def add_to_summary(summary_dic, conf_dic, smiles, save_dir):
    inchikey = inchi.MolToInchiKey(get_mol(smiles))
    pickle_path = os.path.join(os.path.abspath(save_dir), f"{inchikey}.pickle")
    summary_dic[smiles] = {
        key: val
        for key, val in conf_dic.items() if key != "conformers"
    }
    summary_dic[smiles].update({"pickle_path": pickle_path})

    return summary_dic, pickle_path

예제 #3

0

파일 보기

파일: fileio.py 프로젝트: arcaulfield/Chemical_Structure_Reconstruction

def sdf_to_inchikey():
    molecules = Chem.SDMolSupplier(sys.argv[1])

    csv = open(sys.argv[1] + ".inchikey", "w")

    for mol in molecules:
        if mol:
            csv.write(
                inchi.MolToInchiKey(mol) + " " + mol.GetProp("_Name") + "\n")

    csv.close()

예제 #4

0

파일 보기

def convert(input, input_mod='smi'):
    """
    convert SMILES into other molecular identifier
    :param input: SMILES
    :param input_mod: 'smi'
    :return: str(molecular formula), str(inchi), str(inchikey)
    """
    mol = read_string(input_mod, input)
    molecular_formula = Descriptors.rdMolDescriptors.CalcMolFormula(mol)
    molecular_inchi = inchi.MolToInchi(mol)
    molecular_inchikey = inchi.MolToInchiKey(mol)
    return molecular_formula, molecular_inchi, molecular_inchikey

예제 #5

0

파일 보기

 def _append_inchi_keys_dictionary_by_reference(self, inchi_dict: dict,
                                                smile: str):
     mol = Chem.MolFromSmiles(smile)
     if mol is not None:
         try:
             inchi_key = inchi.MolToInchiKey(mol)
             try:
                 inchi_dict[inchi_key][0] += 1
             except:
                 inchi_dict[inchi_key] = [1, mol]
         except:
             self.log_message(f"Failed to transform SMILES string: {smile}")

예제 #6

0

파일 보기

파일: public_spectrum.py 프로젝트: chalbori/bmdms-np

    def set_computables_from_mol(self, mol):
        try:  # warning comes up in pycharm (bug of pycharm)
            self.molecular_formula = Descriptors.rdMolDescriptors.CalcMolFormula(
                mol)
            self.molecular_weight = Descriptors.ExactMolWt(mol)
            self.inchi = inchi.MolToInchi(mol)
            self.inchikey = inchi.MolToInchiKey(mol)
            self.smiles = Chem.MolToSmiles(mol, isomericSmiles=False)
        except Exception as e:
            raise SpectrumError("Error occurred while computing properties" +
                                e.args) from e

        assert self.molecular_formula is not None, "molecular-formula can't be None"
        assert self.molecular_weight is not None, "molecular-weight can't be None"
        assert self.inchi is not None, "inchi can't be None"
        assert self.inchikey is not None, "inchikey can't be None"
        assert self.smiles is not None, "smiles can't be None"

예제 #7

0

파일 보기

파일: base_sampling_logger.py 프로젝트: sirimullalab/Reinvent

 def _count_unique_inchi_keys(self, smiles):
     """returns key value pair where value is [count, mol]"""
     inchi_dict = {}
     for smile in smiles:
         mol = Chem.MolFromSmiles(smile)
         if mol is not None:
             inchi_key = inchi.MolToInchiKey(mol)
             try:
                 inchi_dict[inchi_key][0] += 1
             except:
                 inchi_dict[inchi_key] = [1, mol]
     counts = [v[0] for v in inchi_dict.values()]
     mols = [v[1] for v in inchi_dict.values()]
     to_sort = zip(counts, mols)
     sorted_tuple = sorted(to_sort, key=lambda tup: -tup[0])
     sorted_tuple = sorted_tuple[:self._sample_size]
     list_of_labels = [f"Times sampled: {v[0]}" for v in sorted_tuple]
     sorted_mols = [v[1] for v in sorted_tuple]
     return list_of_labels, sorted_mols

예제 #8

0

파일 보기

def main():
    missingfile = sys.argv[1]
    outputfile = sys.argv[2]

    missing = pd.read_csv(missingfile, sep="\t", header=0)

    names = []
    for row in missing.iterrows():
        smile = row[1].smiles

        m = Chem.MolFromSmiles(smile)
        inchikey = ri.MolToInchiKey(m)

        names.append(inchikey)

    missing['name'] = names
    print(missing)

    missing.to_csv(outputfile, sep="\t", index=False)

예제 #9

0

파일 보기

파일: process_database_sdf.py 프로젝트: ntcockroft/STarFish

def main():
    parser = argparse.ArgumentParser(description='Convert compounds and \
                                     associated information from an sdf file \
                                     into a csv file and generate smiles')
    parser.add_argument('-S', '--sdf', action='store', nargs=1,
                        dest='sdf', help='File containing compounds \
                        (.sdf format)')
    parser.add_argument('-n', '--name', action='store', nargs=1,
                        dest='name', help='Name of output csv file to write')
    parser.add_argument('-i', '--input_directory', action='store', nargs=1,
                        dest='input', default=['./'],
                        help='Directory where input files are stored')
    parser.add_argument('-o', '--output_directory', action='store', nargs=1,
                        dest='output', default=['./'],
                        help='Directory where output files should be written')
    args = vars(parser.parse_args())


    sdf_df = PandasTools.LoadSDF(args['input'][0] + args['sdf'][0],
                                 smilesName='smiles')
    sdf_df['inchikey'] = [inchi.MolToInchiKey(mol) for mol in sdf_df['ROMol']]
    sdf_df.to_csv(args['output'][0] + args['name'][0], index=False)

예제 #10

0

파일 보기

파일: fill_base.py 프로젝트: FCreate/DBChem

def fill_base_test(cursor):
    df = pd.read_csv("toxicity_85832.csv")
    #df = df.drop("Unnamed: 0", axis=1)
    names_of_columns = list(df.columns)
    smiles = list(df["SMILES"])
    df = df.drop("SMILES", axis=1)
    toxic_vals = np.array(df.values)

    #molecules
    canonize_smiles = [_canonize_mixture(smile) for smile in smiles]
    inchi_smiles = [
        inchi.MolToInchi(Chem.MolFromSmiles(smile))
        for smile in canonize_smiles
    ]
    inchikey = [
        inchi.MolToInchiKey(Chem.MolFromSmiles(smile))
        for smile in canonize_smiles
    ]
    ids = [x for x in range(len(canonize_smiles))]
    ziped_vals = zip(inchikey, inchi_smiles, canonize_smiles)
    cursor.executemany(
        """insert into 'molecules' (inchi_key,inchi,canonical_smiles) values (?,?,?)""",
        ziped_vals)

    #tasks
    descr_tasks = [
        randomStringwithDigitsAndSymbols(random.randint(1, 30))
        for i in range(20)
    ]
    cursor.executemany("""insert into 'tasks' (descr) values (?)""",
                       zip(descr_tasks))

    #tasks_running
    completed = [random.randint(0, 1) for i in range(1000)]
    id_tasks = [random.randint(1, len(descr_tasks)) for i in range(1000)]
    id_molecules = [random.randint(1, len(smiles)) for i in range(1000)]
    zip_tasks_running = zip(id_tasks, id_molecules, completed)
    cursor.executemany(
        """insert into 'tasks_running' (id_task, id_molecule, completed) values (?,?,?)""",
        zip_tasks_running)

    #descriptors
    name_of_descr = [
        randomStringwithDigitsAndSymbols(random.randint(1, 30))
        for i in range(10)
    ]
    name_of_version = [
        randomStringwithDigitsAndSymbols(random.randint(1, 30))
        for i in range(10)
    ]
    ziped_versions = zip(name_of_descr, name_of_version)
    cursor.executemany(
        """insert into 'descriptors' (descriptor, version) values (?,?)""",
        ziped_versions)
    cursor.execute(
        """insert into 'descriptors' (descriptor, version) values (?,?)""",
        ("mordred", "0.315"))

    #descriptor_values
    id_descriptor = [11 for i in range(len(smiles))]
    id_molecule = [x + 1 for x in range(len(smiles))]
    id_tasks = [
        random.randint(1, len(descr_tasks)) for i in range(len(smiles))
    ]
    valid = [random.randint(0, 1) for i in range(len(smiles))]
    value = func(canonize_smiles)
    ziped_descr_vals = zip(id_molecule, id_descriptor, id_tasks, valid, value)
    cursor.executemany(
        """insert into 'descriptors_values' (id_molecule, id_descriptor, id_task, valid, value) values (?,?,?,?,?)""",
        ziped_descr_vals)

    #endpoints
    features = names_of_columns[1:]
    descriptions = [feature.split('_')[1] for feature in features]
    types = ['_'.join(feature.split('_')[2:]) for feature in features]
    ziped_endpoints = zip(descriptions, types)
    cursor.executemany("""insert into 'endpoints' (desc, type) values (?,?)""",
                       ziped_endpoints)

    #experimnetal data
    ids_molecules = []
    ids_endpoints = []
    values_endpoints = []
    for i in range(len(toxic_vals[:, 0])):
        for j in range(len(toxic_vals[0, :])):
            if (~np.isnan(toxic_vals[i, j])):
                ids_molecules.append(i + 1)
                ids_endpoints.append(j + 1)
                values_endpoints.append(toxic_vals[i, j])

    ziped_experimental_data = zip(ids_molecules, ids_endpoints,
                                  values_endpoints)
    cursor.executemany(
        """insert into 'experimental_data' (id_molecule, id_endpoint, value) values (?,?,?)""",
        ziped_experimental_data)

    return cursor

예제 #11

0

파일 보기

파일: fill_base.py 프로젝트: FCreate/DBChem

def calculate_inchi_key(smile):
    return inchi.MolToInchiKey(Chem.MolFromSmiles(smile))

예제 #12

0

파일 보기

파일: views.py 프로젝트: Arturossi/quimioteca

def loadSDF(sdfPath):
    # Create images
    #generateImages(sdfPath)
     
    # Create a molecule supplier
    suppl = Chem.SDMolSupplier(sdfPath)
    
    # Filter empty entries
    sdf = [x for x in suppl if x is not None]
    
    # For each molecule in supplier
    for mol in sdf:
        data = {}
        
        try:
            data['fCharge'] = mol.GetProp('Charge')
        except:
            data['fCharge'] = Chem.GetFormalCharge(mol)
            
        try:
            data['name'] = mol.GetProp('DATABASE_ID')
        except:
            data['name'] = 'unkown'
            
        try:
            data['molMass'] = mol.GetProp('Total Molweight')
        except:
            data['molMass'] = Descriptors.ExactMolWt(mol) 
            
        try:
            data['cLogP'] = mol.GetProp('cLogP')
        except:
            data['cLogP'] = Crippen.MolLogP(mol) # não sei se ta certo
            
        try:
            data['cLogS'] = mol.GetProp('cLogS')
        except:
            data['cLogS'] = 0.0
            
        try:
            data['tpsa'] = mol.GetProp('Polar Surface Area')
        except:
            data['tpsa'] = rdMolDescriptors.CalcTPSA(mol)
            
        try:
            data['totalSurfaceArea'] = mol.GetProp('Total Surface Area')
        except:
            data['totalSurfaceArea'] = rdMolDescriptors.CalcTPSA(mol)
        
        try:
            data['hbondAcceptors'] = mol.GetProp('H-Acceptors')
        except:
            data['hbondAcceptors'] = rdMolDescriptors.CalcNumHBA(mol)
            
        try:
            data['hbondDonnors'] = mol.GetProp('H-Donors')
        except:
            data['hbondDonnors'] = rdMolDescriptors.CalcNumHBD(mol)
            
        try:
            data['rotable'] = mol.GetProp('Rotatable Bonds')
        except:
            data['rotable'] = rdMolDescriptors.CalcNumRotatableBonds(mol)
            
        try:
            data['mutagenic'] = mol.GetProp('Mutagenic')
        except:
            data['mutagenic'] = 'Unknown'
            
        try:
            data['tumorigenic'] = mol.GetProp('Tumorigenic')
        except:
            data['tumorigenic'] = 'Unknown'
            
        try:
            data['irritant'] = mol.GetProp('Irritant')
        except:
            data['irritant'] = 'Unkown'
            
        try:
            data['smiles'] = mol.GetProp('SMILES')
        except:
            data['smiles'] = Chem.MolToSmiles(mol)
            
        try:
            data['InChI'] = mol.GetProp('INCHI_IDENTIFIER')
        except:
            data['InChI'] = inchi.MolToInchi(mol)
            
        try:
            data['inchiKey'] = mol.GetProp('INCHI_KEY')
        except:
            data['inchiKey'] = inchi.MolToInchiKey(mol)
            
        try:
            data['nonHAtoms'] = mol.GetProp('Non-H Atoms')
        except:
            data['nonHAtoms'] = -1 # Não sei calcular
            
            
        try:
            data['numAtoms'] = mol.GetProp('numAtoms')
        except:
            data['numAtoms'] = mol.GetNumAtoms()
        
        try:
            data['stereoCenters'] = mol.GetProp('Stereo Centers')
        except:
            data['stereoCenters'] = mol.GetNumAtoms()
            
        try:
            data['provider'] = mol.GetProp('DATABASE_NAME')
        except:
            print("Nenhum fornecedor encontrado, o campo é obrigatório!")
            continue
        
        tmp = AllChem.Compute2DCoords(mol) # Compute its coordinates
        
        Draw.MolToFile(mol, 
            os.path.join(settings.FILES_DIR, f'molImages/' + data["inchiKey"] + '.png'),
            size=(300,300),
            kekulize=True, 
            wedgeBonds=True,
            fitImage=True) # Save it
        
        Draw.MolToFile(mol, 
            os.path.join(settings.FILES_DIR, f'molThumbs/' + data["inchiKey"] + '.png'),
            size=(150,150),
            kekulize=True,
            wedgeBonds=True,
            fitImage=True)
        
        feedDatabase(data)

        if Compounds.objects.filter(inChIKey=data['inchiKey']).exists():
            if not Compounds.objects.filter(provider=['provider']).exists():
                feedDatabase(data)
                print("feed1")
                # append no sdf da base de dados
                a = 1
            else:
                print("continue123")
                continue
                
        else:
            a = 1
            feedDatabase(data)
            print("feed2")
        '''except:

예제 #13

0

파일 보기

def one_species_confs(molecule, log, other_props, max_confs, forcefield,
                      nconf_gen, e_window, rms_tol, prun_tol, job_dir,
                      log_file, rep_e_window, fallback_to_align, temp,
                      clean_up, start_time):

    smiles = copy.deepcopy(molecule)
    with open(log, "w") as output:
        output.write("The smiles strings that will be run are:\n")
        output.write("\n".join([molecule]) + "\n")

        if any([element in molecule for element in UFF_ELEMENTS]):
            output.write(("Switching to UFF, since MMFF94 does "
                          "not have boron and/or aluminum\n"))
            forcefield = 'uff'

        confgen, gen_time, min_time = minimize(output=output,
                                               molecule=molecule,
                                               forcefield=forcefield,
                                               nconf_gen=nconf_gen,
                                               prun_tol=prun_tol,
                                               e_window=e_window,
                                               rms_tol=rms_tol,
                                               rep_e_window=rep_e_window)
        clustered_confs = confgen.cluster(rms_tolerance=float(rms_tol),
                                          max_ranked_conformers=int(max_confs),
                                          energy_window=float(e_window),
                                          Report_e_tol=float(rep_e_window),
                                          output=output)

        cluster_time = time.time()
        inchikey = inchi.MolToInchiKey(get_mol(molecule))

        for i, conformer in enumerate(clustered_confs):
            write_clusters(output=output,
                           idx=i,
                           conformer=conformer,
                           inchikey=inchikey,
                           path=job_dir)

        molecule = run_obabel(inchikey=inchikey, idx=i)
        confgen.recluster(path=job_dir,
                          rms_tolerance=float(rms_tol),
                          max_ranked_conformers=int(max_confs),
                          energy_window=float(e_window),
                          output=output,
                          clustered_confs=clustered_confs,
                          molecule=molecule,
                          key=inchikey,
                          fallback_to_align=fallback_to_align)
        rename_xyz_files(path=job_dir)
        summarize(output=output,
                  gen_time=gen_time,
                  start_time=start_time,
                  min_time=min_time,
                  cluster_time=cluster_time)

    conf_dic = parse_results(job_dir=job_dir,
                             log_file=log_file,
                             inchikey=inchikey,
                             max_confs=max_confs,
                             other_props=other_props,
                             temp=temp,
                             smiles=smiles,
                             clean_up=clean_up)
    return conf_dic

예제 #14

0

파일 보기

파일: remove_dupl_rdkit.py 프로젝트: meddwl/rdkit-scripts

def get_inchi_key(mol, stereo):
    inchi_key = inchi.MolToInchiKey(mol)
    if not stereo:
        q = inchi_key.split('-')
        inchi_key = q[0] + '-' + q[2]  # remove middle part responsible for stereo and isotopes
    return inchi_key

예제 #15

0

파일 보기

파일: valid_unique_np.py 프로젝트: chalbori/coconut_retrieve

converted_not_match_file = open(
    "out/inchi_valid_check/converted_not_match.txt",
    mode="w",
    encoding="utf-8")
w = SDWriter("out/inchi_valid_check/converted.sdf")
np: Unique_NP
for np in repo.get_unique_stream():
    mol = Mol()
    try:
        mol = Chem.MolFromInchi(inchi=np.inchi, treatWarningAsError=True)
        mol.SetProp("coconut_id", np.coconut_id)
    except:
        not_converted += 1

    if mol:
        mol_inchikey = inchi.MolToInchiKey(mol)

        if np.inchikey == mol_inchikey:
            converted_list_file.write(np.inchi + "\n")
            w.write(mol)
            converted += 1
        else:
            converted_not_match_file.write(np.inchi + "\n")
            not_converted += 1
    i += 1
    del np

    if i % 1000 == 0:
        print("{}th checked".format(i))

    # if i > 5: