コード例 #1
0
 def set_computable(self):
     mol = tool_chemical.read_string("mol", self._mol)
     # molecular_formula = Descriptors.rdMolDescriptors.CalcMolFormula(mol)
     # molecular_weight = Descriptors.ExactMolWt(mol)
     self._smiles = Chem.MolToSmiles(mol, isomericSmiles=False)
     self._inchi = inchi.MolToInchi(mol)
     self._inchikey = inchi.MolToInchiKey(mol)
     self._molecular_formula = Chem.CalcMolFormula(mol)
     self._molecular_weight = Chem.CalcExactMolWt(mol)
コード例 #2
0
def add_to_summary(summary_dic, conf_dic, smiles, save_dir):
    inchikey = inchi.MolToInchiKey(get_mol(smiles))
    pickle_path = os.path.join(os.path.abspath(save_dir), f"{inchikey}.pickle")
    summary_dic[smiles] = {
        key: val
        for key, val in conf_dic.items() if key != "conformers"
    }
    summary_dic[smiles].update({"pickle_path": pickle_path})

    return summary_dic, pickle_path
コード例 #3
0
def sdf_to_inchikey():
    molecules = Chem.SDMolSupplier(sys.argv[1])

    csv = open(sys.argv[1] + ".inchikey", "w")

    for mol in molecules:
        if mol:
            csv.write(
                inchi.MolToInchiKey(mol) + " " + mol.GetProp("_Name") + "\n")

    csv.close()
コード例 #4
0
def convert(input, input_mod='smi'):
    """
    convert SMILES into other molecular identifier
    :param input: SMILES
    :param input_mod: 'smi'
    :return: str(molecular formula), str(inchi), str(inchikey)
    """
    mol = read_string(input_mod, input)
    molecular_formula = Descriptors.rdMolDescriptors.CalcMolFormula(mol)
    molecular_inchi = inchi.MolToInchi(mol)
    molecular_inchikey = inchi.MolToInchiKey(mol)
    return molecular_formula, molecular_inchi, molecular_inchikey
コード例 #5
0
 def _append_inchi_keys_dictionary_by_reference(self, inchi_dict: dict,
                                                smile: str):
     mol = Chem.MolFromSmiles(smile)
     if mol is not None:
         try:
             inchi_key = inchi.MolToInchiKey(mol)
             try:
                 inchi_dict[inchi_key][0] += 1
             except:
                 inchi_dict[inchi_key] = [1, mol]
         except:
             self.log_message(f"Failed to transform SMILES string: {smile}")
コード例 #6
0
ファイル: public_spectrum.py プロジェクト: chalbori/bmdms-np
    def set_computables_from_mol(self, mol):
        try:  # warning comes up in pycharm (bug of pycharm)
            self.molecular_formula = Descriptors.rdMolDescriptors.CalcMolFormula(
                mol)
            self.molecular_weight = Descriptors.ExactMolWt(mol)
            self.inchi = inchi.MolToInchi(mol)
            self.inchikey = inchi.MolToInchiKey(mol)
            self.smiles = Chem.MolToSmiles(mol, isomericSmiles=False)
        except Exception as e:
            raise SpectrumError("Error occurred while computing properties" +
                                e.args) from e

        assert self.molecular_formula is not None, "molecular-formula can't be None"
        assert self.molecular_weight is not None, "molecular-weight can't be None"
        assert self.inchi is not None, "inchi can't be None"
        assert self.inchikey is not None, "inchikey can't be None"
        assert self.smiles is not None, "smiles can't be None"
コード例 #7
0
 def _count_unique_inchi_keys(self, smiles):
     """returns key value pair where value is [count, mol]"""
     inchi_dict = {}
     for smile in smiles:
         mol = Chem.MolFromSmiles(smile)
         if mol is not None:
             inchi_key = inchi.MolToInchiKey(mol)
             try:
                 inchi_dict[inchi_key][0] += 1
             except:
                 inchi_dict[inchi_key] = [1, mol]
     counts = [v[0] for v in inchi_dict.values()]
     mols = [v[1] for v in inchi_dict.values()]
     to_sort = zip(counts, mols)
     sorted_tuple = sorted(to_sort, key=lambda tup: -tup[0])
     sorted_tuple = sorted_tuple[:self._sample_size]
     list_of_labels = [f"Times sampled: {v[0]}" for v in sorted_tuple]
     sorted_mols = [v[1] for v in sorted_tuple]
     return list_of_labels, sorted_mols
コード例 #8
0
def main():
    missingfile = sys.argv[1]
    outputfile = sys.argv[2]

    missing = pd.read_csv(missingfile, sep="\t", header=0)

    names = []
    for row in missing.iterrows():
        smile = row[1].smiles

        m = Chem.MolFromSmiles(smile)
        inchikey = ri.MolToInchiKey(m)

        names.append(inchikey)

    missing['name'] = names
    print(missing)

    missing.to_csv(outputfile, sep="\t", index=False)
コード例 #9
0
def main():
    parser = argparse.ArgumentParser(description='Convert compounds and \
                                     associated information from an sdf file \
                                     into a csv file and generate smiles')
    parser.add_argument('-S', '--sdf', action='store', nargs=1,
                        dest='sdf', help='File containing compounds \
                        (.sdf format)')
    parser.add_argument('-n', '--name', action='store', nargs=1,
                        dest='name', help='Name of output csv file to write')
    parser.add_argument('-i', '--input_directory', action='store', nargs=1,
                        dest='input', default=['./'],
                        help='Directory where input files are stored')
    parser.add_argument('-o', '--output_directory', action='store', nargs=1,
                        dest='output', default=['./'],
                        help='Directory where output files should be written')
    args = vars(parser.parse_args())


    sdf_df = PandasTools.LoadSDF(args['input'][0] + args['sdf'][0],
                                 smilesName='smiles')
    sdf_df['inchikey'] = [inchi.MolToInchiKey(mol) for mol in sdf_df['ROMol']]
    sdf_df.to_csv(args['output'][0] + args['name'][0], index=False)
コード例 #10
0
ファイル: fill_base.py プロジェクト: FCreate/DBChem
def fill_base_test(cursor):
    df = pd.read_csv("toxicity_85832.csv")
    #df = df.drop("Unnamed: 0", axis=1)
    names_of_columns = list(df.columns)
    smiles = list(df["SMILES"])
    df = df.drop("SMILES", axis=1)
    toxic_vals = np.array(df.values)

    #molecules
    canonize_smiles = [_canonize_mixture(smile) for smile in smiles]
    inchi_smiles = [
        inchi.MolToInchi(Chem.MolFromSmiles(smile))
        for smile in canonize_smiles
    ]
    inchikey = [
        inchi.MolToInchiKey(Chem.MolFromSmiles(smile))
        for smile in canonize_smiles
    ]
    ids = [x for x in range(len(canonize_smiles))]
    ziped_vals = zip(inchikey, inchi_smiles, canonize_smiles)
    cursor.executemany(
        """insert into 'molecules' (inchi_key,inchi,canonical_smiles) values (?,?,?)""",
        ziped_vals)

    #tasks
    descr_tasks = [
        randomStringwithDigitsAndSymbols(random.randint(1, 30))
        for i in range(20)
    ]
    cursor.executemany("""insert into 'tasks' (descr) values (?)""",
                       zip(descr_tasks))

    #tasks_running
    completed = [random.randint(0, 1) for i in range(1000)]
    id_tasks = [random.randint(1, len(descr_tasks)) for i in range(1000)]
    id_molecules = [random.randint(1, len(smiles)) for i in range(1000)]
    zip_tasks_running = zip(id_tasks, id_molecules, completed)
    cursor.executemany(
        """insert into 'tasks_running' (id_task, id_molecule, completed) values (?,?,?)""",
        zip_tasks_running)

    #descriptors
    name_of_descr = [
        randomStringwithDigitsAndSymbols(random.randint(1, 30))
        for i in range(10)
    ]
    name_of_version = [
        randomStringwithDigitsAndSymbols(random.randint(1, 30))
        for i in range(10)
    ]
    ziped_versions = zip(name_of_descr, name_of_version)
    cursor.executemany(
        """insert into 'descriptors' (descriptor, version) values (?,?)""",
        ziped_versions)
    cursor.execute(
        """insert into 'descriptors' (descriptor, version) values (?,?)""",
        ("mordred", "0.315"))

    #descriptor_values
    id_descriptor = [11 for i in range(len(smiles))]
    id_molecule = [x + 1 for x in range(len(smiles))]
    id_tasks = [
        random.randint(1, len(descr_tasks)) for i in range(len(smiles))
    ]
    valid = [random.randint(0, 1) for i in range(len(smiles))]
    value = func(canonize_smiles)
    ziped_descr_vals = zip(id_molecule, id_descriptor, id_tasks, valid, value)
    cursor.executemany(
        """insert into 'descriptors_values' (id_molecule, id_descriptor, id_task, valid, value) values (?,?,?,?,?)""",
        ziped_descr_vals)

    #endpoints
    features = names_of_columns[1:]
    descriptions = [feature.split('_')[1] for feature in features]
    types = ['_'.join(feature.split('_')[2:]) for feature in features]
    ziped_endpoints = zip(descriptions, types)
    cursor.executemany("""insert into 'endpoints' (desc, type) values (?,?)""",
                       ziped_endpoints)

    #experimnetal data
    ids_molecules = []
    ids_endpoints = []
    values_endpoints = []
    for i in range(len(toxic_vals[:, 0])):
        for j in range(len(toxic_vals[0, :])):
            if (~np.isnan(toxic_vals[i, j])):
                ids_molecules.append(i + 1)
                ids_endpoints.append(j + 1)
                values_endpoints.append(toxic_vals[i, j])

    ziped_experimental_data = zip(ids_molecules, ids_endpoints,
                                  values_endpoints)
    cursor.executemany(
        """insert into 'experimental_data' (id_molecule, id_endpoint, value) values (?,?,?)""",
        ziped_experimental_data)

    return cursor
コード例 #11
0
ファイル: fill_base.py プロジェクト: FCreate/DBChem
def calculate_inchi_key(smile):
    return inchi.MolToInchiKey(Chem.MolFromSmiles(smile))
コード例 #12
0
ファイル: views.py プロジェクト: Arturossi/quimioteca
def loadSDF(sdfPath):
    # Create images
    #generateImages(sdfPath)
     
    # Create a molecule supplier
    suppl = Chem.SDMolSupplier(sdfPath)
    
    # Filter empty entries
    sdf = [x for x in suppl if x is not None]
    
    # For each molecule in supplier
    for mol in sdf:
        data = {}
        
        try:
            data['fCharge'] = mol.GetProp('Charge')
        except:
            data['fCharge'] = Chem.GetFormalCharge(mol)
            
        try:
            data['name'] = mol.GetProp('DATABASE_ID')
        except:
            data['name'] = 'unkown'
            
        try:
            data['molMass'] = mol.GetProp('Total Molweight')
        except:
            data['molMass'] = Descriptors.ExactMolWt(mol) 
            
        try:
            data['cLogP'] = mol.GetProp('cLogP')
        except:
            data['cLogP'] = Crippen.MolLogP(mol) # não sei se ta certo
            
        try:
            data['cLogS'] = mol.GetProp('cLogS')
        except:
            data['cLogS'] = 0.0
            
        try:
            data['tpsa'] = mol.GetProp('Polar Surface Area')
        except:
            data['tpsa'] = rdMolDescriptors.CalcTPSA(mol)
            
        try:
            data['totalSurfaceArea'] = mol.GetProp('Total Surface Area')
        except:
            data['totalSurfaceArea'] = rdMolDescriptors.CalcTPSA(mol)
        
        try:
            data['hbondAcceptors'] = mol.GetProp('H-Acceptors')
        except:
            data['hbondAcceptors'] = rdMolDescriptors.CalcNumHBA(mol)
            
        try:
            data['hbondDonnors'] = mol.GetProp('H-Donors')
        except:
            data['hbondDonnors'] = rdMolDescriptors.CalcNumHBD(mol)
            
        try:
            data['rotable'] = mol.GetProp('Rotatable Bonds')
        except:
            data['rotable'] = rdMolDescriptors.CalcNumRotatableBonds(mol)
            
        try:
            data['mutagenic'] = mol.GetProp('Mutagenic')
        except:
            data['mutagenic'] = 'Unknown'
            
        try:
            data['tumorigenic'] = mol.GetProp('Tumorigenic')
        except:
            data['tumorigenic'] = 'Unknown'
            
        try:
            data['irritant'] = mol.GetProp('Irritant')
        except:
            data['irritant'] = 'Unkown'
            
        try:
            data['smiles'] = mol.GetProp('SMILES')
        except:
            data['smiles'] = Chem.MolToSmiles(mol)
            
        try:
            data['InChI'] = mol.GetProp('INCHI_IDENTIFIER')
        except:
            data['InChI'] = inchi.MolToInchi(mol)
            
        try:
            data['inchiKey'] = mol.GetProp('INCHI_KEY')
        except:
            data['inchiKey'] = inchi.MolToInchiKey(mol)
            
        try:
            data['nonHAtoms'] = mol.GetProp('Non-H Atoms')
        except:
            data['nonHAtoms'] = -1 # Não sei calcular
            
            
        try:
            data['numAtoms'] = mol.GetProp('numAtoms')
        except:
            data['numAtoms'] = mol.GetNumAtoms()
        
        try:
            data['stereoCenters'] = mol.GetProp('Stereo Centers')
        except:
            data['stereoCenters'] = mol.GetNumAtoms()
            
        try:
            data['provider'] = mol.GetProp('DATABASE_NAME')
        except:
            print("Nenhum fornecedor encontrado, o campo é obrigatório!")
            continue
        
        tmp = AllChem.Compute2DCoords(mol) # Compute its coordinates
        
        Draw.MolToFile(mol, 
            os.path.join(settings.FILES_DIR, f'molImages/' + data["inchiKey"] + '.png'),
            size=(300,300),
            kekulize=True, 
            wedgeBonds=True,
            fitImage=True) # Save it
        
        Draw.MolToFile(mol, 
            os.path.join(settings.FILES_DIR, f'molThumbs/' + data["inchiKey"] + '.png'),
            size=(150,150),
            kekulize=True,
            wedgeBonds=True,
            fitImage=True)
        
        feedDatabase(data)

        if Compounds.objects.filter(inChIKey=data['inchiKey']).exists():
            if not Compounds.objects.filter(provider=['provider']).exists():
                feedDatabase(data)
                print("feed1")
                # append no sdf da base de dados
                a = 1
            else:
                print("continue123")
                continue
                
        else:
            a = 1
            feedDatabase(data)
            print("feed2")
        '''except:
コード例 #13
0
def one_species_confs(molecule, log, other_props, max_confs, forcefield,
                      nconf_gen, e_window, rms_tol, prun_tol, job_dir,
                      log_file, rep_e_window, fallback_to_align, temp,
                      clean_up, start_time):

    smiles = copy.deepcopy(molecule)
    with open(log, "w") as output:
        output.write("The smiles strings that will be run are:\n")
        output.write("\n".join([molecule]) + "\n")

        if any([element in molecule for element in UFF_ELEMENTS]):
            output.write(("Switching to UFF, since MMFF94 does "
                          "not have boron and/or aluminum\n"))
            forcefield = 'uff'

        confgen, gen_time, min_time = minimize(output=output,
                                               molecule=molecule,
                                               forcefield=forcefield,
                                               nconf_gen=nconf_gen,
                                               prun_tol=prun_tol,
                                               e_window=e_window,
                                               rms_tol=rms_tol,
                                               rep_e_window=rep_e_window)
        clustered_confs = confgen.cluster(rms_tolerance=float(rms_tol),
                                          max_ranked_conformers=int(max_confs),
                                          energy_window=float(e_window),
                                          Report_e_tol=float(rep_e_window),
                                          output=output)

        cluster_time = time.time()
        inchikey = inchi.MolToInchiKey(get_mol(molecule))

        for i, conformer in enumerate(clustered_confs):
            write_clusters(output=output,
                           idx=i,
                           conformer=conformer,
                           inchikey=inchikey,
                           path=job_dir)

        molecule = run_obabel(inchikey=inchikey, idx=i)
        confgen.recluster(path=job_dir,
                          rms_tolerance=float(rms_tol),
                          max_ranked_conformers=int(max_confs),
                          energy_window=float(e_window),
                          output=output,
                          clustered_confs=clustered_confs,
                          molecule=molecule,
                          key=inchikey,
                          fallback_to_align=fallback_to_align)
        rename_xyz_files(path=job_dir)
        summarize(output=output,
                  gen_time=gen_time,
                  start_time=start_time,
                  min_time=min_time,
                  cluster_time=cluster_time)

    conf_dic = parse_results(job_dir=job_dir,
                             log_file=log_file,
                             inchikey=inchikey,
                             max_confs=max_confs,
                             other_props=other_props,
                             temp=temp,
                             smiles=smiles,
                             clean_up=clean_up)
    return conf_dic
コード例 #14
0
def get_inchi_key(mol, stereo):
    inchi_key = inchi.MolToInchiKey(mol)
    if not stereo:
        q = inchi_key.split('-')
        inchi_key = q[0] + '-' + q[2]  # remove middle part responsible for stereo and isotopes
    return inchi_key
コード例 #15
0
converted_not_match_file = open(
    "out/inchi_valid_check/converted_not_match.txt",
    mode="w",
    encoding="utf-8")
w = SDWriter("out/inchi_valid_check/converted.sdf")
np: Unique_NP
for np in repo.get_unique_stream():
    mol = Mol()
    try:
        mol = Chem.MolFromInchi(inchi=np.inchi, treatWarningAsError=True)
        mol.SetProp("coconut_id", np.coconut_id)
    except:
        not_converted += 1

    if mol:
        mol_inchikey = inchi.MolToInchiKey(mol)

        if np.inchikey == mol_inchikey:
            converted_list_file.write(np.inchi + "\n")
            w.write(mol)
            converted += 1
        else:
            converted_not_match_file.write(np.inchi + "\n")
            not_converted += 1
    i += 1
    del np

    if i % 1000 == 0:
        print("{}th checked".format(i))

    # if i > 5: