Exemple #1
0
 def set_computable(self):
     mol = tool_chemical.read_string("mol", self._mol)
     # molecular_formula = Descriptors.rdMolDescriptors.CalcMolFormula(mol)
     # molecular_weight = Descriptors.ExactMolWt(mol)
     self._smiles = Chem.MolToSmiles(mol, isomericSmiles=False)
     self._inchi = inchi.MolToInchi(mol)
     self._inchikey = inchi.MolToInchiKey(mol)
     self._molecular_formula = Chem.CalcMolFormula(mol)
     self._molecular_weight = Chem.CalcExactMolWt(mol)
Exemple #2
0
def add_to_summary(summary_dic, conf_dic, smiles, save_dir):
    inchikey = inchi.MolToInchiKey(get_mol(smiles))
    pickle_path = os.path.join(os.path.abspath(save_dir), f"{inchikey}.pickle")
    summary_dic[smiles] = {
        key: val
        for key, val in conf_dic.items() if key != "conformers"
    }
    summary_dic[smiles].update({"pickle_path": pickle_path})

    return summary_dic, pickle_path
def sdf_to_inchikey():
    molecules = Chem.SDMolSupplier(sys.argv[1])

    csv = open(sys.argv[1] + ".inchikey", "w")

    for mol in molecules:
        if mol:
            csv.write(
                inchi.MolToInchiKey(mol) + " " + mol.GetProp("_Name") + "\n")

    csv.close()
Exemple #4
0
def convert(input, input_mod='smi'):
    """
    convert SMILES into other molecular identifier
    :param input: SMILES
    :param input_mod: 'smi'
    :return: str(molecular formula), str(inchi), str(inchikey)
    """
    mol = read_string(input_mod, input)
    molecular_formula = Descriptors.rdMolDescriptors.CalcMolFormula(mol)
    molecular_inchi = inchi.MolToInchi(mol)
    molecular_inchikey = inchi.MolToInchiKey(mol)
    return molecular_formula, molecular_inchi, molecular_inchikey
Exemple #5
0
 def _append_inchi_keys_dictionary_by_reference(self, inchi_dict: dict,
                                                smile: str):
     mol = Chem.MolFromSmiles(smile)
     if mol is not None:
         try:
             inchi_key = inchi.MolToInchiKey(mol)
             try:
                 inchi_dict[inchi_key][0] += 1
             except:
                 inchi_dict[inchi_key] = [1, mol]
         except:
             self.log_message(f"Failed to transform SMILES string: {smile}")
Exemple #6
0
    def set_computables_from_mol(self, mol):
        try:  # warning comes up in pycharm (bug of pycharm)
            self.molecular_formula = Descriptors.rdMolDescriptors.CalcMolFormula(
                mol)
            self.molecular_weight = Descriptors.ExactMolWt(mol)
            self.inchi = inchi.MolToInchi(mol)
            self.inchikey = inchi.MolToInchiKey(mol)
            self.smiles = Chem.MolToSmiles(mol, isomericSmiles=False)
        except Exception as e:
            raise SpectrumError("Error occurred while computing properties" +
                                e.args) from e

        assert self.molecular_formula is not None, "molecular-formula can't be None"
        assert self.molecular_weight is not None, "molecular-weight can't be None"
        assert self.inchi is not None, "inchi can't be None"
        assert self.inchikey is not None, "inchikey can't be None"
        assert self.smiles is not None, "smiles can't be None"
 def _count_unique_inchi_keys(self, smiles):
     """returns key value pair where value is [count, mol]"""
     inchi_dict = {}
     for smile in smiles:
         mol = Chem.MolFromSmiles(smile)
         if mol is not None:
             inchi_key = inchi.MolToInchiKey(mol)
             try:
                 inchi_dict[inchi_key][0] += 1
             except:
                 inchi_dict[inchi_key] = [1, mol]
     counts = [v[0] for v in inchi_dict.values()]
     mols = [v[1] for v in inchi_dict.values()]
     to_sort = zip(counts, mols)
     sorted_tuple = sorted(to_sort, key=lambda tup: -tup[0])
     sorted_tuple = sorted_tuple[:self._sample_size]
     list_of_labels = [f"Times sampled: {v[0]}" for v in sorted_tuple]
     sorted_mols = [v[1] for v in sorted_tuple]
     return list_of_labels, sorted_mols
Exemple #8
0
def main():
    missingfile = sys.argv[1]
    outputfile = sys.argv[2]

    missing = pd.read_csv(missingfile, sep="\t", header=0)

    names = []
    for row in missing.iterrows():
        smile = row[1].smiles

        m = Chem.MolFromSmiles(smile)
        inchikey = ri.MolToInchiKey(m)

        names.append(inchikey)

    missing['name'] = names
    print(missing)

    missing.to_csv(outputfile, sep="\t", index=False)
def main():
    parser = argparse.ArgumentParser(description='Convert compounds and \
                                     associated information from an sdf file \
                                     into a csv file and generate smiles')
    parser.add_argument('-S', '--sdf', action='store', nargs=1,
                        dest='sdf', help='File containing compounds \
                        (.sdf format)')
    parser.add_argument('-n', '--name', action='store', nargs=1,
                        dest='name', help='Name of output csv file to write')
    parser.add_argument('-i', '--input_directory', action='store', nargs=1,
                        dest='input', default=['./'],
                        help='Directory where input files are stored')
    parser.add_argument('-o', '--output_directory', action='store', nargs=1,
                        dest='output', default=['./'],
                        help='Directory where output files should be written')
    args = vars(parser.parse_args())


    sdf_df = PandasTools.LoadSDF(args['input'][0] + args['sdf'][0],
                                 smilesName='smiles')
    sdf_df['inchikey'] = [inchi.MolToInchiKey(mol) for mol in sdf_df['ROMol']]
    sdf_df.to_csv(args['output'][0] + args['name'][0], index=False)
Exemple #10
0
def fill_base_test(cursor):
    df = pd.read_csv("toxicity_85832.csv")
    #df = df.drop("Unnamed: 0", axis=1)
    names_of_columns = list(df.columns)
    smiles = list(df["SMILES"])
    df = df.drop("SMILES", axis=1)
    toxic_vals = np.array(df.values)

    #molecules
    canonize_smiles = [_canonize_mixture(smile) for smile in smiles]
    inchi_smiles = [
        inchi.MolToInchi(Chem.MolFromSmiles(smile))
        for smile in canonize_smiles
    ]
    inchikey = [
        inchi.MolToInchiKey(Chem.MolFromSmiles(smile))
        for smile in canonize_smiles
    ]
    ids = [x for x in range(len(canonize_smiles))]
    ziped_vals = zip(inchikey, inchi_smiles, canonize_smiles)
    cursor.executemany(
        """insert into 'molecules' (inchi_key,inchi,canonical_smiles) values (?,?,?)""",
        ziped_vals)

    #tasks
    descr_tasks = [
        randomStringwithDigitsAndSymbols(random.randint(1, 30))
        for i in range(20)
    ]
    cursor.executemany("""insert into 'tasks' (descr) values (?)""",
                       zip(descr_tasks))

    #tasks_running
    completed = [random.randint(0, 1) for i in range(1000)]
    id_tasks = [random.randint(1, len(descr_tasks)) for i in range(1000)]
    id_molecules = [random.randint(1, len(smiles)) for i in range(1000)]
    zip_tasks_running = zip(id_tasks, id_molecules, completed)
    cursor.executemany(
        """insert into 'tasks_running' (id_task, id_molecule, completed) values (?,?,?)""",
        zip_tasks_running)

    #descriptors
    name_of_descr = [
        randomStringwithDigitsAndSymbols(random.randint(1, 30))
        for i in range(10)
    ]
    name_of_version = [
        randomStringwithDigitsAndSymbols(random.randint(1, 30))
        for i in range(10)
    ]
    ziped_versions = zip(name_of_descr, name_of_version)
    cursor.executemany(
        """insert into 'descriptors' (descriptor, version) values (?,?)""",
        ziped_versions)
    cursor.execute(
        """insert into 'descriptors' (descriptor, version) values (?,?)""",
        ("mordred", "0.315"))

    #descriptor_values
    id_descriptor = [11 for i in range(len(smiles))]
    id_molecule = [x + 1 for x in range(len(smiles))]
    id_tasks = [
        random.randint(1, len(descr_tasks)) for i in range(len(smiles))
    ]
    valid = [random.randint(0, 1) for i in range(len(smiles))]
    value = func(canonize_smiles)
    ziped_descr_vals = zip(id_molecule, id_descriptor, id_tasks, valid, value)
    cursor.executemany(
        """insert into 'descriptors_values' (id_molecule, id_descriptor, id_task, valid, value) values (?,?,?,?,?)""",
        ziped_descr_vals)

    #endpoints
    features = names_of_columns[1:]
    descriptions = [feature.split('_')[1] for feature in features]
    types = ['_'.join(feature.split('_')[2:]) for feature in features]
    ziped_endpoints = zip(descriptions, types)
    cursor.executemany("""insert into 'endpoints' (desc, type) values (?,?)""",
                       ziped_endpoints)

    #experimnetal data
    ids_molecules = []
    ids_endpoints = []
    values_endpoints = []
    for i in range(len(toxic_vals[:, 0])):
        for j in range(len(toxic_vals[0, :])):
            if (~np.isnan(toxic_vals[i, j])):
                ids_molecules.append(i + 1)
                ids_endpoints.append(j + 1)
                values_endpoints.append(toxic_vals[i, j])

    ziped_experimental_data = zip(ids_molecules, ids_endpoints,
                                  values_endpoints)
    cursor.executemany(
        """insert into 'experimental_data' (id_molecule, id_endpoint, value) values (?,?,?)""",
        ziped_experimental_data)

    return cursor
Exemple #11
0
def calculate_inchi_key(smile):
    return inchi.MolToInchiKey(Chem.MolFromSmiles(smile))
Exemple #12
0
def loadSDF(sdfPath):
    # Create images
    #generateImages(sdfPath)
     
    # Create a molecule supplier
    suppl = Chem.SDMolSupplier(sdfPath)
    
    # Filter empty entries
    sdf = [x for x in suppl if x is not None]
    
    # For each molecule in supplier
    for mol in sdf:
        data = {}
        
        try:
            data['fCharge'] = mol.GetProp('Charge')
        except:
            data['fCharge'] = Chem.GetFormalCharge(mol)
            
        try:
            data['name'] = mol.GetProp('DATABASE_ID')
        except:
            data['name'] = 'unkown'
            
        try:
            data['molMass'] = mol.GetProp('Total Molweight')
        except:
            data['molMass'] = Descriptors.ExactMolWt(mol) 
            
        try:
            data['cLogP'] = mol.GetProp('cLogP')
        except:
            data['cLogP'] = Crippen.MolLogP(mol) # não sei se ta certo
            
        try:
            data['cLogS'] = mol.GetProp('cLogS')
        except:
            data['cLogS'] = 0.0
            
        try:
            data['tpsa'] = mol.GetProp('Polar Surface Area')
        except:
            data['tpsa'] = rdMolDescriptors.CalcTPSA(mol)
            
        try:
            data['totalSurfaceArea'] = mol.GetProp('Total Surface Area')
        except:
            data['totalSurfaceArea'] = rdMolDescriptors.CalcTPSA(mol)
        
        try:
            data['hbondAcceptors'] = mol.GetProp('H-Acceptors')
        except:
            data['hbondAcceptors'] = rdMolDescriptors.CalcNumHBA(mol)
            
        try:
            data['hbondDonnors'] = mol.GetProp('H-Donors')
        except:
            data['hbondDonnors'] = rdMolDescriptors.CalcNumHBD(mol)
            
        try:
            data['rotable'] = mol.GetProp('Rotatable Bonds')
        except:
            data['rotable'] = rdMolDescriptors.CalcNumRotatableBonds(mol)
            
        try:
            data['mutagenic'] = mol.GetProp('Mutagenic')
        except:
            data['mutagenic'] = 'Unknown'
            
        try:
            data['tumorigenic'] = mol.GetProp('Tumorigenic')
        except:
            data['tumorigenic'] = 'Unknown'
            
        try:
            data['irritant'] = mol.GetProp('Irritant')
        except:
            data['irritant'] = 'Unkown'
            
        try:
            data['smiles'] = mol.GetProp('SMILES')
        except:
            data['smiles'] = Chem.MolToSmiles(mol)
            
        try:
            data['InChI'] = mol.GetProp('INCHI_IDENTIFIER')
        except:
            data['InChI'] = inchi.MolToInchi(mol)
            
        try:
            data['inchiKey'] = mol.GetProp('INCHI_KEY')
        except:
            data['inchiKey'] = inchi.MolToInchiKey(mol)
            
        try:
            data['nonHAtoms'] = mol.GetProp('Non-H Atoms')
        except:
            data['nonHAtoms'] = -1 # Não sei calcular
            
            
        try:
            data['numAtoms'] = mol.GetProp('numAtoms')
        except:
            data['numAtoms'] = mol.GetNumAtoms()
        
        try:
            data['stereoCenters'] = mol.GetProp('Stereo Centers')
        except:
            data['stereoCenters'] = mol.GetNumAtoms()
            
        try:
            data['provider'] = mol.GetProp('DATABASE_NAME')
        except:
            print("Nenhum fornecedor encontrado, o campo é obrigatório!")
            continue
        
        tmp = AllChem.Compute2DCoords(mol) # Compute its coordinates
        
        Draw.MolToFile(mol, 
            os.path.join(settings.FILES_DIR, f'molImages/' + data["inchiKey"] + '.png'),
            size=(300,300),
            kekulize=True, 
            wedgeBonds=True,
            fitImage=True) # Save it
        
        Draw.MolToFile(mol, 
            os.path.join(settings.FILES_DIR, f'molThumbs/' + data["inchiKey"] + '.png'),
            size=(150,150),
            kekulize=True,
            wedgeBonds=True,
            fitImage=True)
        
        feedDatabase(data)

        if Compounds.objects.filter(inChIKey=data['inchiKey']).exists():
            if not Compounds.objects.filter(provider=['provider']).exists():
                feedDatabase(data)
                print("feed1")
                # append no sdf da base de dados
                a = 1
            else:
                print("continue123")
                continue
                
        else:
            a = 1
            feedDatabase(data)
            print("feed2")
        '''except:
Exemple #13
0
def one_species_confs(molecule, log, other_props, max_confs, forcefield,
                      nconf_gen, e_window, rms_tol, prun_tol, job_dir,
                      log_file, rep_e_window, fallback_to_align, temp,
                      clean_up, start_time):

    smiles = copy.deepcopy(molecule)
    with open(log, "w") as output:
        output.write("The smiles strings that will be run are:\n")
        output.write("\n".join([molecule]) + "\n")

        if any([element in molecule for element in UFF_ELEMENTS]):
            output.write(("Switching to UFF, since MMFF94 does "
                          "not have boron and/or aluminum\n"))
            forcefield = 'uff'

        confgen, gen_time, min_time = minimize(output=output,
                                               molecule=molecule,
                                               forcefield=forcefield,
                                               nconf_gen=nconf_gen,
                                               prun_tol=prun_tol,
                                               e_window=e_window,
                                               rms_tol=rms_tol,
                                               rep_e_window=rep_e_window)
        clustered_confs = confgen.cluster(rms_tolerance=float(rms_tol),
                                          max_ranked_conformers=int(max_confs),
                                          energy_window=float(e_window),
                                          Report_e_tol=float(rep_e_window),
                                          output=output)

        cluster_time = time.time()
        inchikey = inchi.MolToInchiKey(get_mol(molecule))

        for i, conformer in enumerate(clustered_confs):
            write_clusters(output=output,
                           idx=i,
                           conformer=conformer,
                           inchikey=inchikey,
                           path=job_dir)

        molecule = run_obabel(inchikey=inchikey, idx=i)
        confgen.recluster(path=job_dir,
                          rms_tolerance=float(rms_tol),
                          max_ranked_conformers=int(max_confs),
                          energy_window=float(e_window),
                          output=output,
                          clustered_confs=clustered_confs,
                          molecule=molecule,
                          key=inchikey,
                          fallback_to_align=fallback_to_align)
        rename_xyz_files(path=job_dir)
        summarize(output=output,
                  gen_time=gen_time,
                  start_time=start_time,
                  min_time=min_time,
                  cluster_time=cluster_time)

    conf_dic = parse_results(job_dir=job_dir,
                             log_file=log_file,
                             inchikey=inchikey,
                             max_confs=max_confs,
                             other_props=other_props,
                             temp=temp,
                             smiles=smiles,
                             clean_up=clean_up)
    return conf_dic
def get_inchi_key(mol, stereo):
    inchi_key = inchi.MolToInchiKey(mol)
    if not stereo:
        q = inchi_key.split('-')
        inchi_key = q[0] + '-' + q[2]  # remove middle part responsible for stereo and isotopes
    return inchi_key
converted_not_match_file = open(
    "out/inchi_valid_check/converted_not_match.txt",
    mode="w",
    encoding="utf-8")
w = SDWriter("out/inchi_valid_check/converted.sdf")
np: Unique_NP
for np in repo.get_unique_stream():
    mol = Mol()
    try:
        mol = Chem.MolFromInchi(inchi=np.inchi, treatWarningAsError=True)
        mol.SetProp("coconut_id", np.coconut_id)
    except:
        not_converted += 1

    if mol:
        mol_inchikey = inchi.MolToInchiKey(mol)

        if np.inchikey == mol_inchikey:
            converted_list_file.write(np.inchi + "\n")
            w.write(mol)
            converted += 1
        else:
            converted_not_match_file.write(np.inchi + "\n")
            not_converted += 1
    i += 1
    del np

    if i % 1000 == 0:
        print("{}th checked".format(i))

    # if i > 5: