예제 #1
0
def _parseMolData(data):
    """Imports a molfile and verifies if all of the coordinates are set to zeros.
    if they are set to zeros then we know there are no real coordinates in the molfile
    In this case we allow RDKit to recaculate the positions of the atoms and come up with its own pictorial representation of the molecule
    If not we use the molecule as drawn"""
    suppl = SDMolSupplier()

    suppl.SetData(str(data), sanitize=False)
    data = [x for x in suppl if x]
    for x in data:
        if not x.HasProp("_drawingBondsWedged"):
            SanitizeMol(x)
        ctab = MolToMolBlock(x)
        ctablines = [
            item.split("0.0000") for item in ctab.split("\n")
            if "0.0000" in item
        ]
        needs_redraw = 0
        for line in ctablines:
            if len(line) > 3:
                needs_redraw += 1
        if needs_redraw == len(ctablines):
            #check for overlapping molecules in the CTAB
            SanitizeMol(x)
            Compute2DCoords(x)
    return data
예제 #2
0
 def parse_molblock(self, mb):
     """parse molblock and return mol"""
     #mol = MolFromMolBlock(mb)
     sd = SDMolSupplier()
     sd.SetData(mb)
     mol = next(sd)
     if mol:
         return mol
     else:
         return None
예제 #3
0
def rd_kit(dir_sdf = "../data/sdf/"):

    temp_str = "ls " + dir_sdf
    temp = os.popen(temp_str).read()
    temp = str(temp).split()
    bit_length = 1024

    sim_matrix_morgan = []
    sim_matrix_rdk = []
    sim_matrix_aval = []
    sim_matrix_layer = []

    baseline = SDMolSupplier(dir_sdf + temp[0])

    baseline_morgan = AllChem.GetMorganFingerprintAsBitVect(baseline[0], 2, nBits=bit_length)
    baseline_rdk = AllChem.RDKFingerprint(baseline[0], maxPath=2)
    baseline_aval = pyAvalonTools.GetAvalonFP(baseline[0], 128)
    baseline_layer = AllChem.LayeredFingerprint(baseline[0])
    count = 0
    for item in temp:
        suppl = SDMolSupplier(dir_sdf + item)
        count += 1
        fp = AllChem.GetMorganFingerprint(suppl[0], 2)

        fp_bit = AllChem.GetMorganFingerprintAsBitVect(suppl[0], 3, nBits=bit_length)
        fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=3)
        fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], 128)
        fp_layer = AllChem.LayeredFingerprint(suppl[0])

        sim_matrix_morgan.append(
            DataStructs.FingerprintSimilarity(baseline_morgan, fp_bit, metric=DataStructs.TanimotoSimilarity))
        sim_matrix_rdk.append(
            DataStructs.FingerprintSimilarity(baseline_rdk, fp_rdk, metric=DataStructs.TanimotoSimilarity))
        sim_matrix_aval.append(
            DataStructs.FingerprintSimilarity(baseline_aval, fp_aval, metric=DataStructs.TanimotoSimilarity))
        sim_matrix_layer.append(
            DataStructs.FingerprintSimilarity(baseline_layer, fp_layer, metric=DataStructs.TanimotoSimilarity))

    sim_matrix_morgan = np.array(sim_matrix_morgan)
    sim_matrix_rdk = np.array(sim_matrix_rdk)
    sim_matrix_aval = np.array(sim_matrix_aval)
    sim_matrix_layer = np.array(sim_matrix_layer)

    label_morgan = "morgan" + str(bit_length)
    plt.hist(sim_matrix_morgan, label = label_morgan)
    plt.hist(sim_matrix_rdk, label = "rdk2")
    #plt.hist(sim_matrix_aval, label = "avalon128")
    #plt.hist(sim_matrix_layer, label = "layer")
    print(np.mean(sim_matrix_rdk))
    print(count)
    plt.xlabel("Similarity to Baseline")
    plt.ylabel("Counts")
    plt.title("Different Fingerprinting Methods, Similarity to Baseline")
    plt.legend()
    plt.show()
예제 #4
0
def MCS_NN_search(sdf_file):
    
    sess_dir = split(sdf_file)[0]
    mol = SDMolSupplier(sdf_file, removeHs=True)[0]
    candidate_ids = literal_eval(mol.GetPropsAsDict()['NN'])
    candidate_mols = [SDMolSupplier(join(sess_dir, '%d.sdf'%id), removeHs=True)[0]
                        for id in candidate_ids]
    MCSs, MCS_matches, NN_mols, NN_MCS_matches = get_MCSs([mol], candidate_mols)
    mol_img = draw_base64(mol, highlightAtoms=MCS_matches[0])
    nn_img = draw_base64(NN_mols[0], highlightAtoms=NN_MCS_matches[0])
    return mol_img, nn_img
예제 #5
0
def parse_sdf(contents, filename):
    """Loads contents of an uploaded file and tries to parse as a SDF. Returns
    list of RDKit molecules and status message. Returns empty list and error
    meassage in case of failure."""
    
    content_type, content_string = contents.split(',')
    decoded = b64decode(content_string)
    session_id = str(uuid4())
    
    try:
        if filename[-4:].lower()=='.sdf':
            
            # Generate random file name and save contents to a file
            unique_fname = join('uploads', '%s.sdf'%session_id)
            with open(unique_fname, 'w') as fh:
                fh.write(decoded.decode('utf-8'))
                
            mols = SDMolSupplier(unique_fname, removeHs=False)
            n_mols = len(mols)
            mols = [m for m in mols if m]
            n_sucess = len(mols)
            
            try:
                remove(unique_fname)
            except Exception as e:
                #This is not critical
                print(e)
                
            return mols, 'Loaded %d/%d mols'%(n_mols, n_sucess), session_id
        else:
            return [], 'The file has a wrong format.', session_id
        
    except Exception as e:
        print(e)
        return [], 'Error occured during processing of a file.', session_id
예제 #6
0
def file_to_mols(filepath):
    if filepath.endswith('.smi'):
        print('Converting SMILES to list of Mols')
        sys.stdout.flush()
        with open(filepath) as infile:
            smiles_list = [line.rstrip() for line in infile.readlines()]
        # Multiprocessing with all available threads
        #with Pool(processes = os.cpu_count()) as pool:
        #mols = pool.map(smi_to_mol, smiles_list)

        mols = process_map(smi_to_mol,
                           smiles_list,
                           chunksize=100,
                           max_workers=a.worker)

        mols = [m for m in mols if m]

    elif filepath.endswith('.sd') or filepath.endswith('.sdf'):
        mols = [mol for mol in SDMolSupplier(filepath) if mol]

    else:
        raise Exception('Invalid file: {}\n'.format(filepath) +
                        '.smi, .sd, or .sdf extension is expected')

    return mols
예제 #7
0
def get_random_mol() -> Mol:

    _mol_supplier = SDMolSupplier(PROCESSED_SDF_PATH)
    _index = randint(0, len(_mol_supplier) - 1)
    assert _mol_supplier[_index]

    return _mol_supplier[_index]
예제 #8
0
def rd_kit_morgan(dir_sdf = "../data/sdf/"):

	temp_str = "ls " + dir_sdf
	temp = os.popen(temp_str).read()
	temp = str(temp).split()
	bit_length = 256
	sim_matrix_morgan = []
	baseline = SDMolSupplier("../data/sdf/" + temp[0])
	for item in temp:
		suppl = SDMolSupplier("../data/sdf/" + item)
		#Note: morgan can output vectors as two types
		fp = AllChem.GetMorganFingerprint(suppl[0], 2)
		fp_bit = AllChem.GetMorganFingerprintAsBitVect(suppl[0], 2, nBits=bit_length)
		sim_matrix_morgan.append(fp_bit)

	sim_matrix_morgan = np.array(sim_matrix_morgan)
	return sim_matrix_morgan
예제 #9
0
def rd_kit_rd(dir_sdf = "../data/sdf/"):

	temp_str = "ls " + dir_sdf
	temp = os.popen(temp_str).read()
	temp = str(temp).split()

	sim_matrix_rdk = []
	baseline = SDMolSupplier("../data/sdf/" + temp[0])
	baseline_rdk = AllChem.RDKFingerprint(baseline[0], maxPath=2)

	for item in temp:
		suppl = SDMolSupplier("../data/sdf/" + item)
		fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=2)
		sim_matrix_rdk.append(DataStructs.FingerprintSimilarity(baseline_rdk, fp_rdk, metric=DataStructs.TanimotoSimilarity))

	sim_matrix_rdk = np.array(sim_matrix_rdk)
	return sim_matrix_rdk
예제 #10
0
def rd_kit_aval(dir_sdf = "../data/sdf/"):

	temp_str = "ls " + dir_sdf
	temp = os.popen(temp_str).read()
	temp = str(temp).split()

	bit_length = 256
	sim_matrix_aval = []
	baseline = SDMolSupplier("../data/sdf/" + temp[0])

	baseline_aval = pyAvalonTools.GetAvalonFP(baseline[0], 128)

	for item in temp:
		suppl = SDMolSupplier("../data/sdf/" + item)
		fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], 128)
		sim_matrix_aval.append(fp_aval)
	sim_matrix_aval = np.array(sim_matrix_aval)
	return sim_matrix_aval
예제 #11
0
def test_MCS(sdf_file):

    mols = SDMolSupplier(sdf_file, removeHs=False)
    mols = np.array([m for m in mols if m])
    nns_ids = get_Tanimoto_NNs(mols, mols, 3, fps_nbits=512, order=1, nns=10)

    MCSs, MCS_matches, NN_mols, NN_MCS_matches = get_MCSs(mols,
                                                          mols,
                                                          nns_indices=nns_ids)
예제 #12
0
def split(sdf, label_col, folder, splitfold=5):
    """
    Stratified splitting of dataset into k-folds
    :param mols: Input molecules as dataset
    :param label_col: Column name of labels for stratification
    :param folder: Folder/model name
    :param splitfold: k number of folds
    :return:
    """

    if folder is None:
        sdf_path = pathlib.Path(sdf)
        sdf_name = sdf_path.name.partition('.')[0]

        folder = sdf_path.parent.joinpath(sdf_name)
        if not folder.is_dir():
            folder.mkdir()
        folder = folder.absolute()

    else:
        p = pathlib.Path(folder)
        if not p.is_dir():
            p.mkdir()

    train_files = []
    test_files = []

    sdm = SDMolSupplier(sdf)
    mols = [x for x in sdm]

    labels = []
    for i in range(len(mols)):
        labels.append(mols[i].GetProp(label_col))

    skf = StratifiedKFold(n_splits=splitfold)
    fold = 0
    for train_ix, test_ix in skf.split(mols, labels):
        test_set_fn = "{}/testset_{}.sdf".format(folder, fold)
        train_set_fn = "{}/trainset_{}.sdf".format(folder, fold)

        sdw_train = SDWriter(train_set_fn)
        for i in train_ix:
            sdw_train.write(mols[i])
        sdw_train.close()
        train_files.append(train_set_fn)


        sdw_test = SDWriter(test_set_fn)
        for i in test_ix:
            sdw_test.write(mols[i])
        sdw_test.close()
        test_files.append(test_set_fn)
        fold += 1

    return {'train_files': train_files,
            'test_files': test_files}, folder
예제 #13
0
파일: io.py 프로젝트: bkbonde/chembl_beaker
def _parseMolData(data, sanitize=True, removeHs=True, strictParsing=True):
    fd, fpath = tempfile.mkstemp(text=True)
    os.write(fd, data)
    os.close(fd)
    suppl = SDMolSupplier(fpath,
                          sanitize=sanitize,
                          removeHs=removeHs,
                          strictParsing=strictParsing)
    res = [x for x in suppl if x]
    os.remove(fpath)
    return res
예제 #14
0
파일: helpers.py 프로젝트: santi921/ML_CO2
def morgan(bit_length=256, dir="../data/sdf/DB3/", bit=True):

    morgan = []
    names = []
    h**o = []
    homo1 = []
    diff = []

    dir_fl_names, list_to_sort = merge_dir_and_data(dir=dir)
    #---------------------------------------------------------------------------
    for tmp, item in enumerate(dir_fl_names):
        try:
            suppl = SDMolSupplier(dir + item)

            if (bit == True):
                try:
                    fp = AllChem.GetMorganFingerprintAsBitVect(
                        suppl[0], int(2), nBits=int(bit_length))
                except:
                    pass
            else:
                try:
                    fp = AllChem.GetMorganFingerprint(suppl[0], int(2))
                except:
                    print("error")
                    pass

            if (item[0:-4] == list_to_sort[tmp].split(":")[0]):
                morgan.append(fp)
                names.append(item)
                homo_temp = float(list_to_sort[tmp].split(":")[1])
                homo1_temp = float(list_to_sort[tmp].split(":")[2])
                h**o.append(homo_temp)
                homo1.append(homo1_temp)
                diff.append(homo_temp - homo1_temp)
            else:
                try:
                    if (item[0:-4] == list_to_sort[tmp + 1].split(":")[0]):
                        morgan.append(fp)
                        names.append(item)
                        homo_temp = float(list_to_sort[tmp + 1].split(":")[1])
                        homo1_temp = float(list_to_sort[tmp + 1].split(":")[2])
                        h**o.append(homo_temp)
                        homo1.append(homo1_temp)
                        diff.append(homo_temp - homo1_temp)
                except:
                    print(list_to_sort[tmp].split(":")[0], item[0:-4])
                    pass
            sys.stdout.write("\r %s /" % tmp + str(len(dir_fl_names)))
            sys.stdout.flush()
        except:
            pass
    morgan = np.array(morgan)
    return names, morgan, h**o, homo1, diff
예제 #15
0
파일: helpers.py 프로젝트: dbim-chem/ML_CO2
def layer(dir="../data/sdf/DB/"):
    dir = "ls " + dir
    temp = os.popen(dir).read()
    temp = str(temp).split()
    layer = []

    for item in temp:
        suppl = SDMolSupplier("../data/sdf/DB/" + item)
        fp_layer = AllChem.LayeredFingerprint(suppl[0])
        layer.append(fp_layer)
    layer = np.array(layer)
    return layer
예제 #16
0
파일: helpers.py 프로젝트: dbim-chem/ML_CO2
def rdk(dir="../data/sdf/DB/"):
    dir = "ls " + dir
    temp = os.popen(dir).read()
    temp = str(temp).split()
    rdk = []

    for item in temp:
        suppl = SDMolSupplier("../data/sdf/DB/" + item)
        fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=2)
        rdk.append(fp_rdk)
    rdk = np.array(rdk)
    return rdk
예제 #17
0
def sdf_to_info(sdf_folder, save_folder):
    sdf_files = glob(osp.join(sdf_folder, "*.sdf"))
    result = pd.DataFrame()
    for sdf in sdf_files:
        f_id = osp.basename(sdf).split(".")[0]
        this_info = {"file_name": f_id}
        mol = list(SDMolSupplier(sdf))[0]
        this_info["SMILES"] = mol.GetProp("SMILES")
        this_info["n_heavy"] = mol.GetNumHeavyAtoms()
        result = result.append(this_info, ignore_index=True)
    result = result.sort_values(by="n_heavy")
    result.to_csv(osp.join(save_folder, "info.csv"), index=False)
예제 #18
0
파일: helpers.py 프로젝트: dbim-chem/ML_CO2
def aval(dir="../data/sdf/DB/", bit_length=128):
    dir = "ls " + dir
    temp = os.popen(dir).read()
    temp = str(temp).split()
    avalon = []

    for item in temp:
        suppl = SDMolSupplier("../data/sdf/DB/" + item)
        fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], bit_length)
        avalon.append(fp_aval)

    avalon = np.array(avalon)
    return avalon
예제 #19
0
def rd_kit_morgan(dir_sdf = "../data/sdf/"):
	temp_str = "ls " + dir_sdf
	temp = os.popen(temp_str).read()
	temp = str(temp).split()
	sim_matrix_layer = []

	for item in temp:
		suppl = SDMolSupplier("../data/sdf/" + item)
		fp_layer = AllChem.LayeredFingerprint(suppl[0])
		sim_matrix_layer.append(fp_layer)
		sim_matrix_layer = np.array(sim_matrix_layer)

	return sim_matrix_layer
예제 #20
0
def load_props(mols_dir):
    
    props = []
    for f in glob(join(mols_dir, '*.sdf')):
        p = SDMolSupplier(f)[0].GetPropsAsDict()
        p['id'] = split(f)[-1].replace('.sdf', '')
        props.append(p)
    df = pd.DataFrame(props)
    
    # Limit dataframe only to necessary columns    
    sorted_cols = []
    for r in sorted(set([c.split('_')[0] for c in df.columns if 'prediction' in c])):
        sorted_cols += ['%s_experimental'%r, '%s_prediction'%r, '%s_error'%r]
    sorted_cols = ['molId']+sorted_cols+['Similarity_Tanimoto', 'NN', 'id']
    
    return df[sorted_cols]
예제 #21
0
def min_sdf():
    files = glob("raw/openchem_logP_confs/*.sdf")
    for f in tqdm(files):
        try:
            suppl = SDMolSupplier(f, removeHs=False)
            lowest_e = np.inf
            selected_mol = None
            for mol in suppl:
                energy = float(mol.GetProp("energy_abs"))
                if energy < lowest_e:
                    lowest_e = energy
                    selected_mol = mol
            if selected_mol is not None:
                writer = SDWriter(f"raw/openchem_logP_mmff_sdfs/{osp.basename(f).split('.')[0].split('_')[0]}.mmff.sdf")
                writer.write(selected_mol)
        except Exception as e:
            print(e)
예제 #22
0
def test_preprocess_mols(sdf_file, session_id):

    try:
        rmtree(join('uploads', session_id))
    except Exception as e:
        print(e)

    mols = SDMolSupplier(sdf_file, removeHs=False)
    mols = np.array([m for m in mols if m])
    df = preprocess_mols(mols, session_id)

    try:
        rmtree(join('uploads', session_id))
    except Exception as e:
        print(e)

    return df
예제 #23
0
파일: helpers.py 프로젝트: santi921/ML_CO2
def rdk(dir="../data/sdf/DB/"):

    rdk = []
    names = []
    h**o = []
    homo1 = []
    diff = []

    dir_fl_names, list_to_sort = merge_dir_and_data(dir=dir)
    #---------------------------------------------------------------------------
    for tmp, item in enumerate(dir_fl_names):
        try:
            suppl = SDMolSupplier(dir + item)
            fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=2)

            if (item[0:-4] == list_to_sort[tmp].split(":")[0]):
                rdk.append(fp_rdk)
                names.append(item)
                homo_temp = float(list_to_sort[tmp].split(":")[1])
                homo1_temp = float(list_to_sort[tmp].split(":")[2])
                h**o.append(homo_temp)
                homo1.append(homo1_temp)
                diff.append(homo_temp - homo1_temp)
            else:
                try:
                    if (item[0:-4] == list_to_sort[tmp + 1].split(":")[0]):
                        rdk.append(fp_rdk)
                        names.append(item)
                        homo_temp = float(list_to_sort[tmp + 1].split(":")[1])
                        homo1_temp = float(list_to_sort[tmp + 1].split(":")[2])
                        h**o.append(homo_temp)
                        homo1.append(homo1_temp)
                        diff.append(homo_temp - homo1_temp)
                except:
                    print(list_to_sort[tmp].split(":")[0], item[0:-4])
                    pass
            sys.stdout.write("\r %s /" % tmp + str(len(dir_fl_names)))
            sys.stdout.flush()
        except:
            pass
    rdk = np.array(rdk)
    return names, rdk, h**o, homo1, diff
예제 #24
0
def check_sdf_matches(dg,
                      sdf_file,
                      draw_structures=True,
                      print_unmatching=False):
    """
	After generating the network, try to see if any structures match with those in SDF files
	These files were usually created manually, storing structures reported in experimental
	studies. The purpose is to match our simulations with experiments.

	Keyword arguments:
	dg			-- the derivation graph of the network
	sdf_file	-- path to the SDF file
	draw_structures -- whether or not to print the structures in the summary pdf
	"""
    matching_structs = []
    not_matching = []
    postSection('Matching Structures')
    print(f"Checking for matches with structures in {sdf_file}")
    sdfile = SDMolSupplier(sdf_file)
    for mol in sdfile:
        Kekulize(mol)
        smi = MolToSmiles(mol, kekuleSmiles=True)
        mol_graph = smiles(smi, add=False)
        for v in dg.vertices:  #dg_new.vertices
            if v.graph.isomorphism(mol_graph) == 1:
                matching_structs.append(mol_graph)
                print(
                    f"Structure {v.graph.smiles} in the network matches a test set molecule!"
                )
        if mol_graph not in matching_structs:
            not_matching.append(mol_graph)
    if draw_structures == True:
        for g in matching_structs:
            g.print(p)
    if print_unmatching == True:
        postSection("Structures not matched yet")
        for g in not_matching:
            g.print(p)

    print(
        f"{len(matching_structs)} of {len(sdfile)} ({100* len(matching_structs)/len(sdfile)}%)  total structures in the SDF are in the reaction network."
    )
예제 #25
0
파일: helpers.py 프로젝트: dbim-chem/ML_CO2
def morgan(bit_length=256, dir="../data/sdf/DB/", bit=True):
    dir = "ls " + dir
    temp = os.popen(dir).read()
    temp = str(temp).split()
    morgan = []

    for item in temp:
        suppl = SDMolSupplier("../data/sdf/DB/" + item)

        if (bit == True):
            fp_bit = AllChem.GetMorganFingerprintAsBitVect(suppl[0],
                                                           2,
                                                           nBits=bit_length)
            morgan.append(fp_bit)
        else:
            fp = AllChem.GetMorganFingerprint(suppl[0], 2)
            morgan.append(fp)

    morgan = np.array(morgan)
    return morgan
예제 #26
0
파일: helpers.py 프로젝트: santi921/ML_CO2
def aval(dir="../data/sdf/DB/", bit_length=256):
    aval = []
    names = []
    h**o = []
    homo1 = []
    diff = []
    dir_fl_names, list_to_sort = merge_dir_and_data(dir=dir)
    #---------------------------------------------------------------------------
    for tmp, item in enumerate(dir_fl_names):
        try:
            suppl = SDMolSupplier(dir + item)
            fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], bit_length)

            if (item[0:-4] == list_to_sort[tmp].split(":")[0]):
                aval.append(fp_aval)
                names.append(item)
                homo_temp = float(list_to_sort[tmp].split(":")[1])
                homo1_temp = float(list_to_sort[tmp].split(":")[2])
                h**o.append(homo_temp)
                homo1.append(homo1_temp)
                diff.append(homo_temp - homo1_temp)
            else:
                try:
                    if (item[0:-4] == list_to_sort[tmp + 1].split(":")[0]):
                        aval.append(fp_aval)
                        names.append(item)
                        homo_temp = float(list_to_sort[tmp + 1].split(":")[1])
                        homo1_temp = float(list_to_sort[tmp + 1].split(":")[2])
                        h**o.append(homo_temp)
                        homo1.append(homo1_temp)
                        diff.append(homo_temp - homo1_temp)
                except:
                    print(list_to_sort[tmp].split(":")[0], item[0:-4])
                    pass
            sys.stdout.write("\r %s /" % tmp + str(len(dir_fl_names)))
            sys.stdout.flush()
        except:
            pass
    aval = np.array(layer)
    return names, aval, h**o, homo1, diff
예제 #27
0
def time_fps(sdf_file, png_file, radius):

    mols = SDMolSupplier(sdf_file, removeHs=False)
    mols = [m for m in mols if m]

    fps_sizes = np.arange(5, 12)
    corrs, timings = speed_tests(mols, radius, 2**fps_sizes)

    sns.set(font_scale=1.5)
    f, ax = plt.subplots(1, 1, figsize=(7, 5), dpi=300)
    ax.plot(fps_sizes, corrs, '-o')

    ax.set_xlabel('size of fingerprint ($log_2$ scale)')
    ax.set_ylabel('Correlation')
    ax.set_xticks(fps_sizes)
    ax.set_xticklabels(['%d' % 2**p for p in fps_sizes])

    for i, t in enumerate(timings):
        ax.annotate('%.1fs' % t, (fps_sizes[i], corrs[i] - 0.05),
                    fontsize='small')

    plt.tight_layout()
    plt.subplots_adjust(wspace=0.2, right=0.95)
    f.savefig(png_file)
예제 #28
0
def docking(k):
    # mol_id = k.split("/")[-1]
    mol_id = k
    protein = os.path.join(pdbbind_dir, mol_id, f"{mol_id}_protein.pdb")
    protein_pdbqt = os.path.join(pdbbind_dir, mol_id,
                                 f"{mol_id}_protein.pdbqt")
    ligand = os.path.join(pdbbind_dir, mol_id, f"{mol_id}_ligand.sdf")
    ligand_mol2 = os.path.join(pdbbind_dir, mol_id, f"{mol_id}_ligand.mol2")
    log_name = os.path.join(log_dir, f'{mol_id}.log')
    out_name = os.path.join(out_pdbqt_dir, f'{mol_id}_out.pdbqt')
    pdb_name = os.path.join(pdb_dir, f'{mol_id}.pdb')
    pdbqt_name = os.path.join(pdbqt_dir, f'{mol_id}.pdbqt')
    ligand_rcsb = os.path.join(rcsb_dir, mol_id, f"{mol_id}.sdf")

    if os.path.exists(out_name):
        return

    # Generate 3D structure of ligand
    # m = Chem.MolFromSmiles(smiles)
    m = SDMolSupplier(ligand)[0]
    if m is None and os.path.exists(ligand_rcsb):
        m = SDMolSupplier(ligand_rcsb)[0]

    if m is None:
        m = Chem.MolFromMol2File(ligand_mol2)

    if m is None:
        return

    Chem.SanitizeMol(m)

    # Adding hydrogen atoms to molecule
    m = Chem.AddHs(m)
    cids = AllChem.EmbedMultipleConfs(
        m,
        numConfs=20,
    )
    cenergy = []
    for conf in cids:
        converged = not AllChem.UFFOptimizeMolecule(m, confId=conf)
        cenergy.append(
            AllChem.UFFGetMoleculeForceField(m, confId=conf).CalcEnergy())
    min_idx = cenergy.index(min(cenergy))

    m = Chem.RemoveHs(m)
    w = PDBWriter(pdb_name)
    w.write(m, min_idx)
    w.close()

    # pdb to pdbqt (both of ligand and protein)
    if not os.path.exists(pdbqt_name):
        os.system(f'obabel {pdb_name} -O {pdbqt_name}')
    if not os.path.exists(protein_pdbqt):
        os.system(f'obabel {protein} -O {protein_pdbqt}')

    command = f"smina \
            -r {protein_pdbqt} \
            -l {pdbqt_name} \
            --autobox_ligand {ligand} \
            --autobox_add 8 \
            --exhaustiveness 8 \
            --log {log_name} \
            -o {out_name} \
            --cpu 1 \
            --num_modes 100 \
            --seed 0"

    os.system(command)
예제 #29
0
파일: io.py 프로젝트: jir322/chembl_beaker
def _parseMolData(data):
    suppl = SDMolSupplier()
    suppl.SetData(str(data))
    return [x for x in suppl if x]
from sys import argv

from rdkit.Chem import SDMolSupplier, SmilesWriter

sdm = SDMolSupplier(argv[1])
sw = SmilesWriter(argv[2], includeHeader=False, nameHeader='_Name')
for mol in sdm:
    sw.write(mol)
sw.close()