def apply_fp(self, data):
     if self.name=='PubChem' or self.name=="FP4":
         mols = list(data['SMILES'])
     elif self.name=='3D pharmacophore':
         mols = list(data['Conformer'])
     else:
         mols = list(data['Mol'])
     i=0
     for mol in mols:
         try:
             fp = self.fp_fun(mol)
         except:
             print(data['SMILES'][i] + '  '+data['Inchi-Key'][i])
             MolToFile(mol, 'd.png')
         if isinstance(fp, tuple):
             fp = np.array(list(fp[0]))
         if isinstance(fp, rdkit.DataStructs.cDataStructs.ExplicitBitVect):
             fp = BitVect_to_NumpyArray(fp)
         elif self.name=='PubChem' or self.name=='FP4':
             fp = IntListToBitArray(fp)
         else:
             fp = np.array(list(fp))
         self.x += [fp]
         if (str(type(self.x[0])) != "<class 'numpy.ndarray'>"):
             print("WARNING: type for ", self.name, "is ", type(self.x[0]))
         i=i+1
Esempio n. 2
0
def generate_png(mol, pngpath, logfile=devnull, size=300):
    with stdout_redirected(to=sys.stdout, stdout=sys.stderr):
        with stdout_redirected(to=logfile, stdout=sys.stdout):
            nhmol = RemoveHs(mol,
                             implicitOnly=False,
                             updateExplicitCount=True,
                             sanitize=False)
            SanitizeMol(nhmol, catchErrors=True)
            op = DrawingOptions()
            op.atomLabelFontSize = size / 25
            MolToFile(PrepareMolForDrawing(nhmol,forceCoords=True,addChiralHs=True),\
                pngpath,fitImage=True,size=(size, size),options=op)
Esempio n. 3
0
    def make_structures_for_smiles(ligand_dict):
        """
        Make structure figures from smile strings. All image files will be in the IMG_DIR

        :param ligand_dict: ligand dictionary which keep all ligand information
        :return:
        """
        relative_dir = IMG_DIR
        for key in ligand_dict:
            smile = ligand_dict[key][SMILE_COLUMNNAME]
            filename = ligand_dict[key]["orig_id"]
            mol = Chem.MolFromSmiles(smile)
            MolToFile(mol, os.path.join(relative_dir,
                                        '{}.svg'.format(filename)))
Esempio n. 4
0
def model_process_fun(model_out, visdom, n):
    # TODO: rephrase this to return a dict, instead of calling visdom directly
    from rdkit import Chem
    from rdkit.Chem.Draw import MolToFile
    # actions, logits, rewards, terminals, info = model_out
    smiles, valid = model_out['info']
    total_rewards = model_out['rewards']
    if len(total_rewards.shape) > 1:
        total_rewards = total_rewards.sum(1)
    best_ind = torch.argmax(total_rewards).data.item()
    this_smile = smiles[best_ind]
    mol = Chem.MolFromSmiles(this_smile)
    pic_save_path = os.path.realpath(root_location + '/images/' + 'tmp.svg')
    if mol is not None:
        try:
            MolToFile(mol, pic_save_path, imageType='svg')
            with open(pic_save_path, 'r') as myfile:
                data = myfile.read()
            data = data.replace('svg:', '')
            visdom.append('best molecule of batch', 'svg', svgstr=data)
        except Exception as e:
            print(e)
        scores, norm_scores = scorer.get_scores([this_smile])
        visdom.append(
            'score component',
            'line',
            X=np.array([n]),
            Y=np.array([[x for x in norm_scores[0]] + [norm_scores[0].sum()] +
                        [scores[0].sum()] + [desc.CalcNumAromaticRings(mol)]]),
            opts={
                'legend': [
                    'logP', 'SA', 'cycle', 'norm_reward', 'reward',
                    'Aromatic rings'
                ]
            })
        visdom.append('reward',
                      'line',
                      X=np.array([n]),
                      Y=np.array([total_rewards[best_ind].item()]))
        visdom.append('fraction valid',
                      'line',
                      X=np.array([n]),
                      Y=np.array([valid.mean().data.item()]))
        visdom.append('num atoms',
                      'line',
                      X=np.array([n]),
                      Y=np.array([len(mol.GetAtoms())]))
Esempio n. 5
0
 def assign_ids(self):
     """Assigns a numerical ID to compounds (and reactions) for ease of
     reference. Unique only to the CURRENT run."""
     # If we were running a multiprocess expansion, this removes the dicts
     # from Manager control
     self.compounds = dict(self.compounds)
     self.reactions = dict(self.reactions)
     i = 1
     for comp in sorted(self.compounds.values(),
                        key=lambda x: (x['Generation'], x['_id'])):
         # Create ID of form ####### ending with i, padded with zeroes to
         # fill unused spots to the left with zfill (e.g. ID = '0003721' if
         # i = 3721).
         if not comp['ID']:
             comp['ID'] = 'pkc' + str(i).zfill(7)
             i += 1
             self.compounds[comp['_id']] = comp
             # If we are not loading into the mine, we generate the image
             # here.
             if self.image_dir and not self.mine:
                 mol = AllChem.MolFromSmiles(comp['SMILES'])
                 try:
                     MolToFile(mol,
                               os.path.join(self.image_dir,
                                            comp['ID'] + '.png'),
                               fitImage=True,
                               kekulize=False)
                 except OSError:
                     print("Unable to generate image for %s" %
                           comp['SMILES'])
     i = 1
     for rxn in sorted(self.reactions.values(),
                       key=lambda x: (x['Generation'], x['_id'])):
         rxn['ID_rxn'] = ' + '.join(['(%s) %s[c0]' %
                                     (x.stoich, self.compounds[x.c_id]["ID"])
                                     for x in rxn["Reactants"]]) \
                         + ' => ' + \
                         ' + '.join(['(%s) %s[c0]' %
                                     (x.stoich, self.compounds[x.c_id]["ID"])
                                     for x in rxn["Products"]])
         # Create ID of form ####### ending with i, padded with zeroes to
         # fill unused spots to the left with zfill (e.g. ID = '0003721' if
         # i = 3721).
         rxn['ID'] = 'pkr' + str(i).zfill(7)
         i += 1
         self.reactions[rxn['_id']] = rxn
 def testMakePNG(self):
     # smoke test only--that it doesn't fail, not that it looks correct (that's outside the scope of this package)
     # The choices shown resulted (at last check) in 3 fragments, one of which has a branch
     try:
         silent_remove(TEST_PNG)
         result = create_sample_kmc_result(num_initial_monos=24, max_monos=24, seed=1, max_time=SHORT_TIME)
         summary = analyze_adj_matrix(result[ADJ_MATRIX])
         adj_analysis_to_stdout(summary, break_co_bonds=False)
         nodes = result[MONO_LIST]
         adj = result[ADJ_MATRIX]
         block = generate_mol(adj, nodes)
         mol = MolFromMolBlock(block)
         Compute2DCoords(mol)
         MolToFile(mol, TEST_PNG, size=(2000, 1200))
         self.assertTrue(os.path.isfile(TEST_PNG))
     finally:
         silent_remove(TEST_PNG, disable=DISABLE_REMOVE)
         pass
Esempio n. 7
0
def make_image_grid(file_label,
                    smi_list,
                    labels=None,
                    out_dir=PNG_DIR,
                    mol_img_size=(400, 300),
                    write_output=True):
    """
    Given a molecular formula (or other label) and the set of SMI, make an image grid of all smiles within
    https://www.rdkit.org/docs/GettingStartedInPython.html
    :param file_label: str, such as chemical formula that corresponds to all smiles in SMILES set
    :param smi_list: list or set of SMILES strings; used to generate images
    :param labels: if None, will use the smi_list as labels; otherwise a list to use
    :param out_dir: directory where the file should be saved
    :param mol_img_size: tuple of ints to determine size of individual molecules
    :param write_output: boolean to determine whether to write to screen that a file was created
    :return: N/A, save a file
    """
    mols = []
    for smi in smi_list:
        mol = Chem.MolFromSmiles(smi)
        Compute2DCoords(mol)
        mols.append(mol)

    if labels:
        img_labels = labels
    else:
        img_labels = smi_list

    if len(mols) == 1:
        # didn't see a way for RDKit to add a label to an image with a single molecule (grid image does not work
        # for one image), so add to file name
        file_label += '_' + img_labels[0]
    fname = create_out_fname(file_label, ext='png', base_dir=out_dir)
    if len(mols) == 1:
        MolToFile(mols[0], fname, size=mol_img_size)
    else:
        img_grid = MolsToGridImage(mols,
                                   molsPerRow=3,
                                   subImgSize=mol_img_size,
                                   legends=img_labels)
        img_grid.save(fname)
    if write_output:
        print(f"Wrote file: {os.path.relpath(fname)}")
Esempio n. 8
0
    def generate_comp_svgs(self):
        """Create SVG images of the compounds.

        The images are inserted directly into the images directory
        of the frontend.

        Notes:
            It is expected that the user has installed an svg capable
            renderer for rdkit.  See http://www.rdkit.org for details.

        Returns:
            None

        """

        structure_path = os.path.join(os.path.dirname(__file__),
                                      '../../frontend/app/data/images')

        self.compounds.apply(lambda r: MolToFile(
            r.structure,
            os.path.join(structure_path, '{}-{}.svg'.format(r.name, r['name']))
        ),
                             axis=1)
Esempio n. 9
0
def produce_output(adj_matrix, mono_list, cfg):
    if cfg[SUPPRESS_SMI] and not (cfg[SAVE_JSON] or cfg[SAVE_PNG] or cfg[SAVE_SVG]):
        format_list = [SAVE_TCL]
        mol = None  # Make IDE happy
    else:
        # Default out is SMILES, which requires getting an rdKit molecule object; also required for everything
        #    except the TCL format
        format_list = [SAVE_TCL, SAVE_JSON, SAVE_PNG, SAVE_SVG]
        block = generate_mol(adj_matrix, mono_list)
        mol = MolFromMolBlock(block)
        try:
            smi_str = MolToSmiles(mol) + '\n'
        except:
            raise InvalidDataError("Error in producing SMILES string.")
        # if SMI is to be saved, don't output to stdout
        if cfg[SAVE_SMI]:
            fname = create_out_fname(cfg[BASENAME], base_dir=cfg[OUT_DIR], ext=SAVE_SMI)
            str_to_file(smi_str, fname, print_info=True)
        else:
            print("\nSMILES representation: \n", MolToSmiles(mol), "\n")
        if cfg[SAVE_PNG] or cfg[SAVE_SVG] or cfg[SAVE_JSON]:
            # PNG and SVG make 2D images and thus need coordinates
            # JSON will save coordinates--zero's if not computed; might as well compute and save non-zero values
            Compute2DCoords(mol)

    for save_format in format_list:
        if cfg[save_format]:
            fname = create_out_fname(cfg[BASENAME], base_dir=cfg[OUT_DIR], ext=save_format)
            if save_format == SAVE_TCL:
                gen_tcl(adj_matrix, mono_list, tcl_fname=fname, chain_id=cfg[CHAIN_ID],
                        psf_fname=cfg[PSF_FNAME], toppar_dir=cfg[TOPPAR_DIR], out_dir=cfg[OUT_DIR])
            if save_format == SAVE_JSON:
                json_str = MolToJSON(mol)
                str_to_file(json_str + '\n', fname)
            elif save_format == SAVE_PNG or save_format == SAVE_SVG:
                MolToFile(mol, fname, size=cfg[IMAGE_SIZE])
            print(f"Wrote file: {fname}")
 def model_process_fun(model_out, visdom, n):
     from rdkit import Chem
     from rdkit.Chem.Draw import MolToFile
     actions, logits, rewards, terminals, info = model_out
     smiles, valid = info
     total_rewards = rewards.sum(1)
     best_ind = torch.argmax(total_rewards).data.item()
     this_smile = smiles[best_ind]
     mol = Chem.MolFromSmiles(this_smile)
     pic_save_path = root_location + 'images/' + 'test.svg'
     if mol is not None:
         try:
             MolToFile(mol, pic_save_path, imageType='svg')
             with open(pic_save_path, 'r') as myfile:
                 data = myfile.read()
             data = data.replace('svg:', '')
             visdom.append('best molecule of batch', 'svg', svgstr=data)
         except:
             pass
         scores, norm_scores = scorer.get_scores([this_smile])
         visdom.append(
             'score component',
             'line',
             X=np.array([n]),
             Y=np.array(
                 [[x for x in norm_scores[0]] + [norm_scores[0].sum()] +
                  [scores[0].sum()] + [desc.CalcNumAromaticRings(mol)]]),
             opts={
                 'legend': [
                     'logP', 'SA', 'cycle', 'norm_reward', 'reward',
                     'Aromatic rings'
                 ]
             })
         visdom.append('fraction valid',
                       'line',
                       X=np.array([n]),
                       Y=np.array([valid.mean().data.item()]))
Esempio n. 11
0
def plot_molecule(molecule, file_name=None):
    """Plot a molecule using RDKit."""
    rdkG = get_graph(molecule)
    rdkG = Chem.RemoveHs(rdkG)
    MolToFile(rdkG, file_name, size=(200, 200))
Esempio n. 12
0
def weight_visulize_py(smiles, atom_weight,):
    print(smiles)
    atom_weight_list = atom_weight.squeeze().numpy().tolist()
    max_atom_weight_index = atom_weight_list.index(max(atom_weight_list))
    significant_weight = atom_weight[max_atom_weight_index]
    mol = Chem.MolFromSmiles(smiles)
    norm = matplotlib.colors.Normalize(vmin=0, vmax=1)
    cmap = cm.get_cmap('Oranges')
    plt_colors = cm.ScalarMappable(norm=norm, cmap=cmap)
    atom_colors = {}
    bond_colors = {}
    # weight_norm = np.array(ind_weight).flatten()
    # threshold = weight_norm[np.argsort(weight_norm)[1]]
    # weight_norm = np.where(weight_norm < threshold, 0, weight_norm)
    atom_new_weight = [0 for x in range(mol.GetNumAtoms())]
    # generate most important significant circle fragment and attach significant weight
    atom = mol.GetAtomWithIdx(max_atom_weight_index)
    # # find neighbors 1
    atom_neighbors_1 = [x.GetIdx() for x in atom.GetNeighbors()]
    # find neighbors 2
    atom_neighbors_2 = []
    for neighbors_1_index in atom_neighbors_1:
        neighbor_1_atom = mol.GetAtomWithIdx(neighbors_1_index)
        atom_neighbors_2 = atom_neighbors_2 + [x.GetIdx() for x in neighbor_1_atom.GetNeighbors()]
    atom_neighbors_2.remove(max_atom_weight_index)
    # find neighbors 3
    atom_neighbors_3 = []
    for neighbors_2_index in atom_neighbors_2:
        neighbor_2_atom = mol.GetAtomWithIdx(neighbors_2_index)
        atom_neighbors_3 = atom_neighbors_3 + [x.GetIdx() for x in neighbor_2_atom.GetNeighbors()]
    atom_neighbors_3 = [x for x in atom_neighbors_3 if x not in atom_neighbors_1]
    # attach neighbor 3 significant weight
    for i in atom_neighbors_3:
        atom_new_weight[i] = significant_weight*0.5
    for i in atom_neighbors_2:
        atom_new_weight[i] = significant_weight
    for i in atom_neighbors_1:
        atom_new_weight[i] = significant_weight
    atom_new_weight[max_atom_weight_index] = significant_weight

    significant_fg_index = [max_atom_weight_index] + atom_neighbors_1 + atom_neighbors_2 + atom_neighbors_3

    for i in range(mol.GetNumAtoms()):
        atom_colors[i] = plt_colors.to_rgba(float(atom_new_weight[i]))

    for i in range(mol.GetNumBonds()):
        bond = mol.GetBondWithIdx(i)
        u = bond.GetBeginAtomIdx()
        v = bond.GetEndAtomIdx()
        x = atom_new_weight[u]
        y = atom_new_weight[v]
        bond_weight = (x+y)/2
        if u in significant_fg_index and v in significant_fg_index:
            bond_colors[i] = plt_colors.to_rgba(float(abs(bond_weight)))
        else:
            bond_colors[i] = plt_colors.to_rgba(float(abs(0)))
    rdDepictor.Compute2DCoords(mol)
    drawer = rdMolDraw2D.MolDraw2DSVG(280, 280)
    drawer.SetFontSize(1)
    op = drawer.drawOptions()
    mol = rdMolDraw2D.PrepareMolForDrawing(mol)
    smiles_name = eval(repr(smiles).replace('\\', '|'))
    MolToFile(mol, r'./CYP2D6/'+smiles_name+'.png', highlightAtoms=range(0, mol.GetNumAtoms()), highlightBonds=range(0, mol.GetNumBonds()),
              highlightAtomColors=atom_colors, highlightBondColors=bond_colors)
Esempio n. 13
0
                        help="The molecule to be fragmented in MOL2 format.")
    parser.add_argument("-outfolder", dest="OUT_FOLDER", required=False, default="fragments", type=str,
                        help="The folder name which will be created (or erased if it already exists) where the "
                             "PNG images of the fragments will be saved.")
    parser.add_argument("-fpradius", dest="FP_RADIUS", required=False, default=2, type=int,
                        help="The ECFP radius parameter value (distance in number of bonds). Default: %(default)s")

    args = parser.parse_args()
    return args

if __name__ == "__main__":
    args = cmdlineparse()
    if args.SMILES:
        mol = Chem.MolFromSmiles(args.SMILES)
    if args.MOL2:
        mol = MolFromMol2File(args.MOL2, sanitize=False, removeHs=False)
    if os.path.exists(args.OUT_FOLDER):
        shutil.rmtree(args.OUT_FOLDER)
    os.mkdir(args.OUT_FOLDER)

    MolToFile(mol, "original_molecule.png")
    shutil.move("original_molecule.png", args.OUT_FOLDER + "/original_molecule.png")
    bi = {}
    fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=args.FP_RADIUS, bitInfo=bi)
    for k in bi.keys(): 
        mfp2_svg = DrawMorganBit(mol, k, bi)
        mfp2_svg.save(fp="%s/%i_frag.png" % (args.OUT_FOLDER, k), format="PNG")

    # TODO: show all fragments in one figure
    # https://stackoverflow.com/questions/37365824/pandas-ipython-notebook-include-and-display-an-image-in-a-dataframe