Ejemplo n.º 1
0
def compute_properties(args):
    if args.oformat == "sdf":
        outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
    else:
        outfile = open(args.output, "w")
        if args.header:
            mol = next(pybel.readfile(args.iformat, args.input))
            metadata = cheminfolib.get_properties_ext(mol)
            outfile.write(
                "%s\n" %
                "\t".join([cheminfolib.ColumnNames[key] for key in metadata]))

    for mol in pybel.readfile(args.iformat, args.input):
        if mol.OBMol.NumHvyAtoms() > 5:
            metadata = cheminfolib.get_properties_ext(mol)
            if args.oformat == "sdf":
                [
                    mol.data.update(
                        {cheminfolib.ColumnNames[key]: metadata[key]})
                    for key in metadata
                ]
                outfile.write(mol)
            else:
                outfile.write(
                    "%s\n" %
                    ("\t".join([str(metadata[key]) for key in metadata])))
    outfile.close()
Ejemplo n.º 2
0
def pairwise_atomic_types(path, processed_dict, atom_types, atom_types_):
    keys = [(i, j) for i in atom_types_ for j in atom_types]
    for name in tqdm(os.listdir(path)):
        if len(name) != 4:
            continue
        ligand = next(
            pybel.readfile('mol2',
                           '%s/%s/%s_ligand.mol2' % (path, name, name)))
        pocket = next(
            pybel.readfile('pdb', '%s/%s/%s_protein.pdb' % (path, name, name)))
        coords_lig = np.vstack([atom.coords for atom in ligand])
        coords_poc = np.vstack([atom.coords for atom in pocket])
        atom_map_lig = [atom.atomicnum for atom in ligand]
        atom_map_poc = [atom.atomicnum for atom in pocket]
        dm = distance_matrix(coords_lig, coords_poc)
        # print(coords_lig.shape, coords_poc.shape, dm.shape)
        ligs, pocks = dist_filter(dm, 12)
        # print(len(ligs),len(pocks))

        fea_dict = {k: 0 for k in keys}
        for x, y in zip(ligs, pocks):
            x, y = atom_map_lig[x], atom_map_poc[y]
            if x not in atom_types or y not in atom_types_: continue
            fea_dict[(y, x)] += 1

        processed_dict[name]['type_pair'] = list(fea_dict.values())

    return processed_dict
Ejemplo n.º 3
0
    def LogRead(self):  # lê as infos no .log e salva em um arquivo
        #    https://docs.python.org/3.1/tutorial/datastructures.html
        for n in range(len(self.smiles)):
            try:
                os.mkdir(self.path + "/xyz")
            except:
                for molecule in pybel.readfile(
                        'g09', '{path}/log/opt_molecule_{n}.log'.format(
                            path=self.path, name=self.name, n=n)):
                    #print(molecule.molwt) molecule weigth mass
                    output = pybel.Outputfile('xyz',
                                              'xyz/data_{n}.xyz'.format(n=n),
                                              overwrite=True)
                    output.write(molecule)
            finally:
                for molecule in pybel.readfile(
                        'g09', '{path}/log/opt_molecule_{n}.log'.format(
                            path=self.path, name=self.name, n=n)):
                    #print(molecule.molwt) molecule weigth mass
                    output = pybel.Outputfile('xyz',
                                              'xyz/data_{n}.xyz'.format(n=n),
                                              overwrite=True)
                    output.write(molecule)

            with open(
                    'log/{name}_molecule_{n}.log'.format(name=self.name, n=n),
                    'r') as file:
                lines = file.readlines()
                print(lines)
                if str(self.name) == 'sp':
                    i = 'energy'
                    x = 1
                    energy = next(i for i in lines if x > 0)
                    print(energy)  ###
Ejemplo n.º 4
0
def GetOptimizedMol(
    arc_file: str = None,
    inputmol: Chem.Mol = None,
    method: str = 'PM3',
    version: str = '7.1',
    verbose: bool = False,
    dispose: bool = True,
) -> Union[pybel.Molecule, None]:
    """Optimize molecule geometry with MOPAC and return the optimized molecule.

    In order not to optimize molecule multiple times, an ARC file may be provided.
    The path to the MOPAC output file is determined based on the one of the provided
    ARC file.

    If not already optimized, a molecule may be provided.

    :param arc_file: Path to MOPAC .arc file
                     (ignored if inputmol provided).
    :param inputmol: molecule to optimize
                     (ignored if arc_file provided).
    :param method: semi-empirical method to apply
                   (ignored if arc_file provided).
    :param version: version of MOPAC to be used
                    (ignored if arc_file provided).
    :param verbose: whether to print progress messages
                    (ignored if arc_file provided).
    :param dispose: whether to remove generated MOPAC output files
                    (ignored if arc_file provided).
    :return: optimized rdkit molecule on success, None otherwise.
    """
    if arc_file is None and inputmol is None:
        raise ValueError('Either ARC file or inputmolecule must be provided.')
    if arc_file is not None:
        mopac_out_dir = os.path.dirname(arc_file)
        mopac_out_path = GetFileInDirFromExt(mopac_out_dir, '.out')
        mopac_out_path = mopac_out_path[0] if len(mopac_out_path) == 1 \
            else GetLastestCreatedFile(filepaths=mopac_out_path)
        if not len(mopac_out_path):
            return None
        pybelmol = next(pybel.readfile('mopout', mopac_out_path))
        return Chem.MolFromMol2Block(pybelmol.write(format='mol2'))
    else:
        res = GetARCFile(inputmol, method, version, verbose, False)
        if res is None:
            return None
        dir_, arc_file_ = res
        mopac_out_path = GetFileInDirFromExt(dir_, '.out')
        mopac_out_path = mopac_out_path[0] if len(mopac_out_path) == 1 \
            else GetLastestCreatedFile(filepaths=mopac_out_path)
        if not len(mopac_out_path):
            return None
        pybelmol = next(pybel.readfile('mopout', mopac_out_path))
        if dispose:
            Dispose(dir_)
        return pybelmol
Ejemplo n.º 5
0
def moved(output, reference, refformat):
    mola = next(pybel.readfile('pdb', output))
    molb = next(pybel.readfile(refformat, reference))

    for a, b in zip(mola.atoms, molb.atoms):
        dist = np.linalg.norm(np.array(a.coords) - np.array(b.coords))
        if dist > 1e-3:
            return True

    print(output)

    return False
Ejemplo n.º 6
0
def eliminar_repetits(sdf_file):

    mols = [mol for mol in pybel.readfile("sdf", sdf_file)]
    unique_mols = {
        mol.write("inchi"): mol
        for mol in pybel.readfile("sdf", sdf_file)
    }
    outputsdf = pybel.Outputfile("sdf",
                                 str(sdf_file[:-4]) + "_uniques.sdf",
                                 overwrite=True)
    for mol in unique_mols.itervalues():
        outputsdf.write(mol)

    outputsdf.close()
Ejemplo n.º 7
0
def create_pdbqt_from_pdb_file(pdb_filepath, pdbqt_filepath, pH=7.4):
    """
    Convert a PDB file to a PDBQT file,
    while adding hydrogen atoms, correcting the protonation state,
    and assigning partial charges.

    Parameters
    ----------
    pdb_filepath: str or pathlib.Path
        Path to input PDB file.
    pdbqt_filepath: str or pathlib.path
        Path to output PDBQT file.
    pH: float
        pH value for defining the protonation state of the atoms.

    Returns
    -------
    openbabel.pybel.Molecule
        Molecule object of PDB file optimized for docking.
    """
    # readfile() provides an iterator over the Molecules in a file.
    # To access the first (and possibly only) molecule in a file,
    # we use next()
    molecule = next(
        pybel.readfile("pdb", str(Path(pdb_filepath).with_suffix(".pdb"))))
    optimize_structure_for_docking(molecule, protonate_for_pH=pH)
    molecule.write("pdbqt",
                   str(Path(pdbqt_filepath).with_suffix(".pdbqt")),
                   overwrite=True)
    return
def xyz_to_pyMol(xyz, cluster_bond_path=None):
    mol = next(pybel.readfile('xyz', xyz))
    if cluster_bond_path:
        m = pybel.ob.OBMol()
        m.BeginModify()
        for atom in mol:
            coords = [coord for coord in atom.coords]
            atomno = atom.atomicnum
            obatom = ob.OBAtom()
            obatom.thisown = 0
            obatom.SetAtomicNum(atomno)
            obatom.SetVector(*coords)
            m.AddAtom(obatom)
            del obatom

        with open(cluster_bond_path, 'r') as f:
            lines = f.read()
        cluster_bond = eval(lines)
        bonds = [(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(),
                  bond.GetBondOrder())
                 for bond in pybel.ob.OBMolBondIter(mol.OBMol)]
        bonds.extend(cluster_bond)
        for bond in bonds:
            m.AddBond(bond[0], bond[1], bond[2])
        pybelmol = pybel.Molecule(m)
        return pybelmol
    else:
        return mol
Ejemplo n.º 9
0
def __main__():
    """
    Select compounds with certain properties from a small library
    """
    args = parse_command_line()

    if args.filters == "__filter_by_name__":
        filter_by_name(args)
        return

    # Its a small trick to get the parameters in an easy way from the xml file.
    # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed.
    # Also the last loop creates a ',{' that is not an valid jason expression.
    filters = json.loads((args.filters).replace(" ", "").replace(",}", "}"))
    if args.iformat == "sdf":
        # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering
        mol = next(pybel.readfile("sdf", args.input))
        for key, elem in filters.items():
            property = cheminfolib.ColumnNames.get(key, key)
            if property not in mol.data:
                break
        else:
            # if the for loop finishes in a normal way, we should habe all properties at least in the first molecule
            # assume it is the same for all other molecules and start the precalculated filtering
            filter_precalculated_compounds(args, filters)
            return True
    filter_new_compounds(args, filters)
Ejemplo n.º 10
0
def getProperties(mol):
    try:
        from openbabel import pybel
    except ImportError:
        raise ImportError(
            'Could not import openbabel. The atomtyper requires this dependency so please install it with `conda install openbabel -c acellera`'
        )

    name = NamedTemporaryFile(suffix='.pdb').name
    mol.write(name)
    mpybel = next(pybel.readfile('pdb', name))

    # print(name)
    residues = pybel.ob.OBResidueIter(mpybel.OBMol)
    atoms = [[
        r.GetName(),
        r.GetNum(),
        r.GetAtomID(at),
        at.GetType(),
        round(at.GetPartialCharge(), 3)
    ] for r in residues for at in pybel.ob.OBResidueAtomIter(r)]

    os.remove(name)

    return atoms
Ejemplo n.º 11
0
def loadall(fname: str):
    """
    Load molecules from file.

    Parameters
    ----------
    fname: str
        File name

    Returns
    -------
    List of molecules
    """

    fmt = utils.molformat(fname)

    obmols = [obmol for obmol in pybel.readfile(fmt, fname)]

    # FIXME: Special handling for multi-model PDB files
    # See OpenBabel Issue #2097
    if fmt == "pdb":
        if len(obmols) > 1:  # Multi-model PDB file
            obmols = obmols[:-1]

    return obmols
Ejemplo n.º 12
0
def getLigandPrints(flist):
    '''
    Get list of ligand fingerprints 
    '''
    fingerprints = []
    names = []
    for fname in flist:
        base,ext = os.path.splitext(fname)
        ext = ext.split('.')[-1]
        if ext == 'smi' or ext == 'ism':
            with open(fname, 'r') as f:
                for line in f:
                    contents = line.split()
                    smi = contents[0]
                    name = contents[1]
                    m = pybel.readstring('smi', smi)
                    fingerprints.append(m.calcfp('ecfp4'))
                    names.append(name)
        else:
            try:
                mols = pybel.readfile(ext, fname)
                for m in mols:
                    fingerprints.append(m.calcfp('ecfp4'))
            except Exception as e:
                print(e)
    return (fingerprints,names)
Ejemplo n.º 13
0
def fitmol(fname, niters=10):
    print('Reading {}'.format(fname))
    m = next(pybel.readfile('sdf', fname))
    m.OBMol.Center()  #put in center of box!
    m.addh()
    ligname = os.path.split(fname)[1]
    print('Typing input molecule')
    cset = molgrid.CoordinateSet(m, typer)
    print('Creating empty grid')
    mgrid_values = torch.zeros(gmaker.grid_dimensions(cset.num_types()),
                               dtype=torch.float32,
                               device=device)
    print('Calling gmaker forward')
    gmaker.forward((0, 0, 0), cset, mgrid_values)

    mgrid = generate.MolGrid(mgrid_values, channels, np.zeros(3), 0.5)
    types = generate.count_types(cset.type_index.tonumpy().astype(int),
                                 cset.num_types(),
                                 dtype=np.int16)

    grid = simple_atom_fit(mgrid, types, niters)
    struct = grid.info['src_struct']
    loss = struct.info['loss']
    fittime = struct.info['time']
    fixes = struct.info['n_steps']

    try:
        rmsd = get_min_rmsd(cset.coords, cset.type_index.tonumpy(), struct.xyz,
                            struct.c)
    except:
        rmsd = np.inf

    return struct, fittime, loss, fixes, rmsd
Ejemplo n.º 14
0
def patch_scores_sdf(sdf_in, outfile, scores):

    global work_dir

    counter = 0
    sdf_path = "{0}{1}{2}.sdf".format(work_dir, os.path.sep, outfile)
    tsv_path = "{0}{1}{2}.tsv".format(work_dir, os.path.sep, outfile)
    utils.log("Writing results to {0} and {1}".format(sdf_path, tsv_path))
    with open(tsv_path, 'w') as tsv_file:
        sdf_file = pybel.Outputfile("sdf", sdf_path)
        for mol in pybel.readfile("sdf", sdf_in):
            if counter in scores:
                score = scores[counter]
                # utils.log("Score for record {0} is {1}".format(counter, score))

                mol.data['dls_deep_score'] = score
                if 'SCORE' in mol.data:
                    rdock_score = mol.data['SCORE']
                else:
                    rdock_score = ''

                if 'SCORE.norm' in mol.data:
                    rdock_nscore = mol.data['SCORE.norm']
                else:
                    rdock_nscore = ''

                sdf_file.write(mol)
                tsv_file.write("{0}\t{1}\t{2}\t{3}\n".format(
                    counter, rdock_score, rdock_nscore, score))

            else:
                utils.log("No score found for record", counter)
            counter += 1
        sdf_file.close()
Ejemplo n.º 15
0
def filter_by_name(args):
    outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True)
    for mol in pybel.readfile("sdf", args.input):
        for name in open(args.list_of_names):
            if mol.title.strip() == name.strip():
                outfile.write(mol)
    outfile.close()
Ejemplo n.º 16
0
def readXYZ(xyz, bonds=None):
    # extract molecule information from xyz
    mol = next(pb.readfile('xyz', xyz))
    # Manually give bond information
    # (Because in metal system the bond information detect by openbabel usually have some problem)
    m = Molecule(pb.ob.OBMol())
    obmol = m.OBMol
    obmol.BeginModify()
    for atom in mol:
        coords = [coord for coord in atom.coords]
        atomno = atom.atomicnum
        obatom = ob.OBAtom()
        obatom.thisown = 0
        obatom.SetAtomicNum(atomno)
        obatom.SetVector(*coords)
        obmol.AddAtom(obatom)
        del obatom

    bonds = [(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(),
              bond.GetBondOrder()) for bond in pb.ob.OBMolBondIter(mol.OBMol)]
    bonds.extend([(12, 14, 1), (12, 15, 1), (12, 16, 1), (12, 17, 1),
                  (12, 13, 1), (17, 23, 1), (16, 23, 1)])

    for bond in bonds:
        obmol.AddBond(bond[0], bond[1], bond[2])

    # obmol.PerceiveBondOrders()
    # obmol.SetTotalCharge(int(mol.charge))
    # obmol.Center()
    obmol.EndModify()
    mol_obj = gen3D.Molecule(obmol)
    return mol_obj
Ejemplo n.º 17
0
def getOpenBabelProperties(pdb, outfile):
    try:
        from openbabel import pybel
    except ImportError:
        print(
            "Could not import openbabel. The atomtyper requires this dependency so please install it with `conda install openbabel -c conda-forge`"
        )
        sys.exit(1)

    try:
        mpybel = next(pybel.readfile("pdb", pdb))
    except Exception:
        traceback.print_exc()
        sys.exit(2)

    try:
        with open(outfile, "w") as f:
            for r in pybel.ob.OBResidueIter(mpybel.OBMol):
                for at in pybel.ob.OBResidueAtomIter(r):
                    f.write(
                        f"{at.GetIndex()},{r.GetName()},{r.GetNum()},{r.GetAtomID(at)},{at.GetType()},{at.GetPartialCharge():.3f}\n"
                    )
    except Exception:
        traceback.print_exc()
        sys.exit(3)
Ejemplo n.º 18
0
 def __init__(self, path, name, calc):
     self.path = path
     self.name = name
     self.calc = calc
     self.normal = []
     self.error = []
     self.smiles = list(pybel.readfile('smi', '{}/smiles.smi'.format(path)))
     self.charge = list()
Ejemplo n.º 19
0
def addh(args):
    outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True)
    for mol in pybel.readfile(args.iformat, args.input):
        if mol.OBMol.NumHvyAtoms() > 5:
            mol.removeh()
            mol.OBMol.AddHydrogens(args.polar, True, args.pH)
            outfile.write(mol)
    outfile.close()
def default_open_input_sdf(filename):
    """Open the input as a SD file (possibly gzipped if ending with .gz) according to RDKit's ForwardSDMolSupplier

    :param filename: The name of the file.
    """

    suppl = pybel.readfile("sdf", filename)
    return None, suppl
Ejemplo n.º 21
0
Archivo: enz.py Proyecto: UoMMIB/enz
 def pdb_to_pdbqt(pdb, save_path):
     m = list(pybel.readfile('pdb', pdb))
     assert len(m) == 1
     m = m[0]
     m.addh()
     m.write('pdbqt', save_path, opt={'r': True},
             overwrite=True)  # opt:r = rigid - less errors?? - revisit this
     return save_path
Ejemplo n.º 22
0
def are_similar(xyz, sdf):
    mola = next(pybel.readfile('xyz', xyz))
    molb = next(pybel.readfile('sdf', sdf))
    #do an n^s comparison, ensure bijection
    atommap = dict()
    bseen = set()
    for a in mola.atoms:
        for b in molb.atoms:
            dist = np.linalg.norm(np.array(a.coords) - np.array(b.coords))
            if dist < 0.1:
                assert a.idx not in atommap
                assert b.idx not in bseen
                atommap[a.idx] = b.idx
                bseen.add(b.idx)
                break
        else:  #did not break, nothing matched a
            return False
    return True
Ejemplo n.º 23
0
def remove_protonation(args):
    outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True)
    for mol in pybel.readfile(args.iformat, args.input):
        [atom.OBAtom.SetFormalCharge(0) for atom in mol.atoms]
        if 'inchi' in mol.data:
            del mol.data[
                'inchi']  # remove inchi cache so modified mol is saved
        outfile.write(mol)
    outfile.close()
Ejemplo n.º 24
0
def remove_ions(args):
    outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True)
    for mol in pybel.readfile(args.iformat, args.input):
        if mol.OBMol.NumHvyAtoms() > 5:
            mol.OBMol.StripSalts(0)
            # Check if new small fragments have been created and remove them
            if mol.OBMol.NumHvyAtoms() > 5:
                outfile.write(mol)
    outfile.close()
Ejemplo n.º 25
0
    def prepare_dataset(self,hdf_mode='w'):

        get_id = lambda structure_id: re.sub('_[0-9]+$', '', structure_id)
        
        data_path = self.data_dir

        ids = os.listdir(data_path)

        multiple_pockets = {}

        with h5py.File(self.hdf_path, mode=hdf_mode) as f:
            for structure_id in iter(ids):
                protein = next(pybel.readfile('mol2',os.path.join(data_path,structure_id,'protein.mol2')))
                pocket = next(pybel.readfile('mol2',os.path.join(data_path,structure_id,'cavity6.mol2')))

                pocket_coords, pocket_features = self.featurizer.pocket_featurizer(pocket)
                prot_coords, prot_features = self.featurizer.protein_featurizer(protein)

                centroid = prot_coords.mean(axis=0)
                pocket_coords -= centroid
                prot_coords -= centroid

                group_id = get_id(structure_id)
                if group_id in f:
                    group = f[group_id]
                    if not np.allclose(centroid, group['centroid'][:], atol=0.5):
                        print('Structures for %s are not aligned, ignoring pocket %s' % (group_id, structure_id))
                        continue
                        
                    multiple_pockets[group_id] = multiple_pockets.get(group_id, 1) + 1

                    for key, data in (('pocket_coords', pocket_coords),
                                      ('pocket_features', pocket_features)):
                        data = np.concatenate((group[key][:], data))
                        del group[key]
                        group.create_dataset(key, data=data, shape=data.shape, dtype='float32', compression='lzf')
                else:
                    group = f.create_group(group_id)
                    for key, data in (('coords', prot_coords),
                                      ('features', prot_features),
                                      ('pocket_coords', pocket_coords),
                                      ('pocket_features', pocket_features),
                                      ('centroid', centroid)):
                        group.create_dataset(key, data=data, shape=data.shape, dtype='float32', compression='lzf')
Ejemplo n.º 26
0
    def Inputs(self):
        '''turn smiles.smi into 3D structures and save in a file
        '''

        for n in range(len(self.smiles)):
            word = 'opt'  #ve se tem opt no input e calcula puxando do smile
            if word in self.calc[2].lower().split():
                smi = self.smiles[n]
                smi.make3D(forcefield='mmff94', steps=50)

                try:
                    os.mkdir(self.path + "/input")
                except:
                    output = pybel.Outputfile(
                        'xyz',
                        'input/{name}_input_{n}.com'.format(name=self.name,
                                                            n=n),
                        overwrite=True)
                    output.write(smi)
                finally:
                    output = pybel.Outputfile(
                        'xyz',
                        'input/{name}_input_{n}.com'.format(name=self.name,
                                                            n=n),
                        overwrite=True)
                    output.write(smi)

            else:
                for molecule in pybel.readfile(
                        'g09', '{path}/log/opt_molecule_{n}.log'.format(
                            path=self.path, name=self.name, n=n)):
                    output = pybel.Outputfile(
                        'xyz',
                        'input/{name}_input_{n}.com'.format(name=self.name,
                                                            n=n),
                        overwrite=True)
                    output.write(molecule)

            with open('input/{name}_input_{n}.com'.format(name=self.name, n=n),
                      'r') as file:
                lines = file.readlines()
            with open('input/{name}_input_{n}.com'.format(name=self.name, n=n),
                      'w') as file:
                a = self.header(n)
                lines[1] = '\n'
                for i in range(0, 6):
                    lines[0] += a[i]
                lines[-1] += '\n'
                file.writelines(lines)
                file.close()
            with open('input/{name}_job_{n}.sh'.format(name=self.name, n=n),
                      'w') as file:
                file.write(a[6] + '\n' + a[7] + '\n' + a[8] + '\n' + a[9])
            subprocess.run('chmod a+x {path}/input/{name}_job_{n}.sh'.format(
                name=self.name, path=self.path, n=n),
                           shell=True)  # cria input.com e job.sh
Ejemplo n.º 27
0
def gen_feature(path, name, featurizer):
    charge_idx = featurizer.FEATURE_NAMES.index('partialcharge')
    ligand = next(
        pybel.readfile('mol2', '%s/%s/%s_ligand.mol2' % (path, name, name)))
    ligand_coords, ligand_features = featurizer.get_features(ligand, molcode=1)
    pocket = next(
        pybel.readfile('mol2', '%s/%s/%s_pocket.mol2' % (path, name, name)))
    pocket_coords, pocket_features = featurizer.get_features(pocket,
                                                             molcode=-1)
    node_num = pocket_atom_num_from_mol2(name, path)
    pocket_coords = pocket_coords[:node_num]
    pocket_features = pocket_features[:node_num]
    try:
        assert (ligand_features[:, charge_idx] != 0).any()
        assert (pocket_features[:, charge_idx] != 0).any()
        assert (ligand_features[:, :9].sum(1) != 0).all()
    except:
        print(name)
    lig_atoms, pock_atoms = [], []
    for i, atom in enumerate(ligand):
        if atom.atomicnum > 1:
            lig_atoms.append(atom.atomicnum)
    for i, atom in enumerate(pocket):
        if atom.atomicnum > 1:
            pock_atoms.append(atom.atomicnum)
    for x in pock_atoms[node_num:]:
        assert x == 8
    pock_atoms = pock_atoms[:node_num]
    assert len(lig_atoms) == len(ligand_features) and len(pock_atoms) == len(
        pocket_features)

    ligand_edges = gen_pocket_graph(ligand)
    pocket_edges = gen_pocket_graph(pocket)
    return {
        'lig_co': ligand_coords,
        'lig_fea': ligand_features,
        'lig_atoms': lig_atoms,
        'lig_eg': ligand_edges,
        'pock_co': pocket_coords,
        'pock_fea': pocket_features,
        'pock_atoms': pock_atoms,
        'pock_eg': pocket_edges
    }
Ejemplo n.º 28
0
def MolFormatConversion(input_file: str,
                        output_file: str,
                        input_format="xyz",
                        output_format="sdf"):
    molecules = readfile(input_format, input_file)
    output_file_writer = Outputfile(output_format, output_file)
    for i, molecule in enumerate(molecules):
        output_file_writer.write(molecule)
    output_file_writer.close()
    print('%d molecules converted' % (i + 1))
Ejemplo n.º 29
0
def getsmilesfromcdxml(filecdxml, lmolecules):
    # SMILES ARE EXTRACTED ACCORDING TO THE ID OF THE FRAGMENTS
    # so i have first to sort the molecules IDs before associating the corresponding smile
    odersmiles = np.argsort(np.asarray([int(i['id']) for i in lmolecules]))
    for i, mol in enumerate(pybel.readfile("cdxml", filecdxml)):
        # print(mol.formula,mol.molwt,len(mol.OBMol.GetSSSR()),lmolecules[odersmiles[i]]['name'])
        smile = mol.write("smi")
        # remove tabs and number of atoms
        smile = re.sub(r'\t.*', '', smile, flags=re.M).strip()
        # UPDATE THE MOL DICTIONARIES WITH THE SMILES
        lmolecules[odersmiles[i]]['smile'] = smile
Ejemplo n.º 30
0
def coordinates(mol2_file):

    molecule = next(pybel.readfile("mol2", mol2_file))

    coords = []

    # data = defaultdict(list)
    for atom in molecule.atoms:
        coords.append(atom.coords)

    return np.array(coords)