Beispiel #1
0
def merge_sdflist(lines, datadir=""):

    # Get first line
    sdffile = datadir + lines[0] + ".sdf"

    molobj, atoms, keep_energies, keep_coordinates = worker.get_sdfcontent(
        sdffile, rtn_atoms=True)
    keep_representations = [
        sim.get_representation(atoms, coordinates)
        for coordinates in keep_coordinates
    ]

    for line in tqdm(lines[1:]):
        # for line in lines[1:]:

        sdffile = datadir + line + ".sdf"
        from_energies, from_coordinates = worker.get_sdfcontent(sdffile)
        from_representations = [
            sim.get_representation(atoms, coordinates)
            for coordinates in from_coordinates
        ]

        # print("merge", len(from_energies))

        # Asymmetrically add new conformers
        # idxs_ref = merge_asymmetric_fchl18(atoms,
        #     from_energies,
        #     keep_energies,
        #     from_coordinates,
        #     keep_coordinates, debug=False)

        # find
        # energies = np.round(keep_energies, 1)
        # idx, = np.where(energies == 14.8)
        # print(from_coordinates[idx[0]])
        #
        # print(idx)

        idxs = merge_asymmetric(atoms, from_energies, keep_energies,
                                from_representations, keep_representations)

        for i, idx in enumerate(idxs):

            # if conformation already exists, continue
            if len(idx) > 0: continue

            # Add new unique conformation to collection
            keep_energies.append(from_energies[i])
            keep_coordinates.append(from_coordinates[i])
            keep_representations.append(from_representations[i])

    sdfstr = ""
    for coordinates in keep_coordinates:
        sdfstr += cheminfo.save_molobj(molobj, coordinates)

    return sdfstr
def merge_sdflist(lines, datadir=""):

    # Get first line
    sdffile = datadir + lines[0] + ".sdf"

    molobj, atoms, keep_energies, keep_coordinates = worker.get_sdfcontent(
        sdffile, rtn_atoms=True)
    keep_representations = [
        workkernel.FchlRepresentation(atoms, coordinates, **DPARAMETERS)
        for coordinates in keep_coordinates
    ]

    for line in tqdm(lines[1:]):
        # for line in lines[1:]:

        sdffile = datadir + line + ".sdf"
        from_energies, from_coordinates = worker.get_sdfcontent(sdffile)
        from_representations = [
            workkernel.FchlRepresentation(atoms, coordinates, **DPARAMETERS)
            for coordinates in from_coordinates
        ]

        # print("merge", len(from_energies))

        # Asymmetrically add new conformers
        # idxs = merge_asymmetric(atoms,
        #     from_energies,
        #     keep_energies,
        #     from_coordinates,
        #     keep_coordinates, debug=False)

        idxs = merge_asymmetric_objs(from_energies, keep_energies,
                                     from_representations,
                                     keep_representations, **DPARAMETERS)

        for i, idx in enumerate(idxs):

            # if conformation already exists, continue
            if len(idx) > 0: continue

            # Add new unique conformation to collection
            keep_energies.append(from_energies[i])
            keep_coordinates.append(from_coordinates[i])
            keep_representations.append(from_representations[i])

    sdfstr = ""
    for coordinates in keep_coordinates:
        sdfstr += cheminfo.save_molobj(molobj, coordinates)

    return sdfstr
Beispiel #3
0
def run_jobline(origins, molobjs, tordbs, line,
    prefix=None,
    debug=False,
    dump=False):

    sep = ","

    # TODO multiple molobjs

    line = line.strip()

    # Locate molobj
    line_s = line.split(sep)
    molid = int(line_s[0])

    molobj = molobjs[molid]
    tordb = tordbs[molid]

    # deep copy
    molobj = copy.deepcopy(molobj)
    cheminfo.molobj_set_coordinates(molobj, origins[molid])

    if dump:
        if prefix is None:
            prefix = line.replace(" ", "_").replace(",", ".")

        filename = "_tmp_data/{:}.sdf".format(prefix)

        # if os.path.exists(filename):
        #     return [],[]

    job_start = time.time()

    job_energies, job_coordinates = run_job(molobj, tordb, line)

    job_end = time.time()

    if debug:
        print(line, "-", len(job_energies), "{:5.2f}".format(job_end-job_start), filename)

    if dump:
        if debug: print("saving {:} confs to".format(len(job_energies)), filename)
        fsdf = open(filename, 'w')
        for energy, coordinates in zip(job_energies, job_coordinates):
            sdfstr = cheminfo.save_molobj(molobj, coordinates)
            fsdf.write(sdfstr)

    return job_energies, job_coordinates
Beispiel #4
0
def calculate_forcefield(molobj, conformer, torsions, origin_angles, delta_angles,
    ffprop=None,
    ff=None,
    delta=10**-7,
    coord_decimals=6,
    grad_threshold=100):
    """


    Disclaimer: lots of hacks, sorry. Let me know if you have an alternative.

    Note: There is a artificat where if delta < 10**-16 the FF will find a
    *extremely* local minima with very high energy (un-physical)the FF will
    find a *extremely* local minima with very high energy (un-physical).
    Setting delta to 10**-6 (numerical noise) should fix this.

    Note: rdkit forcefield restrained optimization will optimized to a *very*
    local and very unphysical minima which the global optimizer cannot get out
    from. Truncating the digits of the coordinates to six is a crude but
    effective way to slight move the the molecule out of this in a reproducable
    way.


    """

    if ffprop is None or ff is None:
        ffprop, ff = get_forcefield(molobj)

    sdfstr = cheminfo.molobj_to_sdfstr(molobj)
    molobj_prime, status = cheminfo.sdfstr_to_molobj(sdfstr)
    conformer_prime = molobj_prime.GetConformer()

    # Setup constrained forcefield
    # ffprop_prime, ffc = get_forcefield(molobj_prime)
    ffc = ChemicalForceFields.MMFFGetMoleculeForceField(molobj_prime, ffprop)

    # Set angles and constrains for all torsions
    for i, angle in enumerate(delta_angles):

        set_angle = origin_angles[i] + angle

        # Set clockwork angle
        try: Chem.rdMolTransforms.SetDihedralDeg(conformer_prime, *torsions[i], set_angle)
        except: pass

        # Set forcefield constrain
        ffc.MMFFAddTorsionConstraint(*torsions[i], False,
            set_angle-delta, set_angle+delta, 1.0e10)

    # minimize constrains
    status = run_forcefield(ffc, 500)

    # Set result
    coordinates = conformer_prime.GetPositions()
    coordinates = np.round(coordinates, coord_decimals) # rdkit hack, read description
    cheminfo.conformer_set_coordinates(conformer, coordinates)

    # minimize global
    status = run_forcefield_prime(ff, 700, force=1e-4)

    # Get current energy
    energy = ff.CalcEnergy()

    if status == 0:

        grad = ff.CalcGrad()
        grad = np.array(grad)
        grad_norm = linalg.norm(grad)

        if grad_norm > grad_threshold:
            status = 4

    debug = False
    if energy > 1000 and debug:

        print(torsions, origin_angles, delta_angles)
        print(energy, status)

        print("id")
        print(id(molobj_prime))
        print(id(molobj))

        molobj_test, status = cheminfo.sdfstr_to_molobj(sdfstr)
        coordinates = conformer.GetPositions()
        cheminfo.molobj_set_coordinates(molobj_test, coordinates)
        ffprop_t, ff_t = get_forcefield(molobj)
        run_forcefield(ff_t, 500)

        print(coordinates)


        for idxs in torsions:
            angle = Chem.rdMolTransforms.GetDihedralDeg(conformer, *idxs)
            print("ANGLE 1", angle)

        f = open("_test_dumpsdf.sdf", 'w')
        sdf = cheminfo.save_molobj(molobj)
        f.write(sdf)

        # prop, ff = get_forcefield(molobj)
        # status = run_forcefield(ff, 500)
        conformer = molobj_test.GetConformer()

        for idxs in torsions:
            angle = Chem.rdMolTransforms.GetDihedralDeg(conformer, *idxs)
            print("ANGLE 2",angle)

        print(energy, status)

        sdf = cheminfo.save_molobj(molobj_test)
        f.write(sdf)

        f.close()
        quit()

    # Get current positions
    pos = conformer.GetPositions()

    return energy, pos, status
def merge_asymmetric(atoms,
                     energies_x,
                     energies_y,
                     coordinates_x,
                     coordinates_y,
                     decimals=1,
                     threshold=0.98,
                     molobj=None,
                     debug=False):
    """
    """

    coordinates_x = np.asarray(coordinates_x)
    coordinates_y = np.asarray(coordinates_y)

    energies_x = np.round(energies_x, decimals=decimals)
    energies_y = np.round(energies_y, decimals=decimals)

    new_unique_energies = np.unique(energies_x)

    # Return index from x, with idx of same similarity
    # if empty, it is new
    rtnidx = [[] for x in range(len(energies_x))]

    for uenergy in new_unique_energies:

        idx_x, = np.where(energies_x == uenergy)
        idx_y, = np.where(energies_y == uenergy)

        if debug:
            print("amerge", uenergy, len(idx_x), len(idx_y))

        if len(idx_y) == 0:
            # all unique, continue
            continue

        if len(idx_y) > 5 and molobj is not None:

            atoms_str = [cheminfo.convert_atom(atom) for atom in atoms]

            # f = open("_tmp_test.xyz", 'w')
            f = open("_tmp_test.sdf", 'w')

            coordinates_dump = align_coordinates(coordinates_y[idx_y])
            energies = energies_y[idx_y]

            for energy, coordinates in zip(energies, coordinates_dump):
                # xyzstr = rmsd.set_coordinates(atoms_str, coordinates, title=str(energy))
                # xyzstr += "\n"
                sdfstr = cheminfo.save_molobj(molobj, coordinates)

                f.write(sdfstr)
            f.close()

        # list of list of idx
        similar = merge_asymmetric_similarity(atoms,
                                              coordinates_x[idx_x],
                                              coordinates_y[idx_y],
                                              threshold=threshold)

        # convert local similarity to idx_y
        for i, sidx in enumerate(similar):
            sidx = [idx_y[j] for j in sidx]
            sidx = np.asarray(sidx)
            similar[i] = sidx

        # Create rtn idx
        for i, idx in enumerate(idx_x):
            rtnidx[idx] = similar[i]

    return rtnidx