Beispiel #1
0
def main():

    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--scratch',
                        action='store',
                        help='',
                        metavar="dir",
                        default="tmp2/")
    parser.add_argument('--randomseed',
                        action='store',
                        help='random seed',
                        metavar="int",
                        default=666)
    parser.add_argument('--sdf', action='store', help='', metavar="file")
    parser.add_argument('-j',
                        '--cpu',
                        action='store',
                        help='pararallize',
                        metavar="int",
                        default=0)

    args = parser.parse_args()

    molecules = cheminfo.read_sdffile('data/sdf/structures.sdf.gz')
    properties = open('data/sdf/properties.csv', 'r')

    sub_mol, sub_prop, idxs = search_molcules(molecules, properties)

    properties.close()

    fm = open('data/sdf/subset_structures.sdf', 'w')
    fp = open('data/sdf/subset_properties.csv', 'w')

    for mol, prop in zip(sub_mol, sub_prop):

        sdf = cheminfo.molobj_to_sdfstr(mol)
        fm.write(sdf)
        fm.write("$$$$\n")
        fp.write(str(prop) + "\n")

    fm.close()
    fp.close()

    for i, idx in enumerate(idxs):
        from_dir = "_tmp_ensemble_/"
        to_dir = "_tmp_subset_/conformers/"
        cmd = "cp {:}{:}.sdf {:}{:}.sdf".format(from_dir, str(idx), to_dir,
                                                str(i))
        list(misc.shell(cmd))
        cmd = "cp {:}{:}.energies.npy {:}{:}.energies.npy".format(
            from_dir, str(idx), to_dir, str(i))
        list(misc.shell(cmd))

        print(cmd)

    return
Beispiel #2
0
def merge_sdfs(filenames):

    molobjs = []
    energies = []
    coordinates = []
    representations = []
    atoms = []
    n_total = 0

    for filename in filenames:

        try:
            molobjs_next, energies_next, coordinates_next, representations_next = generate_sdf(
                filename)
        except:
            continue

        if len(molobjs) == 0:
            atoms, coord = cheminfo.molobj_to_xyz(molobjs_next[0])
            energies += energies_next
            coordinates += coordinates_next
            representations += representations_next
            molobjs += molobjs_next
            n_total += len(molobjs_next)
            continue

        if args.debug:
            print(" {:} = {:} confs".format(filename, len(molobjs_next)))

        idxs = merge_asymmetric(atoms, energies_next, energies,
                                representations_next, representations)

        n_new = 0
        for i, idxl in enumerate(idxs):

            N = len(idxl)
            if N > 0: continue

            energies.append(energies_next[i])
            coordinates.append(coordinates_next[i])
            representations.append(representations_next[i])
            molobjs.append(molobjs_next[i])
            n_new += 1

        if args.debug:
            n_total += n_new
            print(" - new", n_new)
            print("total", n_total)

    if args.dump:
        sdfstr = [cheminfo.molobj_to_sdfstr(molobj) for molobj in molobjs]
        sdfstr = "".join(sdfstr)
        print(sdfstr)

    return
Beispiel #3
0
def get_conformations(line, scr="_tmp_ensemble_/", **kwargs):

    im, molecule = line

    # smi = Chem.MolToSmiles(molecule)
    energies = generate_conformers(molecule)

    misc.save_npy(scr + str(im) + ".energies", energies)

    txtsdf = cheminfo.molobj_to_sdfstr(molecule)

    fsdf = open(scr + str(im) + ".sdf", 'w')
    fsdf.write(txtsdf)
    fsdf.close()

    print(im, "{:} {:5.2f} {:5.2f}".format("smi", energies.mean(),
                                           energies.std()))

    return
Beispiel #4
0
def dump_sdf(molobj, energies, coordinates, costs):

    hel = molobj.SetProp('_Name', '')

    dumpstr = ""

    for energy, coord, cost in zip(energies, coordinates, costs):

        # Set coordinates
        cheminfo.molobj_set_coordinates(molobj, coord)

        molobj.SetProp('Energy', str(energy))
        molobj.SetProp('Cost', str(cost))

        sdfstr = cheminfo.molobj_to_sdfstr(molobj)

        dumpstr += sdfstr

    print(dumpstr)

    return
Beispiel #5
0
def ajax_submitquantum(request):
    """

    Setup quantum calculation

    """

    if not request.POST:
        return {
            'error': 'Error 128 - empty post',
            'message': "Error. Empty post."
        }

    if not request.POST["sdf"]:
        return {
            'error': 'Error 132 - sdf key error',
            'message': "Error. Missing information."
        }

    # Get coordinates from request
    sdfstr = request.POST["sdf"].encode('utf-8')

    # Get rdkit
    molobj, status = cheminfo.sdfstr_to_molobj(sdfstr)

    if molobj is None:
        status = status.split("]")
        status = status[-1]
        return {'error': 'Error 141 - rdkit error', 'message': status}

    try:
        conf = molobj.GetConformer()
    except ValueError:
        # Error
        return {
            'error':
            'Error 141 - rdkit error',
            'message':
            "Error. Server was unable to generate conformations for this molecule"
        }

    # If hydrogens not added, assume graph and optimize with forcefield
    atoms = cheminfo.molobj_to_atoms(molobj)
    if 1 not in atoms:
        molobj = cheminfo.molobj_add_hydrogens(molobj)
        cheminfo.molobj_optimize(molobj)

    # TODO Check lengths of atoms
    # TODO Define max in settings

    # Fix sdfstr
    sdfstr = sdfstr.decode('utf8')
    for _ in range(3):
        i = sdfstr.index('\n')
        sdfstr = sdfstr[i + 1:]
    sdfstr = "\n" * 3 + sdfstr

    # hash on sdf (conformer)
    hshobj = hashlib.md5(sdfstr.encode())
    hashkey = hshobj.hexdigest()

    calculation = request.dbsession.query(models.GamessCalculation) \
        .filter_by(hashkey=hashkey).first()

    if calculation is not None:

        msg = {'hashkey': hashkey}

        calculation.created = datetime.datetime.now()
        return msg

    print("new:", hashkey)

    molecule_info = {"sdfstr": sdfstr, "molobj": molobj, "hashkey": hashkey}

    msg = pipelines.gamess_quantum_pipeline(request, molecule_info)

    return msg

    #
    #
    #

    calculation = request.dbsession.query(models.GamessCalculation) \
        .filter_by(hashkey=hashkey).first()

    if calculation is not None:
        calculation.created = datetime.datetime.now()
        return msg
    else:
        pass

    # check if folder exists
    here = os.path.abspath(os.path.dirname(__file__)) + "/"
    datahere = here + "data/"

    if os.path.isdir(datahere + hashkey):
        # return msg
        pass

    else:
        os.mkdir(datahere + hashkey)

    os.chdir(datahere + hashkey)

    # Minimize with forcefield first
    molobj = cheminfo.molobj_add_hydrogens(molobj)

    cheminfo.molobj_optimize(molobj)

    header = """ $basis gbasis=pm3 $end
 $contrl runtyp=optimize icharg=0 $end
 $statpt opttol=0.0005 nstep=200 projct=.F. $end
"""

    # Prepare gamess input
    # inpstr = gamess.molobj_to_gmsinp(molobj, header)

    # Save and run file
    # with open("optimize.inp", "w") as f:
    #     f.write(inpstr)
    #
    # stdout, stderr = gamess.calculate(hashkey+".inp", store_output=False)

    # with open("start.sdf", 'w') as f:
    #     f.write(cheminfo.molobj_to_sdfstr(molobj))

    # Check output
    # status, message = gamess.check_output(stdout)

    os.chdir(here)

    # if not status:
    #     msg["error"] = "error 192: QM Calculation fail"
    #     msg["message"] = message
    #     return msg

    # Saveable sdf and reset title
    sdfstr = cheminfo.molobj_to_sdfstr(molobj)
    sdfstr = str(sdfstr)
    for _ in range(2):
        i = sdfstr.index('\n')
        sdfstr = sdfstr[i + 1:]
    sdfstr = "\n\n" + sdfstr

    # Get a 2D Picture
    # TODO Compute 2D coordinates
    svgstr = cheminfo.molobj_to_svgstr(molobj, removeHs=True)

    # Success, setup database
    calculation = models.GamessCalculation()
    calculation.smiles = smiles
    calculation.hashkey = hashkey
    calculation.sdf = sdfstr
    calculation.svg = svgstr
    calculation.created = datetime.datetime.now()

    # Add calculation to the database
    request.dbsession.add(calculation)

    # Add smiles to counter
    countobj = request.dbsession.query(models.Counter) \
        .filter_by(smiles=smiles).first()

    if countobj is None:
        counter = models.Counter()
        counter.smiles = smiles
        counter.count = 1
        request.dbsession.add(counter)
        print(counter)
    else:
        countobj.count += 1

    return msg
Beispiel #6
0
def gamess_quantum_pipeline(request, molinfo):
    """

    Assumed that rdkit understands the molecule

    """

    # TODO Read gamess settings from ini

    # Read input
    molobj = molinfo["molobj"]
    sdfstr = molinfo["sdfstr"]

    if "name " in request.POST:
        name = request.POST["name"].encode('utf-8')
    else:
        name = None

    # Get that smile on your face
    smiles = cheminfo.molobj_to_smiles(molobj, remove_hs=True)

    # hash on sdf (conformer)
    hshobj = hashlib.md5(sdfstr.encode())
    hashkey = hshobj.hexdigest()

    # Start respond message
    msg = {"smiles": smiles, "hashkey": hashkey}

    # Check if calculation already exists
    if False:
        calculation = request.dbsession.query(models.GamessCalculation) \
            .filter_by(hashkey=hashkey).first()

        if calculation is not None:
            calculation.created = datetime.datetime.now()
            return msg

    # Create new calculation
    calculation = models.GamessCalculation()

    # check if folder exists
    here = os.path.abspath(os.path.dirname(__file__)) + "/"
    datahere = here + "data/"

    if not os.path.isdir(datahere + hashkey):
        os.mkdir(datahere + hashkey)

    os.chdir(datahere + hashkey)

    # GAMESS DEBUG

    # TODO Add error messages when gamess fails
    # TODO add timeouts for all gamess calls

    # Optimize molecule

    gmsargs = {
        "scr": datahere + hashkey,
        "autoclean": True,
        "debug": False,
    }
    properties = gamess.calculate_optimize(molobj, **gmsargs)

    if properties is None:
        return {
            'error': 'Error g-80 - gamess optimization error',
            'message': "Error. Server was unable to optimize molecule"
        }

    print(smiles, list(properties.keys()))

    # Save and set coordinates
    coord = properties["coord"]
    calculation.coordinates = save_array(coord)
    calculation.enthalpy = properties["h"]
    cheminfo.molobj_set_coordinates(molobj, coord)

    # Optimization is finished, do other calculation async-like

    # Vibrate molecule
    vibheader = """
 $basis
     gbasis=PM3
 $end

 $contrl
    scftyp=RHF
    runtyp=hessian
    icharg={:}
    maxit=60
 $end
"""

    orbheader = """
 $contrl
 coord=cart
 units=angs
 scftyp=rhf
 icharg={:}
 maxit=60
 $end
 $basis gbasis=sto ngauss=3 $end
"""

    solheader = """
 $system
    mwords=125
 $end
 $basis
    gbasis=PM3
 $end
 $contrl
    scftyp=RHF
    runtyp=energy
    icharg={:}
 $end
 $pcm
    solvnt=water
    mxts=15000
    icav=1
    idisp=1
 $end
 $tescav
    mthall=4
    ntsall=60
 $end

"""

    headers = [vibheader, orbheader, solheader]
    readers = [
        gamess.read_properties_vibration, gamess.read_properties_orbitals,
        gamess.read_properties_solvation
    ]

    def procfunc(conn, reader, *args, **kwargs):
        stdout, status = gamess.calculate(*args, **kwargs)
        try:
            properties = reader(stdout)
        except:
            # TODO Error reading properties
            properties = None
        conn.send(properties)
        conn.close()

    procs = []
    conns = []

    for header, reader in zip(headers, readers):

        parent_conn, child_conn = Pipe()
        p = Process(target=procfunc,
                    args=(child_conn, reader, molobj, header),
                    kwargs=gmsargs)
        p.start()

        procs.append(p)
        conns.append(parent_conn)

    for proc in procs:
        proc.join()

    properties_vib = conns[0].recv()
    properties_orb = conns[1].recv()
    properties_sol = conns[2].recv()

    if properties_vib is None:
        return {
            'error': 'Error g-104 - gamess vibration error',
            'message': "Error. Server was unable to vibrate molecule"
        }

    print(smiles, list(properties_vib.keys()))

    calculation.islinear = properties_vib["linear"]
    calculation.vibjsmol = properties_vib["jsmol"]
    calculation.vibfreq = save_array(properties_vib["freq"])
    calculation.vibintens = save_array(properties_vib["intens"])
    calculation.thermo = save_array(properties_vib["thermo"])

    if properties_orb is None:
        return {
            'error': 'Error g-128 - gamess orbital error',
            'message': "Error. Server was unable to orbital the molecule"
        }

    print(smiles, list(properties_orb.keys()))
    calculation.orbitals = save_array(properties_orb["orbitals"])
    calculation.orbitalstxt = properties_orb["stdout"]

    if properties_sol is None:
        return {
            'error': 'Error g-159 - gamess solvation error',
            'message': "Error. Server was unable to run solvation calculation"
        }

    # 'charges', 'solvation_total', 'solvation_polar', 'solvation_nonpolar',
    # 'surface', 'total_charge', 'dipole', 'dipole_total'
    print(smiles, list(properties_sol.keys()))

    charges = properties_sol["charges"]
    calculation.charges = save_array(charges)
    calculation.soltotal = properties_sol["solvation_total"]
    calculation.solpolar = properties_sol["solvation_polar"]
    calculation.solnonpolar = properties_sol["solvation_nonpolar"]
    calculation.solsurface = properties_sol["surface"]
    calculation.soldipole = save_array(properties_sol["dipole"])
    calculation.soldipoletotal = properties_sol["dipole_total"]

    # GAMESS DEBUG

    os.chdir(here)

    # Saveable sdf and reset title
    sdfstr = cheminfo.molobj_to_sdfstr(molobj)
    sdfstr = str(sdfstr)
    for _ in range(2):
        i = sdfstr.index('\n')
        sdfstr = sdfstr[i + 1:]
    sdfstr = "\n\n" + sdfstr

    # Save mol2 fmt

    mol2 = cheminfo.molobj_to_mol2(molobj, charges=charges)
    calculation.mol2 = mol2

    # Get a 2D Picture
    # TODO Compute 2D coordinates
    svgstr = cheminfo.molobj_to_svgstr(molobj, removeHs=True)

    # Success, setup database
    # calculation = models.GamessCalculation()
    calculation.smiles = smiles
    calculation.hashkey = hashkey
    calculation.sdf = sdfstr
    calculation.svg = svgstr
    calculation.created = datetime.datetime.now()

    # Add calculation to the database
    request.dbsession.add(calculation)

    # Add smiles to counter
    countobj = request.dbsession.query(models.Counter) \
        .filter_by(smiles=smiles).first()

    if countobj is None:
        counter = models.Counter()
        counter.smiles = smiles
        counter.count = 1
        request.dbsession.add(counter)
    else:
        countobj.count += 1

    return msg
def main():

    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--scratch',
                        action='store',
                        help='',
                        metavar="DIR",
                        default="_tmp_")
    parser.add_argument('--sdf', action='store', help='',
                        metavar="FILE")  #, nargs="+", default=[])
    parser.add_argument('--properties',
                        action='store',
                        help='',
                        metavar="FILE")  #, nargs="+", default=[])
    parser.add_argument('-j',
                        '--procs',
                        action='store',
                        help='pararallize',
                        metavar="int",
                        default=0,
                        type=int)

    args = parser.parse_args()

    if args.scratch[-1] != "/":
        args.scratch += "/"

    fsdf = gzip.open(args.scratch + "structures.sdf.gz", 'w')
    fprop = open(args.scratch + "properties.csv", 'w')

    molecules = cheminfo.read_sdffile(args.sdf)
    properties = open(args.properties, 'r')

    moledict = {}

    for molobj, line in zip(molecules, properties):

        status = molobjfilter(molobj)

        if not status:
            continue

        status = valuefilter(line)

        if not status:
            continue

        smiles = cheminfo.molobj_to_smiles(molobj, remove_hs=True)

        print(smiles)

        sdfstr = cheminfo.molobj_to_sdfstr(molobj)
        sdfstr += "$$$$\n"
        fsdf.write(sdfstr.encode())
        fprop.write(line)

        values = [float(x) for x in line.split()[1:]]
        moledict[smiles] = values

    fsdf.close()
    fprop.close()

    properties.close()

    misc.save_json(args.scratch + "molecules", moledict)
    misc.save_obj(args.scratch + "molecules", moledict)

    return
Beispiel #8
0
def calculate_forcefield(molobj, conformer, torsions, origin_angles, delta_angles,
    ffprop=None,
    ff=None,
    delta=10**-7,
    coord_decimals=6,
    grad_threshold=100):
    """


    Disclaimer: lots of hacks, sorry. Let me know if you have an alternative.

    Note: There is a artificat where if delta < 10**-16 the FF will find a
    *extremely* local minima with very high energy (un-physical)the FF will
    find a *extremely* local minima with very high energy (un-physical).
    Setting delta to 10**-6 (numerical noise) should fix this.

    Note: rdkit forcefield restrained optimization will optimized to a *very*
    local and very unphysical minima which the global optimizer cannot get out
    from. Truncating the digits of the coordinates to six is a crude but
    effective way to slight move the the molecule out of this in a reproducable
    way.


    """

    if ffprop is None or ff is None:
        ffprop, ff = get_forcefield(molobj)

    sdfstr = cheminfo.molobj_to_sdfstr(molobj)
    molobj_prime, status = cheminfo.sdfstr_to_molobj(sdfstr)
    conformer_prime = molobj_prime.GetConformer()

    # Setup constrained forcefield
    # ffprop_prime, ffc = get_forcefield(molobj_prime)
    ffc = ChemicalForceFields.MMFFGetMoleculeForceField(molobj_prime, ffprop)

    # Set angles and constrains for all torsions
    for i, angle in enumerate(delta_angles):

        set_angle = origin_angles[i] + angle

        # Set clockwork angle
        try: Chem.rdMolTransforms.SetDihedralDeg(conformer_prime, *torsions[i], set_angle)
        except: pass

        # Set forcefield constrain
        ffc.MMFFAddTorsionConstraint(*torsions[i], False,
            set_angle-delta, set_angle+delta, 1.0e10)

    # minimize constrains
    status = run_forcefield(ffc, 500)

    # Set result
    coordinates = conformer_prime.GetPositions()
    coordinates = np.round(coordinates, coord_decimals) # rdkit hack, read description
    cheminfo.conformer_set_coordinates(conformer, coordinates)

    # minimize global
    status = run_forcefield_prime(ff, 700, force=1e-4)

    # Get current energy
    energy = ff.CalcEnergy()

    if status == 0:

        grad = ff.CalcGrad()
        grad = np.array(grad)
        grad_norm = linalg.norm(grad)

        if grad_norm > grad_threshold:
            status = 4

    debug = False
    if energy > 1000 and debug:

        print(torsions, origin_angles, delta_angles)
        print(energy, status)

        print("id")
        print(id(molobj_prime))
        print(id(molobj))

        molobj_test, status = cheminfo.sdfstr_to_molobj(sdfstr)
        coordinates = conformer.GetPositions()
        cheminfo.molobj_set_coordinates(molobj_test, coordinates)
        ffprop_t, ff_t = get_forcefield(molobj)
        run_forcefield(ff_t, 500)

        print(coordinates)


        for idxs in torsions:
            angle = Chem.rdMolTransforms.GetDihedralDeg(conformer, *idxs)
            print("ANGLE 1", angle)

        f = open("_test_dumpsdf.sdf", 'w')
        sdf = cheminfo.save_molobj(molobj)
        f.write(sdf)

        # prop, ff = get_forcefield(molobj)
        # status = run_forcefield(ff, 500)
        conformer = molobj_test.GetConformer()

        for idxs in torsions:
            angle = Chem.rdMolTransforms.GetDihedralDeg(conformer, *idxs)
            print("ANGLE 2",angle)

        print(energy, status)

        sdf = cheminfo.save_molobj(molobj_test)
        f.write(sdf)

        f.close()
        quit()

    # Get current positions
    pos = conformer.GetPositions()

    return energy, pos, status
Beispiel #9
0
def calculate_mopac(molobj, conformer, torsions, origin_angles, delta_angles,
    delta=10**-7,
    coord_decimals=6,
    atoms=None,
    ffprop=None,
    reference_smiles=None):

    sdfstr = cheminfo.molobj_to_sdfstr(molobj)
    molobj_prime, status = cheminfo.sdfstr_to_molobj(sdfstr)
    conformer_prime = molobj_prime.GetConformer()

    # Setup constrained forcefield
    # ffprop_prime, ffc = get_forcefield(molobj_prime)
    ffc = ChemicalForceFields.MMFFGetMoleculeForceField(molobj_prime, ffprop)

    # Set angles and constrains for all torsions
    for i, angle in enumerate(delta_angles):

        set_angle = origin_angles[i] + angle

        # Set clockwork angle
        try: Chem.rdMolTransforms.SetDihedralDeg(conformer_prime, *torsions[i], set_angle)
        except: pass

        # Set forcefield constrain
        ffc.MMFFAddTorsionConstraint(*torsions[i], False,
            set_angle-delta, set_angle+delta, 1.0e10)

    # minimize constrains
    status = run_forcefield(ffc, 500)

    # Set result
    coordinates = conformer_prime.GetPositions()
    coordinates = np.round(coordinates, coord_decimals) # rdkit hack, read description

    smiles = ""

    try:
        energy, ocoordinates = quantum.optmize_conformation(atoms, coordinates)
        status = 0
        coordinates = ocoordinates

        if reference_smiles is not None:
            new_smiles = quantum.get_smiles(atoms, coordinates)
            smiles = new_smiles

            if new_smiles != reference_smiles:
                status = 5
    except:
        energy = 0.0
        status = 4


    # if status == 0:
    #     atoms_str = [cheminfo.convert_atom(atom) for atom in atoms]
    #     txt = rmsd.set_coordinates(atoms_str, coordinates, title="")
    #     with open("_tmp_local_dump.xyz", 'a') as f:
    #         f.write(txt)
    #         f.write("\n")
    #
    # print(status, smiles)

    return energy, coordinates, status
Beispiel #10
0
def main_folder():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('-v', '--version', action='version', version="1.0")
    parser.add_argument('--sdf',
                        nargs="+",
                        action='store',
                        help='',
                        metavar='FILE')
    args = parser.parse_args()

    # TODO Merge results from redis

    if args.sdf is None:
        print("error: actually we need sdfs to merge")
        quit()

    dumpdir = "_tmp_apentane_cum/"

    filename = args.sdf[0] + "{:}_{:}" + ".sdf"

    molobjs, energies, coordinates, representations = generate_sdf(
        filename.format(1, 1))

    atoms, xyz = cheminfo.molobj_to_xyz(molobjs[0])

    # costcombos, costs = clockwork.generate_costlist(total_torsions=28)
    costcombos, costs = clockwork.generate_costlist()

    n_total = len(molobjs)
    molcosts = [(1, 1)] * n_total

    print("start", n_total)

    for combo in costcombos[:15]:

        try:
            molobjs_new, energies_new, coordinates_new, representations_new = generate_sdf(
                filename.format(*combo))
        except:
            continue

        print(" merge", len(molobjs_new))

        idxs = merge_asymmetric(atoms, energies_new, energies,
                                representations_new, representations)

        n_new = 0
        for i, idxl in enumerate(idxs):

            N = len(idxl)
            if N > 0: continue

            energies.append(energies_new[i])
            coordinates.append(coordinates_new[i])
            representations.append(representations_new[i])
            molobjs.append(molobjs_new[i])

            n_new += 1

        molcosts += [combo] * n_new

        n_total += n_new
        print(" - new", n_new)
        print("total", n_total, combo)

    sdfstr = [cheminfo.molobj_to_sdfstr(molobj) for molobj in molobjs]
    sdfstr = "".join(sdfstr)
    f = open(dumpdir + "all.sdf", 'w')
    f.write(sdfstr)
    f.close()

    hellodump = ""
    for combo in molcosts:
        hello = "{:} {:}".format(*combo)
        hellodump += hello + "\n"

    f = open(dumpdir + "costs.csv", 'w')
    f.write(hellodump)
    f.close()

    plt.plot(energies, 'k.')
    plt.yscale("log")
    plt.savefig(dumpdir + "energies")

    return
Beispiel #11
0
def set_structures(datadict, scratch, procs=0):
    """
    take dict of smiles->value and generate sdf from smiles.
    Put in scratch/structures.sdf.gz
    Put values in scratch/properties.{txt,npy}

    """

    keys = datadict.keys()
    results = []

    # no mp
    if procs == 0:

        def get_results():
            values = []
            for key in keys:
                values.append(datadict[key])

            for smi, value in zip(keys, values):
                result = prepare_sdf_and_csv(smi, value)
                yield result

        results = get_results()

    # scale it out
    elif procs > 0:

        def workpackages():
            for i, key in enumerate(keys):

                smi = key
                kelvin = datadict[key]
                yield smi, kelvin

        lines = workpackages()

        import multiprocessing.util as util
        util.log_to_stderr(util.SUBDEBUG)

        p = Pool(procs)
        results = p.map(prepare_sdf_and_csv_procs, lines)

    print("wating for results")
    fsdf = gzip.open(scratch + "structures.sdf.gz", 'w')
    fprop = open(scratch + "properties.csv", 'w')

    for i, result in enumerate(results):

        if result is None: continue

        molobj, values = result

        mean = np.mean(values)

        prtstr = np.round(values, decimals=1)

        print("save {:4.2f}".format(mean), "-", prtstr)

        sdfstr = cheminfo.molobj_to_sdfstr(molobj)
        sdfstr += "$$$$\n"
        fsdf.write(sdfstr.encode())

        valuesstr = " ".join([str(x) for x in values])
        # propstr = "{:} {:}\n".format(mean, standard_deviation)
        propstr = f"{i} " + valuesstr + "\n"
        fprop.write(propstr)

    fsdf.close()
    fprop.close()

    return
Beispiel #12
0
def main(datafile, procs=0, scr="_tmp_"):

    db = misc.load_obj(datafile)

    keys = db.keys()

    print("total keys:", len(keys))

    xaxis = []
    yaxis = []

    if procs == 0:

        def get_results():

            for i, key in enumerate(keys):

                smi = key
                kelvin = db[key]
                result = prepare_sdf_and_csv(smi, kelvin)
                if result is None: continue

                yield result

        results = get_results()

    else:

        def workpackages():
            for i, key in enumerate(keys):

                # if i > 5000: break

                smi = key
                kelvin = db[key]
                yield smi, kelvin

        lines = workpackages()

        results = misc.parallel(lines,
                                prepare_sdf_and_csv_procs, [], {},
                                procs=procs)

        print("streaming results")

    # Write results

    fullsdf = ""
    fsdf = gzip.open("data/sdf/structures.sdf.gz", 'w')
    fprop = open("data/sdf/properties.csv", 'w')

    for i, result in enumerate(results):

        if result is None: continue

        molobj, values = result

        sdfstr = cheminfo.molobj_to_sdfstr(molobj)
        fsdf.write(sdfstr.encode())

        valuesstr = " ".join(values)
        # propstr = "{:} {:}\n".format(mean, standard_deviation)
        propstr = f"{i} " + valuestr
        fprop.write(propstr)

    fsdf.close()
    fprop.close()

    return