Ejemplo n.º 1
0
def main():

    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--scratch',
                        action='store',
                        help='',
                        metavar="DIR",
                        default="_tmp_")
    parser.add_argument('--json', action='store', help='', metavar="FILE")
    parser.add_argument('-j',
                        '--procs',
                        action='store',
                        help='pararallize',
                        metavar="int",
                        default=0,
                        type=int)

    args = parser.parse_args()

    if args.scratch[-1] != "/":
        args.scratch += "/"

    data = misc.load_json(args.json)

    keys = data.keys()
    keys = list(keys)

    canonical_data = {}

    for key in keys:

        molobj, status = cheminfo.smiles_to_molobj(key)

        if molobj is None:
            print("error none mol:", key)
            continue

        smiles = cheminfo.molobj_to_smiles(molobj, remove_hs=True)

        if "." in smiles:
            print("error multi mol:", smiles)
            continue

        atoms = cheminfo.molobj_to_atoms(molobj)

        if not is_mol_allowed(atoms):
            print("error heavy mol:", smiles)
            continue

        canonical_data[smiles] = data[key]

    misc.save_json(args.scratch + "molecule_data", canonical_data)
    misc.save_obj(args.scratch + "molecule_data", canonical_data)

    return
Ejemplo n.º 2
0
def search_molcules(mollist, proplist, conf_scr="_tmp_ensemble_"):

    sublist_mol = []
    sublist_prop = []
    sublist_idxs = []

    for idx, (molobj, prop) in enumerate(zip(mollist, proplist)):

        atoms = molobj.GetAtoms()
        atoms = [atom.GetSymbol() for atom in atoms]

        atoms = np.array(atoms)
        uatm, counts = np.unique(atoms, return_counts=True)

        if 'C' not in uatm:
            continue

        c_idx, = np.where(uatm == 'C')
        c_idx = c_idx[0]

        if counts[c_idx] > 8:
            continue

        if counts[c_idx] < 3:
            continue

        h_idx, = np.where(uatm == 'C')
        h_idx = h_idx[0]

        counts[h_idx] = 0

        N = sum(counts)

        if N > 10:
            continue

        smi = cheminfo.molobj_to_smiles(molobj, remove_hs=True)

        try:

            value, stddev = prop.strip().split()

            value = float(value)
            stddev = float(stddev)

        except:

            value = prop.strip()
            value = float(value)

        sublist_mol.append(molobj)
        sublist_prop.append(value)
        sublist_idxs.append(idx)

    return sublist_mol, sublist_prop, sublist_idxs
Ejemplo n.º 3
0
def main():

    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--scratch', action='store', help='', metavar="DIR", default="_tmp_")
    parser.add_argument('--sdf', action='store', help='', metavar="FILE", nargs="+")
    parser.add_argument('-j', '--procs', action='store', help='pararallize', metavar="int", default=0, type=int)

    args = parser.parse_args()

    if args.scratch[-1] != "/":
        args.scratch += "/"

    if args.procs == -1:
        args.procs = os.cpu_count()
        print("starting", args.procs, "procs")

    # fsdf = gzip.open(args.scratch + "structures.sdf.gz", 'w')
    # fprop = open(args.scratch + "properties.csv", 'w')
    mol_val_dict = {}

    for sdf in args.sdf:

        print("reading", sdf)

        molobjs, values = parse_ochem(sdf, debug=True, procs=args.procs)

        for molobj, value in zip(molobjs, values):

            smiles = cheminfo.molobj_to_smiles(molobj, remove_hs=True)

            if "smiles" not in mol_val_dict:
                mol_val_dict[smiles] = []
            else:
                print("duplicate", smiles)

            mol_val_dict[smiles].append(value)

            # sdfstr = cheminfo.molobj_to_sdfstr(molobj)
            # sdfstr += "$$$$\n"
            #
            # propstr = "{:} {:}\n".format(value, 0.0)
            # fprop.write(propstr)

    # fsdf.close()
    # fprop.close()

    keys = mol_val_dict.keys()
    print("TOTAL ITEMS", len(keys))

    misc.save_json(args.scratch + "molecule_data", mol_val_dict)
    misc.save_obj(args.scratch + "molecule_data", mol_val_dict)

    return
Ejemplo n.º 4
0
def parse_molobj(molobj, debug=False, **kwargs):

    if molobj is None:
        return None, None

    mol_smi = cheminfo.molobj_to_smiles(molobj)
    props = molobj.GetPropsAsDict()
    keys = props.keys()

    result = parse_molandprop(molobj, props)

    return result
Ejemplo n.º 5
0
def clean_data(listdata):

    data = {}

    atom_types = []

    for row in listdata:

        idx = row[0]
        smi = row[1]
        value = row[3]
        value = float(value)

        molobj, status = cheminfo.smiles_to_molobj(smi)

        if molobj is None:
            print("error:", smi)
            continue

        smi = cheminfo.molobj_to_smiles(molobj, remove_hs=True)

        atoms = cheminfo.molobj_to_atoms(molobj)

        # filter for organic chemistry
        if not is_mol_allowed(atoms):
            continue

        atom_types += list(atoms)

        if smi not in data:
            data[smi] = []

        data[smi].append(value)

    atom_types, counts = np.unique(atom_types, return_counts=True)

    for atom, count in zip(atom_types, counts):
        print(atom, count)

    keys = data.keys()

    print("Total molecules", len(keys))

    return data
Ejemplo n.º 6
0
def clean_data(df, scratch):

    smiles = df.iloc[1]

    data = {}

    atom_types = []

    for index, row in df.iterrows():

        smi = row.smiles
        value = row.mpC + 273.15

        molobj, status = cheminfo.smiles_to_molobj(smi)

        if molobj is None:
            print("error:", smi)
            continue

        smi = cheminfo.molobj_to_smiles(molobj, remove_hs=True)

        # Atoms
        atoms = cheminfo.molobj_to_atoms(molobj)
        atom_types += list(atoms)

        if smi not in data:
            data[smi] = []

        data[smi].append(value)

    atom_types, counts = np.unique(atom_types, return_counts=True)

    for atom, count in zip(atom_types, counts):
        print(atom, count)

    misc.save_obj(scratch + "molecule_data", data)
    misc.save_json(scratch + "molecule_data", data)

    return
Ejemplo n.º 7
0
def main():

    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--scratch',
                        action='store',
                        help='',
                        metavar="dir",
                        default="_tmp_")
    parser.add_argument('-j',
                        '--procs',
                        action='store',
                        help='pararallize',
                        metavar="int",
                        default=0,
                        type=int)

    args = parser.parse_args()

    if args.scratch[-1] != "/":
        args.scratch += "/"

    # Read properties
    properties = misc.load_npy(args.scratch + "properties")
    molecules = cheminfo.read_sdffile(args.scratch + "structures.sdf.gz")
    molecules = list(molecules)

    heavy_atoms = []
    predictions = []
    errors = []

    for mol, prop in zip(molecules, properties):

        smi = cheminfo.molobj_to_smiles(mol, remove_hs=True)
        J = thermo.joback.Joback(smi)
        # J = thermo.joback.Joback('CC(=O)C')
        # J = thermo.joback.Joback('CCC(=O)OC(=O)CC')

        status = J.status

        atoms, coord = cheminfo.molobj_to_xyz(mol)
        idx = np.where(atoms != 1)
        atoms = atoms[idx]
        N = len(atoms)
        heavy_atoms.append(N)

        if "Did not match all atoms present" in status:
            errors.append(1)
            predictions.append(float("nan"))
            continue

        try:
            estimate = J.estimate()
        except TypeError:
            errors.append(1)
            predictions.append(float("nan"))
            continue

        errors.append(0)

        T_b = estimate["Tb"]
        T_m = estimate["Tm"]

        predictions.append(T_m)

    errors = np.array(errors, dtype=int)

    idx_success, = np.where(errors == 0)

    heavy_atoms = np.array(heavy_atoms)
    predictions = np.array(predictions)
    properties = np.array(properties)

    predictions = predictions[idx_success]
    properties = properties[idx_success]
    heavy_atoms = heavy_atoms[idx_success]

    print("total", errors.shape[0], "filter", idx_success.shape[0])
    print()
    print(rmse(properties, predictions))

    plt.plot(properties, properties, "-k")
    plt.scatter(properties, predictions, s=0.95, alpha=0.8, c=heavy_atoms)

    plt.xlabel("True")
    plt.ylabel("Predicted")

    plt.savefig("_fig_joback")
    plt.clf()

    return
Ejemplo n.º 8
0
def gamess_quantum_pipeline(request, molinfo):
    """

    Assumed that rdkit understands the molecule

    """

    # TODO Read gamess settings from ini

    # Read input
    molobj = molinfo["molobj"]
    sdfstr = molinfo["sdfstr"]

    if "name " in request.POST:
        name = request.POST["name"].encode('utf-8')
    else:
        name = None

    # Get that smile on your face
    smiles = cheminfo.molobj_to_smiles(molobj, remove_hs=True)

    # hash on sdf (conformer)
    hshobj = hashlib.md5(sdfstr.encode())
    hashkey = hshobj.hexdigest()

    # Start respond message
    msg = {"smiles": smiles, "hashkey": hashkey}

    # Check if calculation already exists
    if False:
        calculation = request.dbsession.query(models.GamessCalculation) \
            .filter_by(hashkey=hashkey).first()

        if calculation is not None:
            calculation.created = datetime.datetime.now()
            return msg

    # Create new calculation
    calculation = models.GamessCalculation()

    # check if folder exists
    here = os.path.abspath(os.path.dirname(__file__)) + "/"
    datahere = here + "data/"

    if not os.path.isdir(datahere + hashkey):
        os.mkdir(datahere + hashkey)

    os.chdir(datahere + hashkey)

    # GAMESS DEBUG

    # TODO Add error messages when gamess fails
    # TODO add timeouts for all gamess calls

    # Optimize molecule

    gmsargs = {
        "scr": datahere + hashkey,
        "autoclean": True,
        "debug": False,
    }
    properties = gamess.calculate_optimize(molobj, **gmsargs)

    if properties is None:
        return {
            'error': 'Error g-80 - gamess optimization error',
            'message': "Error. Server was unable to optimize molecule"
        }

    print(smiles, list(properties.keys()))

    # Save and set coordinates
    coord = properties["coord"]
    calculation.coordinates = save_array(coord)
    calculation.enthalpy = properties["h"]
    cheminfo.molobj_set_coordinates(molobj, coord)

    # Optimization is finished, do other calculation async-like

    # Vibrate molecule
    vibheader = """
 $basis
     gbasis=PM3
 $end

 $contrl
    scftyp=RHF
    runtyp=hessian
    icharg={:}
    maxit=60
 $end
"""

    orbheader = """
 $contrl
 coord=cart
 units=angs
 scftyp=rhf
 icharg={:}
 maxit=60
 $end
 $basis gbasis=sto ngauss=3 $end
"""

    solheader = """
 $system
    mwords=125
 $end
 $basis
    gbasis=PM3
 $end
 $contrl
    scftyp=RHF
    runtyp=energy
    icharg={:}
 $end
 $pcm
    solvnt=water
    mxts=15000
    icav=1
    idisp=1
 $end
 $tescav
    mthall=4
    ntsall=60
 $end

"""

    headers = [vibheader, orbheader, solheader]
    readers = [
        gamess.read_properties_vibration, gamess.read_properties_orbitals,
        gamess.read_properties_solvation
    ]

    def procfunc(conn, reader, *args, **kwargs):
        stdout, status = gamess.calculate(*args, **kwargs)
        try:
            properties = reader(stdout)
        except:
            # TODO Error reading properties
            properties = None
        conn.send(properties)
        conn.close()

    procs = []
    conns = []

    for header, reader in zip(headers, readers):

        parent_conn, child_conn = Pipe()
        p = Process(target=procfunc,
                    args=(child_conn, reader, molobj, header),
                    kwargs=gmsargs)
        p.start()

        procs.append(p)
        conns.append(parent_conn)

    for proc in procs:
        proc.join()

    properties_vib = conns[0].recv()
    properties_orb = conns[1].recv()
    properties_sol = conns[2].recv()

    if properties_vib is None:
        return {
            'error': 'Error g-104 - gamess vibration error',
            'message': "Error. Server was unable to vibrate molecule"
        }

    print(smiles, list(properties_vib.keys()))

    calculation.islinear = properties_vib["linear"]
    calculation.vibjsmol = properties_vib["jsmol"]
    calculation.vibfreq = save_array(properties_vib["freq"])
    calculation.vibintens = save_array(properties_vib["intens"])
    calculation.thermo = save_array(properties_vib["thermo"])

    if properties_orb is None:
        return {
            'error': 'Error g-128 - gamess orbital error',
            'message': "Error. Server was unable to orbital the molecule"
        }

    print(smiles, list(properties_orb.keys()))
    calculation.orbitals = save_array(properties_orb["orbitals"])
    calculation.orbitalstxt = properties_orb["stdout"]

    if properties_sol is None:
        return {
            'error': 'Error g-159 - gamess solvation error',
            'message': "Error. Server was unable to run solvation calculation"
        }

    # 'charges', 'solvation_total', 'solvation_polar', 'solvation_nonpolar',
    # 'surface', 'total_charge', 'dipole', 'dipole_total'
    print(smiles, list(properties_sol.keys()))

    charges = properties_sol["charges"]
    calculation.charges = save_array(charges)
    calculation.soltotal = properties_sol["solvation_total"]
    calculation.solpolar = properties_sol["solvation_polar"]
    calculation.solnonpolar = properties_sol["solvation_nonpolar"]
    calculation.solsurface = properties_sol["surface"]
    calculation.soldipole = save_array(properties_sol["dipole"])
    calculation.soldipoletotal = properties_sol["dipole_total"]

    # GAMESS DEBUG

    os.chdir(here)

    # Saveable sdf and reset title
    sdfstr = cheminfo.molobj_to_sdfstr(molobj)
    sdfstr = str(sdfstr)
    for _ in range(2):
        i = sdfstr.index('\n')
        sdfstr = sdfstr[i + 1:]
    sdfstr = "\n\n" + sdfstr

    # Save mol2 fmt

    mol2 = cheminfo.molobj_to_mol2(molobj, charges=charges)
    calculation.mol2 = mol2

    # Get a 2D Picture
    # TODO Compute 2D coordinates
    svgstr = cheminfo.molobj_to_svgstr(molobj, removeHs=True)

    # Success, setup database
    # calculation = models.GamessCalculation()
    calculation.smiles = smiles
    calculation.hashkey = hashkey
    calculation.sdf = sdfstr
    calculation.svg = svgstr
    calculation.created = datetime.datetime.now()

    # Add calculation to the database
    request.dbsession.add(calculation)

    # Add smiles to counter
    countobj = request.dbsession.query(models.Counter) \
        .filter_by(smiles=smiles).first()

    if countobj is None:
        counter = models.Counter()
        counter.smiles = smiles
        counter.count = 1
        request.dbsession.add(counter)
    else:
        countobj.count += 1

    return msg
Ejemplo n.º 9
0
def main():

    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--scratch',
                        action='store',
                        help='',
                        metavar="DIR",
                        default="_tmp_")
    parser.add_argument('--sdf', action='store', help='',
                        metavar="FILE")  #, nargs="+", default=[])
    parser.add_argument('--properties',
                        action='store',
                        help='',
                        metavar="FILE")  #, nargs="+", default=[])
    parser.add_argument('-j',
                        '--procs',
                        action='store',
                        help='pararallize',
                        metavar="int",
                        default=0,
                        type=int)

    args = parser.parse_args()

    if args.scratch[-1] != "/":
        args.scratch += "/"

    fsdf = gzip.open(args.scratch + "structures.sdf.gz", 'w')
    fprop = open(args.scratch + "properties.csv", 'w')

    molecules = cheminfo.read_sdffile(args.sdf)
    properties = open(args.properties, 'r')

    moledict = {}

    for molobj, line in zip(molecules, properties):

        status = molobjfilter(molobj)

        if not status:
            continue

        status = valuefilter(line)

        if not status:
            continue

        smiles = cheminfo.molobj_to_smiles(molobj, remove_hs=True)

        print(smiles)

        sdfstr = cheminfo.molobj_to_sdfstr(molobj)
        sdfstr += "$$$$\n"
        fsdf.write(sdfstr.encode())
        fprop.write(line)

        values = [float(x) for x in line.split()[1:]]
        moledict[smiles] = values

    fsdf.close()
    fprop.close()

    properties.close()

    misc.save_json(args.scratch + "molecules", moledict)
    misc.save_obj(args.scratch + "molecules", moledict)

    return
Ejemplo n.º 10
0
molobjs = cheminfo.read_sdffile(filename)

for i, molobj in enumerate(molobjs):

    molobj = next(molobjs)

    # stat = cheminfo.molobj_optimize(molobj)
    # print(stat)

    dist = Chem.rdmolops.Get3DDistanceMatrix(molobj)
    np.fill_diagonal(dist, 10.0)
    min_dist = np.min(dist)

    if min_dist < 0.01:
        print(i, min_dist)
        smi = cheminfo.molobj_to_smiles(molobj)
        molobj = cheminfo.conformationalsearch(smi)

        dist = Chem.rdmolops.Get3DDistanceMatrix(molobj)
        np.fill_diagonal(dist, 10.0)
        min_dist = np.min(dist)

        print(smi)
        print(min_dist)

    # atoms, coord = cheminfo.molobj_to_xyz(molobj)

    # atoms = list(atoms)
    # many_atoms = [atoms]
    # mbtypes = qml.representations.get_slatm_mbtypes(many_atoms)
Ejemplo n.º 11
0
def parse_results(molidx,
                  readtemplate,
                  molobjs,
                  dump_results=None,
                  debug=True,
                  **kwargs):
    """
    """

    if debug:

        filename = dump_results.format(molidx)

        if os.path.exists(filename):
            print("exists", molidx)
            return

        print("parsing", molidx)

    filename = readtemplate.format(molidx)
    molobj = molobjs[molidx]

    reference_smiles = cheminfo.molobj_to_smiles(molobj, remove_hs=True)

    atoms = cheminfo.molobj_to_atoms(molobj)
    n_atoms = len(atoms)

    energies, coordinates, costs = merge.read_txt(filename, n_atoms)

    oenergies = []
    ocoordinates = []
    ocosts = []

    for i, energy, coord, cost in zip(range(len(energies)), energies,
                                      coordinates, costs):

        filename = "_tmp_mopac_/_" + str(molidx) + "-" + str(i) + "_"

        try:
            oenergy, ocoord = optmize_conformation(atoms,
                                                   coord,
                                                   filename=filename)
        except:
            print("unconverged", filename)
            continue

        m = get_molobj(atoms, ocoord)
        smiles = cheminfo.molobj_to_smiles(m)

        same_graph = (smiles == reference_smiles)

        if same_graph:
            oenergies.append(oenergy)
            ocoordinates.append(ocoord)
            ocosts.append(cost)

        # print(smiles == reference_smiles, "{:5.2f}".format(energy), "{:5.2f}".format(oenergy), cost)

    idxs = merge.merge_cost(atoms, oenergies, ocoordinates, ocosts)

    renergies = []
    rcoords = []
    rcosts = []

    for idx in idxs:

        energy = oenergies[idx]
        coord = ocoordinates[idx]
        cost = ocosts[idx]

        renergies.append(energy)
        rcoords.append(coord)
        rcosts.append(cost)

    if dump_results is not None:

        out = merge.dump_txt(renergies, rcoords, rcosts)

        filename = dump_results.format(molidx)
        f = open(filename, 'w')
        f.write(out)
        f.close()

    return renergies, rcoords, rcosts
Ejemplo n.º 12
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--scratch',
                        action='store',
                        help='',
                        metavar="DIR",
                        default="_tmp_")
    parser.add_argument('--sdf',
                        action='store',
                        help='',
                        metavar="FILE",
                        nargs="+",
                        default=[])
    parser.add_argument('--dict',
                        action='store',
                        help='',
                        metavar="FILE",
                        nargs="+",
                        default=[])
    parser.add_argument('--name',
                        action='store',
                        help='',
                        metavar="STR",
                        nargs="+")
    parser.add_argument('--filename', action='store', help='', metavar="STR")
    parser.add_argument('--filter', action='store_true', help='')
    parser.add_argument('-j',
                        '--procs',
                        action='store',
                        help='pararallize',
                        metavar="int",
                        default=0,
                        type=int)

    args = parser.parse_args()

    if args.scratch[-1] != "/":
        args.scratch += "/"

    print()
    databases_set = []
    databases_dict = []

    for sdf in args.sdf:
        molobjs = cheminfo.read_sdffile(sdf)
        molobjs = list(molobjs)
        smiles = [
            cheminfo.molobj_to_smiles(molobj, remove_hs=True)
            for molobj in molobjs
        ]
        smiles = set(smiles)
        databases_set.append(smiles)
        print(sdf, len(smiles))

    for filename in args.dict:
        data = misc.load_obj(filename)
        smiles = data.keys()
        smiles = set(smiles)
        databases_set.append(smiles)
        databases_dict.append(data)
        print(filename, len(smiles))

    if args.scratch is not None:

        # Merge databases
        everything = {}

        for data in databases_dict:

            keys = data.keys()

            for key in keys:

                if key not in everything:
                    everything[key] = []

                everything[key] += data[key]

        if args.filter:
            everything = filter_dict(everything)

        keys = everything.keys()
        print("n items", len(keys))

        # Save
        misc.save_json(args.scratch + "molecule_data", everything)
        misc.save_obj(args.scratch + "molecule_data", everything)

    if args.name is not None:

        n_db = len(databases_set)

        if n_db == 2:
            venn2(databases_set, set_labels=args.name)
        elif n_db == 3:
            venn3(databases_set, set_labels=args.name)

        plt.savefig(args.scratch + "venndiagram")

    return