Esempio n. 1
0
def molobjfilter(molobj):

    # GetRingInfo
    info = molobj.GetRingInfo()
    n_rings = info.NumRings()

    # if n_rings == 0:
    #     return False

    # if n_rings > 2:
    #     return False

    # really_small_space = [1, 5, 6, 7, 8]
    really_small_space = [1, 6]
    atoms = cheminfo.molobj_to_atoms(molobj)
    if not is_allowed_atoms(atoms, allowed_atoms=really_small_space):
        return False

    # n_atoms = len(atoms)
    # n_heavy_atoms, = np.where(atoms > 1)
    # n_heavy_atoms = len(n_heavy_atoms)
    #
    # # no long chains
    # aromatic_atoms = molobj.GetAromaticAtoms()
    # aromatic_atoms = [atom for atom in aromatic_atoms]
    # aromatic_atoms = [atom.GetAtomicNum() for atom in aromatic_atoms]
    # n_atomatic_atoms = len(aromatic_atoms)
    #
    # n_non_aromatic_atoms = n_heavy_atoms - n_atomatic_atoms
    #
    # if n_non_aromatic_atoms > 7:
    #     return False

    return True
Esempio n. 2
0
def prepare_sdf_and_csv(smi, values, debug=True, **kwargs):

    kelvin = np.array(values)

    #
    standard_deviation = np.std(kelvin)
    mean = np.mean(kelvin)

    # Load molecule information
    molobj = Chem.MolFromSmiles(smi)
    atoms = cheminfo.molobj_to_atoms(molobj)
    n_atoms = len(atoms)

    # NOTE This is a choice
    # NOTE Filter organic chemistry
    if n_atoms > 50: return None
    if n_atoms < 4: return None
    if not is_allowed_atoms(atoms): return None

    molobj = Chem.AddHs(molobj)

    if molobj is None: return None

    molobj = cheminfo.conformationalsearch(smi)

    if molobj is None: return None

    # sdfstr = cheminfo.molobj_to_sdfstr(molobj)

    if debug:
        print("{:4.1f}".format(mean), "{:1.2f}".format(standard_deviation))

    # return molobj, mean, standard_deviation, values
    return molobj, values
Esempio n. 3
0
def main():

    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--scratch',
                        action='store',
                        help='',
                        metavar="DIR",
                        default="_tmp_")
    parser.add_argument('--json', action='store', help='', metavar="FILE")
    parser.add_argument('-j',
                        '--procs',
                        action='store',
                        help='pararallize',
                        metavar="int",
                        default=0,
                        type=int)

    args = parser.parse_args()

    if args.scratch[-1] != "/":
        args.scratch += "/"

    data = misc.load_json(args.json)

    keys = data.keys()
    keys = list(keys)

    canonical_data = {}

    for key in keys:

        molobj, status = cheminfo.smiles_to_molobj(key)

        if molobj is None:
            print("error none mol:", key)
            continue

        smiles = cheminfo.molobj_to_smiles(molobj, remove_hs=True)

        if "." in smiles:
            print("error multi mol:", smiles)
            continue

        atoms = cheminfo.molobj_to_atoms(molobj)

        if not is_mol_allowed(atoms):
            print("error heavy mol:", smiles)
            continue

        canonical_data[smiles] = data[key]

    misc.save_json(args.scratch + "molecule_data", canonical_data)
    misc.save_obj(args.scratch + "molecule_data", canonical_data)

    return
Esempio n. 4
0
def merge_individual_mp(molobjs, filenames, procs=1, debug=True):

    print("starting {:} procs".format(procs))

    atoms_list = [cheminfo.molobj_to_atoms(molobj) for molobj in molobjs]

    easyusage.parallel(filenames,
                       merge_results_filename, [atoms_list], {},
                       procs=procs)

    return
Esempio n. 5
0
def merge_results_cumulative(sdffile, filenames, debug=True, molid=0):

    # init
    energies = []
    coordinates = []
    representations = []
    atoms = []
    n_total = 0

    molobjs = cheminfo.read_sdffile(sdffile[0])
    molobjs = [molobj for molobj in molobjs]
    atoms_list = [cheminfo.molobj_to_atoms(molobj) for molobj in molobjs]

    for filename in filenames:

        energies_next, coordinates_next, atoms = read_resulttxt(
            atoms_list, filename)
        representations_next = [
            sim.get_representation(atoms, coord) for coord in coordinates_next
        ]

        if len(energies) == 0:
            energies += energies_next
            coordinates += coordinates_next
            representations += representations_next
            n_total += len(energies_next)
            continue

        idxs = merge_asymmetric(atoms, energies_next, energies,
                                representations_next, representations)

        n_new = 0
        for i, idxl in enumerate(idxs):

            N = len(idxl)
            if N > 0: continue

            energies.append(energies_next[i])
            coordinates.append(coordinates_next[i])
            representations.append(representations_next[i])
            n_new += 1

        if debug:
            n_total += n_new
            print(" - new", n_new)
            print("total", n_total)

    return
Esempio n. 6
0
def merge_results_filenames(molobjs, filenames):

    print("filenames")

    # init
    energies = []
    coordinates = []
    representations = []
    atoms = []
    n_total = 0

    atoms_list = [cheminfo.molobj_to_atoms(molobj) for molobj in molobjs]

    for filename in filenames:
        merge_results_filename(filename, atoms_list)

    return
Esempio n. 7
0
def clean_data(listdata):

    data = {}

    atom_types = []

    for row in listdata:

        idx = row[0]
        smi = row[1]
        value = row[3]
        value = float(value)

        molobj, status = cheminfo.smiles_to_molobj(smi)

        if molobj is None:
            print("error:", smi)
            continue

        smi = cheminfo.molobj_to_smiles(molobj, remove_hs=True)

        atoms = cheminfo.molobj_to_atoms(molobj)

        # filter for organic chemistry
        if not is_mol_allowed(atoms):
            continue

        atom_types += list(atoms)

        if smi not in data:
            data[smi] = []

        data[smi].append(value)

    atom_types, counts = np.unique(atom_types, return_counts=True)

    for atom, count in zip(atom_types, counts):
        print(atom, count)

    keys = data.keys()

    print("Total molecules", len(keys))

    return data
Esempio n. 8
0
def filter_molobj(molobj):

    # GetRingInfo
    info = molobj.GetRingInfo()
    n_rings = info.NumRings()

    # if n_rings == 0:
    #     return False

    # if n_rings > 2:
    #     return False

    atoms = cheminfo.molobj_to_atoms(molobj)
    if not is_allowed_atoms(atoms):
        return False

    n_atoms = len(atoms)
    n_heavy_atoms, = np.where(atoms > 1)
    n_heavy_atoms = len(n_heavy_atoms)

    # # no long chains
    # aromatic_atoms = molobj.GetAromaticAtoms()
    # aromatic_atoms = [atom for atom in aromatic_atoms]
    # aromatic_atoms = [atom.GetAtomicNum() for atom in aromatic_atoms]
    # n_atomatic_atoms = len(aromatic_atoms)
    #
    # n_non_aromatic_atoms = n_heavy_atoms - n_atomatic_atoms
    #
    # if n_non_aromatic_atoms > 7:
    #     return False

    if n_heavy_atoms < 10:
        return False

    if n_heavy_atoms > 20:
        return False

    if n_atoms > 40:
        return False

    return True
Esempio n. 9
0
def filter_dict(molecules):

    keys = molecules.keys()
    keys = list(keys)

    max_atoms = 0

    for key in keys:

        molobj, status = cheminfo.smiles_to_molobj(key)

        if molobj is None:
            continue

        status = filter_molobj(molobj)

        if not status:
            del molecules[key]
            print(key, status)
            continue

        status = filter_value(molecules[key])

        if not status:
            print(status, key, molecules[key])
            del molecules[key]
            continue

        # Report
        atoms = cheminfo.molobj_to_atoms(molobj)
        n_atoms = len(atoms)

        if n_atoms > max_atoms:
            max_atoms = n_atoms

        continue

    print("max atoms: ", max_atoms)

    return molecules
Esempio n. 10
0
def clean_data(df, scratch):

    smiles = df.iloc[1]

    data = {}

    atom_types = []

    for index, row in df.iterrows():

        smi = row.smiles
        value = row.mpC + 273.15

        molobj, status = cheminfo.smiles_to_molobj(smi)

        if molobj is None:
            print("error:", smi)
            continue

        smi = cheminfo.molobj_to_smiles(molobj, remove_hs=True)

        # Atoms
        atoms = cheminfo.molobj_to_atoms(molobj)
        atom_types += list(atoms)

        if smi not in data:
            data[smi] = []

        data[smi].append(value)

    atom_types, counts = np.unique(atom_types, return_counts=True)

    for atom, count in zip(atom_types, counts):
        print(atom, count)

    misc.save_obj(scratch + "molecule_data", data)
    misc.save_json(scratch + "molecule_data", data)

    return
Esempio n. 11
0
def ajax_submitquantum(request):
    """

    Setup quantum calculation

    """

    if not request.POST:
        return {
            'error': 'Error 128 - empty post',
            'message': "Error. Empty post."
        }

    if not request.POST["sdf"]:
        return {
            'error': 'Error 132 - sdf key error',
            'message': "Error. Missing information."
        }

    # Get coordinates from request
    sdfstr = request.POST["sdf"].encode('utf-8')

    # Get rdkit
    molobj, status = cheminfo.sdfstr_to_molobj(sdfstr)

    if molobj is None:
        status = status.split("]")
        status = status[-1]
        return {'error': 'Error 141 - rdkit error', 'message': status}

    try:
        conf = molobj.GetConformer()
    except ValueError:
        # Error
        return {
            'error':
            'Error 141 - rdkit error',
            'message':
            "Error. Server was unable to generate conformations for this molecule"
        }

    # If hydrogens not added, assume graph and optimize with forcefield
    atoms = cheminfo.molobj_to_atoms(molobj)
    if 1 not in atoms:
        molobj = cheminfo.molobj_add_hydrogens(molobj)
        cheminfo.molobj_optimize(molobj)

    # TODO Check lengths of atoms
    # TODO Define max in settings

    # Fix sdfstr
    sdfstr = sdfstr.decode('utf8')
    for _ in range(3):
        i = sdfstr.index('\n')
        sdfstr = sdfstr[i + 1:]
    sdfstr = "\n" * 3 + sdfstr

    # hash on sdf (conformer)
    hshobj = hashlib.md5(sdfstr.encode())
    hashkey = hshobj.hexdigest()

    calculation = request.dbsession.query(models.GamessCalculation) \
        .filter_by(hashkey=hashkey).first()

    if calculation is not None:

        msg = {'hashkey': hashkey}

        calculation.created = datetime.datetime.now()
        return msg

    print("new:", hashkey)

    molecule_info = {"sdfstr": sdfstr, "molobj": molobj, "hashkey": hashkey}

    msg = pipelines.gamess_quantum_pipeline(request, molecule_info)

    return msg

    #
    #
    #

    calculation = request.dbsession.query(models.GamessCalculation) \
        .filter_by(hashkey=hashkey).first()

    if calculation is not None:
        calculation.created = datetime.datetime.now()
        return msg
    else:
        pass

    # check if folder exists
    here = os.path.abspath(os.path.dirname(__file__)) + "/"
    datahere = here + "data/"

    if os.path.isdir(datahere + hashkey):
        # return msg
        pass

    else:
        os.mkdir(datahere + hashkey)

    os.chdir(datahere + hashkey)

    # Minimize with forcefield first
    molobj = cheminfo.molobj_add_hydrogens(molobj)

    cheminfo.molobj_optimize(molobj)

    header = """ $basis gbasis=pm3 $end
 $contrl runtyp=optimize icharg=0 $end
 $statpt opttol=0.0005 nstep=200 projct=.F. $end
"""

    # Prepare gamess input
    # inpstr = gamess.molobj_to_gmsinp(molobj, header)

    # Save and run file
    # with open("optimize.inp", "w") as f:
    #     f.write(inpstr)
    #
    # stdout, stderr = gamess.calculate(hashkey+".inp", store_output=False)

    # with open("start.sdf", 'w') as f:
    #     f.write(cheminfo.molobj_to_sdfstr(molobj))

    # Check output
    # status, message = gamess.check_output(stdout)

    os.chdir(here)

    # if not status:
    #     msg["error"] = "error 192: QM Calculation fail"
    #     msg["message"] = message
    #     return msg

    # Saveable sdf and reset title
    sdfstr = cheminfo.molobj_to_sdfstr(molobj)
    sdfstr = str(sdfstr)
    for _ in range(2):
        i = sdfstr.index('\n')
        sdfstr = sdfstr[i + 1:]
    sdfstr = "\n\n" + sdfstr

    # Get a 2D Picture
    # TODO Compute 2D coordinates
    svgstr = cheminfo.molobj_to_svgstr(molobj, removeHs=True)

    # Success, setup database
    calculation = models.GamessCalculation()
    calculation.smiles = smiles
    calculation.hashkey = hashkey
    calculation.sdf = sdfstr
    calculation.svg = svgstr
    calculation.created = datetime.datetime.now()

    # Add calculation to the database
    request.dbsession.add(calculation)

    # Add smiles to counter
    countobj = request.dbsession.query(models.Counter) \
        .filter_by(smiles=smiles).first()

    if countobj is None:
        counter = models.Counter()
        counter.smiles = smiles
        counter.count = 1
        request.dbsession.add(counter)
        print(counter)
    else:
        countobj.count += 1

    return msg
Esempio n. 12
0
def get_conformations(molobj, torsions, resolutions, method="sqm", debug=False):

    molobj = copy.deepcopy(molobj)

    n_torsions = len(torsions)

    # init energy
    energies = []
    states = []
    coordinates = []

    # no constraints
    ffprop, forcefield = get_forcefield(molobj)

    # Forcefield generation failed
    if forcefield is None:
        return [], [], []

    # Get conformer and origin
    conformer = molobj.GetConformer()
    origin = conformer.GetPositions()

    # Origin angle
    origin_angles = []

    # HACK rdkit requires int type for index
    torsions = [[int(y) for y in x] for x in torsions]

    for idxs in torsions:
        angle = Chem.rdMolTransforms.GetDihedralDeg(conformer, *idxs)
        origin_angles.append(angle)

    # Get resolution angles
    angle_iterator = clockwork.generate_angles(resolutions, n_torsions)


    # set calculate func
    if method == "ff":
        # rdkit mmff
        calculate_method = calculate_forcefield
        cal_kwargs = {
            "ffprop": ffprop,
            "ff": forcefield
        }
    else:
        atoms = cheminfo.molobj_to_atoms(molobj)
        atoms_str = [cheminfo.convert_atom(atom) for atom in atoms]
        smiles = quantum.get_smiles(atoms, origin)
        calculate_method = calculate_mopac
        cal_kwargs = {
            "ffprop": ffprop,
            "atoms": atoms,
            "reference_smiles": smiles
        }


    for angle in angle_iterator:

        # reset coordinates
        set_coordinates(conformer, origin)

        # Minimze with torsion angle constraint
        # energy, pos, status = calculate_forcefield(molobj, conformer, torsions, origin_angles, angle,
        #         ffprop=ffprop,
        #         ff=forcefield)

        if debug:
            start = time.time()

        energy, pos, status = calculate_method(molobj, conformer, torsions, origin_angles, angle, **cal_kwargs)

        if debug:
            end = time.time()
            print("{:6.5f}s".format(end-start), "{:6.2f}".format(energy), status)

        # collect
        energies += [energy]
        coordinates += [pos]
        states += [status]

    return np.asarray(energies), np.asarray(coordinates), np.asarray(states)
Esempio n. 13
0
def parse_results(molidx,
                  readtemplate,
                  molobjs,
                  dump_results=None,
                  debug=True,
                  **kwargs):
    """
    """

    if debug:

        filename = dump_results.format(molidx)

        if os.path.exists(filename):
            print("exists", molidx)
            return

        print("parsing", molidx)

    filename = readtemplate.format(molidx)
    molobj = molobjs[molidx]

    reference_smiles = cheminfo.molobj_to_smiles(molobj, remove_hs=True)

    atoms = cheminfo.molobj_to_atoms(molobj)
    n_atoms = len(atoms)

    energies, coordinates, costs = merge.read_txt(filename, n_atoms)

    oenergies = []
    ocoordinates = []
    ocosts = []

    for i, energy, coord, cost in zip(range(len(energies)), energies,
                                      coordinates, costs):

        filename = "_tmp_mopac_/_" + str(molidx) + "-" + str(i) + "_"

        try:
            oenergy, ocoord = optmize_conformation(atoms,
                                                   coord,
                                                   filename=filename)
        except:
            print("unconverged", filename)
            continue

        m = get_molobj(atoms, ocoord)
        smiles = cheminfo.molobj_to_smiles(m)

        same_graph = (smiles == reference_smiles)

        if same_graph:
            oenergies.append(oenergy)
            ocoordinates.append(ocoord)
            ocosts.append(cost)

        # print(smiles == reference_smiles, "{:5.2f}".format(energy), "{:5.2f}".format(oenergy), cost)

    idxs = merge.merge_cost(atoms, oenergies, ocoordinates, ocosts)

    renergies = []
    rcoords = []
    rcosts = []

    for idx in idxs:

        energy = oenergies[idx]
        coord = ocoordinates[idx]
        cost = ocosts[idx]

        renergies.append(energy)
        rcoords.append(coord)
        rcosts.append(cost)

    if dump_results is not None:

        out = merge.dump_txt(renergies, rcoords, rcosts)

        filename = dump_results.format(molidx)
        f = open(filename, 'w')
        f.write(out)
        f.close()

    return renergies, rcoords, rcosts
Esempio n. 14
0
def merge_results_cumulative_prime(molid,
                                   molobj,
                                   filenametemplate,
                                   debug=True,
                                   dump_results=None,
                                   iolock=None):

    # the G list
    combos = clockwork.generate_linear_costlist()

    # init
    energies = []
    coordinates = []
    representations = []
    atoms = []
    costs = []
    n_total = 0

    atoms = cheminfo.molobj_to_atoms(molobj)
    n_atoms = len(atoms)

    for combo in combos:

        filename = filenametemplate.format(molid, *combo)

        stdprint(filename, std="err", iolock=iolock)

        energies_next, coordinates_next = read_txt(filename, n_atoms)

        # file did not exists
        if energies_next is None:
            continue

        representations_next = [
            sim.get_representation(atoms, coord) for coord in coordinates_next
        ]

        if len(energies) == 0:
            n_new = len(energies_next)
            energies += energies_next
            coordinates += coordinates_next
            representations += representations_next
            costs += [combo] * n_new
            n_total += n_new
            continue

        idxs = merge_asymmetric(atoms, energies_next, energies,
                                representations_next, representations)

        n_new = 0
        for i, idxl in enumerate(idxs):

            N = len(idxl)
            if N > 0: continue

            energies.append(energies_next[i])
            coordinates.append(coordinates_next[i])
            representations.append(representations_next[i])
            costs.append(combo)
            n_new += 1

        if debug:
            n_total += n_new
            print(" - new", n_new, file=sys.stderr)
            print("total", n_total, file=sys.stderr)

    if dump_results:

        out = dump_txt(energies, coordinates, costs)
        filename = dump_results.format(molid)
        f = open(filename, 'w')
        f.write(out)
        f.close()

    return energies, coordinates, costs
Esempio n. 15
0
def parse_molandprop(*args, debug=False, **kwargs):

    if len(args) > 1:
        molobj = args[0]
        props = args[1]
    else:
        molobj, props = args[0]

    if molobj is None:
        return None, None

    keys = props.keys()

    if "SMILES" not in keys:
        return None, None

    prop_smiles = props["SMILES"]

    # Ignore multi molecules
    if "." in prop_smiles:
        if debug:
            print(f"ignore: {prop_smiles}")
        return None, None

    # Count
    atoms = cheminfo.molobj_to_atoms(molobj)

    # if len(atoms) < 3:
    #     if debug:
    #         print("ignore small", props)
    #     return None, None

    # if len(atoms) > 40:
    #     if debug:
    #         print("ignore large", props)
    #     return None, None

    # atoms_carbons, = np.where(atoms == 6)
    # if len(atoms_carbons) < 1:
    #     if debug:
    #         print("ignore non-org", props)
    #     return None, None

    # Add hydrogens and optimize structure
    molobj = cheminfo.molobj_add_hydrogens(molobj)
    status = cheminfo.molobj_optimize(molobj)

    # if unconverged
    if status == 5:

        # try the smiles
        molobj, status = cheminfo.smiles_to_molobj(prop_smiles)
        if molobj is None:
            print("error", props)
            return None, None

        molobj = cheminfo.molobj_add_hydrogens(molobj)
        status = cheminfo.molobj_optimize(molobj)

        if status == 5:
            print("error", props)
            return None, None

    idx_ref = [key for key in keys if "{measured}" in key]
    idx_ref = idx_ref[0]

    value = str(props[idx_ref])
    if "<" in value:
        return None, None
    if ">" in value:
        return None, None

    idx_value = [key for key in keys if "measured, converted" in key]
    idx_value = idx_value[0]

    idx_unit = [key for key in keys if "UNIT" in key]
    idx_unit = [key for key in idx_unit if "Point" in key]
    idx_unit = idx_unit[0]

    prop_unit = props[idx_unit]
    prop_value = props[idx_value]

    if prop_unit == "Celsius":
        prop_value += 273.15
    elif prop_unit == "K":
        pass
    else:
        print("error unknown unit", prop_unit, props)
        return None, None

    return molobj, prop_value