Ejemplo n.º 1
0
def get_numRotatableBonds(mol):
    ''' Rotatable Bond Count '''
    return CalcNumRotatableBonds(mol)
def populate_all_molecules(
    params,
    redo_size,
    redo_prop,
    mol_file=None
):
    """
    Populate all molecules in pickle files in directory.

    """

    vdwScale = params['vdwScale']
    boxMargin = params['boxMargin']
    spacing = params['spacing']
    show_vdw = params['show_vdw']
    plot_ellip = params['plot_ellip']
    N_conformers = int(params['N_conformers'])
    MW_thresh = params['MW_thresh']
    seed = int(params['seed'])

    fail_list = IO.fail_list_read(
        directory=params['molec_dir'],
        file_name='failures.txt'
    )
    print(mol_file)

    if mol_file is None:
        molecule_list = glob.glob('*_unopt.mol')
    else:
        molecule_list = IO.read_molecule_list(mol_file)

    print(f'{len(molecule_list)} molecules in DB.')

    count = 0
    for mol in sorted(molecule_list):
        count += 1

        name = mol.replace('_unopt.mol', '')
        if name in fail_list:
            continue

        opt_file = name+'_opt.mol'
        etkdg_fail = name+'_unopt.ETKDGFAILED'
        diam_file = name+'_size.csv'
        prop_file = name+'_prop.json'
        smiles = rdkf.read_structure_to_smiles(mol)

        # Check for generics.
        if '*' in smiles:
            IO.fail_list_write(
                new_name=name,
                directory=params['molec_dir'],
                file_name='failures.txt'
            )
            fail_list.append(name)
            continue

        # Get molecular properties from 2D structure.
        if not exists(prop_file) or redo_prop:
            print(
                f'>> calculating molecule descriptors of {mol}, '
                f'{count} of {len(molecule_list)}'
            )
            prop_dict = {}
            rdkitmol = Chem.MolFromSmiles(smiles)
            rdkitmol.Compute2DCoords()
            Chem.SanitizeMol(rdkitmol)
            prop_dict['logP'] = Descriptors.MolLogP(
                rdkitmol,
                includeHs=True
            )
            prop_dict['logS'] = rdkf.get_logSw(rdkitmol)
            prop_dict['Synth_score'] = rdkf.get_SynthA_score(rdkitmol)
            prop_dict['NHA'] = rdkitmol.GetNumHeavyAtoms()
            prop_dict['MW'] = Descriptors.MolWt(rdkitmol)
            nrb = CalcNumRotatableBonds(rdkitmol)
            nb = rdkitmol.GetNumBonds(onlyHeavy=True)
            prop_dict['NRB'] = nrb
            if nb == 0:
                prop_dict['NRBr'] = 0.0
            else:
                prop_dict['NRBr'] = nrb / nb
            prop_dict['bertzCT'] = BertzCT(rdkitmol)
            prop_dict['purchasability'] = chemcost_IO.is_purchasable(
                name=name,
                smiles=smiles
            )

            with open(prop_file, 'w') as f:
                json.dump(prop_dict, f)

        # Get a 3D representation of all molecules using ETKDG.
        chk1 = (not exists(opt_file) or redo_size)
        if chk1 and not exists(etkdg_fail):
            print(
                f'>> optimising {mol}, '
                f'{count} of {len(molecule_list)}'
            )
            rdkit_mol = rdkf.ETKDG(mol, seed=seed)
            if rdkit_mol is not None:
                rdkf.write_structure(opt_file, rdkit_mol)

        # Only property to determine at the moment is the molecular
        # size. This produces a csv for all conformers, which will be
        # used in the analysis.
        chk2 = (not exists(diam_file) or redo_size)
        if chk2 and not exists(etkdg_fail):
            print(
                f'>> getting molecular size of {mol}, '
                f'{count} of {len(molecule_list)}'
            )
            _ = rdkf.calc_molecule_diameter(
                name,
                smiles,
                out_file=diam_file,
                vdwScale=vdwScale,
                boxMargin=boxMargin,
                spacing=spacing,
                MW_thresh=MW_thresh,
                show_vdw=show_vdw,
                plot_ellip=plot_ellip,
                N_conformers=N_conformers,
                rSeed=seed
            )
            del _
Ejemplo n.º 3
0
def mol_to_feature(m1, m1_uff, m2, interaction_data, pos_noise_std):
    # Remove hydrogens
    m1 = Chem.RemoveHs(m1)
    m2 = Chem.RemoveHs(m2)

    # extract valid amino acids
    # m2 = extract_valid_amino_acid(m2, self.amino_acids)

    # random rotation
    angle = np.random.uniform(0, 360, 1)[0]
    axis = np.random.uniform(-1, 1, 3)
    # m1 = rotate(m1, angle, axis, False)
    # m2 = rotate(m2, angle, axis, False)

    angle = np.random.uniform(0, 360, 1)[0]
    axis = np.random.uniform(-1, 1, 3)
    m1_rot = rotate(copy.deepcopy(m1), angle, axis, True)

    # prepare ligand
    n1 = m1.GetNumAtoms()
    d1 = np.array(m1.GetConformers()[0].GetPositions())
    d1 += np.random.normal(0.0, pos_noise_std, d1.shape)
    d1_rot = np.array(m1_rot.GetConformers()[0].GetPositions())
    adj1 = GetAdjacencyMatrix(m1) + np.eye(n1)
    h1 = get_atom_feature(m1, True)

    # prepare protein
    n2 = m2.GetNumAtoms()
    c2 = m2.GetConformers()[0]
    d2 = np.array(c2.GetPositions())
    d2 += np.random.normal(0.0, pos_noise_std, d2.shape)
    adj2 = GetAdjacencyMatrix(m2) + np.eye(n2)
    h2 = get_atom_feature(m2, True)

    # prepare distance vector
    dmv = dm_vector(d1, d2)
    dmv_rot = dm_vector(d1_rot, d2)

    # get interaction matrix
    # A_int = get_interaction_matrix(d1, d2, interaction_data)
    A_int = np.zeros(
        (len(interaction_types), m1.GetNumAtoms(), m2.GetNumAtoms()))
    A_int[-2] = get_A_hydrophobic(m1, m2)
    A_int[1] = get_A_hbond(m1, m2)
    A_int[-1] = get_A_metal_complexes(m1, m2)

    # cal sasa
    sasa = cal_sasa(m1)
    dsasa = sasa - cal_sasa(m1_uff)

    # count rotatable bonds
    rotor = CalcNumRotatableBonds(m1)
    # dm = distance_matrix(d1, d2)
    # rotor = count_active_rotatable_bond(m1, dm)
    # charge
    # charge1 = cal_charge(m1)
    # charge2 = cal_charge(m2)
    charge1 = np.zeros((n1, ))
    charge2 = np.zeros((n2, ))
    """
    mp1 = AllChem.MMFFGetMoleculeProperties(m1)
    mp2 = AllChem.MMFFGetMoleculeProperties(m2)
    charge1 = [mp1.GetMMFFPartialCharge(i) for i in range(m1.GetNumAtoms())]
    charge2 = [mp2.GetMMFFPartialCharge(i) for i in range(m2.GetNumAtoms())]
    """

    # partial charge calculated by gasteiger
    charge1 = np.array(charge1)
    charge2 = np.array(charge2)

    # There is nan for some cases.
    charge1 = np.nan_to_num(charge1, nan=0, neginf=0, posinf=0)
    charge2 = np.nan_to_num(charge2, nan=0, neginf=0, posinf=0)

    # valid
    valid1 = np.ones((n1, ))
    valid2 = np.ones((n2, ))

    # no metal
    metal_symbols = ["Zn", "Mn", "Co", "Mg", "Ni", "Fe", "Ca", "Cu"]
    no_metal1 = np.array([
        1 if a.GetSymbol() not in metal_symbols else 0 for a in m1.GetAtoms()
    ])
    no_metal2 = np.array([
        1 if a.GetSymbol() not in metal_symbols else 0 for a in m2.GetAtoms()
    ])
    # vdw radius
    vdw_radius1 = np.array([get_vdw_radius(a) for a in m1.GetAtoms()])
    vdw_radius2 = np.array([get_vdw_radius(a) for a in m2.GetAtoms()])

    vdw_epsilon, vdw_sigma = get_epsilon_sigma(m1, m2, False)

    # uff energy difference
    # delta_uff = cal_uff(m1)-cal_uff(m1_uff)
    # delta_uff = get_torsion_energy(m1) - get_torsion_energy(m1_uff)
    # delta_uff = cal_torsion_energy(m1)+cal_internal_vdw(m1)
    delta_uff = 0.0
    sample = {
        "h1": h1,
        "adj1": adj1,
        "h2": h2,
        "adj2": adj2,
        "A_int": A_int,
        "dmv": dmv,
        "dmv_rot": dmv_rot,
        "pos1": d1,
        "pos2": d2,
        "sasa": sasa,
        "dsasa": dsasa,
        "rotor": rotor,
        "charge1": charge1,
        "charge2": charge2,
        "vdw_radius1": vdw_radius1,
        "vdw_radius2": vdw_radius2,
        "vdw_epsilon": vdw_epsilon,
        "vdw_sigma": vdw_sigma,
        "delta_uff": delta_uff,
        "valid1": valid1,
        "valid2": valid2,
        "no_metal1": no_metal1,
        "no_metal2": no_metal2,
    }
    return sample
Ejemplo n.º 4
0
def predict():
    req_data = request.get_json()
    print("Data requested")
    print(req_data)
    conditions = req_data["conditions"]
    num_rounds = req_data["num_rounds"]
    loyality = req_data["loyality"]
    num_of_mols = req_data["num_of_mols"]

    # molecules closer to aspirin
    # "Melting point", "Boiling point", "Water Solubility", loyality to drug design rules, number of rounds, number of molecules
    #conditions = [120, 285, -2.1, 0.7, 10, 10]
    #data = conditions[]
    result_arr = []
    for round in range(num_rounds):
        print(f"round {round}")
        number_generate = 100
        endp = torch.tensor(scaler.transform(np.array([conditions])))
        print(endp.shape)

        c = deepcopy(endp)
        c = [str(l) for l in list(c.numpy())]
        # endp = endp.unsqueeze(0)
        endp = endp.repeat(100, 1)
        endp = endp.unsqueeze(0)
        endp = endp.repeat(3, 1, 1)

        endp = endp.float()
        endp = endp.cuda()
        res = model.sample(endp, number_generate, dataset.model)
        valid = len(res) * 100 / number_generate
        print("valid : {} %".format(valid))
        # writer.add_scalar("Valid", valid, cnt)
        res = [robust_standardizer(mol) for mol in res]
        res = list(filter(lambda x: x is not None, res))
        mols = res
        print("Mols obtained")
        print(mols)
        vals_another = requests.post("https://backend.syntelly.com/tempSmilesArrToPredict",
                                     json={'smiles': mols}).json()
        for idx in range(len(vals_another)):
            elem = vals_another[idx]['data']
            for e in elem:
                e["endpoint_id"] = endpoints_id2name[e["endpoint_id"]]
        e2v = []
        for idx in range(len(vals_another)):
            e2v.append(dict(zip([e['endpoint_id'] for e in vals_another[idx]['data']],
                                [e['value'] for e in vals_another[idx]['data']])))
        smiles = [val['smiles'] for val in vals_another]
        mols = [robust_standardizer(mol) for mol in smiles]
        mols = [Chem.MolFromSmiles(mol) for mol in mols]
        molecular_weights = [CalcExactMolWt(mol) for mol in mols]
        logp = [MolLogP(mol) for mol in mols]
        atom_count = [mol.GetNumAtoms() for mol in mols]
        molar_reflactivity = [MolMR(mol) for mol in mols]
        numRings = [CalcNumRings(mol) for mol in mols]
        numRotBonds = [CalcNumRotatableBonds(mol) for mol in mols]
        numHAcceptors = [NumHAcceptors(mol) for mol in mols]
        numHDonors = [NumHDonors(mol) for mol in mols]
        bcf = [e['Bioconcentration factor'] for e in e2v]
        dev_tox = [e['Developmental toxicity'] for e in e2v]
        flash_point = [e['Flash point'] for e in e2v]
        boiling_point = [e['Boiling point'] for e in e2v]
        melting_points = [e['Melting point'] for e in e2v]
        water_solubility = [e['Water Solubility'] for e in e2v]

        result = [0] * len(smiles)
        for idx in range(len(smiles)):
            val = 0
            if (molecular_weights[idx] <= 480 and molecular_weights[idx] >= 160):
                val += 1
            if (logp[idx] <= 5.6 and logp[idx] >= -0.4):
                val += 1
            if (atom_count[idx] <= 70 and atom_count[idx] >= 20):
                val += 1
            if (molar_reflactivity[idx] >= 40 and molar_reflactivity[idx] <= 130):
                val += 1
            if (bcf[idx] < 3):
                val += 1
            if (dev_tox[idx] == 'Negative'):
                val += 1
            if (flash_point[idx] > (350 - 273.15)):
                val += 1
            if (boiling_point[idx] > (300 - 273.15)):
                val += 1
            if (numRings[idx] > 0):
                val += 1
            if (numRotBonds[idx] < 5):
                val += 1
            if (numHAcceptors[idx] <= 10):
                val += 1
            if (numHDonors[idx] <= 5):
                val += 1

            if (val / 12 >= loyality):
                result[idx] = val

        print(result)
        for idx in range(len(result)):
            if (result[idx] > 0):
                result_arr.append((smiles[idx], result[idx],
                                   (melting_points[idx], boiling_point[idx], water_solubility[idx]),
                                   mean_squared_error(
                                       scaler.transform(np.array(
                                           [[melting_points[idx], boiling_point[idx], water_solubility[idx]]])),
                                       scaler.transform(np.array([conditions]))
                                   )))

    result_arr.sort(key=lambda x: x[3])

    print(result_arr[:num_of_mols])
    return jsonify(result_arr[:num_of_mols])
Ejemplo n.º 5
0
 def calculate(self):
     return CalcNumRotatableBonds(self.mol)