def qeds(df):
        N = df.shape[0]
        n = int(N * threshold)
        df = df.sort_values('score')
        df = df[:n]  # top 10 % statistics
        smiles = df.smile
        smiles = [s for s in smiles if Chem.MolFromSmiles(s) is not None]
        mols = [Chem.MolFromSmiles(s) for s in smiles]
        q = np.array([QED.default(m) for m in mols])

        return (np.mean(q), np.std(q))
    N = 3

    # Plot : 2500 first :
    optimol = pd.read_csv(
        os.path.join(script_dir, '..',
                     'cbas/slurm/results/big_new_lr/optimol_scored.csv'))
    optimol = optimol[:10000]
    optimol = optimol.sample(10)

    # Top molecules
    samples = optimol.sort_values('score')
    smiles, scores = samples.smile, samples.score

    smiles = smiles[:N]
    mols = [Chem.MolFromSmiles(s) for s in smiles]
    qeds = np.array([QED.default(m) for m in mols])
    sas = [calculateScore(m) for m in mols]
    scores = scores[:N]

    img1 = Draw.MolsToGridImage(mols,
                                molsPerRow=1,
                                useSVG=False,
                                legends=[f'{sc:.2f}' for sc in scores])
    img2 = Draw.MolsToGridImage(mols, molsPerRow=1, useSVG=False)

    svg2pdf(str(img), write_to='optimol_samp_2.pdf')
    """
    ['Cc1ccccc1CCC(=O)OCC(=O)NCCc1ccc2ccccc2c1'
     'COCC1CCCCN(C(=O)C(=O)NCc2cc3ccccc3c3ccccc23)C1'
     'Cc1ccccc1CC1CCCN1C(=O)CC1Cc2ccccc2NC1=O']
    """
예제 #3
0
prop_names = [
    'QED', 'logP', 'molWt', 'maxCharge', 'minCharge', 'valence', 'TPSA', 'HBA',
    'HBD', 'jIndex'
]
for name in prop_names:
    d[f'{name}'] = []

for i, s in enumerate(smiles):
    if (i % 10000 == 0):
        print(i)
    m = Chem.MolFromSmiles(s)
    if (m == None or 'i' in s or '.' in s):
        DUD = DUD.drop(i)
        print(s, i)
    else:
        d['QED'].append(QED.default(m))
        d['logP'].append(Crippen.MolLogP(m))
        d['molWt'].append(Descriptors.MolWt(m))
        d['maxCharge'].append(Descriptors.MaxPartialCharge(m))
        d['minCharge'].append(Descriptors.MinPartialCharge(m))
        d['valence'].append(Descriptors.NumValenceElectrons(m))
        d['TPSA'].append(rdMolDescriptors.CalcTPSA(m))
        d['HBA'].append(rdMolDescriptors.CalcNumHBA(m))
        d['HBD'].append(rdMolDescriptors.CalcNumHBD(m))
        d['jIndex'].append(GraphDescriptors.BalabanJ(m))

df = pd.DataFrame.from_dict(d)

df_merge = pd.merge(df, DUD, on=df.index)

#df_merge.to_csv('/home/mcb/jboitr/data/DUD_full.csv')