def fcd_statistics(SMILES, n_jobs, gpu, out, canonicalize=False):
    """
    Pre-compute FCD statistics for a list of SMILES.

    :param SMILES: List SMILES
    :param n_jobs: Number of jobs for processing SMILES
    :param out: Path to output file
    :return:
    """
    fcd = FCD(device=f'cuda:{gpu}', n_jobs=n_jobs, canonize=False)

    if canonicalize:
        can_SMILES = []
        for smi in SMILES:
            if Chem.MolFromSmiles(smi):
                can_SMILES.append(Chem.MolToSmiles(Chem.MolFromSmiles(smi)))
        results = fcd.precalc(can_SMILES)
    else:
        results = fcd.precalc(SMILES)

    if not os.path.exists(os.path.dirname(out)):
        os.makedirs(os.path.dirname(out))

    with open(out, 'wb') as f:
        pkl.dump(results, f)
Beispiel #2
0
 def test_zero_molecule(self):
     fcd = FCD()
     output_pytorch = fcd([], [])
     self.assertNotEqual(
         output_pytorch,
         output_pytorch,
         msg=("FCD should return np.nan on invalid situations"))
Beispiel #3
0
 def test_multiprocess(self):
     fcd = FCD(n_jobs=2)
     output_pytorch = fcd(self.set1, self.set2)
     diff = abs(self.output_keras - output_pytorch)
     self.assertAlmostEqual(
         output_pytorch,
         self.output_keras,
         places=4,
         msg=("Outputs differ. keras={},".format(self.output_keras) +
              "torch={}. diff is {}".format(output_pytorch, diff)))
Beispiel #4
0
            res = res.append(
                pd.DataFrame({
                    'input_file':
                    sampled_file,
                    'outcome': [
                        'KL divergence, hydrogen donors',
                        'KL divergence, hydrogen acceptors',
                        'Jensen-Shannon distance, hydrogen donors',
                        'Jensen-Shannon distance, hydrogen acceptors',
                        'Wasserstein distance, hydrogen donors',
                        'Wasserstein distance, hydrogen acceptors'
                    ],
                    'value': [
                        kl_donors, kl_acceptors, jsd_donors, jsd_acceptors,
                        emd_donors, emd_acceptors
                    ]
                }))

            ## outcome 20: Frechet ChemNet distance
            fcd = FCD(canonize=False)
            fcd_calc = fcd(gen_canonical, org_canonical)
            res = res.append(
                pd.DataFrame({
                    'input_file': sampled_file,
                    'outcome': 'Frechet ChemNet distance',
                    'value': [fcd_calc]
                }))

        # write output
        res.to_csv(output_file, index=False, compression='gzip')