Example #1
0
 def _unfolded_ecfps_nodupes(self):
     if self._ecfps_no_dupes is None:
         ecfp_nodupes_file = op.join(manysources_dataset_root(self.name),
                                     '03-ecfps-nodupes', '%s.ecfps.h5' % self.name)
         if not op.exists(ecfp_nodupes_file):
             ecfps = self._unfolded_ecfps()
             ufp = UnfoldedFingerprints(ecfps.molids,
                                        ecfps.i2s,
                                        zero_dupes(ecfps.csr, by_rows=False),  # Hack 1, this should be Configurable
                                        failed_molids=ecfps.failed_molids)
             ensure_dir(op.dirname(ecfp_nodupes_file))
             with open(op.join(op.dirname(ecfp_nodupes_file), 'config.txt'), 'w') as writer:
                 writer.write('Same as 02-ecfps, but removed columns that have the same value accross all rows.')
             ufp.save(ecfp_nodupes_file)
         self._ecfps = UnfoldedFingerprints.load(ecfp_nodupes_file)
     return self._ecfps
Example #2
0
 def _unfolded_ecfps(self):
     if self._ecfps is None:
         ecfp_file = op.join(manysources_dataset_root(self.name), '02-ecfps', '%s.ecfps.h5' % self.name)
         if not op.exists(ecfp_file):
             fingerprinter = RDKMorganFingerprinter()
             for molid, mol in self.mols():
                 print molid
                 fingerprinter.add_mol(molid, mol)
             ensure_dir(op.dirname(ecfp_file))
             with open(op.join(op.dirname(ecfp_file), 'config.json'), 'w') as writer:
                 writer.write(fingerprinter.what().id())
             fingerprinter.fingerprints().save(ecfp_file)
         self._ecfps = UnfoldedFingerprints.load(ecfp_file)
     return self._ecfps