Example #1
0
    def test_save_txt_errors(self):
        """Check errors/warnings raised when saving bitstrings."""
        import warnings
        from scipy.sparse import csr_matrix
        from e3fp.util import E3FPEfficiencyWarning
        from e3fp.fingerprint.db import FingerprintDatabase
        from e3fp.fingerprint.fprint import CountFingerprint
        from e3fp.fingerprint.util import E3FPInvalidFingerprintError

        array = np.array(
            [[1, 0, 0, 1, 1], [0, 0, 0, 1, 0], [0, 1, 1, 1, 1]], dtype=np.bool_
        )
        db = FingerprintDatabase.from_array(
            array, ["1", "2", "3"], fp_type=CountFingerprint
        )

        desc, txt_file = tempfile.mkstemp(suffix=".txt.gz")
        os.close(desc)
        with self.assertRaises(E3FPInvalidFingerprintError):
            db.savetxt(txt_file)

        array = csr_matrix((3, 2 ** 15), dtype=np.bool_)
        db = FingerprintDatabase.from_array(array, ["1", "2", "3"])
        with warnings.catch_warnings(record=True):
            warnings.simplefilter("error")

            with self.assertRaises(E3FPEfficiencyWarning):
                db.savetxt(txt_file)

        os.unlink(txt_file)
Example #2
0
    def test_create_from_array(self):
        from e3fp.fingerprint.fprint import Fingerprint, CountFingerprint
        from e3fp.fingerprint.db import FingerprintDatabase

        array = (np.random.uniform(0, 1, size=(10, 1024)) > 0.9).astype(
            np.uint16
        )
        fprints = [Fingerprint.from_vector(array[i, :]) for i in range(10)]
        fp_names = []
        for i, fp in enumerate(fprints):
            name = str(i)
            fp.name = name
            fp.level = 5
            fp_names.append(name)
        db1 = FingerprintDatabase(
            fp_type=CountFingerprint, level=5, name="Test"
        )
        db1.add_fingerprints(fprints)
        db2 = FingerprintDatabase.from_array(
            array, fp_names, level=5, name="Test"
        )
        self.assertEqual(db1.fp_type, db2.fp_type)
        np.testing.assert_array_equal(
            db1.array.todense().getA(), db2.array.todense().getA()
        )
Example #3
0
    def test_load_efficiency_warning(self):
        import warnings
        from e3fp.util import E3FPEfficiencyWarning
        from e3fp.fingerprint.db import FingerprintDatabase
        import scipy

        array = (np.random.uniform(0, 1, size=(10, 1024)) > 0.9).astype(
            np.uint16
        )
        fp_names = []
        for i in range(array.shape[0]):
            fp_names.append(str(i))
        db = FingerprintDatabase.from_array(
            array, fp_names=fp_names, level=5, props={"index": range(10)}
        )
        desc, db_file = tempfile.mkstemp(suffix=".fpz")
        os.close(desc)
        db.savez(db_file)

        with warnings.catch_warnings(record=True):
            warnings.simplefilter("error")

            scipy.__version__ = "0.19"
            with self.assertRaises(E3FPEfficiencyWarning):
                db.load(db_file)

            scipy.__version__ = "1.0"
            db.load(db_file)

        os.unlink(db_file)
Example #4
0
    def test_save_txt(self):
        """Ensure bitstrings saved to txt correctly."""
        from e3fp.fingerprint.db import FingerprintDatabase
        from python_utilities.io_tools import smart_open

        array = np.array(
            [[1, 0, 0, 1, 1], [0, 0, 0, 1, 0], [0, 1, 1, 1, 1]], dtype=np.bool_
        )
        db = FingerprintDatabase.from_array(array, ["1", "2", "3"])

        desc, txt_file = tempfile.mkstemp(suffix=".txt.gz")
        os.close(desc)
        db.savetxt(txt_file)
        exp_bitstring = b"10011 1\n00010 2\n01111 3\n"
        with smart_open(txt_file, "r") as f:
            bitstring = f.read()
        self.assertEqual(bitstring, exp_bitstring)
        os.unlink(txt_file)

        desc, txt_file = tempfile.mkstemp(suffix=".txt.gz")
        os.close(desc)
        db.savetxt(txt_file, with_names=False)
        exp_bitstring = b"10011\n00010\n01111\n"
        with smart_open(txt_file, "r") as f:
            bitstring = f.read()
        self.assertEqual(bitstring, exp_bitstring)
        os.unlink(txt_file)
Example #5
0
 def test_get_db_subset(self):
     from e3fp.fingerprint.db import FingerprintDatabase
     array = (np.random.uniform(0, 1, size=(10, 1024)) > .9).astype(
         np.uint16)
     fp_names = []
     for i in range(array.shape[0]):
         fp_names.append(str(i))
     db = FingerprintDatabase.from_array(array, fp_names=fp_names, level=5)
     sub_db = db.get_subset(fp_names[:-2])
     self.assertEqual(db.level, sub_db.level)
     self.assertEqual(db.fp_type, sub_db.fp_type)
     self.assertEqual((db.array[:-2, :] - sub_db.array).nnz, 0)
Example #6
0
    def test_fingerprint_has_props(self):
        from e3fp.fingerprint.db import FingerprintDatabase

        array = (np.random.uniform(0, 1, size=(10, 1024)) > 0.9).astype(
            np.uint16
        )
        fp_names = [str(i) for i in range(10)]
        indices = [float(i) for i in range(10)]
        db = FingerprintDatabase.from_array(
            array, fp_names, level=5, name="Test", props={"index": indices}
        )
        for i, x in enumerate(fp_names):
            self.assertEqual(db[x][0].get_prop("index"), indices[i])
Example #7
0
 def test_lookup(self):
     from e3fp.fingerprint.fprint import Fingerprint
     from e3fp.fingerprint.db import FingerprintDatabase
     db = FingerprintDatabase(name="TestDB")
     array = (np.random.uniform(0, 1, size=(2, 1024)) > .9).astype(np.bool_)
     fp_names = []
     for i in range(array.shape[0]):
         fp_names.append("fp" + str(i))
     db = FingerprintDatabase.from_array(array, fp_names, name="Test")
     for i in range(array.shape[0]):
         self.assertEqual(Fingerprint.from_vector(array[i, :]), db[i])
         self.assertEqual(Fingerprint.from_vector(array[i, :]),
                          db[db.fp_names[i]][0])
Example #8
0
 def test_concat_dbs(self):
     from e3fp.fingerprint.db import concat, FingerprintDatabase
     array = (np.random.uniform(0, 1, size=(10, 1024)) > .9).astype(
         np.uint16)
     fp_names = [str(i) for i in range(10)]
     indices = [float(i) for i in range(10)]
     dbs = []
     for i in range(10)[::2]:
         db = FingerprintDatabase.from_array(
             array[i:i + 2, :],
             fp_names[i:i + 2],
             level=5,
             name="Test",
             props={"index": indices[i:i + 2]})
         dbs.append(db)
     join_db = concat(dbs)
     np.testing.assert_array_equal(join_db.array.todense().getA(), array)
Example #9
0
 def test_roundtrip_zlib(self):
     """Ensure DB is the same after saving with savez and loading."""
     from e3fp.fingerprint.db import FingerprintDatabase
     array = (np.random.uniform(0, 1, size=(10, 1024)) > .9).astype(
         np.uint16)
     fp_names = []
     for i in range(array.shape[0]):
         fp_names.append(str(i))
     db = FingerprintDatabase.from_array(array,
                                         fp_names=fp_names,
                                         level=5,
                                         props={"index": range(10)})
     desc, db_file = tempfile.mkstemp(suffix=".fpz")
     os.close(desc)
     db.savez(db_file)
     db2 = db.load(db_file)
     os.unlink(db_file)
     self.assertEqual(db, db2)
     self.assertListEqual(db2.get_prop("index").tolist(), list(range(10)))