Example #1
0
 def test_elba(self):
     f = torch.nn.Linear(784, 128)
     elba = EfficientLearnableBinaryAccess(f, f,
                                           HashingMultiBernoulliSDS(1, 1))
     data = np.random.RandomState(0xcafe).normal(0, 1, (32, 784))
     data = torch.tensor(data, dtype=torch.float32)
     elba.batch_insert(data, range(32))
     #using the same function for fq and fd
     index = elba.batch_search(data)
     self.assertEqual(index, [{i} for i in range(32)])
     index = [next(g) for g in elba.batch_itersearch(data)]
     self.assertEqual(index, [{i} for i in range(32)])
Example #2
0
    def test_itersearch(self):
        expected0 = [
            {0},  #with [0, 0, 1] -> {0}, {}, {},
            #with [0, 1, 1] -> {}, {}, {},
        ]

        expected1 = [
            {1},
            {2},  #with [0, 1, 0] -> {}, {1}, {2},
            {0},  #with [0, 0, 0] -> {2}, {}, {0}, (but 2 duplicate)
        ]

        mbht = HashingMultiBernoulliSDS(3, 2)
        mbht.batch_insert(self.docs_log_probs, self.indexes)
        result0 = list(mbht.itersearch(self.qurs_log_probs[0], nlookups=2))
        result1 = list(mbht.itersearch(self.qurs_log_probs[1], nlookups=2))
        self.assertEqual(result0, expected0)
        self.assertEqual(result1, expected1)

        #given log_probs1
        log_probs1 = np.log(1 - np.exp(self.qurs_log_probs))
        log_probs = np.stack([self.qurs_log_probs, log_probs1], axis=1)
        result0 = list(mbht.itersearch(self.qurs_log_probs[0], nlookups=2))
        result1 = list(mbht.itersearch(self.qurs_log_probs[1], nlookups=2))
        self.assertEqual(result0, expected0)
        self.assertEqual(result1, expected1)
Example #3
0
    def test_state(self):
        mbht = HashingMultiBernoulliSDS(3, 2)
        mbht.batch_insert(self.docs_log_probs, self.indexes)
        mbht_state = mbht.get_state()

        mbht_copy = HashingMultiBernoulliSDS(3, 2).set_state(mbht_state)
        self.assertEqual(mbht.tables, mbht_copy.tables)
 def test_raise(self):
     model = Fbeta(
         torch.nn.Linear(32, 16),
         torch.nn.Linear(32, 16),
         HashingMultiBernoulliSDS(1, 1),
         -np.log(32),  # log(1/bs)
         match_dist=0,
         nindex=3,  #16%3 != 0
     )
     x = torch.eye(32, dtype=torch.float)
     r = torch.eye(32, dtype=torch.bool)
     with self.assertRaises(ValueError):
         model.step(x, x, r)
Example #5
0
def evaluation(results,
               prefix,
               nbits,
               elba,
               queries,
               documents,
               relevances,
               batch_size=100,
               sswrs_timeout=30):
    N = len(documents)
    with torch.no_grad():
        dlogits = torch.cat(batch_call(elba.fd, documents, batch_size), dim=0)
        qlogits = torch.cat(batch_call(elba.fq, queries, batch_size), dim=0)
        dls_pairs = elba._log_sigmoid_pairs(dlogits)
        qls_pairs = elba._log_sigmoid_pairs(qlogits)
    dbits = 0 < dlogits
    qbits = 0 < qlogits
    pos_counts, neg_counts = conditional_hamming_counts(
        dbits, qbits, relevances)
    precisions, recalls = conditional_counts_to_pr_curve(
        pos_counts, neg_counts)

    results[prefix + '_pos_counts'].append(pos_counts.cpu().numpy())
    results[prefix + '_neg_counts'].append(neg_counts.cpu().numpy())
    results[prefix + '_precisions'].append(precisions.cpu().numpy())
    results[prefix + '_recalls'].append(recalls.cpu().numpy())

    hr = HammingRadiusSDS(nbits=nbits, radius=2).batch_insert(dbits, range(N))
    hr_gens = hr.batch_itersearch(qbits, yield_empty=True)
    hr_sswr = eval_sswr(results, prefix + '_2081hr', N, relevances, hr_gens,
                        2081, sswrs_timeout)

    mb = HashingMultiBernoulliSDS(1, 1).batch_insert(dls_pairs, range(N))
    mb_gens = mb.batch_itersearch(qls_pairs, yield_empty=True)
    mb_sswr = eval_sswr(results, prefix + '_5001mb', N, relevances, mb_gens,
                        5001, sswrs_timeout)
    return hr_sswr, mb_sswr
 def test_step(self):
     torch.manual_seed(0)
     model = HashNet(
         torch.nn.Linear(32, 64),
         torch.nn.Linear(32, 64),
         HashingMultiBernoulliSDS(1, 1),
         1 / 32,
     )
     x = torch.eye(32, dtype=torch.float)
     r = torch.eye(32, dtype=torch.bool)
     l0 = model.step(x, x, r)
     for i in range(500):
         model.step(x, x, r)
     l1 = model.step(x, x, r)
     self.assertTrue(l1 < l0)
 def test_step(self):
     torch.manual_seed(0)
     model = Fbeta(
         torch.nn.Linear(32, 16),
         torch.nn.Linear(32, 16),
         HashingMultiBernoulliSDS(1, 1),
         -np.log(32),  # log(1/bs)
         match_dist=0,
     )
     x = torch.eye(32, dtype=torch.float)
     r = torch.eye(32, dtype=torch.bool)
     for i in range(500):
         model.step(x, x, r)
     model.batch_insert(x, range(32))
     out = model.batch_search(x)
     self.assertEqual(out, [{i} for i in range(32)])
Example #8
0
    def test_batch_search(self):
        expected = [
            {0},  #for query 0
            {0, 1, 2},  #for query 1
        ]
        mbht = HashingMultiBernoulliSDS(3, 2)
        mbht.batch_insert(self.docs_log_probs, self.indexes)
        result = mbht.batch_search(self.qurs_log_probs, nlookups=2)
        self.assertEqual(result, expected)

        #given log_probs1
        log_probs1 = np.log(1 - np.exp(self.qurs_log_probs))
        log_probs = np.stack([self.qurs_log_probs, log_probs1], axis=1)
        result = list(mbht.batch_search(log_probs, nlookups=2))
        self.assertEqual(result, expected)
Example #9
0
 def test_get_and_set_state(self):
     f = torch.nn.Linear(784, 128)
     elba = EfficientLearnableBinaryAccess(f, f,
                                           HashingMultiBernoulliSDS(1, 1))
     elba.set_state(elba.get_state())
Example #10
0
 def test_get_generator_error(self):
     ndim3 = np.zeros((1, 2, 3))
     mbht = HashingMultiBernoulliSDS(3, 2)
     with self.assertRaises(ValueError):
         mbht.insert(ndim3, 1)
Example #11
0
 def test_bucket_stats(self):
     mbht = HashingMultiBernoulliSDS(3, 2)
     mbht.batch_insert(self.docs_log_probs, self.indexes)
     repr(mbht)  #make sure it runs
     self.assertEqual(mbht.get_buckets_avg_size(), [1, 1, 1])
     self.assertEqual(mbht.get_buckets_max_size(), [1, 1, 1])
Example #12
0
 def test_batch_insert(self):
     mbht = HashingMultiBernoulliSDS(3, 2)
     mbht.batch_insert(self.docs_log_probs, self.indexes)
Example #13
0
 def test_numpy_cast(self):
     mbht = HashingMultiBernoulliSDS(3, 2)
     mbht.batch_insert(torch.tensor(self.docs_log_probs), self.indexes)