def test_get_descriptors(self): descrs = [ random_descriptor(), # [0] random_descriptor(), # [1] random_descriptor(), # [2] random_descriptor(), # [3] random_descriptor(), # [4] ] index = MemoryDescriptorIndex() index.add_many_descriptors(descrs) # single descriptor reference r = index.get_descriptor(descrs[1].uuid()) ntools.assert_equal(r, descrs[1]) # multiple descriptor reference r = list( index.get_many_descriptors([descrs[0].uuid(), descrs[3].uuid()])) ntools.assert_equal(len(r), 2) ntools.assert_equal(set(r), {descrs[0], descrs[3]})
def test_get_descriptors(self): descrs = [ random_descriptor(), # [0] random_descriptor(), # [1] random_descriptor(), # [2] random_descriptor(), # [3] random_descriptor(), # [4] ] index = MemoryDescriptorIndex() index.add_many_descriptors(descrs) # single descriptor reference r = index.get_descriptor(descrs[1].uuid()) ntools.assert_equal(r, descrs[1]) # multiple descriptor reference r = list(index.get_many_descriptors([descrs[0].uuid(), descrs[3].uuid()])) ntools.assert_equal(len(r), 2) ntools.assert_equal(set(r), {descrs[0], descrs[3]})
log.info("Classifying phone imagery descriptors") i = 0 descriptor_index_shas = set(descriptor_index.iterkeys()) for p in phone2shas: log.info('%s (%d / %d)', p, i + 1, len(phone2shas)) # Not all source "images" have descriptors since some URLs returned # non-image files. Intersect phone sha's with what was actually # computed. Warn if this reduces descriptors for classification to zero. indexed_shas = set(phone2shas[p]) & descriptor_index_shas if not indexed_shas: raise RuntimeError( "Phone number '%s' has no valid images associated " "with it.\nBefore:\n%s\n\nAfter:\n%s" % (p, phone2shas[p], indexed_shas)) descriptor_elems = descriptor_index.get_many_descriptors(*indexed_shas) e2c = classifier.classify_async(descriptor_elems, c_factory, use_multiprocessing=True, ri=1.) pos_scores = [c['positive'] for c in e2c.values()] # Max of pool phone2score[p] = max(pos_scores) i += 1 log.info("Saving score map") csv.writer(open(PHONE2SCORE_OUTPUT_FILEPATH, 'w')) \ .writerows(sorted(phone2score.iteritems()))
log.info("Classifying phone imagery descriptors") i = 0 descriptor_index_shas = set(descriptor_index.iterkeys()) for p in phone2shas: log.info('%s (%d / %d)', p, i + 1, len(phone2shas)) # Not all source "images" have descriptors since some URLs returned # non-image files. Intersect phone sha's with what was actually # computed. Warn if this reduces descriptors for classification to zero. indexed_shas = set(phone2shas[p]) & descriptor_index_shas if not indexed_shas: raise RuntimeError( "Phone number '%s' has no valid images associated " "with it.\nBefore:\n%s\n\nAfter:\n%s" % (p, phone2shas[p], indexed_shas)) descriptor_elems = descriptor_index.get_many_descriptors(*indexed_shas) e2c = classifier.classify_async(descriptor_elems, c_factory, use_multiprocessing=True, ri=1.) pos_scores = [c['positive'] for c in e2c.values()] # Max of pool phone2score[p] = max(pos_scores) i += 1 log.info("Saving score map") csv.writer(open(PHONE2SCORE_OUTPUT_FILEPATH, 'w')) \ .writerows(sorted(phone2score.iteritems())) log.info("Done")