log.info("Loading plugins") descriptor_index = MemoryDescriptorIndex( file_cache=DESCRIPTOR_INDEX_FILE_CACHE) #: :type: smqtk.algorithms.Classifier classifier = from_plugin_config(classifier_config['plugins']['classifier'], get_classifier_impls()) c_factory = ClassificationElementFactory(MemoryClassificationElement, {}) #: :type: dict[str, list[str]] phone2shas = json.load(open(PHONE_SHA1_JSON)) #: :type: dict[str, float] phone2score = {} log.info("Classifying phone imagery descriptors") i = 0 descriptor_index_shas = set(descriptor_index.iterkeys()) for p in phone2shas: log.info('%s (%d / %d)', p, i + 1, len(phone2shas)) # Not all source "images" have descriptors since some URLs returned # non-image files. Intersect phone sha's with what was actually # computed. Warn if this reduces descriptors for classification to zero. indexed_shas = set(phone2shas[p]) & descriptor_index_shas if not indexed_shas: raise RuntimeError( "Phone number '%s' has no valid images associated " "with it.\nBefore:\n%s\n\nAfter:\n%s" % (p, phone2shas[p], indexed_shas)) descriptor_elems = descriptor_index.get_many_descriptors(*indexed_shas) e2c = classifier.classify_async(descriptor_elems, c_factory,
def test_iterkeys(self): i = MemoryDescriptorIndex() descrs = [random_descriptor() for _ in range(100)] i.add_many_descriptors(descrs) ntools.assert_equal(set(i.iterkeys()), set(d.uuid() for d in descrs))
def test_iterkeys(self): i = MemoryDescriptorIndex() descrs = [random_descriptor() for _ in xrange(100)] i.add_many_descriptors(descrs) ntools.assert_equal(set(i.iterkeys()), set(d.uuid() for d in descrs))
log.info("Loading plugins") descriptor_index = MemoryDescriptorIndex(file_cache=DESCRIPTOR_INDEX_FILE_CACHE) #: :type: smqtk.algorithms.Classifier classifier = from_plugin_config(classifier_config['plugins']['classifier'], get_classifier_impls()) c_factory = ClassificationElementFactory(MemoryClassificationElement, {}) #: :type: dict[str, list[str]] phone2shas = json.load(open(PHONE_SHA1_JSON)) #: :type: dict[str, float] phone2score = {} log.info("Classifying phone imagery descriptors") i = 0 descriptor_index_shas = set(descriptor_index.iterkeys()) for p in phone2shas: log.info('%s (%d / %d)', p, i + 1, len(phone2shas)) # Not all source "images" have descriptors since some URLs returned # non-image files. Intersect phone sha's with what was actually # computed. Warn if this reduces descriptors for classification to zero. indexed_shas = set(phone2shas[p]) & descriptor_index_shas if not indexed_shas: raise RuntimeError( "Phone number '%s' has no valid images associated " "with it.\nBefore:\n%s\n\nAfter:\n%s" % (p, phone2shas[p], indexed_shas)) descriptor_elems = descriptor_index.get_many_descriptors(*indexed_shas) e2c = classifier.classify_async(descriptor_elems, c_factory, use_multiprocessing=True, ri=1.)