def update_index(cls, dr): source_filters = dr.source_filters.copy() # Only select entries with completed events, otherwise indexes might not be synced or complete. source_filters['event__completed'] = True if dr.indexer_shasum: source_filters['indexer_shasum'] = dr.indexer_shasum if dr.approximator_shasum: source_filters['approximator_shasum'] = dr.approximator_shasum else: source_filters[ 'approximator_shasum'] = None # Required otherwise approximate index entries are selected index_entries = IndexEntries.objects.filter(**source_filters) visual_index = cls._visual_retriever[dr.pk] for index_entry in index_entries: if index_entry.pk not in visual_index.loaded_entries and index_entry.count > 0: if visual_index.algorithm == "LOPQ": vectors, entries = index_entry.load_index() logging.info("loading approximate index {}".format( index_entry.pk)) start_index = len(visual_index.entries) visual_index.load_index(entries=entries) visual_index.loaded_entries[ index_entry.pk] = indexer.IndexRange( start=start_index, end=len(visual_index.entries) - 1) elif visual_index.algorithm == 'FAISS': index_file_path, entries = index_entry.load_index() logging.info("loading FAISS index {}".format( index_entry.pk)) start_index = visual_index.findex visual_index.load_index(index_file_path, entries) visual_index.loaded_entries[ index_entry.pk] = indexer.IndexRange( start=start_index, end=visual_index.findex - 1) else: vectors, entries = index_entry.load_index() logging.info("Starting {} in {} with shape {}".format( index_entry.video_id, visual_index.name, vectors.shape)) try: start_index = visual_index.findex visual_index.load_index(vectors, entries) visual_index.loaded_entries[ index_entry.pk] = indexer.IndexRange( start=start_index, end=visual_index.findex - 1) except: logging.info( "ERROR Failed to load {} vectors shape {} entries {}" .format(index_entry.video_id, vectors.shape, len(entries))) else: logging.info("finished {} in {}".format( index_entry.pk, visual_index.name))
def update_index(cls, dr): source_filters = dr.source_filters.copy() if dr.indexer_shasum: source_filters['indexer_shasum'] = dr.indexer_shasum if dr.approximator_shasum: source_filters['approximator_shasum'] = dr.approximator_shasum else: source_filters[ 'approximator_shasum'] = None # Required otherwise approximate index entries are selected index_entries = IndexEntries.objects.filter(**source_filters) visual_index = cls._visual_retriever[dr.pk] for index_entry in index_entries: if index_entry.pk not in visual_index.loaded_entries and index_entry.count > 0: vectors, entries = index_entry.load_index() if visual_index.algorithm == "LOPQ": logging.info("loading approximate index {}".format( index_entry.pk)) start_index = len(visual_index.entries) visual_index.load_index(entries=entries) visual_index.loaded_entries[ index_entry.pk] = indexer.IndexRange( start=start_index, end=len(visual_index.entries) - 1) else: logging.info("Starting {} in {} with shape {}".format( index_entry.video_id, visual_index.name, vectors.shape)) try: start_index = visual_index.findex visual_index.load_index(vectors, entries) visual_index.loaded_entries[ index_entry.pk] = indexer.IndexRange( start=start_index, end=visual_index.findex - 1) except: logging.info( "ERROR Failed to load {} vectors shape {} entries {}" .format(index_entry.video_id, vectors.shape, len(entries))) else: logging.info( "finished {} in {}, current shape {}, range". format( index_entry.video_id, visual_index.name, visual_index.index.shape, visual_index.loaded_entries[ index_entry.pk].start, visual_index.loaded_entries[ index_entry.pk].end, ))
def update_index(self, dr): index_entries = IndexEntries.objects.filter(**dr.source_filters) visual_index = RetrieverTask._visual_retriever[dr.pk] for index_entry in index_entries: if index_entry.pk not in visual_index.loaded_entries and index_entry.count > 0: fname = "{}/{}/indexes/{}".format( settings.MEDIA_ROOT, index_entry.video_id, index_entry.features_file_name) vectors = indexer.np.load(fname) vector_entries = json.load( file("{}/{}/indexes/{}".format( settings.MEDIA_ROOT, index_entry.video_id, index_entry.entries_file_name))) logging.info("Starting {} in {} with shape {}".format( index_entry.video_id, visual_index.name, vectors.shape)) start_index = visual_index.findex try: visual_index.load_index(vectors, vector_entries) except: logging.info( "ERROR Failed to load {} vectors shape {} entries {}". format(index_entry.video_id, vectors.shape, len(vector_entries))) visual_index.loaded_entries[ index_entry.pk] = indexer.IndexRange( start=start_index, end=visual_index.findex - 1) logging.info( "finished {} in {}, current shape {}, range".format( index_entry.video_id, visual_index.name, visual_index.index.shape, visual_index.loaded_entries[index_entry.pk].start, visual_index.loaded_entries[index_entry.pk].end, ))
def refresh_index(self, index_name): index_entries = IndexEntries.objects.all() visual_index = self.visual_indexer[index_name] for index_entry in index_entries: if index_entry.pk not in visual_index.loaded_entries and index_entry.algorithm == index_name: fname = "{}/{}/indexes/{}".format(settings.MEDIA_ROOT, index_entry.video_id, index_entry.features_file_name) vectors = indexer.np.load(fname) vector_entries = json.load(file("{}/{}/indexes/{}".format(settings.MEDIA_ROOT, index_entry.video_id, index_entry.entries_file_name))) logging.info("Starting {} in {}".format(index_entry.video_id, visual_index.name)) start_index = visual_index.findex try: visual_index.load_index(vectors, vector_entries) except: logging.info("ERROR Failed to load {} ".format(index_entry.video_id)) visual_index.loaded_entries[index_entry.pk] = indexer.IndexRange(start=start_index, end=visual_index.findex - 1) logging.info("finished {} in {}, current shape {}, range".format(index_entry.video_id, visual_index.name, visual_index.index.shape, visual_index.loaded_entries[ index_entry.pk].start, visual_index.loaded_entries[ index_entry.pk].end, ))
def refresh_index(self, index_name): """ # TODO: speed this up by skipping refreshes when count is unchanged. :param index_name: :return: """ index_entries = IndexEntries.objects.all() visual_index = self.visual_indexer[index_name] for index_entry in index_entries: if index_entry.pk not in visual_index.loaded_entries and index_entry.algorithm == index_name and index_entry.count > 0: fname = "{}/{}/indexes/{}".format( settings.MEDIA_ROOT, index_entry.video_id, index_entry.features_file_name) vectors = indexer.np.load(fname) vector_entries = json.load( file("{}/{}/indexes/{}".format( settings.MEDIA_ROOT, index_entry.video_id, index_entry.entries_file_name))) logging.info("Starting {} in {} with shape {}".format( index_entry.video_id, visual_index.name, vectors.shape)) start_index = visual_index.findex try: visual_index.load_index(vectors, vector_entries) except: logging.info( "ERROR Failed to load {} vectors shape {} entries {}". format(index_entry.video_id, vectors.shape, len(vector_entries))) visual_index.loaded_entries[ index_entry.pk] = indexer.IndexRange( start=start_index, end=visual_index.findex - 1) logging.info( "finished {} in {}, current shape {}, range".format( index_entry.video_id, visual_index.name, visual_index.index.shape, visual_index.loaded_entries[index_entry.pk].start, visual_index.loaded_entries[index_entry.pk].end, ))