def update(index_id): latest_id = rep.database.latest_post_id() log.info("getting new posts. latest post {}", latest_id) counter = 0 posts = [] ids = [] features_FEATURE_VECTOR = [] for post in api.iterate_posts(latest_id): counter += 1 posts.append(post) ids.append(post.id) rep.database.get_session().add(post) if post.type == PostType.IMAGE: image = rep.read_image(post) if image is not None: result = analyze.analyze_image(image) post.status = PostStatus.INDEXED for type, data in result.items(): rep.database.session.merge( Feature.from_analyzeresult(post, type, data)) if type == FeatureType.FEATURE_VECTOR: features_FEATURE_VECTOR.append( msgpack.packb({ 'id': post.id, 'data': data })) rep.database.get_session().commit() if len(features_FEATURE_VECTOR) > 0: rep.redis.lpush('rep0st-latest-feature-vectors-index-' + str(index_id), *features_FEATURE_VECTOR) log.info("finished getting new posts. added {} posts to database", counter)
def analyze_post(self, post): image = self.read_image(post) if image is None: return None else: result = analyze_image(image) return post, result
def search(self, image, k=-1): start = time() if k == -1: k = config.index_config['default_k'] nearest = SimplePriorityQueue(k) fv = analyze_image(image)[FeatureType.FEATURE_VECTOR] arr = np.asarray(bytearray(fv)).astype(np.float32) annoy_results = self.annoy_index.get_nns_by_vector( arr, k, search_k=config.index_config['search_k'], include_distances=True) for i in range(0, len(annoy_results[0])): a_p = annoy_results[1][i] a_v = annoy_results[0][i] nearest.add(a_p, a_v) for element in self.rep0st.redis.lrange( 'rep0st-latest-feature-vectors-index-' + str(self.current_index), 0, -1): element = msgpack.unpackb(element) data = np.asarray(bytearray(element['data'])).astype(np.float32) distance = dist(arr, data) nearest.add(distance, element['id']) list = [] for item in nearest: post = self.rep0st.database.get_post_by_id(item.value) list.append(SearchResult(post, item.priority)) stop = time() log.debug("query with search_k={} and {} trees took {}ms", config.index_config['search_k'], config.index_config['tree_count'], str( (stop - start) * 1000)) return list