Ejemplo n.º 1
0
def update(index_id):
    latest_id = rep.database.latest_post_id()
    log.info("getting new posts. latest post {}", latest_id)
    counter = 0

    posts = []
    ids = []
    features_FEATURE_VECTOR = []
    for post in api.iterate_posts(latest_id):
        counter += 1
        posts.append(post)
        ids.append(post.id)
        rep.database.get_session().add(post)
        if post.type == PostType.IMAGE:
            image = rep.read_image(post)
            if image is not None:
                result = analyze.analyze_image(image)
                post.status = PostStatus.INDEXED

                for type, data in result.items():
                    rep.database.session.merge(
                        Feature.from_analyzeresult(post, type, data))
                    if type == FeatureType.FEATURE_VECTOR:
                        features_FEATURE_VECTOR.append(
                            msgpack.packb({
                                'id': post.id,
                                'data': data
                            }))

        rep.database.get_session().commit()
    if len(features_FEATURE_VECTOR) > 0:
        rep.redis.lpush('rep0st-latest-feature-vectors-index-' + str(index_id),
                        *features_FEATURE_VECTOR)

    log.info("finished getting new posts. added {} posts to database", counter)
Ejemplo n.º 2
0
 def analyze_post(self, post):
     image = self.read_image(post)
     if image is None:
         return None
     else:
         result = analyze_image(image)
         return post, result
Ejemplo n.º 3
0
    def search(self, image, k=-1):
        start = time()

        if k == -1:
            k = config.index_config['default_k']

        nearest = SimplePriorityQueue(k)

        fv = analyze_image(image)[FeatureType.FEATURE_VECTOR]
        arr = np.asarray(bytearray(fv)).astype(np.float32)

        annoy_results = self.annoy_index.get_nns_by_vector(
            arr,
            k,
            search_k=config.index_config['search_k'],
            include_distances=True)

        for i in range(0, len(annoy_results[0])):
            a_p = annoy_results[1][i]
            a_v = annoy_results[0][i]
            nearest.add(a_p, a_v)

        for element in self.rep0st.redis.lrange(
                'rep0st-latest-feature-vectors-index-' +
                str(self.current_index), 0, -1):
            element = msgpack.unpackb(element)
            data = np.asarray(bytearray(element['data'])).astype(np.float32)
            distance = dist(arr, data)
            nearest.add(distance, element['id'])

        list = []
        for item in nearest:
            post = self.rep0st.database.get_post_by_id(item.value)
            list.append(SearchResult(post, item.priority))

        stop = time()

        log.debug("query with search_k={} and {} trees took {}ms",
                  config.index_config['search_k'],
                  config.index_config['tree_count'], str(
                      (stop - start) * 1000))

        return list