Example #1
0
def refresh_retriever():
    global W
    if W.queue_name == settings.GLOBAL_RETRIEVER:
        for dr in Retrievers._selector_to_dr.values():
            logging.info("Starting index refresh on queue {} for retriever {}".format(W.queue_name,dr.pk))
            start_ts = time.time()
            Retrievers.refresh_index(dr)
            delta = time.time() - start_ts
            redis_client.hset("retriever_state", "{},{},{}".format(W.pk, W.queue_name, dr.pk),json.dumps({"delta":delta,
                                                                                                'worker_id':W.pk,
                                                                                                'retriever_id':dr.pk,
                                                                                                'queue_name':
                                                                                                    W.queue_name,
                                                                                                'ts':time.time()}))
            logging.info("Finished index refresh on queue {} for retriever {}".format(W.queue_name, dr.pk))
    elif 'retriever_' in W.queue_name:
        pk = int(W.queue_name.split('_')[-1])
        logging.info("Starting index refresh on queue {} for retriever {}".format(W.queue_name, pk))
        start_ts = time.time()
        _, dr = Retrievers.get_retriever(args={'retriever_selector': {'pk': pk}})
        Retrievers.refresh_index(dr)
        delta = time.time() - start_ts
        redis_client.hset("retriever_state","{},{},{}".format(W.pk, W.queue_name, dr.pk),json.dumps({"delta":delta,
                                                                                          'worker_id':W.pk,
                                                                                          'retriever_id':dr.pk,
                                                                                          'queue_name':W.queue_name,
                                                                                          'ts':time.time()}))
        logging.info("Finished index refresh on queue {} for retriever {}".format(W.queue_name, pk))
    else:
        raise ValueError("{} is not valid for retriever".format(W.queue_name))
Example #2
0
def handle_perform_indexing(start):
    json_args = start.arguments
    target = json_args.get('target', 'frames')
    if 'index' in json_args:
        index_name = json_args['index']
        visual_index, di = indexing.Indexers.get_index_by_name(index_name)
    else:
        visual_index, di = indexing.Indexers.get_index_by_pk(
            json_args['indexer_pk'])
    sync = True
    if target == 'query':
        local_path = task_shared.download_and_get_query_path(start)
        vector = visual_index.apply(local_path)
        # TODO: figure out a better way to store numpy arrays.
        s = io.BytesIO()
        np.save(s, vector)
        redis_client.set(start.pk, s.getvalue())
        sync = False
    elif target == 'query_regions':
        queryset, target = task_shared.build_queryset(args=start.arguments)
        region_paths = task_shared.download_and_get_query_region_path(
            start, queryset)
        for i, dr in enumerate(queryset):
            local_path = region_paths[i]
            vector = visual_index.apply(local_path)
            s = io.BytesIO()
            np.save(s, vector)
            # can be replaced by Redis instead of using DB
            redis_client.hset(start.pk, dr.pk, s.getvalue())
            _ = models.QueryRegionIndexVector.objects.create(
                vector=s.getvalue(), event=start, query_region=dr)
        sync = False
    elif target == 'regions':
        # For regions simply download/ensure files exists.
        queryset, target = task_shared.build_queryset(args=start.arguments,
                                                      video_id=start.video_id)
        task_shared.ensure_files(queryset, target)
        indexing.Indexers.index_queryset(di, visual_index, start, target,
                                         queryset)
    elif target == 'frames':
        queryset, target = task_shared.build_queryset(args=start.arguments,
                                                      video_id=start.video_id)
        if visual_index.cloud_fs_support and settings.ENABLE_CLOUDFS and (
                not settings.KUBE_MODE):
            # TODO Re-enable this in Kube Mode when issues with GCS are resolved.
            # if NFS is disabled and index supports cloud file systems natively (e.g. like Tensorflow)
            indexing.Indexers.index_queryset(di,
                                             visual_index,
                                             start,
                                             target,
                                             queryset,
                                             cloud_paths=True)
        else:
            # Otherwise download and ensure that the files exist
            task_shared.ensure_files(queryset, target)
            indexing.Indexers.index_queryset(di, visual_index, start, target,
                                             queryset)
    return sync
def handle_perform_indexing(start):
    json_args = start.arguments
    target = json_args.get('target', 'frames')
    if 'index' in json_args:
        index_name = json_args['index']
        visual_index, di = indexing.Indexers.get_index_by_name(index_name)
    else:
        visual_index, di = indexing.Indexers.get_index_by_pk(
            json_args['indexer_pk'])
    sync = True
    if target == 'query':
        local_path = task_shared.download_and_get_query_path(start)
        vector = visual_index.apply(local_path)
        # TODO: figure out a better way to store numpy arrays.
        s = io.BytesIO()
        np.save(s, vector)
        redis_client.set("query_vector_{}".format(start.pk), s.getvalue())
        sync = False
    elif target == 'query_regions':
        queryset, target = task_shared.build_queryset(args=start.arguments)
        region_paths = task_shared.download_and_get_query_region_path(
            start, queryset)
        for i, dr in enumerate(queryset):
            local_path = region_paths[i]
            vector = visual_index.apply(local_path)
            s = io.BytesIO()
            np.save(s, vector)
            redis_client.hset("query_region_vectors_{}".format(start.pk),
                              dr.pk, s.getvalue())
        sync = False
    elif target == 'regions':
        # For regions simply download/ensure files exists.
        queryset, target = task_shared.build_queryset(args=start.arguments,
                                                      video_id=start.video_id)
        task_shared.ensure_files(queryset, target)
        indexing.Indexers.index_queryset(di, visual_index, start, target,
                                         queryset)
    elif target == 'frames':
        queryset, target = task_shared.build_queryset(args=start.arguments,
                                                      video_id=start.video_id)
        if visual_index.cloud_fs_support and settings.ENABLE_CLOUDFS:
            # if NFS is disabled and index supports cloud file systems natively (e.g. like Tensorflow)
            indexing.Indexers.index_queryset(di,
                                             visual_index,
                                             start,
                                             target,
                                             queryset,
                                             cloud_paths=True)
        else:
            # Otherwise download and ensure that the files exist
            task_shared.ensure_files(queryset, target)
            indexing.Indexers.index_queryset(di, visual_index, start, target,
                                             queryset)
    return sync