コード例 #1
0
ファイル: app.py プロジェクト: saman-moeinsadat/examples
def main(task, batch_size, top_k):
    os.environ['WORKDIR'] = get_random_ws(os.environ['TMP_DATA_DIR'])
    if task == 'index':
        data_path = os.path.join(os.environ['TMP_DATA_DIR'],
                                 'siftsmall_base.fvecs')
        if os.path.exists(data_path):
            print(
                f'\n +---------------------------------------------------------------------------------+ \
                    \n |                                   ������                                        | \
                    \n | The directory {data_path} already exists. Please remove it before indexing again. | \
                    \n |                                   ������                                        | \
                    \n +---------------------------------------------------------------------------------+'
            )

        flow = Flow().load_config('flow-index.yml')
        with flow.build() as fl:
            fl.index_ndarray(read_data(data_path), batch_size=batch_size)
    elif task == 'query':
        data_path = os.path.join(os.environ['TMP_DATA_DIR'],
                                 'siftsmall_query.fvecs')
        flow = Flow().load_config('flow-query.yml')
        with flow.build() as fl:
            ppr = lambda x: save_topk(
                x, os.path.join(os.environ['TMP_DATA_DIR'], 'query_results.txt'
                                ), top_k)
            fl.search_ndarray(read_data(data_path), output_fn=ppr, top_k=top_k)
    else:
        raise NotImplementedError(
            f'unknown task: {task}. A valid task is either `index` or `query`.'
        )
コード例 #2
0
def main(task, num_docs, top_k, path):
    os.environ['TMP_WORKSPACE'] = get_random_ws(os.environ['TMP_DATA_DIR'])
    data_path = os.path.join(os.environ['TMP_DATA_DIR'], 'jpg')
    if task == 'index':
        flow = Flow().load_config('flow-index.yml')
        with flow.build() as fl:
            fl.index(raw_bytes=read_data(data_path, num_docs), batch_size=2)
    elif task == 'query':
        if not path:
            cmd_prompt = '\033[{}mpython {} -t query -p <JPG file or directory>\033[0m'.format(32, sys.argv[0])
            prompt = input(f'You can specify a JPG file or directory you own to query: {cmd_prompt}\nDo you want? Please input y or n: ')
            if prompt and prompt == 'y':
               sys.exit(0)

        read_data_fn = read_custom_data if path else read_data
        if path:
            data_path = path

        flow = Flow().load_config('flow-query.yml')
        with flow.build() as fl:
            ppr = lambda x: save_topk(x, os.path.join(os.environ['TMP_DATA_DIR'], 'query_results.png'))
            fl.search(read_data_fn(data_path, 5), callback=ppr, top_k=top_k)
    else:
        raise NotImplementedError(
            f'unknown task: {task}. A valid task is either `index` or `query`.')
コード例 #3
0
def main(task, num_docs, top_k):
    os.environ['TMP_WORKSPACE'] = get_random_ws(os.environ['TMP_DATA_DIR'])
    data_path = os.path.join(os.environ['TMP_DATA_DIR'], 'jpg')
    if task == 'index':
        flow = Flow().load_config('flow-index.yml')
        with flow.build() as fl:
            fl.index(raw_bytes=read_data(data_path, num_docs), batch_size=2)
    elif task == 'query':
        flow = Flow().load_config('flow-query.yml')
        with flow.build() as fl:
            ppr = lambda x: save_topk(x, os.path.join(os.environ['TMP_DATA_DIR'], 'query_results.png'))
            fl.search(read_data(data_path, 5), callback=ppr, top_k=top_k)
    else:
        raise NotImplementedError(
            f'unknown task: {task}. A valid task is either `index` or `query`.')
コード例 #4
0
def main(task, num_docs, top_k):
    os.environ['TMP_WORKSPACE'] = get_random_ws(os.environ['TMP_DATA_DIR'])
    data_path = os.path.join(os.environ['TMP_DATA_DIR'], 'character-lines.csv')
    if task == 'index':
        flow = Flow().load_config('flow-index.yml')
        with flow.build() as fl:
            fl.index(buffer=read_data(data_path, num_docs), batch_size=8)
        print('done')
    elif task == 'query':
        flow = Flow().load_config('flow-query.yml')
        with flow.build() as fl:
            while True:
                text = input('please type a sentence: ')
                if not text:
                    break
                ppr = lambda x: print_topk(x, text)
                fl.search(read_query_data(text), callback=ppr, topk=top_k)
    else:
        raise NotImplementedError(
            f'unknown task: {task}. A valid task is either `index` or `query`.'
        )
コード例 #5
0
ファイル: app.py プロジェクト: rutulgandhi05/examples
def main(task, batch_size, top_k):
    os.environ['TMP_WORKSPACE'] = get_random_ws(os.environ['TMP_DATA_DIR'])
    if task == 'index':
        data_path = os.path.join(os.environ['TMP_DATA_DIR'],
                                 'siftsmall_base.fvecs')
        flow = Flow().load_config('flow-index.yml')
        with flow.build() as fl:
            fl.index_ndarray(read_data(data_path), batch_size=batch_size)
    elif task == 'query':
        data_path = os.path.join(os.environ['TMP_DATA_DIR'],
                                 'siftsmall_query.fvecs')
        flow = Flow().load_config('flow-query.yml')
        with flow.build() as fl:
            ppr = lambda x: save_topk(
                x, os.path.join(os.environ['TMP_DATA_DIR'], 'query_results.txt'
                                ), top_k)
            fl.search_ndarray(read_data(data_path), output_fn=ppr, top_k=top_k)
    else:
        raise NotImplementedError(
            f'unknown task: {task}. A valid task is either `index` or `query`.'
        )
コード例 #6
0
def main(task, num_docs, top_k):
    workspace_path = '/tmp/jina/urbandict'
    os.environ['TMP_WORKSPACE'] = get_random_ws(workspace_path)
    print(f'{os.environ["TMP_WORKSPACE"]}')
    data_fn = os.path.join('/tmp/jina/urbandict', "urbandict-word-defs.json")
    if task == 'index':
        flow = Flow().load_config('flow-index.yml')
        with flow.build() as fl:
            fl.index(buffer=read_data(data_fn, num_docs), batch_size=16)
    elif task == 'query':
        flow = Flow().load_config('flow-query.yml')
        with flow.build() as fl:
            while True:
                text = input('word definition: ')
                if not text:
                    break
                ppr = lambda x: print_topk(x, text)
                fl.search(read_query_data(text), callback=ppr, topk=top_k)
    else:
        raise NotImplementedError(
            f'unknown task: {task}. A valid task is either `index` or `query`.')
コード例 #7
0
ファイル: app.py プロジェクト: jina-ai/stress-test
def f2():
    f = Flow(logserver=True, logserver_config='test-server-config.yml').add(
        yaml_path='gif2chunk2.yml', replicas=replicas)

    def bytes_gen():
        idx = 0
        for g in glob.glob(GIF_BLOB)[:num_docs]:
            with open(g, 'rb') as fp:
                # print(f'im asking to read {idx}')
                yield fp.read()
                idx += 1

    # for idx, request in enumerate(bytes_gen()):
    #     print(idx)

    with f.build() as fl:
        fl.index(bytes_gen(), batch_size=8)
コード例 #8
0
ファイル: app.py プロジェクト: jina-ai/stress-test
def f1():
    f = Flow().add(yaml_path='gif2chunk.yml', replicas=replicas)
    bytes_gen = (g.encode() for g in glob.glob(GIF_BLOB)[:num_docs])

    with f.build() as fl:
        fl.index(bytes_gen, batch_size=128)