예제 #1
0
def test_download_proxy():
    import urllib.request
    # first test no proxy
    args = set_hw_parser().parse_args([])

    opener = urllib.request.build_opener()
    if args.download_proxy:
        proxy = urllib.request.ProxyHandler({'http': args.download_proxy, 'https': args.download_proxy})
        opener.add_handler(proxy)
    urllib.request.install_opener(opener)
    # head check
    req = urllib.request.Request(args.index_data_url, method="HEAD")
    response = urllib.request.urlopen(req, timeout=5)
    assert response.status == 200

    # test with proxy
    args = set_hw_parser().parse_args(["--download-proxy", os.getenv("HTTP_PROXY")])

    opener = urllib.request.build_opener()
    if args.download_proxy:
        proxy = urllib.request.ProxyHandler({'http': args.download_proxy, 'https': args.download_proxy})
        opener.add_handler(proxy)
    urllib.request.install_opener(opener)
    # head check
    req = urllib.request.Request(args.index_data_url, method="HEAD")
    response = urllib.request.urlopen(req, timeout=5)
    assert response.status == 200
예제 #2
0
def test_helloworld_flow(tmpdir):
    args = set_hw_parser().parse_args([])

    os.environ['RESOURCE_DIR'] = resource_filename('jina', 'resources')
    os.environ['SHARDS'] = str(args.shards)
    os.environ['PARALLEL'] = str(args.parallel)
    os.environ['HW_WORKDIR'] = str(tmpdir)

    f = Flow.load_config(resource_filename('jina', '/'.join(('resources', 'helloworld.flow.index.yml'))))

    targets = {
        'index': {
            'url': args.index_data_url,
            'filename': os.path.join(tmpdir, 'index-original')
        },
        'query': {
            'url': args.query_data_url,
            'filename': os.path.join(tmpdir, 'query-original')
        }
    }

    # download the data
    Path(tmpdir).mkdir(parents=True, exist_ok=True)
    download_data(targets)

    # run it!
    with f:
        f.index(_input_ndarray(targets['index']['data']), request_size=args.index_request_size)
예제 #3
0
def test_helloworld_flow_dry_run(tmpdir):
    args = set_hw_parser().parse_args([])

    os.environ['RESOURCE_DIR'] = resource_filename('jina', 'resources')
    os.environ['SHARDS'] = str(args.shards)
    os.environ['PARALLEL'] = str(args.parallel)
    os.environ['HW_WORKDIR'] = str(tmpdir)

    # run it!
    with Flow.load_config(resource_filename('jina', '/'.join(('resources', 'helloworld.flow.index.yml')))):
        pass

    # run it!
    with Flow.load_config(resource_filename('jina', '/'.join(('resources', 'helloworld.flow.query.yml')))):
        pass
예제 #4
0
def test_helloworld_flow_dry_run(tmpdir):
    args = set_hw_parser().parse_args([])

    os.environ['PATH'] += os.pathsep + resource_filename(
        'jina', 'resources/fashion')
    os.environ['SHARDS'] = str(args.shards)
    os.environ['PARALLEL'] = str(args.parallel)
    os.environ['HW_WORKDIR'] = str(tmpdir)

    # run it!
    with Flow.load_config('helloworld.flow.index.yml'):
        pass

    # run it!
    with Flow.load_config('helloworld.flow.query.yml'):
        pass
예제 #5
0
def test_helloworld_py(tmpdir):
    from jina.helloworld import hello_world
    hello_world(set_hw_parser().parse_args(['--workdir', str(tmpdir)]))
    check_hello_world_results(os.path.join(str(tmpdir), 'hello-world.html'))
예제 #6
0
파일: app.py 프로젝트: luojiguicai/jina
        )

        # f.search(
        #     query_generator(
        #         num_docs=args.num_query, target=targets, with_groundtruth=True
        #     ),
        #     shuffle=True,
        #     on_done=print_result,
        #     request_size=args.request_size,
        #     parameters={'top_k': args.top_k},
        # )

        f.post(
            '/eval',
            query_generator(num_docs=args.num_query,
                            target=targets,
                            with_groundtruth=True),
            shuffle=True,
            on_done=print_result,
            request_size=args.request_size,
            parameters={'top_k': args.top_k},
        )

        # write result to html
        write_html(os.path.join(args.workdir, 'demo.html'))


if __name__ == '__main__':
    args = set_hw_parser().parse_args()
    hello_world(args)
예제 #7
0
def test_helloworld_py(tmpdir):
    from jina.helloworld import hello_world
    hello_world(set_hw_parser().parse_args(['--workdir', str(tmpdir)]))
예제 #8
0
def helloworld_args(tmpdir):
    return set_hw_parser().parse_args(['--workdir', str(tmpdir)])
예제 #9
0
def _benchmark_qps() -> Dict[str, float]:
    """Benchmark Jina Core Indexing and Query.

    Returns:
        A dict mapping keys
    """
    args = set_hw_parser().parse_args()
    args.workdir = os.path.join(os.getcwd(), 'original')
    args.num_query = 4096

    targets = {
        'index-labels': {
            'url': args.index_labels_url,
            'filename': os.path.join(args.workdir, 'index-labels'),
        },
        'query-labels': {
            'url': args.query_labels_url,
            'filename': os.path.join(args.workdir, 'query-labels'),
        },
        'index': {
            'url': args.index_data_url,
            'filename': os.path.join(args.workdir, 'index-original'),
        },
        'query': {
            'url': args.query_data_url,
            'filename': os.path.join(args.workdir, 'query-original'),
        },
    }

    # download the data
    Path(args.workdir).mkdir(parents=True, exist_ok=True)
    download_data(targets, args.download_proxy)

    try:
        f = Flow().add(uses=MyEncoder).add(workspace='./', uses=MyIndexer)

        with f:
            # do index
            log.info('Benchmarking index')
            st = time.perf_counter()
            f.index(
                index_generator(num_docs=targets['index']['data'].shape[0],
                                target=targets),
                show_progress=True,
            )
            index_time = time.perf_counter() - st
            log.info(
                'Indexed %d docs within %d seconds',
                targets['index']['data'].shape[0],
                index_time,
            )

            # do query
            log.info('Benchmarking query')
            st = time.perf_counter()
            f.search(
                query_generator(num_docs=args.num_query, target=targets),
                shuffle=True,
                parameters={'top_k': args.top_k},
                show_progress=True,
            )
            query_time = time.perf_counter() - st
            log.info('%d query within %d seconds', args.num_query, query_time)

    except Exception as e:
        log.error(e)
        sys.exit(1)

    return {
        'index_time': index_time,
        'query_time': query_time,
        'index_qps': targets['index']['data'].shape[0] / index_time,
        'query_qps': args.num_query / query_time,
    }