def test_download_proxy(): import urllib.request # first test no proxy args = set_hw_parser().parse_args([]) opener = urllib.request.build_opener() if args.download_proxy: proxy = urllib.request.ProxyHandler({'http': args.download_proxy, 'https': args.download_proxy}) opener.add_handler(proxy) urllib.request.install_opener(opener) # head check req = urllib.request.Request(args.index_data_url, method="HEAD") response = urllib.request.urlopen(req, timeout=5) assert response.status == 200 # test with proxy args = set_hw_parser().parse_args(["--download-proxy", os.getenv("HTTP_PROXY")]) opener = urllib.request.build_opener() if args.download_proxy: proxy = urllib.request.ProxyHandler({'http': args.download_proxy, 'https': args.download_proxy}) opener.add_handler(proxy) urllib.request.install_opener(opener) # head check req = urllib.request.Request(args.index_data_url, method="HEAD") response = urllib.request.urlopen(req, timeout=5) assert response.status == 200
def test_helloworld_flow(tmpdir): args = set_hw_parser().parse_args([]) os.environ['RESOURCE_DIR'] = resource_filename('jina', 'resources') os.environ['SHARDS'] = str(args.shards) os.environ['PARALLEL'] = str(args.parallel) os.environ['HW_WORKDIR'] = str(tmpdir) f = Flow.load_config(resource_filename('jina', '/'.join(('resources', 'helloworld.flow.index.yml')))) targets = { 'index': { 'url': args.index_data_url, 'filename': os.path.join(tmpdir, 'index-original') }, 'query': { 'url': args.query_data_url, 'filename': os.path.join(tmpdir, 'query-original') } } # download the data Path(tmpdir).mkdir(parents=True, exist_ok=True) download_data(targets) # run it! with f: f.index(_input_ndarray(targets['index']['data']), request_size=args.index_request_size)
def test_helloworld_flow_dry_run(tmpdir): args = set_hw_parser().parse_args([]) os.environ['RESOURCE_DIR'] = resource_filename('jina', 'resources') os.environ['SHARDS'] = str(args.shards) os.environ['PARALLEL'] = str(args.parallel) os.environ['HW_WORKDIR'] = str(tmpdir) # run it! with Flow.load_config(resource_filename('jina', '/'.join(('resources', 'helloworld.flow.index.yml')))): pass # run it! with Flow.load_config(resource_filename('jina', '/'.join(('resources', 'helloworld.flow.query.yml')))): pass
def test_helloworld_flow_dry_run(tmpdir): args = set_hw_parser().parse_args([]) os.environ['PATH'] += os.pathsep + resource_filename( 'jina', 'resources/fashion') os.environ['SHARDS'] = str(args.shards) os.environ['PARALLEL'] = str(args.parallel) os.environ['HW_WORKDIR'] = str(tmpdir) # run it! with Flow.load_config('helloworld.flow.index.yml'): pass # run it! with Flow.load_config('helloworld.flow.query.yml'): pass
def test_helloworld_py(tmpdir): from jina.helloworld import hello_world hello_world(set_hw_parser().parse_args(['--workdir', str(tmpdir)])) check_hello_world_results(os.path.join(str(tmpdir), 'hello-world.html'))
) # f.search( # query_generator( # num_docs=args.num_query, target=targets, with_groundtruth=True # ), # shuffle=True, # on_done=print_result, # request_size=args.request_size, # parameters={'top_k': args.top_k}, # ) f.post( '/eval', query_generator(num_docs=args.num_query, target=targets, with_groundtruth=True), shuffle=True, on_done=print_result, request_size=args.request_size, parameters={'top_k': args.top_k}, ) # write result to html write_html(os.path.join(args.workdir, 'demo.html')) if __name__ == '__main__': args = set_hw_parser().parse_args() hello_world(args)
def test_helloworld_py(tmpdir): from jina.helloworld import hello_world hello_world(set_hw_parser().parse_args(['--workdir', str(tmpdir)]))
def helloworld_args(tmpdir): return set_hw_parser().parse_args(['--workdir', str(tmpdir)])
def _benchmark_qps() -> Dict[str, float]: """Benchmark Jina Core Indexing and Query. Returns: A dict mapping keys """ args = set_hw_parser().parse_args() args.workdir = os.path.join(os.getcwd(), 'original') args.num_query = 4096 targets = { 'index-labels': { 'url': args.index_labels_url, 'filename': os.path.join(args.workdir, 'index-labels'), }, 'query-labels': { 'url': args.query_labels_url, 'filename': os.path.join(args.workdir, 'query-labels'), }, 'index': { 'url': args.index_data_url, 'filename': os.path.join(args.workdir, 'index-original'), }, 'query': { 'url': args.query_data_url, 'filename': os.path.join(args.workdir, 'query-original'), }, } # download the data Path(args.workdir).mkdir(parents=True, exist_ok=True) download_data(targets, args.download_proxy) try: f = Flow().add(uses=MyEncoder).add(workspace='./', uses=MyIndexer) with f: # do index log.info('Benchmarking index') st = time.perf_counter() f.index( index_generator(num_docs=targets['index']['data'].shape[0], target=targets), show_progress=True, ) index_time = time.perf_counter() - st log.info( 'Indexed %d docs within %d seconds', targets['index']['data'].shape[0], index_time, ) # do query log.info('Benchmarking query') st = time.perf_counter() f.search( query_generator(num_docs=args.num_query, target=targets), shuffle=True, parameters={'top_k': args.top_k}, show_progress=True, ) query_time = time.perf_counter() - st log.info('%d query within %d seconds', args.num_query, query_time) except Exception as e: log.error(e) sys.exit(1) return { 'index_time': index_time, 'query_time': query_time, 'index_qps': targets['index']['data'].shape[0] / index_time, 'query_qps': args.num_query / query_time, }