def search( workspace: Path, data_dir: Path, flow: Flow, threshold: Optional[float], top_k: int, num_queries: int, ): if not workspace.exists(): raise FileNotFoundError( f'The directory {workspace} does not exist. Please index first via `python app.py index`' ) with flow: create_query_audios(num_queries, data_dir) responses = flow.post( '/search', inputs=create_docs(os.path.join(data_dir, 'query', '*.mp3')), return_results=True, ) result_html, accuracy = report_results(responses, threshold, top_k) write_html(str(workspace / 'demo.html'), result_html, accuracy, top_k)
def hello_world(args): """ Runs Jina's Hello World. Usage: Use it via CLI :command:`jina hello-world`. Description: It downloads Fashion-MNIST dataset and :term:`Indexer<indexes>` 50,000 images. The index is stored into 4 *shards*. It randomly samples 128 unseen images as :term:`Queries<Searching>` Results are shown in a webpage. More options can be found in :command:`jina hello-world --help` :param args: Argparse object """ Path(args.workdir).mkdir(parents=True, exist_ok=True) targets = { 'index-labels': { 'url': args.index_labels_url, 'filename': os.path.join(args.workdir, 'index-labels'), }, 'query-labels': { 'url': args.query_labels_url, 'filename': os.path.join(args.workdir, 'query-labels'), }, 'index': { 'url': args.index_data_url, 'filename': os.path.join(args.workdir, 'index-original'), }, 'query': { 'url': args.query_data_url, 'filename': os.path.join(args.workdir, 'query-original'), }, } # download the data download_data(targets, args.download_proxy) # reduce the network load by using `fp16`, or even `uint8` os.environ['JINA_ARRAY_QUANT'] = 'fp16' os.environ['HW_WORKDIR'] = args.workdir # now comes the real work # load index flow from a YAML file f = Flow().add(uses=MyEncoder, parallel=2).add(uses=MyIndexer).add(uses=MyEvaluator) # run it! with f: f.index( index_generator(num_docs=targets['index']['data'].shape[0], target=targets), request_size=args.request_size, ) # wait for couple of seconds countdown( 3, reason=colored( 'behold! im going to switch to query mode', 'cyan', attrs=['underline', 'bold', 'reverse'], ), ) # f.search( # query_generator( # num_docs=args.num_query, target=targets, with_groundtruth=True # ), # shuffle=True, # on_done=print_result, # request_size=args.request_size, # parameters={'top_k': args.top_k}, # ) f.post( '/eval', query_generator(num_docs=args.num_query, target=targets, with_groundtruth=True), shuffle=True, on_done=print_result, request_size=args.request_size, parameters={'top_k': args.top_k}, ) # write result to html write_html(os.path.join(args.workdir, 'demo.html'))
def hello_world(args): """ Runs Jina's Hello World. Usage: Use it via CLI :command:`jina hello fashion`. Description: It downloads Fashion-MNIST dataset and :term:`Indexer<indexes>` 50,000 images. The index is stored into 4 *shards*. It randomly samples 128 unseen images as :term:`Queries<Searching>` Results are shown in a webpage. More options can be found in :command:`jina hello-world --help` :param args: Argparse object """ Path(args.workdir).mkdir(parents=True, exist_ok=True) targets = { 'index-labels': { 'url': args.index_labels_url, 'filename': os.path.join(args.workdir, 'index-labels'), }, 'query-labels': { 'url': args.query_labels_url, 'filename': os.path.join(args.workdir, 'query-labels'), }, 'index': { 'url': args.index_data_url, 'filename': os.path.join(args.workdir, 'index-original'), }, 'query': { 'url': args.query_data_url, 'filename': os.path.join(args.workdir, 'query-original'), }, } # download the data download_data(targets, args.download_proxy) # now comes the real work # load index flow from a YAML file f = (Flow().add(uses=MyEncoder, replicas=2).add(uses=MyIndexer, workspace=args.workdir)) # run it! with f: f.index( index_generator(num_docs=targets['index']['data'].shape[0], target=targets), show_progress=True, ) groundtruths = get_groundtruths(targets) evaluate_print_callback = partial(print_result, groundtruths) evaluate_print_callback.__name__ = 'evaluate_print_callback' f.post( '/search', query_generator(num_docs=args.num_query, target=targets), shuffle=True, on_done=evaluate_print_callback, parameters={'top_k': args.top_k}, show_progress=True, ) # write result to html write_html(os.path.join(args.workdir, 'demo.html'))