Example #1
0
def search(
    workspace: Path,
    data_dir: Path,
    flow: Flow,
    threshold: Optional[float],
    top_k: int,
    num_queries: int,
):
    if not workspace.exists():
        raise FileNotFoundError(
            f'The directory {workspace} does not exist. Please index first via `python app.py index`'
        )

    with flow:
        create_query_audios(num_queries, data_dir)
        responses = flow.post(
            '/search',
            inputs=create_docs(os.path.join(data_dir, 'query', '*.mp3')),
            return_results=True,
        )

    result_html, accuracy = report_results(responses, threshold, top_k)
    write_html(str(workspace / 'demo.html'), result_html, accuracy, top_k)
Example #2
0
def hello_world(args):
    """
    Runs Jina's Hello World.

    Usage:
        Use it via CLI :command:`jina hello-world`.

    Description:
        It downloads Fashion-MNIST dataset and :term:`Indexer<indexes>` 50,000 images.
        The index is stored into 4 *shards*. It randomly samples 128 unseen images as :term:`Queries<Searching>`
        Results are shown in a webpage.

    More options can be found in :command:`jina hello-world --help`

    :param args: Argparse object
    """

    Path(args.workdir).mkdir(parents=True, exist_ok=True)

    targets = {
        'index-labels': {
            'url': args.index_labels_url,
            'filename': os.path.join(args.workdir, 'index-labels'),
        },
        'query-labels': {
            'url': args.query_labels_url,
            'filename': os.path.join(args.workdir, 'query-labels'),
        },
        'index': {
            'url': args.index_data_url,
            'filename': os.path.join(args.workdir, 'index-original'),
        },
        'query': {
            'url': args.query_data_url,
            'filename': os.path.join(args.workdir, 'query-original'),
        },
    }

    # download the data
    download_data(targets, args.download_proxy)

    # reduce the network load by using `fp16`, or even `uint8`
    os.environ['JINA_ARRAY_QUANT'] = 'fp16'
    os.environ['HW_WORKDIR'] = args.workdir

    # now comes the real work
    # load index flow from a YAML file
    f = Flow().add(uses=MyEncoder,
                   parallel=2).add(uses=MyIndexer).add(uses=MyEvaluator)

    # run it!
    with f:
        f.index(
            index_generator(num_docs=targets['index']['data'].shape[0],
                            target=targets),
            request_size=args.request_size,
        )

        # wait for couple of seconds
        countdown(
            3,
            reason=colored(
                'behold! im going to switch to query mode',
                'cyan',
                attrs=['underline', 'bold', 'reverse'],
            ),
        )

        # f.search(
        #     query_generator(
        #         num_docs=args.num_query, target=targets, with_groundtruth=True
        #     ),
        #     shuffle=True,
        #     on_done=print_result,
        #     request_size=args.request_size,
        #     parameters={'top_k': args.top_k},
        # )

        f.post(
            '/eval',
            query_generator(num_docs=args.num_query,
                            target=targets,
                            with_groundtruth=True),
            shuffle=True,
            on_done=print_result,
            request_size=args.request_size,
            parameters={'top_k': args.top_k},
        )

        # write result to html
        write_html(os.path.join(args.workdir, 'demo.html'))
Example #3
0
def hello_world(args):
    """
    Runs Jina's Hello World.

    Usage:
        Use it via CLI :command:`jina hello fashion`.

    Description:
        It downloads Fashion-MNIST dataset and :term:`Indexer<indexes>` 50,000 images.
        The index is stored into 4 *shards*. It randomly samples 128 unseen images as :term:`Queries<Searching>`
        Results are shown in a webpage.

    More options can be found in :command:`jina hello-world --help`

    :param args: Argparse object
    """

    Path(args.workdir).mkdir(parents=True, exist_ok=True)

    targets = {
        'index-labels': {
            'url': args.index_labels_url,
            'filename': os.path.join(args.workdir, 'index-labels'),
        },
        'query-labels': {
            'url': args.query_labels_url,
            'filename': os.path.join(args.workdir, 'query-labels'),
        },
        'index': {
            'url': args.index_data_url,
            'filename': os.path.join(args.workdir, 'index-original'),
        },
        'query': {
            'url': args.query_data_url,
            'filename': os.path.join(args.workdir, 'query-original'),
        },
    }

    # download the data
    download_data(targets, args.download_proxy)

    # now comes the real work
    # load index flow from a YAML file
    f = (Flow().add(uses=MyEncoder, replicas=2).add(uses=MyIndexer,
                                                    workspace=args.workdir))

    # run it!
    with f:
        f.index(
            index_generator(num_docs=targets['index']['data'].shape[0],
                            target=targets),
            show_progress=True,
        )

        groundtruths = get_groundtruths(targets)
        evaluate_print_callback = partial(print_result, groundtruths)
        evaluate_print_callback.__name__ = 'evaluate_print_callback'
        f.post(
            '/search',
            query_generator(num_docs=args.num_query, target=targets),
            shuffle=True,
            on_done=evaluate_print_callback,
            parameters={'top_k': args.top_k},
            show_progress=True,
        )

        # write result to html
        write_html(os.path.join(args.workdir, 'demo.html'))