def finfindr_feature_extract_aid_batch(ibs, aid_list, jobs=None, **kwargs):

    MAXJOBS = 4

    if jobs is None:
        jobs = ut.num_cpus()

    jobs = min(jobs, len(aid_list))
    jobs = min(jobs, MAXJOBS)

    url_clone_list = []
    for job in range(jobs):
        container_name = 'flukebook_finfindr'
        urls_clone = ibs.docker_ensure(container_name, clone=job)

        if len(urls_clone) == 0:
            raise RuntimeError('Could not ensure container clone')
        elif len(urls_clone) == 1:
            url_clone = urls_clone[0]
        else:
            url_clone = urls_clone[0]
            args = (
                urls_clone,
                url_clone,
            )
            logger.info(
                '[WARNING] Multiple BACKEND_URLS:\n\tFound: %r\n\tUsing: %r' %
                args)
        url_clone_list.append(url_clone)

    config = {
        'ext': '.jpg',
    }
    fpath_list = ibs.get_annot_chip_fpath(aid_list,
                                          ensure=True,
                                          config2_=config)

    url_list = []
    index = 0
    for fpath in fpath_list:
        url = url_clone_list[index]
        url_list.append(url)

        index += 1
        index %= len(url_clone_list)

    args_list = list(zip(url_list, fpath_list))

    json_result_gen = ut.generate2(
        finfindr_feature_extract_aid_helper,
        args_list,
        nTasks=len(args_list),
        nprocs=jobs,
        ordered=True,
    )

    json_result_list = list(json_result_gen)

    return json_result_list
Example #2
0
def resize_imagelist_generator(gpath_list, new_gpath_list, newsize_list, **kwargs):
    """ Resizes images and yeilds results asynchronously  """
    # Compute and write thumbnail in asychronous process
    kwargs['force_serial'] = kwargs.get('force_serial', True)
    kwargs['ordered']      = kwargs.get('ordered', True)
    arg_iter = zip(gpath_list, new_gpath_list, newsize_list)
    arg_list = list(arg_iter)
    return ut.generate2(_resize_worker, arg_list, **kwargs)
Example #3
0
def classify(vector_list, weight_filepath, verbose=VERBOSE_SVM, **kwargs):
    """
    Args:
        thumbail_list (list of str): the list of image thumbnails that need classifying

    Returns:
        iter
    """
    import multiprocessing
    import numpy as np

    # Get correct weight if specified with shorthand
    if weight_filepath in CONFIG_URL_DICT:
        weight_url = CONFIG_URL_DICT[weight_filepath]
        if weight_url.endswith('.zip'):
            weight_filepath = ut.grab_zipped_url(weight_url, appname='ibeis')
        else:
            weight_filepath = ut.grab_file_url(weight_url, appname='ibeis',
                                               check_hash=True)

    # Get ensemble
    is_ensemble = isdir(weight_filepath)
    if is_ensemble:
        weight_filepath_list = sorted([
            join(weight_filepath, filename) for filename in listdir(weight_filepath)
            if isfile(join(weight_filepath, filename))
        ])
    else:
        weight_filepath_list = [weight_filepath]
    num_weights = len(weight_filepath_list)
    assert num_weights > 0

    # Form dictionaries
    num_vectors = len(vector_list)
    index_list = list(range(num_vectors))

    # Generate parallelized wrapper
    OLD = False
    if is_ensemble and OLD:
        vectors_list = [ vector_list for _ in range(num_weights) ]
        args_list = zip(weight_filepath_list, vectors_list)
        nTasks = num_weights
        print('Processing ensembles in parallel using %d ensembles' % (num_weights, ))
    else:
        num_cpus = multiprocessing.cpu_count()
        vector_batch = int(np.ceil(float(num_vectors) / num_cpus))
        vector_rounds = int(np.ceil(float(num_vectors) / vector_batch))

        args_list = []
        for vector_round in range(vector_rounds):
            start_index = vector_round * vector_batch
            stop_index = (vector_round + 1) * vector_batch
            assert start_index < num_vectors
            stop_index = min(stop_index, num_vectors)
            # print('Slicing index range: [%r, %r)' % (start_index, stop_index, ))

            # Slice gids and get feature data
            index_list_ = list(range(start_index, stop_index))
            vector_list_ = vector_list[start_index: stop_index]
            assert len(index_list_) == len(vector_list_)
            for weight_filepath in weight_filepath_list:
                args = (weight_filepath, vector_list_, index_list_)
                args_list.append(args)

        nTasks = len(args_list)
        print('Processing vectors in parallel using vector_batch = %r' % (vector_batch, ))

    # Perform inference
    classify_iter = ut.generate2(classify_helper, args_list, nTasks=nTasks,
                                 ordered=True, force_serial=False)

    # Classify with SVM for each image vector
    score_dict = { index: [] for index in index_list }
    class_dict = { index: [] for index in index_list }
    for score_dict_, class_dict_ in classify_iter:
        for index in index_list:
            if index in score_dict_:
                score_dict[index] += score_dict_[index]
            if index in class_dict_:
                class_dict[index] += class_dict_[index]

    # Organize and compute mode and average for class and score
    for index in index_list:
        score_list_ = score_dict[index]
        class_list_ = class_dict[index]
        score_ = sum(score_list_) / len(score_list_)
        class_ = max(set(class_list_), key=class_list_.count)
        class_ = 'positive' if int(class_) == 1 else 'negative'
        yield score_, class_