Exemple #1
0
    def __init__(self, server, sizing_mode="stretch_both", **kwargs):
        self.server = server
        self.counter_figures = {}
        self.counter_sources = {}
        self.digest_figures = {}
        self.digest_sources = {}
        self.sizing_mode = sizing_mode

        if self.server.digests:
            for name in self.server.digests:
                self.add_digest_figure(name)
        for name in self.server.counters:
            self.add_counter_figure(name)

        figures = merge(self.digest_figures, self.counter_figures)
        figures = [figures[k] for k in sorted(figures)]

        if len(figures) <= 5:
            self.root = column(figures, sizing_mode=sizing_mode)
        else:
            self.root = column(*[
                row(*pair, sizing_mode=sizing_mode)
                for pair in partition_all(2, figures)
            ],
                               sizing_mode=sizing_mode)
Exemple #2
0
async def test_broken_worker_during_computation(c, s, a, b):
    s.allowed_failures = 100
    n = await Nanny(s.address, nthreads=2, loop=s.loop)

    start = time()
    while len(s.nthreads) < 3:
        await asyncio.sleep(0.01)
        assert time() < start + 5

    N = 256
    expected_result = N * (N + 1) // 2
    i = 0
    L = c.map(inc, range(N), key=["inc-%d-%d" % (i, j) for j in range(N)])
    while len(L) > 1:
        i += 1
        L = c.map(
            slowadd,
            *zip(*partition_all(2, L)),
            key=["add-%d-%d" % (i, j) for j in range(len(L) // 2)],
        )

    await asyncio.sleep(random.random() / 20)
    with suppress(CommClosedError):  # comm will be closed abrupty
        await c._run(os._exit, 1, workers=[n.worker_address])

    await asyncio.sleep(random.random() / 20)
    while len(s.workers) < 3:
        await asyncio.sleep(0.01)

    with suppress(
            CommClosedError,
            EnvironmentError):  # perhaps new worker can't be contacted yet
        await c._run(os._exit, 1, workers=[n.worker_address])

    [result] = await c.gather(L)
    assert isinstance(result, int)
    assert result == expected_result

    await n.close()
Exemple #3
0
def _reduction(
    ph: PartitionedHistogram,
    split_every: int | None = None,
) -> AggHistogram:
    if split_every is None:
        split_every = 4
    if split_every is False:
        split_every = ph.npartitions

    token = tokenize(ph, sum, split_every)
    name = f"hist-aggregate-{token}"
    k = ph.npartitions
    b = ph.name
    d = 0
    dsk = {}
    while k > split_every:
        c = f"{name}{d}"
        for i, inds in enumerate(partition_all(split_every, range(k))):
            dsk[(c, i)] = (
                empty_safe_aggregate,
                sum,
                [(b, j) for j in inds],
                False,
            )
        k = i + 1
        b = c
        d += 1
    dsk[(name, 0)] = (
        empty_safe_aggregate,
        sum,
        [(b, j) for j in range(k)],
        True,
    )

    dsk[name] = dsk.pop((name, 0))  # type: ignore
    g = HighLevelGraph.from_collections(name, dsk, dependencies=[ph])
    return AggHistogram(g, name, histref=ph.histref)
Exemple #4
0
def test_text_blocks_to_pandas_blocked(reader, files):
    header = files["2014-01-01.csv"].split(b"\n")[0] + b"\n"
    blocks = []
    for k in sorted(files):
        b = files[k]
        lines = b.split(b"\n")
        blocks.append([b"\n".join(bs) for bs in partition_all(2, lines)])

    df = text_blocks_to_pandas(reader, blocks, header, expected.head(), {})
    assert_eq(
        df.compute().reset_index(drop=True),
        expected.reset_index(drop=True),
        check_dtype=False,
    )

    expected2 = expected[["name", "id"]]
    df = text_blocks_to_pandas(
        reader, blocks, header, expected2.head(), {"usecols": ["name", "id"]}
    )
    assert_eq(
        df.compute().reset_index(drop=True),
        expected2.reset_index(drop=True),
        check_dtype=False,
    )
Exemple #5
0
def partial_reduce(func,
                   x,
                   split_every,
                   keepdims=False,
                   dtype=None,
                   name=None,
                   reduced_meta=None):
    """Partial reduction across multiple axes.

    Parameters
    ----------
    func : function
    x : Array
    split_every : dict
        Maximum reduction block sizes in each dimension.

    Examples
    --------
    Reduce across axis 0 and 2, merging a maximum of 1 block in the 0th
    dimension, and 3 blocks in the 2nd dimension:

    >>> partial_reduce(np.min, x, {0: 1, 2: 3})    # doctest: +SKIP
    """
    name = ((name or funcname(func)) + "-" +
            tokenize(func, x, split_every, keepdims, dtype))
    parts = [
        list(partition_all(split_every.get(i, 1), range(n)))
        for (i, n) in enumerate(x.numblocks)
    ]
    keys = product(*map(range, map(len, parts)))
    out_chunks = [
        tuple(1 for p in partition_all(split_every[i], c))
        if i in split_every else c for (i, c) in enumerate(x.chunks)
    ]
    if not keepdims:
        out_axis = [i for i in range(x.ndim) if i not in split_every]
        getter = lambda k: get(out_axis, k)
        keys = map(getter, keys)
        out_chunks = list(getter(out_chunks))
    dsk = {}
    for k, p in zip(keys, product(*parts)):
        decided = dict((i, j[0]) for (i, j) in enumerate(p) if len(j) == 1)
        dummy = dict(i for i in enumerate(p) if i[0] not in decided)
        g = lol_tuples((x.name, ), range(x.ndim), decided, dummy)
        dsk[(name, ) + k] = (func, g)
    graph = HighLevelGraph.from_collections(name, dsk, dependencies=[x])

    meta = x._meta
    if reduced_meta is not None:
        try:
            meta = func(reduced_meta, computing_meta=True)
        # no meta keyword argument exists for func, and it isn't required
        except TypeError:
            meta = func(reduced_meta)
        # when no work can be computed on the empty array (e.g., func is a ufunc)
        except ValueError:
            pass

    # some functions can't compute empty arrays (those for which reduced_meta
    # fall into the ValueError exception) and we have to rely on reshaping
    # the array according to len(out_chunks)
    if is_arraylike(meta) and meta.ndim != len(out_chunks):
        if len(out_chunks) == 0:
            meta = meta.sum()
        else:
            meta = meta.reshape((0, ) * len(out_chunks))

    if np.isscalar(meta):
        return Array(graph, name, out_chunks, dtype=dtype)
    else:
        with ignoring(AttributeError):
            meta = meta.astype(dtype)
        return Array(graph, name, out_chunks, meta=meta)
Exemple #6
0
def reduction(
    args,
    chunk=None,
    aggregate=None,
    combine=None,
    meta=None,
    token=None,
    chunk_kwargs=None,
    aggregate_kwargs=None,
    combine_kwargs=None,
    split_every=None,
    **kwargs,
):
    """Generic tree reduction operation.

    Parameters
    ----------
    args :
        Positional arguments for the `chunk` function. All `dask.dataframe`
        objects should be partitioned and indexed equivalently.
    chunk : function [block-per-arg] -> block
        Function to operate on each block of data
    aggregate : function list-of-blocks -> block
        Function to operate on the list of results of chunk
    combine : function list-of-blocks -> block, optional
        Function to operate on intermediate lists of results of chunk
        in a tree-reduction. If not provided, defaults to aggregate.
    $META
    token : str, optional
        The name to use for the output keys.
    chunk_kwargs : dict, optional
        Keywords for the chunk function only.
    aggregate_kwargs : dict, optional
        Keywords for the aggregate function only.
    combine_kwargs : dict, optional
        Keywords for the combine function only.
    split_every : int, optional
        Group partitions into groups of this size while performing a
        tree-reduction. If set to False, no tree-reduction will be used,
        and all intermediates will be concatenated and passed to ``aggregate``.
        Default is 8.
    kwargs :
        All remaining keywords will be passed to ``chunk``, ``aggregate``, and
        ``combine``.
    """
    if chunk_kwargs is None:
        chunk_kwargs = dict()
    if aggregate_kwargs is None:
        aggregate_kwargs = dict()
    chunk_kwargs.update(kwargs)
    aggregate_kwargs.update(kwargs)

    if combine is None:
        if combine_kwargs:
            raise ValueError("`combine_kwargs` provided with no `combine`")
        combine = aggregate
        combine_kwargs = aggregate_kwargs
    else:
        if combine_kwargs is None:
            combine_kwargs = dict()
        combine_kwargs.update(kwargs)

    if not isinstance(args, (tuple, list)):
        args = [args]

    npartitions = {arg.npartitions for arg in args if isinstance(arg, _Frame)}
    if len(npartitions) > 1:
        raise ValueError("All arguments must have same number of partitions")
    npartitions = npartitions.pop()

    if split_every is None:
        split_every = 8
    elif split_every is False:
        split_every = npartitions
    elif split_every < 2 or not isinstance(split_every, int):
        raise ValueError("split_every must be an integer >= 2")

    token_key = tokenize(
        token or (chunk, aggregate),
        meta,
        args,
        chunk_kwargs,
        aggregate_kwargs,
        combine_kwargs,
        split_every,
    )

    # Chunk
    a = f"{token or funcname(chunk)}-chunk-{token_key}"
    if len(args) == 1 and isinstance(args[0], _Frame) and not chunk_kwargs:
        dsk = {(a, 0, i): (chunk, key)
               for i, key in enumerate(args[0].__dask_keys__())}
    else:
        dsk = {(a, 0, i): (
            apply,
            chunk,
            [(x._name, i) if isinstance(x, _Frame) else x for x in args],
            chunk_kwargs,
        )
               for i in range(args[0].npartitions)}

    # Combine
    b = f"{token or funcname(combine)}-combine-{token_key}"
    k = npartitions
    depth = 0
    while k > split_every:
        for part_i, inds in enumerate(partition_all(split_every, range(k))):
            conc = (list, [(a, depth, i) for i in inds])
            dsk[(b, depth + 1,
                 part_i)] = ((apply, combine, [conc],
                              combine_kwargs) if combine_kwargs else
                             (combine, conc))
        k = part_i + 1
        a = b
        depth += 1

    # Aggregate
    b = f"{token or funcname(aggregate)}-agg-{token_key}"
    conc = (list, [(a, depth, i) for i in range(k)])
    if aggregate_kwargs:
        dsk[(b, 0)] = (apply, aggregate, [conc], aggregate_kwargs)
    else:
        dsk[(b, 0)] = (aggregate, conc)

    if meta is None:
        meta_chunk = _emulate(apply, chunk, args, chunk_kwargs)
        meta = _emulate(apply, aggregate, [[meta_chunk]], aggregate_kwargs)
    meta = dask_make_meta(meta)

    graph = HighLevelGraph.from_collections(b, dsk, dependencies=args)
    return dd.core.new_dd_object(graph, b, meta, (None, None))
Exemple #7
0
def categorize(df, columns=None, index=None, split_every=None, **kwargs):
    """Convert columns of the DataFrame to category dtype.

    Parameters
    ----------
    columns : list, optional
        A list of column names to convert to categoricals. By default any
        column with an object dtype is converted to a categorical, and any
        unknown categoricals are made known.
    index : bool, optional
        Whether to categorize the index. By default, object indices are
        converted to categorical, and unknown categorical indices are made
        known. Set True to always categorize the index, False to never.
    split_every : int, optional
        Group partitions into groups of this size while performing a
        tree-reduction. If set to False, no tree-reduction will be used.
        Default is 16.
    kwargs
        Keyword arguments are passed on to compute.
    """
    meta = df._meta
    if columns is None:
        columns = list(meta.select_dtypes(["object", "category"]).columns)
    elif is_scalar(columns):
        columns = [columns]

    # Filter out known categorical columns
    columns = [
        c for c in columns if not (
            is_categorical_dtype(meta[c]) and has_known_categories(meta[c]))
    ]

    if index is not False:
        if is_categorical_dtype(meta.index):
            index = not has_known_categories(meta.index)
        elif index is None:
            index = meta.index.dtype == object

    # Nothing to do
    if not len(columns) and index is False:
        return df

    if split_every is None:
        split_every = 16
    elif split_every is False:
        split_every = df.npartitions
    elif not isinstance(split_every, Integral) or split_every < 2:
        raise ValueError("split_every must be an integer >= 2")

    token = tokenize(df, columns, index, split_every)
    a = "get-categories-chunk-" + token
    dsk = {(a, i): (_get_categories, key, columns, index)
           for (i, key) in enumerate(df.__dask_keys__())}

    prefix = "get-categories-agg-" + token
    k = df.npartitions
    depth = 0
    while k > split_every:
        b = prefix + str(depth)
        for part_i, inds in enumerate(partition_all(split_every, range(k))):
            dsk[(b, part_i)] = (_get_categories_agg, [(a, i) for i in inds])
        k = part_i + 1
        a = b
        depth += 1

    dsk[(prefix, 0)] = (_get_categories_agg, [(a, i) for i in range(k)])
    dsk.update(df.dask)

    # Compute the categories
    categories, index = compute_as_if_collection(df.__class__, dsk,
                                                 (prefix, 0), **kwargs)

    # some operations like get_dummies() rely on the order of categories
    categories = {k: v.sort_values() for k, v in categories.items()}

    # Categorize each partition
    return df.map_partitions(_categorize_block, categories, index)
Exemple #8
0
def _correct_errors(ra, err_rate, p_value=0.05):

    # True: use Dask's broadcast (ra transfer via inproc/tcp)
    # False: each worker reacs ra.pickle from disk
    use_dask_broadcast = False

    log.debug(
        "Available CPU / RAM: {} / {} GB".format(
            _get_cpu_count(), int(_get_available_memory() / 1024 ** 3)
        ),
        module_name="rmt_correction",
    )

    n_workers = _calc_max_workers(ra)

    log.debug(
        "Estimated optimum n_workers: {}".format(n_workers),
        module_name="rmt_correction",
    )

    if int(os.environ.get("SEQC_MAX_WORKERS", 0)) > 0:
        n_workers = int(os.environ.get("SEQC_MAX_WORKERS"))
        log.debug(
            "n_workers overridden with SEQC_MAX_WORKERS: {}".format(n_workers),
            module_name="rmt_correction",
        )

    # n_workers = 1
    # p_value = 0.005

    # configure dask.distributed
    # memory_terminate_fraction doesn't work for some reason
    # https://github.com/dask/distributed/issues/3519
    # https://docs.dask.org/en/latest/setup/single-distributed.html#localcluster
    # https://docs.dask.org/en/latest/scheduling.html#local-threads
    worker_kwargs = {
        "n_workers": n_workers,
        "threads_per_worker": 1,
        "processes": True,
        "memory_limit": "64G",
        "memory_target_fraction": 0.95,
        "memory_spill_fraction": 0.99,
        "memory_pause_fraction": False,
        # "memory_terminate_fraction": False,
    }

    # do not kill worker at 95% memory level
    dask.config.set({"distributed.worker.memory.terminate": False})
    dask.config.set({"distributed.scheduler.allowed-failures": 50})

    # setup Dask distributed client
    cluster = LocalCluster(**worker_kwargs)
    client = Client(cluster)

    # debug message
    log.debug(
        "Dask processes={} threads={}".format(
            len(client.nthreads().values()), np.sum(list(client.nthreads().values()))
        ),
        module_name="rmt_correction",
    )
    log.debug(
        "Dask worker_kwargs "
        + " ".join([f"{k}={v}" for k, v in worker_kwargs.items()]),
        module_name="rmt_correction",
    )
    log.debug("Dask Dashboard=" + client.dashboard_link, module_name="rmt_correction")

    # group by cells (same cell barcodes as one group)
    log.debug("Grouping...", module_name="rmt_correction")
    indices_grouped_by_cells = ra.group_indices_by_cell()

    if use_dask_broadcast:
        # send readarray in advance to all workers (i.e. broadcast=True)
        # this way, we reduce the serialization time
        log.debug("Scattering ReadArray...", module_name="rmt_correction")
        [future_ra] = client.scatter([ra], broadcast=True)
    else:
        # write ra to pickle which will be used later to parallel process rmt correction
        with open("pre-correction-ra.pickle", "wb") as fout:
            pickle.dump(ra, fout, protocol=4)

    # correct errors per cell group in parallel
    log.debug("Submitting jobs to Dask...", module_name="rmt_correction")
    with performance_report(filename="dask-report.html"):
        futures = []

        # distribute chunks to workers evenly
        n_chunks = math.ceil(len(indices_grouped_by_cells) / n_workers)
        chunks = partition_all(n_chunks, indices_grouped_by_cells)

        for chunk in tqdm(chunks, disable=None):

            future = client.submit(
                _correct_errors_by_cell_group_chunks,
                future_ra if use_dask_broadcast else None,
                chunk,
                err_rate,
                p_value,
            )
            futures.append(future)

        # wait until all done
        log.debug("Waiting untill all tasks complete...", module_name="rmt_correction")
        completed, not_completed = wait(futures)

    if len(not_completed) > 1:
        raise Exception("There are uncompleted tasks!")

    # gather the resutls and release
    log.debug(
        "Collecting the task results from the workers...", module_name="rmt_correction"
    )
    results = []
    for future in tqdm(completed, disable=None):
        # this returns a list of a list
        # len(result) should be the number of chunks e.g. 50
        result = future.result()

        # remove empty lists
        result = list(filter(lambda x: len(x) > 0, result))

        # aggregate and release
        results.extend(result)
        future.release()

    # clean up
    del futures
    del completed
    del not_completed

    client.shutdown()
    client.close()

    # iterate through the list of returned read indices and donor rmts
    # create a mapping tble of pre-/post-correction
    mapping = set()
    for result in results:
        for idx, idx_corrected_rmt in result:

            # record pre-/post-correction
            # skip if it's already marked as rmt error
            if (
                ra.data["cell"][idx],
                ra.data["rmt"][idx],
                ra.data["rmt"][idx_corrected_rmt],
            ) in mapping:
                continue

            mapping.add(
                (
                    ra.data["cell"][idx],
                    ra.data["rmt"][idx],
                    ra.data["rmt"][idx_corrected_rmt],
                )
            )

    # iterate through the list of returned read indices and donor rmts
    # actually, update the read array object with corrected UMI
    for result in results:
        for idx, idx_corrected_rmt in result:

            # skip if it's already marked as rmt error
            if ra.data["status"][idx_corrected_rmt] & ra.filter_codes["rmt_error"]:
                continue

            # correct
            ra.data["rmt"][idx] = ra.data["rmt"][idx_corrected_rmt]

            # report error
            ra.data["status"][idx] |= ra.filter_codes["rmt_error"]

    return pd.DataFrame(mapping, columns=["CB", "UR", "UB"])
Exemple #9
0
def npoclass(inputs,
             gpu_core=True,
             model_path=None,
             ntee_type='bc',
             n_jobs=4,
             backend='multiprocessing',
             batch_size_dl=64,
             verbose=1):

    # Set the seed value all over the place to make this reproducible.
    seed_val = 42
    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)

    # Check model files.
    if ntee_type == 'bc' and model_path == None:
        raise ValueError(
            "Make sure model files/path are correct. Please download from https://jima.me/open/npoclass_model_bc.zip, unzip, and specifiy model_path (default set to None)."
        )
    if ntee_type == 'mg' and model_path == None:
        raise ValueError(
            "Make sure model files/path are correct. Please download from https://jima.me/open/npoclass_model_mg.zip, unzip, and specifiy model_path (default set to None)."
        )

    # Check ntee type.
    if ntee_type == 'bc':
        le_file_name = 'le_broad_cat.pkl'
    elif ntee_type == 'mg':
        le_file_name = 'le_major_group.pkl'
    else:
        raise ValueError(
            "ntee_type must be 'bc' (broad category) or 'mg' (major group)")

    # Read model and label encoder, if not read.
    global model_loaded, tokenizer_loaded, label_encoder
    try:
        assert model_loaded
        assert tokenizer_loaded
        assert label_encoder
    except:
        #load a pretrained model and tokenizer.
        model_loaded = BertForSequenceClassification.from_pretrained(
            model_path)
        tokenizer_loaded = BertTokenizer.from_pretrained(model_path)
        # Read label encoder.
        with open(model_path + le_file_name, 'rb') as label_encoder_pkl:
            label_encoder = pickle.load(label_encoder_pkl)

    # Select acceleration method.
    if gpu_core == True and torch.cuda.is_available():
        print('There are %d GPU(s) available.' % torch.cuda.device_count(),
              'Using GPU:', torch.cuda.get_device_name(0))
        torch.cuda.manual_seed_all(seed_val)
        device = torch.device('cuda')
        model_loaded.cuda()
    else:
        print('No GPU acceleration available or gpu_core=False, using CPU.')
        device = torch.device('cpu')
        model_loaded.cpu()
    print('Encoding inputs ...')
    sleep(.5)  # Pause a second for better printing results.

    # Encode inputs.
    global func_encode_string, func_encode_string_batch  # Define as global, otherwise cannot pickle or very slow.

    def func_encode_string(text_string):
        encoded_dict = tokenizer_loaded.encode_plus(
            text_string,
            add_special_tokens=True,  # Add '[CLS]' and '[SEP]'
            truncation='longest_first',
            padding='max_length',  # Max length accepted by model.
            return_attention_mask=True,  # Construct attn. masks.
            return_tensors='pt',  # Return pytorch tensors.
        )
        return encoded_dict

    def func_encode_string_batch(text_strings):
        encoded_dicts = []
        for text_string in text_strings:
            encoded_dicts += [func_encode_string(text_string)]
        return encoded_dicts

    # Tokenize all of the sentences and map the tokens to thier word IDs.
    input_ids = []
    attention_masks = []
    # Encode input string(s).
    if type(inputs) == list:
        if backend == 'multiprocessing':  # Multiprocessing is faster than loky in processing large objects.
            encoded_outputs = Parallel(
                n_jobs=n_jobs,
                backend="multiprocessing",
                batch_size='auto',
                verbose=verbose)(delayed(func_encode_string)(text_string)
                                 for text_string in inputs)
            for encoded_output in encoded_outputs:
                # Add the encoded sentence to the list.
                input_ids.append(encoded_output['input_ids'])
                # And its attention mask (simply differentiates padding from non-padding).
                attention_masks.append(encoded_output['attention_mask'])
        elif backend == 'sequential':
            for text_string in tqdm(inputs):
                encoded_output = func_encode_string(text_string)
                # Add the encoded sentence to the list.
                input_ids.append(encoded_output['input_ids'])
                # And its attention mask (simply differentiates padding from non-padding).
                attention_masks.append(encoded_output['attention_mask'])
        elif backend == 'dask':
            with joblib.parallel_backend('dask'):
                n_jobs = len(
                    client.scheduler_info()['workers'])  # Get # works.
                string_chunks = partition_all(
                    math.ceil(len(inputs) / n_jobs),
                    inputs)  # Collect into groups of size by worker numbers.
                encoded_outputs = Parallel(
                    n_jobs=-1, batch_size='auto', verbose=verbose)(
                        delayed(func_encode_string_batch)(text_strings)
                        for text_strings in string_chunks)
                encoded_outputs = itertools.chain(*encoded_outputs)
            for encoded_output in encoded_outputs:
                # Add the encoded sentence to the list.
                input_ids.append(encoded_output['input_ids'])
                # And its attention mask (simply differentiates padding from non-padding).
                attention_masks.append(encoded_output['attention_mask'])
    if type(inputs) == str:
        encoded_output = func_encode_string(inputs)
        input_ids = [encoded_output['input_ids']]
        attention_masks = [encoded_output['attention_mask']]

    # Convert the lists into tensors.
    input_ids = torch.cat(input_ids, dim=0)
    attention_masks = torch.cat(attention_masks, dim=0)

    # Prepare dataloader for efficient calculation.
    pred_data = TensorDataset(input_ids, attention_masks)
    pred_sampler = SequentialSampler(pred_data)
    pred_dataloader = DataLoader(pred_data,
                                 sampler=pred_sampler,
                                 batch_size=batch_size_dl)

    # Start prediction.
    model_loaded.eval()
    logits_all = []
    print('Predicting categories ...')
    sleep(.5)  # Pause a second for better printing results.
    for batch in tqdm(pred_dataloader, mininterval=10):
        # Add batch to the pre-chosen device
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask = batch
        with torch.no_grad():
            outputs = model_loaded(b_input_ids,
                                   token_type_ids=None,
                                   attention_mask=b_input_mask)
        logits_all += outputs[0].tolist()

    # Calculate probabilities of logitcs.
    logits_prob = tf.nn.sigmoid(logits_all).numpy().tolist()
    # Find the positions of max values in logits.
    logits_max = np.argmax(logits_prob, axis=1)
    # Transfer to labels.
    logits_labels = label_encoder.inverse_transform(logits_max)

    # Compile results to be returned.
    result_list = []
    for list_index in range(0, len(logits_labels)):
        result_dict = {}
        result_dict['recommended'] = logits_labels[list_index]
        conf_prob = logits_prob[list_index][logits_max[list_index]]
        if conf_prob >= .99:
            result_dict['confidence'] = 'high (>=.99)'
        elif conf_prob >= .95:
            result_dict['confidence'] = 'medium (<.99|>=.95)'
        else:
            result_dict['confidence'] = 'low (<.95)'
        prob_dict = {}
        for label_index in range(0, len(label_encoder.classes_)):
            prob_dict[label_encoder.classes_[label_index]] = logits_prob[
                list_index][label_index]
        result_dict['probabilities'] = prob_dict
        result_list += [result_dict]

    return result_list