Python TaxTree.allin1 Examples

Programming Language: Python

Namespace/Package Name: recentrifuge.trees

Class/Type: TaxTree

Method/Function: allin1

Examples at hotexamples.com: 4

Python TaxTree.allin1 - 4 examples found. These are the top rated real world Python examples of recentrifuge.trees.TaxTree.allin1 extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

TaxTree(9)

allin1(4)

get_taxa(4)

grow(4)

shape(3)

subtract(2)

get_lineage(1)

prune(1)

Example #1

Show file

File: core.py Project: oatesa/recentrifuge

    def cross_analysis(iteration, raw):
        """Cross analysis: exclusive and part of shared&ctrl"""
        nonlocal shared_counts, shared_score
        nonlocal shared_ctrl_counts, shared_ctrl_score

        def partial_shared_update(i):
            """Perform shared and shared-control taxa partial evaluations"""
            nonlocal shared_counts, shared_score
            nonlocal shared_ctrl_counts, shared_ctrl_score
            if i == 0:  # 1st iteration: Initialize shared abundance and score
                shared_counts.update(sub_shared_counts)
                shared_score.update(sub_shared_score)
            elif i < controls:  # Just update shared abundance and score
                shared_counts &= sub_shared_counts
                shared_score &= sub_shared_score
            elif i == controls:  # Initialize shared-control counters
                shared_counts &= sub_shared_counts
                shared_score &= sub_shared_score
                shared_ctrl_counts.update(sub_shared_counts)
                shared_ctrl_score.update(sub_shared_score)
            elif controls:  # Both: Accumulate shared abundance and score
                shared_counts &= sub_shared_counts
                shared_score &= sub_shared_score
                shared_ctrl_counts &= sub_shared_counts
                shared_ctrl_score &= sub_shared_score
            else:  # Both: Accumulate shared abundance and score (no controls)
                shared_counts &= sub_shared_counts
                shared_score &= sub_shared_score

        exclude: Set[Id] = set()
        # Get taxids at this rank that are present in the other samples
        for sample in (smpl for smpl in raws if smpl != raw):
            exclude.update(taxids[sample][rank])
        exclude.update(excluding)  # Add explicit excluding taxa if any
        output.write(f'  \033[90mExclusive: From \033[0m{raw}\033[90m '
                     f'excluding {len(exclude)} taxa. '
                     f'Generating sample...\033[0m')

        exclude_tree = TaxTree()
        exclude_out = SampleDataById(['counts', 'scores', 'accs'])
        exclude_tree.allin1(ontology=ontology,
                            counts=counts[raw],
                            scores=scores[raw],
                            min_taxa=mintaxas[raw],
                            min_rank=rank,
                            just_min_rank=True,
                            include=including,
                            exclude=exclude,
                            out=exclude_out)
        exclude_out.purge_counters()
        if exclude_out.counts:  # Avoid adding empty samples
            sample = Sample(f'{raw}_{STR_EXCLUSIVE}_{rank.name.lower()}')
            samples.append(sample)
            counts[sample] = exclude_out.get_counts()
            accs[sample] = exclude_out.get_accs()
            scores[sample] = exclude_out.get_scores()
            output.write('\033[92m OK! \033[0m\n')
        else:
            output.write('\033[93m VOID \033[0m\n')

        # Get partial abundance and score for the shared analysis
        sub_shared_tree = TaxTree()
        sub_shared_out = SampleDataById(['shared', 'accs'])
        sub_shared_tree.allin1(ontology=ontology,
                               counts=counts[raw],
                               scores=scores[raw],
                               min_taxa=mintaxas[raw],
                               min_rank=rank,
                               just_min_rank=True,
                               include=including,
                               exclude=excluding,
                               out=sub_shared_out)
        sub_shared_out.purge_counters()
        # Scale scores by abundance
        sub_shared_counts: SharedCounter = sub_shared_out.get_shared_counts()
        sub_shared_score: SharedCounter = sub_shared_out.get_shared_scores()
        sub_shared_score *= sub_shared_counts
        partial_shared_update(iteration)

Example #2

Show file

File: centrifuge.py Project: pamag/recentrifuge

def process_output(
        *args, **kwargs
) -> Tuple[Sample, TaxTree, SampleDataByTaxId, SampleStats, Err]:
    """
    Process Centrifuge/LMAT output files (to be usually called in parallel!).
    """
    # timing initialization
    start_time: float = time.perf_counter()
    # Recover input and parameters
    target_file: Filename = args[0]
    debug: bool = kwargs['debug']
    is_ctrl: bool = args[1]
    if debug:
        print(gray('Processing'), blue('ctrl' if is_ctrl else 'sample'),
              target_file, gray('...'))
        sys.stdout.flush()
    taxonomy: Taxonomy = kwargs['taxonomy']
    mintaxa: int = kwargs['ctrlmintaxa'] if is_ctrl else kwargs['mintaxa']
    minscore: Score = kwargs['ctrlminscore'] if is_ctrl else kwargs['minscore']
    including: Set[TaxId] = taxonomy.including
    excluding: Set[TaxId] = taxonomy.excluding
    scoring: Scoring = kwargs['scoring']
    lmat: bool = kwargs['lmat']
    output: io.StringIO = io.StringIO(newline='')

    def vwrite(*args):
        """Print only if verbose/debug mode is enabled"""
        if kwargs['debug']:
            output.write(' '.join(str(item) for item in args))

    sample: Sample = Sample(os.path.splitext(target_file)[0])
    error: Err = Err.NO_ERROR
    # Read Centrifuge/LMAT output files to get abundances
    read_method: Callable[[Filename, Scoring, Optional[Score]],  # Input
                          Tuple[str, SampleStats, Counter[TaxId],
                                Dict[TaxId, Score]]  # Output
                          ]
    if lmat:
        read_method = read_lmat_output
    else:
        read_method = read_output
    log: str
    counts: Counter[TaxId]
    scores: Dict[TaxId, Score]
    log, stat, counts, scores = read_method(target_file, scoring, minscore)
    output.write(log)
    # Update field in stat about control nature of the sample
    stat.is_ctrl = is_ctrl
    # Move cellular_organisms counts to root, in case
    if taxonomy.collapse and counts[CELLULAR_ORGANISMS]:
        vwrite(gray('Moving'), counts[CELLULAR_ORGANISMS],
               gray('"CELLULAR_ORGANISMS" reads to "ROOT"... '))
        if counts[ROOT]:
            stat.num_taxa -= 1
            scores[ROOT] = (
                (scores[CELLULAR_ORGANISMS] * counts[CELLULAR_ORGANISMS] +
                 scores[ROOT] * counts[ROOT]) /
                (counts[CELLULAR_ORGANISMS] + counts[ROOT]))
        else:
            scores[ROOT] = scores[CELLULAR_ORGANISMS]
        counts[ROOT] += counts[CELLULAR_ORGANISMS]
        counts[CELLULAR_ORGANISMS] = 0
        scores[CELLULAR_ORGANISMS] = NO_SCORE
    # Remove root counts, in case
    if kwargs['root'] and counts[ROOT]:
        vwrite(gray('Removing'), counts[ROOT], gray('"ROOT" reads... '))
        stat.seq = stat.seq._replace(filt=stat.seq.filt - counts[ROOT])
        stat.num_taxa -= 1
        counts[ROOT] = 0
        scores[ROOT] = NO_SCORE
        vwrite(green('OK!'), '\n')

    # Building taxonomy tree
    output.write(gray('Building from raw data... '))
    vwrite(gray('\n  Building taxonomy tree with all-in-1... '))
    tree = TaxTree()
    ancestors: Set[TaxId]
    orphans: Set[TaxId]
    ancestors, orphans = taxonomy.get_ancestors(counts.keys())
    out = SampleDataByTaxId(['all'])
    tree.allin1(taxonomy=taxonomy,
                counts=counts,
                scores=scores,
                ancestors=ancestors,
                min_taxa=mintaxa,
                include=including,
                exclude=excluding,
                out=out)
    out.purge_counters()
    vwrite(green('OK!'), '\n')

    # Give stats about orphan taxid
    if debug:
        vwrite(gray('  Checking taxid loss (orphans)... '))
        lost: int = 0
        if orphans:
            for orphan in orphans:
                vwrite(yellow('Warning!'), f'Orphan taxid={orphan}\n')
                lost += counts[orphan]
            vwrite(
                yellow('WARNING!'), f'{len(orphans)} orphan taxids ('
                f'{len(orphans)/len(counts):.2%} of total)\n'
                f'{lost} orphan sequences ('
                f'{lost/sum(counts.values()):.3%} of total)\n')
        else:
            vwrite(green('OK!\n'))
    # Check the lost of taxids (plasmids typically) under some conditions
    if debug and not excluding and not including:
        vwrite(gray('  Additional checking of taxid loss... '))
        lost = 0
        for taxid in counts:
            if not out.counts[taxid]:
                lost += 1
                vwrite(yellow('Warning!'), f'Lost taxid={taxid}: '
                       f'{taxonomy.get_name(taxid)}\n')
        if lost:
            vwrite(
                yellow('WARNING!'), f'Lost {lost} taxids ('
                f'{lost/len(counts):.2%} of total)'
                '\n')
        else:
            vwrite(green('OK!\n'))

    # Print last message and check if the sample is void
    if out.counts:
        output.write(sample + blue(' ctrl ' if is_ctrl else ' sample ') +
                     green('OK!\n'))
    elif is_ctrl:
        output.write(sample + red(' ctrl VOID!\n'))
        error = Err.VOID_CTRL
    else:
        output.write(sample + blue(' sample ') + yellow('VOID\n'))
        error = Err.VOID_SAMPLE

    # Timing results
    output.write(
        gray('Load elapsed time: ') +
        f'{time.perf_counter() - start_time:.3g}' + gray(' sec\n'))
    print(output.getvalue())
    sys.stdout.flush()
    return sample, tree, out, stat, error

Example #3

Show file

File: core.py Project: oatesa/recentrifuge

    def control_analysis():
        """Perform last steps of control and shared controls analysis"""
        nonlocal shared_ctrl_counts, shared_ctrl_score

        def robust_contamination_removal():
            """Implement robust contamination removal algorithm."""
            nonlocal exclude_sets, shared_crossover

            def compute_qn(data: List[float], dist: str = "Gauss") -> float:
                """Compute Qn robust estimator of scale (Rousseeuw, 1993)"""
                c_d: float  # Select d parameter depending on the distribution
                if dist == "Gauss":
                    c_d = 2.2219
                elif dist == "Cauchy":  # Heavy-tailed distribution
                    c_d = 1.2071
                elif dist == "NegExp":  # Negative exponential (asymetric)
                    c_d = 3.4760
                else:
                    raise Exception(red('\nERROR! ') + 'Unknown distribution')
                num: int = len(data)
                sort_data = sorted(data)
                pairwisedifs: List[float] = []
                for (i, x_val) in enumerate(sort_data):
                    for y_val in sort_data[i + 1:]:
                        pairwisedifs.append(abs(x_val - y_val))
                k: int = int(num * (num / 2 + 1) / 4)
                return c_d * sorted(pairwisedifs)[k - 1]

            exclude_sets = {smpl: set() for smpl in raws[controls:]}
            vwrite(
                gray('Robust contamination removal: '
                     'Searching for contaminants...\n'))
            for tid in exclude_candidates:
                relfreq_ctrl: List[float] = [
                    accs[ctrl][tid] / accs[ctrl][ontology.ROOT]
                    for ctrl in raws[:controls]
                ]
                relfreq_smpl: List[float] = [
                    accs[smpl][tid] / accs[smpl][ontology.ROOT]
                    for smpl in raws[controls:]
                ]
                relfreq: List[float] = relfreq_ctrl + relfreq_smpl
                crossover: List[bool]  # Crossover source (yes/no)
                # Just-controls contamination check
                if all([rf < EPS for rf in relfreq_smpl]):
                    vwrite(cyan('just-ctrl:\t'), tid, ontology.get_name(tid),
                           gray('relfreq:'),
                           fltlst2str(relfreq_ctrl) + fltlst2str(relfreq_smpl),
                           '\n')
                    continue  # Go for next candidate
                # Critical contamination check
                if all([rf > SEVR_CONTM_MIN_RELFREQ for rf in relfreq_ctrl]):
                    vwrite(red('critical:\t'), tid, ontology.get_name(tid),
                           gray('relfreq:'),
                           fltlst2str(relfreq_ctrl) + fltlst2str(relfreq_smpl),
                           '\n')
                    for exclude_set in exclude_sets.values():
                        exclude_set.add(tid)
                    continue  # Go for next candidate
                # Severe contamination check
                if any([rf > SEVR_CONTM_MIN_RELFREQ for rf in relfreq_ctrl]):
                    vwrite(yellow('severe: \t'), tid, ontology.get_name(tid),
                           gray('relfreq:'),
                           fltlst2str(relfreq_ctrl) + fltlst2str(relfreq_smpl),
                           '\n')
                    for exclude_set in exclude_sets.values():
                        exclude_set.add(tid)
                    continue  # Go for next candidate
                # Mild contamination check
                if all([rf > MILD_CONTM_MIN_RELFREQ for rf in relfreq_ctrl]):
                    vwrite(blue('mild cont:\t'), tid, ontology.get_name(tid),
                           gray('relfreq:'),
                           fltlst2str(relfreq_ctrl) + fltlst2str(relfreq_smpl),
                           '\n')
                    for exclude_set in exclude_sets.values():
                        exclude_set.add(tid)
                    continue  # Go for next candidate
                # Calculate median and MAD median but including controls
                mdn: float = statistics.median(relfreq)
                # mad:float=statistics.mean([abs(mdn - rf) for rf in relfreq])
                q_n: float = compute_qn(relfreq, dist="NegExp")
                # Calculate crossover in samples
                outlier_lim: float = mdn + ROBUST_XOVER_OUTLIER * q_n
                ordomag_lim: float = max(
                    relfreq_ctrl) * 10**ROBUST_XOVER_ORD_MAG
                crossover = [
                    rf > outlier_lim and rf > ordomag_lim
                    for rf in relfreq[controls:]
                ]
                # Crossover contamination check
                if any(crossover):
                    vwrite(
                        magenta('crossover:\t'), tid, ontology.get_name(tid),
                        green(f'lims: [{outlier_lim:.1g}]' +
                              ('<' if outlier_lim < ordomag_lim else '>') +
                              f'[{ordomag_lim:.1g}]'), gray('relfreq:'),
                        fltlst2str(relfreq_ctrl) + fltlst2str(relfreq_smpl),
                        gray('crossover:'), blst2str(crossover), '\n')
                    # Exclude just for contaminated samples (not the source)
                    vwrite(magenta('\t->'), gray(f'Include {tid} just in:'))
                    for i in range(len(raws[controls:])):
                        if not crossover[i]:
                            exclude_sets[raws[i + controls]].add(tid)
                        else:
                            vwrite(f' {raws[i + controls]}')
                    if all(crossover):  # Shared taxon contaminating control(s)
                        vwrite(' (', yellow('Shared crossover taxon!'), ')')
                        shared_crossover.add(tid)
                    vwrite('\n')
                    continue
                # Other contamination: remove from all samples
                vwrite(
                    gray('other cont:\t'), tid, ontology.get_name(tid),
                    green(f'lims: [{outlier_lim:.1g}]' +
                          ('<' if outlier_lim < ordomag_lim else '>') +
                          f'[{ordomag_lim:.1g}]'), gray('relfreq:'),
                    fltlst2str(relfreq_ctrl) + fltlst2str(relfreq_smpl), '\n')
                for exclude_set in exclude_sets.values():
                    exclude_set.add(tid)

        # Get taxids at this rank that are present in the control samples
        exclude_candidates: Set[Id] = set()
        for i in range(controls):
            exclude_candidates.update(taxids[raws[i]][rank])
        exclude_sets: Dict[Sample, Set[Id]]
        shared_crossover: Set[Id] = set()  # Shared taxa contaminating controls
        if controls and (len(raws) - controls >= ROBUST_MIN_SAMPLES):
            robust_contamination_removal()
        else:  # If this case, just apply strict control
            exclude_sets = {
                file: exclude_candidates
                for file in raws[controls::]
            }
        # Add explicit excluding taxa (if any) to exclude sets
        for exclude_set in exclude_sets.values():
            exclude_set.update(excluding)
        exclude_candidates.update(excluding)
        # Process each sample excluding control taxa
        for raw in raws[controls:]:
            output.write(
                gray('  Ctrl: From') + f' {raw} ' +
                gray(f'excluding {len(exclude_sets[raw])} ctrl taxa. '
                     f'Generating sample... '))
            ctrl_tree = TaxTree()
            ctrl_out = SampleDataById(['counts', 'scores', 'accs'])
            ctrl_tree.allin1(ontology=ontology,
                             counts=counts[raw],
                             scores=scores[raw],
                             min_taxa=mintaxas[raw],
                             min_rank=rank,
                             just_min_rank=True,
                             include=including,
                             exclude=exclude_sets[raw],
                             out=ctrl_out)
            ctrl_out.purge_counters()
            if ctrl_out.counts:  # Avoid adding empty samples
                sample = Sample(f'{raw}_{STR_CONTROL}_{rank.name.lower()}')
                samples.append(sample)
                counts[sample] = ctrl_out.get_counts()
                accs[sample] = ctrl_out.get_accs()
                scores[sample] = ctrl_out.get_scores()
                output.write(green('OK!\n'))
            else:
                output.write(yellow('VOID\n'))

        def shared_ctrl_analysis():
            """Perform last steps of shared taxa analysis"""
            shared_ctrl_tree: TaxTree = TaxTree()
            shared_ctrl_out: SampleDataById = SampleDataById(
                ['shared', 'accs'])
            shared_ctrl_tree.allin1(ontology=ontology,
                                    counts=shared_ctrl_counts,
                                    scores=shared_ctrl_score,
                                    min_taxa=get_shared_mintaxa(),
                                    include=including,
                                    exclude=(exclude_candidates -
                                             shared_crossover),
                                    out=shared_ctrl_out)
            shared_ctrl_out.purge_counters()
            out_counts: SharedCounter = shared_ctrl_out.get_shared_counts()
            output.write(
                gray(f'  Ctrl-shared: Including {len(out_counts)}'
                     ' shared taxa. Generating sample... '))
            if out_counts:
                sample = Sample(f'{STR_CONTROL_SHARED}_{rank.name.lower()}')
                samples.append(sample)
                counts[Sample(sample)] = out_counts
                accs[Sample(sample)] = shared_ctrl_out.get_accs()
                scores[sample] = shared_ctrl_out.get_shared_scores()
                output.write(green('OK!\n'))
            else:
                output.write(yellow('VOID\n'))

        # Shared-control taxa final analysis
        if shared_ctrl_counts:
            # Normalize scaled scores by total abundance
            shared_ctrl_score /= (+shared_ctrl_counts)
            # Get averaged abundance by number of samples minus ctrl samples
            shared_ctrl_counts //= (len(raws) - controls)
            shared_ctrl_analysis()
        else:
            output.write(
                gray('  Ctrl-shared: No taxa! ') + yellow('VOID') +
                gray(' sample.\n'))

Example #4

Show file

File: taxclass.py Project: pythseq/recentrifuge

def process_output(
        *args,
        **kwargs) -> Tuple[Sample, TaxTree, SampleDataById, SampleStats, Err]:
    """
    Process classifiers output files (to be usually called in parallel!).
    """
    # timing initialization
    start_time: float = time.perf_counter()
    # Recover input and parameters
    target_file: Filename = args[0]
    debug: bool = kwargs['debug']
    is_ctrl: bool = args[1]
    if debug:
        print(gray('Processing'), blue('ctrl' if is_ctrl else 'sample'),
              target_file, gray('...'))
        sys.stdout.flush()
    ontology: Ontology = kwargs['ontology']
    mintaxa: Optional[int] = (kwargs['ctrlmintaxa']
                              if is_ctrl else kwargs['mintaxa'])
    minscore: Score = kwargs['ctrlminscore'] if is_ctrl else kwargs['minscore']
    including: Union[Tuple, Set[Id]] = ontology.including
    excluding: Union[Tuple, Set[Id]] = ontology.excluding
    scoring: Scoring = kwargs['scoring']
    classifier: Classifier = kwargs['classifier']
    genfmt: GenericFormat = kwargs['genfmt']
    output: io.StringIO = io.StringIO(newline='')

    def vwrite(*args):
        """Print only if verbose/debug mode is enabled"""
        if kwargs['debug']:
            output.write(' '.join(str(item) for item in args))

    sample: Sample = Sample(os.path.splitext(target_file)[0])
    error: Err = Err.NO_ERROR
    # Read taxonomic classifier output files to get abundances
    read_method: Callable[  # Format: [[Input], Output]
        [Filename, Scoring, Optional[Score]],
        Tuple[str, SampleStats, Counter[Id], Dict[Id, Score]]]
    log: str
    stat: SampleStats
    counts: Counter[Id]
    scores: Dict[Id, Score]
    if classifier is Classifier.GENERIC:  # Direct call to generic method
        log, stat, counts, scores = read_generic_output(
            target_file, scoring, minscore, genfmt)
    else:  # Use read_method
        if classifier is Classifier.KRAKEN:
            read_method = read_kraken_output
        elif classifier is Classifier.CLARK:
            read_method = read_clark_output
        elif classifier is Classifier.LMAT:
            read_method = read_lmat_output
        elif classifier is Classifier.CENTRIFUGE:
            read_method = read_output
        else:
            raise Exception(red('\nERROR!'),
                            f'taxclass: Unknown classifier "{classifier}".')
        log, stat, counts, scores = read_method(target_file, scoring, minscore)
    output.write(log)
    # Complete/Update fields in stats
    stat.is_ctrl = is_ctrl  # set control nature of the sample
    if mintaxa is not None:  # manual mintaxa has precedence over automatic
        stat.mintaxa = mintaxa
    else:  # update local value with the automatically guessed value
        mintaxa = stat.mintaxa
    # Move cellular_organisms counts to root, in case
    if ontology.collapse and counts[CELLULAR_ORGANISMS]:
        vwrite(gray('Moving'), counts[CELLULAR_ORGANISMS],
               gray('"CELLULAR_ORGANISMS" reads to "ROOT"... \n'))
        if counts[ontology.ROOT]:
            stat.decrease_filtered_taxids()
            scores[ontology.ROOT] = Score(
                (scores[CELLULAR_ORGANISMS] * counts[CELLULAR_ORGANISMS] +
                 scores[ontology.ROOT] * counts[ontology.ROOT]) /
                (counts[CELLULAR_ORGANISMS] + counts[ontology.ROOT]))
        else:
            scores[ontology.ROOT] = scores[CELLULAR_ORGANISMS]
        counts[ontology.ROOT] += counts[CELLULAR_ORGANISMS]
        counts[CELLULAR_ORGANISMS] = 0
        scores[CELLULAR_ORGANISMS] = NO_SCORE
    # Remove root counts, in case
    if kwargs['root'] and counts[ontology.ROOT]:
        vwrite(gray('Removing'), counts[ontology.ROOT],
               gray('"ROOT" reads... '))
        stat.seq = stat.seq._replace(filt=stat.seq.filt -
                                     counts[ontology.ROOT])
        stat.decrease_filtered_taxids()
        counts[ontology.ROOT] = 0
        scores[ontology.ROOT] = NO_SCORE
        vwrite(green('OK!'), '\n')

    # Building ontology tree
    output.write(
        gray('Building from raw data with mintaxa = ') + f'{mintaxa:_d}' +
        gray(' ... \n'))
    vwrite(gray('  Building ontology tree with all-in-1... '))
    tree = TaxTree()
    ancestors: Set[Id]
    orphans: Set[Id]
    ancestors, orphans = ontology.get_ancestors(counts.keys())
    out = SampleDataById(['all'])
    tree.allin1(ontology=ontology,
                counts=counts,
                scores=scores,
                ancestors=ancestors,
                min_taxa=mintaxa,
                include=including,
                exclude=excluding,
                out=out)
    out.purge_counters()
    vwrite(green('OK!'), '\n')

    # Stats: Complete final value for TaxIDs after tree building and folding
    final_taxids: int = len(out.counts) if out.counts is not None else 0
    stat.set_final_taxids(final_taxids)

    # Check for additional loss of reads (due to include/exclude an orphans)
    output.write(gray('  Check for more seqs lost ([in/ex]clude affects)... '))
    if out.counts is not None:
        discard: int = sum(counts.values()) - sum(out.counts.values())
        if discard:
            output.write(
                blue('\n  Info:') + f' {discard} ' +
                gray('additional seqs discarded (') +
                f'{discard/sum(counts.values()):.3%} ' +
                gray('of accepted)\n'))
        else:
            output.write(green('OK!\n'))
    else:
        output.write(red('No counts in sample tree!\n'))
    # Warn or give detailed stats about orphan taxid and orphan seqs
    if debug:
        vwrite(gray('  Checking taxid loss (orphans)... '))
        lost: int = 0
        if orphans:
            for orphan in orphans:
                vwrite(yellow('  Warning!'), gray('Orphan taxid'),
                       f'{orphan}\n')
                lost += counts[orphan]
            vwrite(
                yellow('  WARNING!'), f'{len(orphans)} orphan taxids ('
                f'{len(orphans)/len(counts):.2%} of accepted)\n'
                f'    and {lost} orphan sequences ('
                f'{lost/sum(counts.values()):.3%} of accepted)\n')
        else:
            vwrite(green('OK!\n'))
    elif orphans:
        output.write(
            yellow('\n  Warning!') + f' {len(orphans)} orphan taxids' +
            gray(' (rerun with --debug for details)\n'))
    # Check the removal of TaxIDs (accumulation of leaves in parents)
    if debug and not excluding and including == {ontology.ROOT}:
        vwrite(gray('  Assess accumulation due to "folding the tree"...\n'))
        migrated: int = 0
        if out.counts is not None:
            for taxid in counts:
                if out.counts[taxid] == 0:
                    migrated += 1
                    vwrite(
                        blue('  Info:'),
                        gray(f'Folded TaxID {taxid} (') +
                        f'{ontology.get_name(taxid)}' + gray(') with ') +
                        f'{counts[taxid]}' + gray(' original seqs\n'))
        if migrated:
            vwrite(
                blue('  INFO:'), f'{migrated} TaxIDs folded ('
                f'{migrated/len(+counts):.2%} of TAF —TaxIDs after filtering—)'
                '\n')
            vwrite(
                blue('  INFO:'), f'Final assigned TaxIDs: {final_taxids} '
                f'(reduced to {final_taxids/len(+counts):.2%} of '
                'number of TAF)\n')
        else:
            vwrite(blue('  INFO:'), gray('No migration!'), green('OK!\n'))
    # Print last message and check if the sample is void
    if out.counts:
        output.write(sample + blue(' ctrl ' if is_ctrl else ' sample ') +
                     green('OK!\n'))
    elif is_ctrl:
        output.write(sample + red(' ctrl VOID!\n'))
        error = Err.VOID_CTRL
    else:
        output.write(sample + blue(' sample ') + yellow('VOID\n'))
        error = Err.VOID_SAMPLE

    # Timing results
    output.write(
        gray('Load elapsed time: ') +
        f'{time.perf_counter() - start_time:.3g}' + gray(' sec\n'))
    print(output.getvalue())
    sys.stdout.flush()
    return sample, tree, out, stat, error