def summarize_analysis( *args, **kwargs) -> Tuple[Optional[Sample], Counter[Id], Counter[Id], Scores]: """ Summarize for a cross-analysis (to be usually called in parallel!). """ # Recover input and parameters analysis: str = args[0] ontology: Ontology = kwargs['ontology'] # TODO: Delete the following comment lines in a future release # including = ontology.including # See comment below for the reason # excluding = ontology.excluding # in/excluding are not used anymore counts: Dict[Sample, Counter[Id]] = kwargs['counts'] scores: Dict[Sample, Dict[Id, Score]] = kwargs['scores'] samples: List[Sample] = kwargs['samples'] output: io.StringIO = io.StringIO(newline='') # Declare/define variables summary_counts: Counter[Id] = col.Counter() summary_acc: Counter[Id] = col.Counter() summary_score: Scores = Scores({}) summary: Optional[Sample] = None output.write(gray('Summary for ') + analysis + gray('... ')) target_samples: List[Sample] = [ smpl for smpl in samples if smpl.startswith(analysis) ] assert len(target_samples) >= 1, \ red('ERROR! ') + analysis + gray(' has no samples to summarize!') for smpl in target_samples: summary_counts += counts[smpl] summary_score.update(scores[smpl]) tree = TaxTree() tree.grow(ontology=ontology, counts=summary_counts, scores=summary_score) tree.subtract() tree.shape() summary_counts.clear() summary_score.clear() # Avoid including/excluding here as get_taxa is not as 'clever' as allin1 # and taxa are already included/excluded in the derived samples tree.get_taxa(counts=summary_counts, accs=summary_acc, scores=summary_score) summary_counts = +summary_counts # remove counts <= 0 if summary_counts: # Avoid returning empty sample (summary would be None) summary = Sample(f'{analysis}_{STR_SUMMARY}') output.write( gray('(') + cyan(f'{len(target_samples)}') + gray(' samples)') + green(' OK!\n')) else: output.write(yellow(' VOID\n')) # Print output and return print(output.getvalue(), end='') sys.stdout.flush() return summary, summary_counts, summary_acc, summary_score
def summarize_analysis( *args, **kwargs) -> Tuple[Sample, Counter[Id], Counter[Id], Scores]: """ Summarize for a cross-analysis (to be usually called in parallel!). """ # Recover input and parameters analysis: str = args[0] ontology: Ontology = kwargs['ontology'] including = ontology.including excluding = ontology.excluding counts: Dict[Sample, Counter[Id]] = kwargs['counts'] scores: Dict[Sample, Dict[Id, Score]] = kwargs['scores'] samples: List[Sample] = kwargs['samples'] output: io.StringIO = io.StringIO(newline='') # Declare/define variables summary_counts: Counter[Id] = Counter() summary_acc: Counter[Id] = Counter() summary_score: Scores = Scores({}) summary: Sample = None output.write(gray('Summary for ') + analysis + gray('... ')) target_samples: List[Sample] = [ smpl for smpl in samples if smpl.startswith(analysis) ] assert len(target_samples) >= 1, \ red('ERROR! ') + analysis + gray(' has no samples to summarize!') for smpl in target_samples: summary_counts += counts[smpl] summary_score.update(scores[smpl]) tree = TaxTree() tree.grow(ontology=ontology, counts=summary_counts, scores=summary_score) tree.subtract() tree.shape() summary_counts.clear() summary_score.clear() tree.get_taxa(counts=summary_counts, accs=summary_acc, scores=summary_score, include=including, exclude=excluding) summary_counts = +summary_counts # remove counts <= 0 if summary_counts: # Avoid returning empty sample (summary would be None) summary = Sample(f'{analysis}_{STR_SUMMARY}') output.write( gray('(') + cyan(f'{len(target_samples)}') + gray(' samples)') + green(' OK!\n')) else: output.write(yellow(' VOID\n')) # Print output and return print(output.getvalue(), end='') sys.stdout.flush() return summary, summary_counts, summary_acc, summary_score
def process_report( *args, **kwargs ) -> Tuple[Sample, TaxTree, SampleDataByTaxId, SampleStats, Err]: """ Process Centrifuge/Kraken report files (to be usually called in parallel!). """ # TODO: Full review to report support # Recover input and parameters filerep: Filename = args[0] taxonomy: Taxonomy = kwargs['taxonomy'] mintaxa: int = kwargs['mintaxa'] collapse: bool = taxonomy.collapse including: Set[TaxId] = taxonomy.including excluding: Set[TaxId] = taxonomy.excluding debug: bool = kwargs['debug'] output: io.StringIO = io.StringIO(newline='') def vwrite(*args): """Print only if verbose/debug mode is enabled""" if kwargs['debug']: output.write(' '.join(str(item) for item in args)) sample: Sample = Sample(filerep) # Read Centrifuge/Kraken report file to get abundances log: str abundances: Counter[TaxId] log, abundances, _ = read_report(filerep) output.write(log) # Remove root counts, in case if kwargs['root']: vwrite(gray('Removing'), abundances[ROOT], gray('"ROOT" reads... ')) abundances[ROOT] = 0 vwrite(green('OK!'), '\n') # Build taxonomy tree output.write(' \033[90mBuilding taxonomy tree...\033[0m') tree = TaxTree() tree.grow(taxonomy=taxonomy, counts=abundances) # Grow tax tree from root node output.write('\033[92m OK! \033[0m\n') # Prune the tree output.write(' \033[90mPruning taxonomy tree...\033[0m') tree.prune(mintaxa, None, collapse, debug) tree.shape() output.write('\033[92m OK! \033[0m\n') # Get the taxa with their abundances and taxonomical levels output.write(' \033[90mFiltering taxa...\033[0m') new_abund: Counter[TaxId] = col.Counter() new_accs: Counter[TaxId] = col.Counter() ranks: Ranks = Ranks({}) tree.get_taxa(abundance=new_abund, accs=new_accs, ranks=ranks, mindepth=0, maxdepth=0, include=including, exclude=excluding) new_abund = +new_abund # remove zero and negative counts if including or excluding: # Recalculate accumulated counts new_tree = TaxTree() new_tree.grow(taxonomy, new_abund) # Grow tree with new abund new_tree.shape() new_abund = col.Counter() # Reset abundances new_accs = col.Counter() # Reset accumulated new_tree.get_taxa(new_abund, new_accs) # Get new accumulated counts out: SampleDataByTaxId = SampleDataByTaxId() out.set(counts=new_abund, ranks=ranks, accs=new_accs) output.write('\033[92m OK! \033[0m\n') print(output.getvalue()) sys.stdout.flush() return sample, tree, out, SampleStats(), Err.NO_ERROR