def test_top_contigs(self): """ Test top contigs from ref """ #ref_entry = ReferenceEntry.from_ref_dir(ref) ref_entry = openReference(self.REFERENCE) self.assertEqual(1, len(get_top_contigs_from_ref_entry(ref_entry, 2)))
def get_name_untruncator(repo_path, truncation_regex='\s'): """Return a dictionary that maps truncated reference names to full reference names. :param repo_path: Path to the reference repository that contains the full reference names. :param truncation_regex: Character at which reference names are truncated. For SAM/BAM files, this is whitespace. :returns: dict from truncated name to full name :raises: ReferenceTruncationError if multiple full names truncate to the same name. """ ref_entry = openReference(repo_path) truncated_to_full = {} for contig in ref_entry.contigs: full_ref_name = contig.header truncated_ref_name = re.split(truncation_regex, full_ref_name)[0] if truncated_ref_name in truncated_to_full: msg = ("The full reference '{r}' truncates to '{t}', " "but another reference also truncates to '{t}'.".format( r=full_ref_name, t=truncated_ref_name)) raise ReferenceTruncationError(msg) truncated_to_full[truncated_ref_name] = full_ref_name return truncated_to_full
def make_variants_report(aln_summ_gff, variants_gff, reference, max_contigs_to_plot, report, output_dir, dpi=72, dumpdata=True): """ Entry to report. :param aln_summ_gff: (str) path to alignment_summary.gff :param variants_gff: (str) path to variants_gff :param reference: (str) path to reference_dir :param max_contigs_to_plot: (int) max number of contigs to plot """ _validate_inputs([('aln_summ_gff', aln_summ_gff), ('variants_gff', variants_gff), ('reference', reference)]) # reference entry & top contings ref = openReference(reference) top_contigs = get_top_contigs_from_ref_entry(ref, max_contigs_to_plot) # extract gff data from files ref_data, contig_variants = _extract_alignment_summ_data( aln_summ_gff, top_contigs) _append_variants_gff_data(ref_data, variants_gff) # make report objects table, atts = _get_consensus_table_and_attributes(ref_data, ref) plotgroup = _create_variants_plot_grp( top_contigs, contig_variants, output_dir) rpt = Report(Constants.R_ID, plotgroups=[plotgroup], attributes=atts, tables=[table], dataset_uuids=(ReferenceSet(reference).uuid,)) rpt = spec.apply_view(rpt) rpt.write_json(os.path.join(output_dir, report)) return rpt
def get_name_untruncator(repo_path, truncation_regex='\s'): """Return a dictionary that maps truncated reference names to full reference names. :param repo_path: Path to the reference repository that contains the full reference names. :param truncation_regex: Character at which reference names are truncated. For SAM/BAM files, this is whitespace. :returns: dict from truncated name to full name :raises: ReferenceTruncationError if multiple full names truncate to the same name. """ ref_entry = openReference(repo_path) truncated_to_full = {} for contig in ref_entry.contigs: full_ref_name = contig.header truncated_ref_name = re.split(truncation_regex, full_ref_name)[0] if truncated_ref_name in truncated_to_full: msg = ("The full reference '{r}' truncates to '{t}', " "but another reference also truncates to '{t}'." .format(r=full_ref_name, t=truncated_ref_name)) raise ReferenceTruncationError(msg) truncated_to_full[truncated_ref_name] = full_ref_name return truncated_to_full
def __init__(self, variantsGff, referenceDir, howMany=100, batchSortSize=10000): """varianstGff = source file, which can be a .gz; howMany = top N variants; batchSortSize = the size of intermediate lists we sort. referenceDir = referenceRepository dir, so we can fetch real contig names""" self._howMany = howMany self._batchSortSize = batchSortSize self._variantsGff = variantsGff self._rezip = False self._reference = openReference(referenceDir)
def _get_reference_entry(self): return openReference(self.REFERENCE)