def test_top_contigs(self):
     """
     Test top contigs from ref
     """
     #ref_entry = ReferenceEntry.from_ref_dir(ref)
     ref_entry = openReference(self.REFERENCE)
     self.assertEqual(1, len(get_top_contigs_from_ref_entry(ref_entry, 2)))
Esempio n. 2
0
def get_name_untruncator(repo_path, truncation_regex='\s'):
    """Return a dictionary that maps truncated reference names to full
    reference names.

    :param repo_path: Path to the reference repository that contains the
        full reference names.
    :param truncation_regex: Character at which reference names are truncated.
        For SAM/BAM files, this is whitespace.

    :returns: dict from truncated name to full name

    :raises: ReferenceTruncationError if multiple full names truncate to the
        same name.
    """

    ref_entry = openReference(repo_path)

    truncated_to_full = {}
    for contig in ref_entry.contigs:
        full_ref_name = contig.header
        truncated_ref_name = re.split(truncation_regex, full_ref_name)[0]

        if truncated_ref_name in truncated_to_full:
            msg = ("The full reference '{r}' truncates to '{t}', "
                   "but another reference also truncates to '{t}'.".format(
                       r=full_ref_name, t=truncated_ref_name))
            raise ReferenceTruncationError(msg)

        truncated_to_full[truncated_ref_name] = full_ref_name

    return truncated_to_full
Esempio n. 3
0
def make_variants_report(aln_summ_gff, variants_gff, reference, max_contigs_to_plot, report, output_dir, dpi=72, dumpdata=True):
    """
    Entry to report.
    :param aln_summ_gff: (str) path to alignment_summary.gff
    :param variants_gff: (str) path to variants_gff
    :param reference: (str) path to reference_dir
    :param max_contigs_to_plot: (int) max number of contigs to plot
    """
    _validate_inputs([('aln_summ_gff', aln_summ_gff),
                      ('variants_gff', variants_gff),
                      ('reference', reference)])

    # reference entry & top contings
    ref = openReference(reference)
    top_contigs = get_top_contigs_from_ref_entry(ref, max_contigs_to_plot)

    # extract gff data from files
    ref_data, contig_variants = _extract_alignment_summ_data(
        aln_summ_gff, top_contigs)
    _append_variants_gff_data(ref_data, variants_gff)

    # make report objects
    table, atts = _get_consensus_table_and_attributes(ref_data, ref)
    plotgroup = _create_variants_plot_grp(
        top_contigs, contig_variants, output_dir)

    rpt = Report(Constants.R_ID,
                 plotgroups=[plotgroup],
                 attributes=atts,
                 tables=[table],
                 dataset_uuids=(ReferenceSet(reference).uuid,))

    rpt = spec.apply_view(rpt)
    rpt.write_json(os.path.join(output_dir, report))
    return rpt
Esempio n. 4
0
def get_name_untruncator(repo_path, truncation_regex='\s'):
    """Return a dictionary that maps truncated reference names to full
    reference names.

    :param repo_path: Path to the reference repository that contains the
        full reference names.
    :param truncation_regex: Character at which reference names are truncated.
        For SAM/BAM files, this is whitespace.

    :returns: dict from truncated name to full name

    :raises: ReferenceTruncationError if multiple full names truncate to the
        same name.
    """

    ref_entry = openReference(repo_path)

    truncated_to_full = {}
    for contig in ref_entry.contigs:
        full_ref_name = contig.header
        truncated_ref_name = re.split(truncation_regex, full_ref_name)[0]

        if truncated_ref_name in truncated_to_full:
            msg = ("The full reference '{r}' truncates to '{t}', "
                   "but another reference also truncates to '{t}'."
                   .format(r=full_ref_name, t=truncated_ref_name))
            raise ReferenceTruncationError(msg)

        truncated_to_full[truncated_ref_name] = full_ref_name

    return truncated_to_full
Esempio n. 5
0
 def test_top_contigs(self):
     """
     Test top contigs from ref
     """
     #ref_entry = ReferenceEntry.from_ref_dir(ref)
     ref_entry = openReference(self.REFERENCE)
     self.assertEqual(1, len(get_top_contigs_from_ref_entry(ref_entry, 2)))
Esempio n. 6
0
 def __init__(self, variantsGff, referenceDir, howMany=100, batchSortSize=10000):
     """varianstGff = source file, which can be a .gz; howMany = top N variants;
     batchSortSize = the size of intermediate lists we sort.
     referenceDir = referenceRepository dir, so we can fetch real contig names"""
     self._howMany = howMany
     self._batchSortSize = batchSortSize
     self._variantsGff = variantsGff
     self._rezip = False
     self._reference = openReference(referenceDir)
Esempio n. 7
0
 def __init__(self,
              variantsGff,
              referenceDir,
              howMany=100,
              batchSortSize=10000):
     """varianstGff = source file, which can be a .gz; howMany = top N variants;
     batchSortSize = the size of intermediate lists we sort.
     referenceDir = referenceRepository dir, so we can fetch real contig names"""
     self._howMany = howMany
     self._batchSortSize = batchSortSize
     self._variantsGff = variantsGff
     self._rezip = False
     self._reference = openReference(referenceDir)
 def _get_reference_entry(self):
     return openReference(self.REFERENCE)
Esempio n. 9
0
 def _get_reference_entry(self):
     return openReference(self.REFERENCE)