def _get_sam_groups(ignored_ids_writer=None):
    """Returns grouped sam records read from SORTED_GMAP_SAM and READS_DS."""
    query_len_dict = ContigSetReaderWrapper.name_to_len_dict(READS_DS)
    groups = [g for g in iter_gmap_sam(sam_filename=SORTED_GMAP_SAM,
                                       query_len_dict=query_len_dict,
                                       min_aln_coverage=0.99, min_aln_identity=0.85,
                                       ignored_ids_writer=ignored_ids_writer)]
    return groups
Exemple #2
0
def _get_sam_groups(ignored_ids_writer=None):
    """Returns grouped sam records read from SORTED_GMAP_SAM and READS_DS."""
    query_len_dict = ContigSetReaderWrapper.name_to_len_dict(READS_DS)
    groups = [
        g for g in iter_gmap_sam(sam_filename=SORTED_GMAP_SAM,
                                 query_len_dict=query_len_dict,
                                 min_aln_coverage=0.99,
                                 min_aln_identity=0.85,
                                 ignored_ids_writer=ignored_ids_writer)
    ]
    return groups
    def run(self,
            allow_extra_5exon,
            skip_5_exon_alt,
            ignored_ids_fn,
            good_gff_fn,
            bad_gff_fn,
            group_fn,
            tolerate_end=100):
        """
        Process the whole SAM file:
          (1) Group SAM records based on where they mapped to and strands
          (2) Collapse records, write collapsed isoforms to *_gff_writer,
              write supportive records associated with each collapsed isoforms
              to group_writer.
        """
        ignored_ids_writer = open(ignored_ids_fn,
                                  'w') if ignored_ids_fn else None
        good_gff_writer = CollapseGffWriter(
            good_gff_fn) if good_gff_fn else None
        bad_gff_writer = CollapseGffWriter(bad_gff_fn) if bad_gff_fn else None
        group_writer = GroupWriter(group_fn) if group_fn else None

        cuff_index = 1
        for recs in iter_gmap_sam(sam_filename=self.sam_filename,
                                  query_len_dict=self.isoform_len_dict,
                                  min_aln_coverage=self.min_aln_coverage,
                                  min_aln_identity=self.min_aln_identity,
                                  ignored_ids_writer=ignored_ids_writer):
            # Iterate over groups of overlapping SAM records
            for records in recs.itervalues():
                if len(records) > 0:
                    # records: a list of overlapping SAM records, same strands
                    collapse_sam_records(records=records,
                                         cuff_index=cuff_index,
                                         cov_threshold=self.cov_threshold,
                                         allow_extra_5exon=allow_extra_5exon,
                                         skip_5_exon_alt=skip_5_exon_alt,
                                         good_gff_writer=good_gff_writer,
                                         bad_gff_writer=bad_gff_writer,
                                         group_writer=group_writer,
                                         tolerate_end=tolerate_end)
                    cuff_index += 1

        # close writers.
        for writer in (ignored_ids_writer, good_gff_writer, bad_gff_writer,
                       bad_gff_writer):
            if writer:
                writer.close()
    def run(self, allow_extra_5exon, skip_5_exon_alt,
            ignored_ids_fn, good_gff_fn, bad_gff_fn, group_fn,
            tolerate_end=100):
        """
        Process the whole SAM file:
          (1) Group SAM records based on where they mapped to and strands
          (2) Collapse records, write collapsed isoforms to *_gff_writer,
              write supportive records associated with each collapsed isoforms
              to group_writer.
        """
        ignored_ids_writer = open(ignored_ids_fn, 'w') if ignored_ids_fn else None
        good_gff_writer = CollapseGffWriter(good_gff_fn) if good_gff_fn else None
        bad_gff_writer = CollapseGffWriter(bad_gff_fn) if bad_gff_fn else None
        group_writer = GroupWriter(group_fn) if group_fn else None

        cuff_index = 1
        for recs in iter_gmap_sam(sam_filename=self.sam_filename,
                                  query_len_dict=self.isoform_len_dict,
                                  min_aln_coverage=self.min_aln_coverage,
                                  min_aln_identity=self.min_aln_identity,
                                  ignored_ids_writer=ignored_ids_writer):
            # Iterate over groups of overlapping SAM records
            for records in recs.itervalues():
                if len(records) > 0:
                    # records: a list of overlapping SAM records, same strands
                    collapse_sam_records(records=records, cuff_index=cuff_index,
                                         cov_threshold=self.cov_threshold,
                                         allow_extra_5exon=allow_extra_5exon,
                                         skip_5_exon_alt=skip_5_exon_alt,
                                         good_gff_writer=good_gff_writer,
                                         bad_gff_writer=bad_gff_writer,
                                         group_writer=group_writer,
                                         tolerate_end=tolerate_end)
                    cuff_index += 1

        # close writers.
        for writer in (ignored_ids_writer, good_gff_writer, bad_gff_writer, bad_gff_writer):
            if writer:
                writer.close()