def _get_sam_groups(ignored_ids_writer=None): """Returns grouped sam records read from SORTED_GMAP_SAM and READS_DS.""" query_len_dict = ContigSetReaderWrapper.name_to_len_dict(READS_DS) groups = [g for g in iter_gmap_sam(sam_filename=SORTED_GMAP_SAM, query_len_dict=query_len_dict, min_aln_coverage=0.99, min_aln_identity=0.85, ignored_ids_writer=ignored_ids_writer)] return groups
def _get_sam_groups(ignored_ids_writer=None): """Returns grouped sam records read from SORTED_GMAP_SAM and READS_DS.""" query_len_dict = ContigSetReaderWrapper.name_to_len_dict(READS_DS) groups = [ g for g in iter_gmap_sam(sam_filename=SORTED_GMAP_SAM, query_len_dict=query_len_dict, min_aln_coverage=0.99, min_aln_identity=0.85, ignored_ids_writer=ignored_ids_writer) ] return groups
def run(self, allow_extra_5exon, skip_5_exon_alt, ignored_ids_fn, good_gff_fn, bad_gff_fn, group_fn, tolerate_end=100): """ Process the whole SAM file: (1) Group SAM records based on where they mapped to and strands (2) Collapse records, write collapsed isoforms to *_gff_writer, write supportive records associated with each collapsed isoforms to group_writer. """ ignored_ids_writer = open(ignored_ids_fn, 'w') if ignored_ids_fn else None good_gff_writer = CollapseGffWriter( good_gff_fn) if good_gff_fn else None bad_gff_writer = CollapseGffWriter(bad_gff_fn) if bad_gff_fn else None group_writer = GroupWriter(group_fn) if group_fn else None cuff_index = 1 for recs in iter_gmap_sam(sam_filename=self.sam_filename, query_len_dict=self.isoform_len_dict, min_aln_coverage=self.min_aln_coverage, min_aln_identity=self.min_aln_identity, ignored_ids_writer=ignored_ids_writer): # Iterate over groups of overlapping SAM records for records in recs.itervalues(): if len(records) > 0: # records: a list of overlapping SAM records, same strands collapse_sam_records(records=records, cuff_index=cuff_index, cov_threshold=self.cov_threshold, allow_extra_5exon=allow_extra_5exon, skip_5_exon_alt=skip_5_exon_alt, good_gff_writer=good_gff_writer, bad_gff_writer=bad_gff_writer, group_writer=group_writer, tolerate_end=tolerate_end) cuff_index += 1 # close writers. for writer in (ignored_ids_writer, good_gff_writer, bad_gff_writer, bad_gff_writer): if writer: writer.close()
def run(self, allow_extra_5exon, skip_5_exon_alt, ignored_ids_fn, good_gff_fn, bad_gff_fn, group_fn, tolerate_end=100): """ Process the whole SAM file: (1) Group SAM records based on where they mapped to and strands (2) Collapse records, write collapsed isoforms to *_gff_writer, write supportive records associated with each collapsed isoforms to group_writer. """ ignored_ids_writer = open(ignored_ids_fn, 'w') if ignored_ids_fn else None good_gff_writer = CollapseGffWriter(good_gff_fn) if good_gff_fn else None bad_gff_writer = CollapseGffWriter(bad_gff_fn) if bad_gff_fn else None group_writer = GroupWriter(group_fn) if group_fn else None cuff_index = 1 for recs in iter_gmap_sam(sam_filename=self.sam_filename, query_len_dict=self.isoform_len_dict, min_aln_coverage=self.min_aln_coverage, min_aln_identity=self.min_aln_identity, ignored_ids_writer=ignored_ids_writer): # Iterate over groups of overlapping SAM records for records in recs.itervalues(): if len(records) > 0: # records: a list of overlapping SAM records, same strands collapse_sam_records(records=records, cuff_index=cuff_index, cov_threshold=self.cov_threshold, allow_extra_5exon=allow_extra_5exon, skip_5_exon_alt=skip_5_exon_alt, good_gff_writer=good_gff_writer, bad_gff_writer=bad_gff_writer, group_writer=group_writer, tolerate_end=tolerate_end) cuff_index += 1 # close writers. for writer in (ignored_ids_writer, good_gff_writer, bad_gff_writer, bad_gff_writer): if writer: writer.close()