Example #1
0
    def output_exons_as_gff(self,
                            base_diff=6,
                            const_only=False,
                            cds_only=False):
        """
        Output constitutive exons for all genes as GFF.

        - const_only: if True, output only constitutive exons
        - cds_only: if True, output CDS only exons 
        """
        exons_type = "exons"
        exons_outdir = self.exons_dir
        if const_only:
            exons_type = "const_exons"
            exons_outdir = self.const_exons_dir
        if cds_only:
            exons_basename = "%s.cds_only.%s.gff" %(self.source, exons_type)
        else:
            exons_basename = "%s.%s.gff" %(self.source, exons_type)
        gff_output_filename = os.path.join(exons_outdir, exons_basename)
        print "Outputting exons..."
        print "  - Exons type: %s" %(exons_type)
        print "  - Output file: %s" %(gff_output_filename)
        print "  - CDS only: %s" %(cds_only)
        if os.path.isfile(gff_output_filename):
            print "%s exists. Skipping.." %(gff_output_filename)
            return
        # Output a map from genes to constitutive exons
        # for convenience
        genes_to_exons_fname = os.path.join(exons_outdir,
                                            exons_basename.replace(".gff",
                                                                   ".to_genes.txt"))
        gff_out = gff_utils.Writer(open(gff_output_filename, "w"))
        rec_type = "exon"
        genes_to_exons = []
        genes_to_exons_header = ["gene_id", "exons"]
        for gene_id, gene in self.genes.iteritems():
            if const_only:
                # Get only constitutive exons
                exons = gene.compute_const_exons(base_diff=base_diff,
                                                 cds_only=cds_only)
            elif cds_only:
                # Get all CDS exons
                exons = gene.cds_parts
            else:
                # Get all exons
                exons = gene.parts
            exon_labels = [e.label for e in exons]
            if len(exon_labels) == 0:
                exon_labels = self.na_val
            else:
                exon_labels = ",".join(exon_labels)
            entry = {"gene_id": gene_id,
                     "exons": exon_labels}
            genes_to_exons.append(entry)
            # Output constitutive exons to GFF file
            GeneModel.output_parts_as_gff(gff_out,
                                          exons,
                                          gene.chrom,
                                          gene.strand,
                                          source=self.source,
                                          rec_type=rec_type,
                                          gene_id=gene_id)
        genes_to_exons = pandas.DataFrame(genes_to_exons)
        genes_to_exons.to_csv(genes_to_exons_fname,
                              cols=genes_to_exons_header,
                              index=False,
                              sep="\t")