Python copyAlignment Examples

Programming Language: Python

Namespace/Package Name: alignlib

Method/Function: copyAlignment

Examples at hotexamples.com: 5

Python copyAlignment - 5 examples found. These are the top rated real world Python examples of alignlib.copyAlignment extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: Prediction.py Project: siping/cgat

    def getCopy( self ):
        """return a new copy.
        """

        new_entry = Prediction()

        new_entry.mExpand = self.mExpand 
        
        new_entry.mPredictionId = self.mPredictionId 
        new_entry.mQueryToken = self.mQueryToken 
        new_entry.mQueryFrom = self.mQueryFrom 
        new_entry.mQueryTo = self.mQueryTo 
        new_entry.mSbjctToken = self.mSbjctToken 
        new_entry.mSbjctStrand = self.mSbjctStrand 
        new_entry.mSbjctFrom = self.mSbjctFrom 
        new_entry.mSbjctTo = self.mSbjctTo 
        new_entry.mRank = self.mRank 
        new_entry.score = self.score 
        new_entry.mQueryLength = self.mQueryLength 
        new_entry.mQueryCoverage = self.mQueryCoverage 
        new_entry.mNGaps = self.mNGaps 
        new_entry.mNFrameShifts = self.mNFrameShifts 
        new_entry.mNIntrons = self.mNIntrons 
        new_entry.mNSplits = self.mNSplits 
        new_entry.mNStopCodons = self.mNStopCodons 
        new_entry.mPercentIdentity = self.mPercentIdentity 
        new_entry.mPercentSimilarity = self.mPercentSimilarity 
        new_entry.mTranslation = self.mTranslation 
        new_entry.mSbjctGenomeFrom = self.mSbjctGenomeFrom 
        new_entry.mSbjctGenomeTo = self.mSbjctGenomeTo 
        new_entry.mAlignmentString = self.mAlignmentString 
        new_entry.mQueryAli = self.mQueryAli 
        new_entry.mSbjctAli = self.mSbjctAli 

        if self.mExpand:
            new_entry.mMapPeptide2Translation = alignlib.makeAlignmentVector()
            alignlib.copyAlignment( new_entry.mMapPeptide2Translation, self.mMapPeptide2Translation)
            new_entry.mMapPeptide2Genome = Genomics.String2Alignment( new_entry.mAlignmentString) 
        else:
            new_entry.mMapPeptide2Translation = self.mMapPeptide2Translation = None
            new_entry.mMapPeptide2Genome = self.mMapPeptide2Genome = None

        return new_entry

Example #2

Show file

File: links2exons.py Project: nishantthakur/cgat

            map_row2col.clear()
            alignlib.AlignmentFormatExplicit(link.mQueryFrom, link.mQueryAli, link.mSbjctFrom, link.mSbjctAli).copy(
                map_row2col
            )

            ## test all combinations, the alignment might be a suboptimal alignment in case
            ## of repeats.
            for e1 in cds[link.mQueryToken]:
                for e2 in cds[link.mSbjctToken]:
                    tmp_map_row2col.clear()
                    if param_expand:
                        alignlib.copyAlignment(
                            tmp_map_row2col,
                            map_row2col,
                            e1.mPeptideFrom + 1,
                            e1.mPeptideTo,
                            e2.mPeptideFrom + 1,
                            e2.mPeptideTo,
                        )
                    else:
                        alignlib.copyAlignment(
                            tmp_map_row2col,
                            map_row2col,
                            e1.mPeptideFrom / 3 + 1,
                            e1.mPeptideTo / 3 + 1,
                            e2.mPeptideFrom / 3 + 1,
                            e2.mPeptideTo / 3 + 1,
                        )

                    ## in case of split codons, there is an alignment of length 1. Skip that.
                    if tmp_map_row2col.getLength() > 1:

Example #3

Show file

File: compare_predictions2exons.py Project: siping/cgat

                ref_genome_from -= ref_exons_offset
                ref_genome_to   -= ref_exons_offset

                ## get percent identity for exon
                exon_percent_identity = 0
                exon_percent_similarity = 0
                
                if query_sequence and sbjct_sequence:
                    
                    tmp_ali = alignlib.makeAlignmentVector()

                    xquery_from = exon_from / 3
                    xquery_to = exon_to / 3

                    alignlib.copyAlignment( tmp_ali, entry.mMapPeptide2Translation, xquery_from, xquery_to )

                    if tmp_ali.getLength() == 0:
                        options.stdlog.write( "# WARNING: empty alignment %s\n" % str((ref_from, exon_from, ref_to, exon_to, xquery_from, xquery_to)))
                        nempty_alignments += 1
                    else:
                        if options.loglevel >= 5:
                            options.stdlog.write( "# %s\n" % str( alignlib.AlignmentFormatExplicit( tmp_ali, query_sequence, sbjct_sequence ) ) )

                        exon_percent_identity = alignlib.calculatePercentIdentity( tmp_ali,
                                                                                   query_sequence,
                                                                                   sbjct_sequence ) * 100
                        exon_percent_similarity = alignlib.calculatePercentSimilarity( tmp_ali ) * 100

                if exon_percent_identity >= min_pide:
                    is_good_exon = 1

Example #4

Show file

File: compare_predictions2exons.py Project: BioinformaticsArchive/cgat

def main( argv = None ):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv == None: argv = sys.argv

    parser = E.OptionParser( version = "%prog version: $Id: gpipe/compare_predictions2exons.py 2011 2008-07-04 10:40:51Z andreas $",
                                    usage = globals()["__doc__"] )

    parser.add_option( "-g", "--genome-file", dest="genome_file", type="string",
                       help="filename with genome."  )

    parser.add_option( "-b", "--boundaries", dest="filename_boundaries", type="string",
                       help="filename with exon boundaries."  )

    parser.add_option( "-e", "--exons", dest="filename_exons", type="string",
                       help="filename with exons (output)."  )

    parser.add_option( "-p", "--peptides", dest="filename_peptides", type="string",
                       help="filename with peptide sequences."  )

    parser.add_option( "-w", "--write-notfound", dest="write_notfound", action="store_true",
                       help="print exons for predictions not found in reference."  )

    parser.add_option( "-q", "--quality-pide", dest="quality_threshold_pide", type="int",
                       help="quality threshold (pide) for exons."  )

    parser.set_defaults( 
        genome_file = "genome",
        filename_boundaries = None,
        filename_exons = None,
        filename_peptides = None,
        quality_threshold_pide = 0,
        write_notfound = False,
        ## allowed number of nucleotides for exon boundaries to
        ## be considered equivalent.
        slipping_exon_boundary = 9,
        ## stop codons to search for        
        stop_codons = ("TAG", "TAA", "TGA"), )


    (options, args) = E.Start( parser, add_pipe_options = True )

    if len(args) > 0:
        print USAGE, "no arguments required."
        sys.exit(2)

    reference_exon_boundaries = {}
    if options.filename_boundaries:
        reference_exon_boundaries = Exons.ReadExonBoundaries( open( options.filename_boundaries, "r"),
                                                              do_invert = 1,
                                                              remove_utr = 1)
        E.info( "read exon boundaries for %i queries" % len(reference_exon_boundaries) )
                
    if options.filename_exons:
        outfile_exons = open( options.filename_exons, "w")
        outfile_exons.write( "%s\n" % "\t".join( (
                    "prediction_id",
                    "exon_id",
                    "exon_from",
                    "exon_to",
                    "exon_frame",
                    "reference_id",
                    "reference_from",
                    "reference_to",
                    "reference_phase",
                    "pidentity",
                    "psimilarity",
                    "nframeshifts",
                    "ngaps",
                    "nstopcodons",
                    "is_ok",
                    "genome_exon_from",
                    "genome_exon_to") ) )

    else:
        outfile_exons = None

    if options.filename_peptides:
        peptide_sequences = Genomics.ReadPeptideSequences( open(options.filename_peptides, "r") )
        E.info("read peptide sequences for %i queries" % len(peptide_sequences) )
    else:
        peptide_sequences = {}

    entry = PredictionParser.PredictionParserEntry()
    last_filename_genome = None
    
    nfound, nmissed_exons, nmissed_length = 0, 0, 0
    nempty_alignments = 0

    fasta = IndexedFasta.IndexedFasta( options.genome_file )

    options.stdout.write( "%s\n" % "\t".join( (
                "prediction_id", 
                "number",
                "dubious_exons",
                "boundaries_sum",
                "boundaries_max",
                "identical_exons",
                "inserted_exons",
                "deleted_exons",
                "inserted_introns",
                "deleted_introns",
                "truncated_Nterminus",
                "truncated_Cterminus",
                "deleted_Nexons",
                "deleted_Cexons",
                "inserted_Nexons",
                "inserted_Cexons" ) ) )

    for line in sys.stdin:

        if line[0] == "#": continue
        
        try:
            entry.Read(line)
        except ValueError, msg:
            print "# parsing failed with msg %s in line %s" % (msg, line[:-1])
            sys.exit(1)

        exons = Genomics.Alignment2ExonBoundaries( entry.mMapPeptide2Genome,
                                                   query_from = entry.mQueryFrom,
                                                   sbjct_from = entry.mSbjctGenomeFrom,
                                                   add_stop_codon = 0 )

        if exons[-1][4] != entry.mSbjctGenomeTo:
            print "# WARNING: discrepancy in exon calculation!!!"
            for e in exons:
                print "#", str(e)
            print "#", str(entry)

        if options.loglevel >= 5:
            for e in exons:
                print "#", str(e)
        
        genomic_fragment = fasta.getSequence( entry.mSbjctToken, entry.mSbjctStrand,
                                              entry.mSbjctGenomeFrom, entry.mSbjctGenomeTo )
        
        skip = False
        if peptide_sequences.has_key( entry.mQueryToken ):
            
            query_sequence = alignlib.makeSequence(peptide_sequences[entry.mQueryToken])
            sbjct_sequence = alignlib.makeSequence(entry.mTranslation)
            
            percent_similarity, percent_identity = 0, 0
            if query_sequence.getLength() < entry.mMapPeptide2Translation.getRowTo():
                print "# WARNING: query sequence %s is too short: %i %i" % ( entry.mQueryToken,
                                                                             query_sequence.getLength(),
                                                                             entry.mMapPeptide2Translation.getRowTo())
                sys.stdout.flush()
                nmissed_length += 1
                skip = True
                
            elif sbjct_sequence.getLength() < entry.mMapPeptide2Translation.getColTo():
                print "# WARNING: sbjct sequence %s is too short: %i %i" % ( entry.mSbjctToken,
                                                                       sbjct_sequence.getLength(),
                                                                       entry.mMapPeptide2Translation.getColTo())
                sys.stdout.flush()                
                nmissed_length += 1
                skip = True
            else:
                alignlib.rescoreAlignment( entry.mMapPeptide2Translation, 
                                           query_sequence, 
                                           sbjct_sequence,
                                           alignlib.makeScorer( query_sequence, sbjct_sequence ) )
                percent_identity = alignlib.calculatePercentIdentity( entry.mMapPeptide2Translation,
                                                                      query_sequence,
                                                                      sbjct_sequence ) * 100
                percent_similarity = alignlib.calculatePercentSimilarity( entry.mMapPeptide2Translation ) * 100
                
            E.debug( "prediction %s: percent identity/similarity: before=%5.2f/%5.2f, realigned=%5.2f/%5.2f" % (
                    str(entry.mPredictionId), 
                    entry.mPercentSimilarity,
                    entry.mPercentIdentity,
                    percent_similarity,
                    percent_identity ) )
                
        else:
            query_sequence = None
            sbjct_sequence = None

        # default values
        exons_num_exons = "na"
        exons_boundaries_sum = "na"
        exons_boundaries_max = "na"
        dubious_exons = "na"

        ndeleted_exons, ninserted_exons, ndeleted_introns, ninserted_introns, nidentical_exons = 0,0,0,0,0
        truncated_Nterminal_exon, truncated_Cterminal_exon = 0,0
        ndeleted_Nexons, ndeleted_Cexons = 0, 0
        ninserted_Nexons, ninserted_Cexons = 0, 0
        
        exons_offset = exons[0][3]

        if not reference_exon_boundaries.has_key( entry.mQueryToken ):
            print "# WARNING: sequence %s has no exon boundaries" % ( entry.mQueryToken )
            sys.stdout.flush()
            nmissed_exons += 1
            skip = True
        
        if not skip:

            nfound += 1
            
            ref_exons = reference_exon_boundaries[entry.mQueryToken]

            ref_exons_offset = ref_exons[0].mGenomeFrom
            
            exons_num_exons = len(ref_exons) - len(exons)
            exons_boundaries_sum = 0
            exons_phase = 0
            exons_boundaries_max = 0
            dubious_exons = 0
            
            inserted_exons = 0
            temp_inserted_exons = 0
            
            if options.loglevel >= 3:
                for e in exons:
                    options.stdlog.write( "# %s\n" % str(e) )
                for e in ref_exons:
                    options.stdlog.write( "# %s\n" % str(e) )

            min_pide = entry.mPercentIdentity * options.quality_threshold_pide / 100

            in_sync = 0
            e,r = 0,0

            while e < len(exons) and r < len(ref_exons):

                this_e, this_r = e+1, r+1
                percent_identity = 0
                percent_similarity = 0
                is_good_exon = 0

                if options.loglevel >= 4:
                    options.stdlog.write( "# current exons: %i and %i\n" % (e, r) )
                    sys.stdout.flush()
                    
                exon_from, exon_to, exon_phase, exon_genome_from, exon_genome_to, exon_ali = exons[e][0:6]
                ref_from, ref_to, ref_phase, ref_genome_from, ref_genome_to = (ref_exons[r].mPeptideFrom,
                                                                               ref_exons[r].mPeptideTo,
                                                                               ref_exons[r].frame,
                                                                               ref_exons[r].mGenomeFrom,
                                                                               ref_exons[r].mGenomeTo)

                ref_genome_from -= ref_exons_offset
                ref_genome_to   -= ref_exons_offset

                ## get percent identity for exon
                exon_percent_identity = 0
                exon_percent_similarity = 0
                
                if query_sequence and sbjct_sequence:
                    
                    tmp_ali = alignlib.makeAlignmentVector()

                    xquery_from = exon_from / 3
                    xquery_to = exon_to / 3

                    alignlib.copyAlignment( tmp_ali, entry.mMapPeptide2Translation, xquery_from, xquery_to )

                    if tmp_ali.getLength() == 0:
                        options.stdlog.write( "# WARNING: empty alignment %s\n" % str((ref_from, exon_from, ref_to, exon_to, xquery_from, xquery_to)))
                        nempty_alignments += 1
                    else:
                        if options.loglevel >= 5:
                            options.stdlog.write( "# %s\n" % str( alignlib.AlignmentFormatExplicit( tmp_ali, query_sequence, sbjct_sequence ) ) )

                        exon_percent_identity = alignlib.calculatePercentIdentity( tmp_ali,
                                                                                   query_sequence,
                                                                                   sbjct_sequence ) * 100
                        exon_percent_similarity = alignlib.calculatePercentSimilarity( tmp_ali ) * 100

                if exon_percent_identity >= min_pide:
                    is_good_exon = 1
                else:
                    is_good_exon = 0
                    
                if e < len(exons) -1 :
                    (next_exon_from, next_exon_to, next_exon_phase,
                     next_exon_genome_from, next_exon_genome_to, next_exon_ali) = exons[e+1][0:6]
                else:
                    (next_exon_from, next_exon_to, next_exon_phase,
                     next_exon_genome_from, next_exon_genome_to, next_exon_ali) = 0, 0, 0, 0, 0, []
                    
                if r < len(ref_exons) - 1:
                    next_ref_from, next_ref_to, next_ref_phase = (ref_exons[r+1].mPeptideFrom,
                                                                  ref_exons[r+1].mPeptideTo,
                                                                  ref_exons[r+1].frame)
                else:
                    next_ref_from, next_ref_to, next_ref_phase = 0, 0, 0
                    
                if options.loglevel >= 2:
                    options.stdlog.write( "# %s\n" % "\t".join( map(str, (entry.mQueryToken,
                                                                          exon_from, exon_to, exon_phase,
                                                                          exon_genome_from, exon_genome_to,
                                                                          ref_from, ref_to, ref_phase ))))
                    sys.stdout.flush()                    

                # beware of small exons.
                # if less than options.slipping_exon_boundary: boundary is 0
                # check if end is more than options.splipping_exon_boundary apart as well.
                if exon_to - exon_from <= options.slipping_exon_boundary or \
                        ref_to - ref_from <= options.slipping_exon_boundary:
                    boundary = 0
                else:
                    boundary = options.slipping_exon_boundary
                    
                if ref_to <= exon_from + boundary and \
                   ref_to <= exon_to - options.slipping_exon_boundary:
                    ## no overlap 
                    is_good_exon = 0
                    if e == 0:
                        ndeleted_Nexons += 1
                    else:
                        ndeleted_exons += 1
                    r += 1
                    exon_from, exon_to, exon_phase, exon_genome_from, exon_genome_to = 0, 0, 0, 0, 0
                    overlap = 0
                elif exon_to <= ref_from + boundary and \
                         exon_to <= ref_to - options.slipping_exon_boundary:
                    ## no overlap
                    is_good_exon = 0
                    if r == 0:
                        ninserted_Nexons += 1
                    else:
                        ninserted_exons += 1
                    e += 1
                    ref_from, ref_to, ref_phase = 0, 0, 0
                    overlap = 0
                else:
                    ## overlap
                    overlap = 1
                    dfrom = int(math.fabs(exon_from - ref_from))
                    dto = int(math.fabs(exon_to - ref_to))

                    ## get percent identity for overlapping fragment 
                    if query_sequence and sbjct_sequence:
                        ## this the problem
                        tmp_ali = alignlib.makeAlignmentVector()
                        
                        xquery_from = max( ref_from / 3, exon_from / 3)
                        xquery_to = min(ref_to / 3, exon_to / 3)

                        alignlib.copyAlignment( tmp_ali, entry.mMapPeptide2Translation, xquery_from, xquery_to )

                        if tmp_ali.getLength() == 0:
                            options.stdlog.write( "# warning: empty alignment %s\n" % str((ref_from, exon_from, ref_to, exon_to, xquery_from, xquery_to )))
                            percent_identity = 0
                            percent_similarity = 0
                        else:
                            if options.loglevel >= 5:
                                print str( alignlib.AlignmentFormatExplicit( tmp_ali, query_sequence, sbjct_sequence ) )

                            percent_identity = alignlib.calculatePercentIdentity( tmp_ali,
                                                                                  query_sequence,
                                                                                  sbjct_sequence ) * 100
                            percent_similarity = alignlib.calculatePercentSimilarity( tmp_ali ) * 100
                            
                    if percent_identity >= min_pide:
                        is_good_exon = 1
                    else:
                        is_good_exon = 0
                        dubious_exons += 1

                    ## adjust regions for terminal exons
                    if e == 0 and r == 0 and dfrom <= (entry.mQueryFrom - 1) * 3 and dfrom > 0:
                        if is_good_exon:                        
                            truncated_Nterminal_exon = dfrom
                        dfrom = 0
                            
                    ## truncated terminal exons
                    if e == len(exons)-1 and r == len(ref_exons)-1 and dto <= (entry.mQueryLength - entry.mQueryTo) * 3 and dto > 0:
                        if is_good_exon:                        
                            truncated_Cterminal_exon = dto
                        dto = 0

                    ## do not count deviations for terminal query exons
                    if e == 0 and dfrom <= entry.mQueryFrom * 3 and dfrom > 0:
                        dfrom = 0
                            
                    if e == len(exons)-1 and dto <= (entry.mQueryLength - entry.mQueryTo) * 3 and dto > 0:
                        dto = 0

                    ## permit difference of one codon (assumed to be stop)
                    if e == len(exons)-1 and r == len(ref_exons)-1 and dto == 3:
                        dto = 0

                    ## deal with different boundary conditions:
                    if dfrom == 0 and dto == 0:
                        if is_good_exon: nidentical_exons += 1
                        e += 1
                        r += 1
                    ## next exon within this ref_exon
                    elif exon_to < ref_to and next_exon_to and next_exon_to <= ref_to + options.slipping_exon_boundary:
                        if is_good_exon: ninserted_introns += 1
                        e += 1
                        in_sync = 1
                        dto = 0
                    ## next ref_exon within this exon
                    elif ref_to < exon_to and next_ref_to and next_ref_to <= exon_to + options.slipping_exon_boundary:
                        if is_good_exon: ndeleted_introns += 1
                        r += 1
                        in_sync = 1
                        dto = 0
                    else:
                        e += 1
                        r += 1
                        if in_sync:
                            dfrom = 0

                    if is_good_exon:
                        exons_boundaries_sum += dfrom + dto
                        exons_boundaries_max = max( dfrom, exons_boundaries_max )
                        exons_boundaries_max = max( dto, exons_boundaries_max )
                    
                        
                    ###########################################################
                    ## count inserted/deleted introns and misplaced boundaries
                    ##
                    ## if exon and next_exon in ref_exon: inserted intron
                    ## if ref_exon and next_ref_exon in exon: deleted intron
                    
                if outfile_exons:

                    if genomic_fragment and exon_genome_to:
                        nintrons, nframeshifts, ngaps, nsplits, nstopcodons, disruptions = Genomics.CountGeneFeatures( exon_genome_from - entry.mSbjctGenomeFrom,
                                                                                                                       exon_ali,
                                                                                                                       genomic_fragment,
                                                                                                                       border_stop_codon = 0
                                                                                                                       )
                    else:
                        nintrons, nframeshifts, ngaps, nsplits, nstopcodons = 0, 0, 0, 0, 0

                    if exon_to == 0: this_e = 0
                    if ref_to == 0: this_r = 0
                    outfile_exons.write( string.join( map(str, (entry.mPredictionId,
                                                                this_e, exon_from, exon_to, exon_phase,
                                                                this_r, ref_from, ref_to, ref_phase,
                                                                percent_identity, percent_similarity,
                                                                nframeshifts, ngaps, nstopcodons,
                                                                is_good_exon,
                                                                exon_genome_from, exon_genome_to,
                                                                )), "\t") + "\n")
                    
            while e < len(exons):
                exon_from, exon_to, exon_phase, exon_genome_from, exon_genome_to = exons[e][0:5]
                e += 1
                ninserted_Cexons += 1

                if outfile_exons:
                    outfile_exons.write( string.join( map(str, (entry.mPredictionId, 
                                                                e, exon_from, exon_to, exon_phase,
                                                                0, 0, 0, 0,
                                                                0, 0,
                                                                0, 0, 0,
                                                                1,
                                                                exon_genome_from, exon_genome_to,
                                                                )), "\t") + "\n")
                    
            while r < len(ref_exons):
                ref_from, ref_to, ref_phase, ref_genome_from, ref_genome_to = (ref_exons[r].mPeptideFrom,
                                                                               ref_exons[r].mPeptideTo,
                                                                               ref_exons[r].frame,
                                                                               ref_exons[r].mGenomeFrom,
                                                                               ref_exons[r].mGenomeTo)
                ndeleted_Cexons += 1
                ref_genome_from -= ref_exons_offset
                ref_genome_to -= ref_exons_offset
                r += 1
                if outfile_exons:
                    outfile_exons.write( string.join( map(str, (entry.mPredictionId,
                                                                0, 0, 0, 0,
                                                                r, ref_from, ref_to, ref_phase, 
                                                                0, 0,
                                                                0, 0, 0,
                                                                0,
                                                                0, 0,
                                                                )), "\t") + "\n")
        else:
            if options.write_notfound:
                this_e = 0
                ## use prediction's identity/similarity for exons.
                ## This will still then flag stop-codons in later analysis
                percent_identity = entry.mPercentIdentity
                percent_similarity = entry.mPercentSimilarity
            
                for exon in exons:
                    this_e += 1
                    exon_from, exon_to, exon_phase, exon_genome_from, exon_genome_to, exon_ali = exon[0:6]
                    if genomic_fragment:
                        nintrons, nframeshifts, ngaps, nsplits, nstopcodons, disruptions = Genomics.CountGeneFeatures( exon_genome_from - entry.mSbjctGenomeFrom,
                                                                                                                       exon_ali,
                                                                                                                       genomic_fragment )
                    
                    outfile_exons.write( string.join( map(str, (entry.mPredictionId,
                                                                this_e, exon_from, exon_to, exon_phase,
                                                                0, 0, 0, 0,
                                                                percent_identity, percent_similarity,
                                                                nframeshifts, ngaps, nstopcodons,
                                                                1,
                                                                exon_genome_from, exon_genome_to,
                                                                )), "\t") + "\n")
            
        options.stdout.write( "\t".join(map(str,
                              (entry.mPredictionId,
                               exons_num_exons,
                               dubious_exons,
                               exons_boundaries_sum,
                               exons_boundaries_max,
                               nidentical_exons,
                               ninserted_exons, ndeleted_exons,
                               ninserted_introns, ndeleted_introns,
                               truncated_Nterminal_exon, truncated_Cterminal_exon,
                               ndeleted_Nexons, ndeleted_Cexons,
                               ninserted_Nexons, ninserted_Cexons))) + "\n" )

Example #5

Show file

File: psl2psl.py Project: siping/cgat

def pslMap( options ):
    """thread psl alignments using intervals.

    """

    if options.format == "gtf":
        use_copy = False
    else:
        use_copy = True

    ninput, noutput, ndiscarded, nskipped, nskipped_small_queries = 0, 0, 0, 0, 0

    min_length = options.min_aligned

    for match, qx, tx in iterator_psl_intervals( options ):

        map_query2target = match.getMapQuery2Target()

        ninput += 1

        ## if no filter on qx or tx, use full segment
        if qx == None:
            qx = [ (match.mQueryFrom,match.mQueryTo,0) ]
        elif tx == None:
            tx = [ (match.mSbjctFrom,match.mSbjctTo,0) ]

        ## if no overlap: return
        if not qx or not tx: 
            nskipped += 1
            continue

        for query in qx:

            qstart, qend, qval = query

            # skip elements that are too small
            if qend - qstart < min_length: 
                E.debug( "query too small - skipped at %s:%i-%i" % (match.mQueryId, qstart, qend) )
                nskipped_small_queries += 1
                continue

            E.debug( "working on query %s:%i-%i" % (match.mQueryId, qstart, qend) )

            mqstart, mqend = ( map_query2target.mapRowToCol(qstart, 
                                                            alignlib.RIGHT), 
                               map_query2target.mapRowToCol(qend, 
                                                            alignlib.LEFT) )
                        
                
            if match.strand == "-":
                qstart, qend = match.mQueryLength - qend, match.mQueryLength - qstart

            for target in tx:

                tstart, tend, tval = target
                if tstart >= mqend or tend <= mqstart: continue
                if tend - tstart < min_length: continue

                new = alignlib.makeAlignmentBlocks()
                    
                if use_copy:
                    # do copy with range filter
                    if options.loglevel >= 3:

                        mtstart, mtend = map_query2target.mapColToRow(tstart), map_query2target.mapColToRow(tend) 
                        E.debug( "query: %i-%i (len=%i)-> %i-%i(len=%i); target: %i-%i (len=%i)-> %i-%i (len=%i)" % \
                                     (qstart, qend,
                                      qend - qstart,
                                      mqstart, mqend,
                                      mqend - mqstart,
                                      tstart, tend,
                                      tend - tstart,
                                      mtstart, mtend,
                                      mtend - mtstart ) )
                                     
                    alignlib.copyAlignment( 
                        new, 
                        map_query2target,
                        qstart, qend,
                        tstart, tend )
                else:
                    # do copy with alignment filter
                    map_query = qval
                    if map_query:
                        tmp = alignlib.makeAlignmentBlocks()                        
                        alignlib.copyAlignment( tmp, map_query2target, map_query, alignlib.RR )
                        if options.loglevel >= 5:
                            options.stdlog.write( "######## mapping query ###########\n" )
                            options.stdlog.write( "# %s\n" % str(alignlib.AlignmentFormatEmissions( map_query2target ) ))
                            options.stdlog.write( "# %s\n" % str(alignlib.AlignmentFormatEmissions( map_query ) ))
                            options.stdlog.write( "# %s\n" % str(alignlib.AlignmentFormatEmissions( tmp ) ))
                    else:
                        tmp = map_query2target
                        
                    map_target = tval
                    if map_target:
                        new = alignlib.makeAlignmentBlocks()
                        alignlib.copyAlignment( new, tmp, map_target, alignlib.CR )                        
                        if options.loglevel >= 5:
                            options.stdlog.write( "######## mapping target ###########\n" )
                            options.stdlog.write( "# before: %s\n" % str(alignlib.AlignmentFormatEmissions( tmp ) ))
                            options.stdlog.write( "# map   : %s\n" % str(alignlib.AlignmentFormatEmissions( map_target ) ))
                            options.stdlog.write( "# after : %s\n" % str(alignlib.AlignmentFormatEmissions( new ) ))
                    else:
                        new = tmp

                if options.loglevel >= 4:
                    E.debug("putative match with intervals: %s and %s: %i-%i" % \
                                (str(query), str(target), qstart, qend ))
                    if options.loglevel >= 5:
                        E.debug( "input : %s" % str(alignlib.AlignmentFormatEmissions( map_query2target ) ))
                        E.debug( "final : %s" % str(alignlib.AlignmentFormatEmissions( new ) ) )

                    if new.getLength() > 0:
                        n = match.copy()
                        n.fromMap( new, use_strand = True )
                        E.info( "match : %s" % (str(n)))

                if new.getNumAligned() > options.min_aligned:
                    n = match.copy()
                    n.fromMap( new, use_strand = True )
                    options.stdout.write( str(n) + "\n" )
                    noutput += 1
                else:
                    ndiscarded += 1

    E.info( "map: ninput=%i, noutput=%i, nskipped=%i, ndiscarded=%i, nsmall_queries=%i" % \
                (ninput, noutput, nskipped, ndiscarded, nskipped_small_queries) )