Esempio n. 1
0
def _orf_overlap_rank(theorf,orflist,orfpointer):
    """ """
    subset = [ theorf ]
    for orf in orflist[orfpointer+1:]:
        if orf.startPY > theorf.endPY:
            break
        subset.append(orf)
    for pointer in range(orfpointer-1,-1,-1):
        orf = orflist[pointer]
        if orf.endPY < theorf.startPY:
            break
        subset.insert(0,orf)
    # order the subset
    subset = order_list_by_attribute(subset,order_by='coding_propensity',reversed=True)
    rank=0 
    for orf in subset:
        rank+=1
        if orf.startPY == theorf.startPY:
            break
    # return the rank
    return rank
Esempio n. 2
0
def _orf_overlap_rank(theorf, orflist, orfpointer):
    """ """
    subset = [theorf]
    for orf in orflist[orfpointer + 1:]:
        if orf.startPY > theorf.endPY:
            break
        subset.append(orf)
    for pointer in range(orfpointer - 1, -1, -1):
        orf = orflist[pointer]
        if orf.endPY < theorf.startPY:
            break
        subset.insert(0, orf)
    # order the subset
    subset = order_list_by_attribute(subset,
                                     order_by='coding_propensity',
                                     reversed=True)
    rank = 0
    for orf in subset:
        rank += 1
        if orf.startPY == theorf.startPY:
            break
    # return the rank
    return rank
Esempio n. 3
0
def assembleorf(orflist,start,end,max_errors=2,minimum_errors_only=True):
    """ """
    orflist = order_list_by_attribute(orflist,order_by='length',reversed=True)
    orfidcombis = [] 
    for startpos in range(0,len(orflist)):
        centralorf = orflist[startpos]
        orfset = [ ( centralorf.startPY, centralorf.endPY, centralorf.frame, centralorf ) ]
        # correct 'end' orf gff track position with STOP codon length!
        minpos = min([ tup[0] for tup in orfset ])
        maxpos = max([ tup[1] for tup in orfset ])
        while minpos > start or maxpos < end-3:
            for pos in range(0,len(orflist)):
                #if minpos <= start and maxpos >= end-3: break
                orf = orflist[pos]
                if orf.id in [ tup[-1].id for tup in orfset ]: continue

                if minpos > start and orf.startPY < minpos and orf.endPY >= minpos-3:
                    orfset.insert(0, ( orf.startPY, orf.endPY, orf.frame, orf ) )
                    minpos = min([ tup[0] for tup in orfset ])
                    maxpos = max([ tup[1] for tup in orfset ])
                    # check if this orf even OVERLAPS ths current group of orfs
                    if orf.endPY == maxpos:
                        orfset.append( ( orf.startPY, orf.endPY, orf.frame, orf ) )
                    # goto next iteration
                    break
                if maxpos < end-3 and orf.endPY > maxpos and orf.startPY <= maxpos+3:
                    orfset.append( ( orf.startPY, orf.endPY, orf.frame, orf ) )
                    minpos = min([ tup[0] for tup in orfset ])
                    maxpos = max([ tup[1] for tup in orfset ])
                    # check if this orf even OVERLAPS ths current group of orfs
                    if orf.startPY == minpos:
                        orfset.insert(0, ( orf.startPY, orf.endPY, orf.frame, orf ) )
                    # goto next iteration
                    break
        # calculate minimal amount of sequence changes needed
        frames = [ tup[2] for tup in orfset ]
        distance = _getorfgroupframedistance(frames)
        # only store if not to much errors
        if not minimum_errors_only and distance[0] > max_errors:
            pass
        else:
            if ( sum(distance), distance, orfset ) not in orfidcombis: orfidcombis.append( ( sum(distance), distance, orfset ) )

    # now find the most likely explanation
    orfidcombis.sort()
    minimum_error_count = None    
    for (summed,distance,orfset) in orfidcombis:
        # define minimum number of sequence errors
        if minimum_error_count == None: minimum_error_count=distance[0]
        # break when more than minimum sequence errors
        if minimum_errors_only and distance[0] > minimum_error_count: break
        # if NO sequence errors -> break as well (not an error, but a missed tiny Orf!)
        orfids = [ tup[-1].id for tup in orfset ]
        frames = [ tup[2] for tup in orfset ]

        # oldprinting style, replaced by PotentialSequenceErrorWarning 
        #print orfids, frames, distance
        #for tup in orfset: print tup[-1], tup[-1].frame 

        # print PotentialSequenceErrorWarning
        message = [ str(distance[1:]), "(insertions,deletions,errors)" ]
        for tup in orfset: message.append( str(tup[-1]) )
        print PotentialSequenceErrorWarning( message )
Esempio n. 4
0
 def order_list_by_attribute(self, order_by='', reversed=False):
     """
     @attention: see graphAbgp.ordering.order_list_by_attribute
     """
     self.codingblockgraphs = order_list_by_attribute(
         self.codingblockgraphs, order_by=order_by, reversed=reversed)
Esempio n. 5
0
 def order_list_by_attribute(self,order_by='',reversed=False):
     """
     @attention: see graphAbgp.ordering.order_list_by_attribute
     """
     self.codingblockgraphs = order_list_by_attribute(
         self.codingblockgraphs,order_by=order_by,reversed=reversed)
Esempio n. 6
0
        print explain()
        sysExit()
        ## TEMPORARILY backwards-compatibility with old input_data_struct.txt file
        #input = eval(open('input_data_struct.txt').read().strip())

    # do geneconfirmation; this includes rungetorf() function call
    input, gene_status = geneconfirmation(input, verbose=True)

    # proces tcode data
    input = obtaintcodedata(input)

    for org in input.keys():

        # loop over all the Orf objects and score their coding propensity rank
        orflist = input[org]['orfs'].orfs
        orflist = order_list_by_attribute(orflist, 'startPY')
        for orfpointer in range(0, len(orflist)):
            orf = orflist[orfpointer]
            # add data to tss object
            orf.coding_propensity = orf.tcode_score() * orf.length

        for orfpointer in range(0, len(orflist)):
            orf = orflist[orfpointer]
            # define _orf_overlap_rank
            rank = _orf_overlap_rank(orf, orflist, orfpointer)
            orf._orf_overlap_rank = rank

        # order orflist by coding_propensity and score this rank
        orflist = order_list_by_attribute(orflist,
                                          'coding_propensity',
                                          reversed=True)
Esempio n. 7
0
        print explain()
        sysExit()
        ## TEMPORARILY backwards-compatibility with old input_data_struct.txt file
        #input = eval(open('input_data_struct.txt').read().strip())

    # do geneconfirmation; this includes rungetorf() function call
    input,gene_status = geneconfirmation(input,verbose=True)

    # proces tcode data
    input = obtaintcodedata(input)

    for org in input.keys():

        # loop over all the Orf objects and score their coding propensity rank
        orflist = input[org]['orfs'].orfs
        orflist = order_list_by_attribute(orflist,'startPY')
        for orfpointer in range(0,len(orflist)):
            orf = orflist[orfpointer]
            # add data to tss object
            orf.coding_propensity = orf.tcode_score() * orf.length 

        for orfpointer in range(0,len(orflist)):
            orf = orflist[orfpointer]
            # define _orf_overlap_rank
            rank = _orf_overlap_rank(orf,orflist,orfpointer)
            orf._orf_overlap_rank = rank

        # order orflist by coding_propensity and score this rank 
        orflist = order_list_by_attribute(orflist,'coding_propensity',reversed=True)
        cnt = 1
        for orf in orflist: