Ejemplo n.º 1
0
    def get_pacbps_by_nodes(self, node1=None, node2=None, order_by='bits'):
        """
        Get the pacbp(s) from the CodingBlockGraph by node, optional two nodes

        @type  node1: *
        @param node1: node identifier

        @type  node2: *
        @param node2: node identifier (or None)

        @type  order_by: string
        @param order_by: 'length' (DESC), 'bits' (DESC), 'start' (ASC);
			 default 'bits'

        @rtype:  list
        @return: list of pacbps objects

        @attention: when only a single node is requested, pacbp(s) are swapped
		    to make the requested node the query node
        """
        if node1 not in self.get_nodes():
            message = "node1 `%s` not in graph: %s" % (node1, self.get_nodes())
            raise InproperlyAppliedArgument, message
        if node2 and node2 not in self.get_nodes():
            message = "node2 `%s` not in graph: %s" % (node2, self.get_nodes())
            raise InproperlyAppliedArgument, message
        if order_by not in ['bits', 'length']:
            order_by = 'bits'

        # if no pacbps are stored into the object yet, return []
        if not self.pacbps: return []

        thepacbps = []
        for (key, nodeA, nodeB), pacbporf in self.pacbps.iteritems():
            if nodeA == node1 or nodeB == node1:
                if not node2:
                    if nodeA == node1:
                        thepacbps.append(pacbporf)
                    else:
                        # swap query and sbjct!
                        thispacbporf = pacb.swap_query_and_sbjct(pacbporf)
                        thepacbps.append(thispacbporf)
                else:
                    if nodeA == node2 or nodeB == node2:
                        thepacbps.append(pacbporf)
                    else:
                        pass

        # order the requested pacbps
        if order_by == 'bits':
            thepacbps = ordering.order_list_by_attribute(thepacbps,
                                                         "bits",
                                                         reversed=True)
        else:
            thepacbps = ordering.order_list_by_attribute(thepacbps,
                                                         "length",
                                                         reversed=True)

        # return the requested pacbps
        return thepacbps
Ejemplo n.º 2
0
def hmmresults2splittedpacbps(results,hmmcoords,target,informant,inwpCBG,input,gapsize=2,min_bitscore=0):
    """ """
    hmm_pacbporf_list = []
    for hmmhit in results:
        ( sbjct_header, sbjct_start, sbjct_end, query_start, query_end,
          query, match, sbjct, score, expect ) = hmmhit
        if score < min_bitscore: continue
        _org,orfid = hmmhit[0].split('_orf_')
        orfSbjct = input[informant]['orfs'].get_orf_by_id( int(orfid) )
        orfQuery = inwpCBG.get_orfs_of_graph(target)[0]
        querycoords = ( min(hmmcoords[target]),max(hmmcoords[target]) )
        key_data, hmmpacbporf = hmmhit2pacbp(
                orfQuery,target,querycoords,
                orfSbjct,informant,hmmhit)

        # check if hmmpacbporf creation was succesfull
        if not hmmpacbporf: continue

        # if here, unextend and split on gapsize
        (pacbpkey,qNode,sNode) = key_data 
        hmmpacbporf.unextend_pacbporf()
        splittedhmmpacbporfs, splittedstatus =\
            split_pacb_on_gaps(hmmpacbporf,gapsize=gapsize)

        # loop over the splitted ones and store high(er) scoring fractions
        for splittedhmmpf in splittedhmmpacbporfs:
            # added code to strip unmatched ends. Should not
            # be neccesarily anymore, but just to be certain
            # no leading/trailing gaps are there
#            if '-' in [ splittedhmmpf.query[0], splittedhmmpf.sbjct[0],
#            splittedhmmpf.query[-1], splittedhmmpf.sbjct[-1] ]:
#                hmmpacbp = pacbporf2pacbp(splittedhmmpf)
#                hmmpacbp.strip_unmatched_ends()
#                if not hmmpacbp: continue
#                if len(hmmpacbp) <= 1: continue
#                # if here, make again a pacbporf of the pacbp
#                splittedhmmpf = pacbp2pacbporf(hmmpacbp,splittedhmmpf.orfQ,splittedhmmpf.orfS)

            if splittedhmmpf.bitscore < min_bitscore: continue
            # check if query/sbjct must be swapped
            queryNode = (target,orfQuery.id)
            sbjctNode = (informant,orfSbjct.id)
            if qNode == queryNode:
                pass
            elif qNode == sbjctNode:
                # swap query and sbjct!
                splittedhmmpf = swap_query_and_sbjct(splittedhmmpf)
            else:
                # whaaaat else !?
                raise "UNEXPECTED EVENT"

            # append to hmm_pacbporf_list
            hmm_pacbporf_list.append( splittedhmmpf )

    # return bitscore ordered list of hmmpacbporfs
    return _order_list_by_attribute(hmm_pacbporf_list,
            order_by='bitscore',reversed=True)
Ejemplo n.º 3
0
    def get_pacbps_by_organism(self, organism, order_by=None):
        """
        Get the pacbp(s) from the CodingBlockGraph of a single organism

        @type  organism: * (string)
        @param organism: Organism identifier

        @type  order_by: string
        @param order_by: 'length' (DESC), 'bits' (DESC), 'node' or None
			 (on node); default None

	@rtype:  list
	@return: list of pacbps objects

        @attention: pacbps are swapped such that `organism` is always the query!
        @attention: pacbps are ordered by their sbjct nodes
        """
        # check if requested organism is present in this graph
        if organism not in self.organism_set():
            raise OrganismNotPresentInGraph

        # if no pacbps are stored into the object yet, return []
        if not self.pacbps: return []

        # reset order_by if falsely assigned
        if order_by not in [None, 'bits', 'length']:
            order_by = None

        thepacbps = []
        for (key, (org1, orf1), (org2,
                                 orf2)), pacbporf in self.pacbps.iteritems():
            if organism == org1:
                thepacbps.append(((org2, orf2), pacbporf))
            elif organism == org2:
                # swap query and sbjct!
                thispacbporf = pacb.swap_query_and_sbjct(pacbporf)
                thepacbps.append(((org1, orf1), thispacbporf))
            else:
                pass
        # sort the requested pacbps on Node
        thepacbps.sort()
        thepacbps = [pacbporf for node, pacbporf in thepacbps]

        # order the requested pacbps if requested for
        if order_by == 'bits':
            thepacbps = ordering.order_list_by_attribute(thepacbps,
                                                         "bits",
                                                         reversed=True)
        if order_by == 'length':
            thepacbps = ordering.order_list_by_attribute(thepacbps,
                                                         "length",
                                                         reversed=True)

        # return the requested pacbps
        return thepacbps
Ejemplo n.º 4
0
def hmmresults2splittedpacbps(results,
                              hmmcoords,
                              target,
                              informant,
                              inwpCBG,
                              input,
                              gapsize=2,
                              min_bitscore=0):
    """ """
    hmm_pacbporf_list = []
    for hmmhit in results:
        (sbjct_header, sbjct_start, sbjct_end, query_start, query_end, query,
         match, sbjct, score, expect) = hmmhit
        if score < min_bitscore: continue
        _org, orfid = hmmhit[0].split('_orf_')
        orfSbjct = input[informant]['orfs'].get_orf_by_id(int(orfid))
        orfQuery = inwpCBG.get_orfs_of_graph(target)[0]
        querycoords = (min(hmmcoords[target]), max(hmmcoords[target]))
        key_data, hmmpacbporf = hmmhit2pacbp(orfQuery, target, querycoords,
                                             orfSbjct, informant, hmmhit)

        # check if hmmpacbporf creation was succesfull
        if not hmmpacbporf: continue

        # if here, unextend and split on gapsize
        (pacbpkey, qNode, sNode) = key_data
        hmmpacbporf.unextend_pacbporf()
        splittedhmmpacbporfs, splittedstatus =\
            split_pacb_on_gaps(hmmpacbporf,gapsize=gapsize)

        # loop over the splitted ones and store high(er) scoring fractions
        for splittedhmmpf in splittedhmmpacbporfs:
            # added code to strip unmatched ends. Should not
            # be neccesarily anymore, but just to be certain
            # no leading/trailing gaps are there
            #            if '-' in [ splittedhmmpf.query[0], splittedhmmpf.sbjct[0],
            #            splittedhmmpf.query[-1], splittedhmmpf.sbjct[-1] ]:
            #                hmmpacbp = pacbporf2pacbp(splittedhmmpf)
            #                hmmpacbp.strip_unmatched_ends()
            #                if not hmmpacbp: continue
            #                if len(hmmpacbp) <= 1: continue
            #                # if here, make again a pacbporf of the pacbp
            #                splittedhmmpf = pacbp2pacbporf(hmmpacbp,splittedhmmpf.orfQ,splittedhmmpf.orfS)

            if splittedhmmpf.bitscore < min_bitscore: continue
            # check if query/sbjct must be swapped
            queryNode = (target, orfQuery.id)
            sbjctNode = (informant, orfSbjct.id)
            if qNode == queryNode:
                pass
            elif qNode == sbjctNode:
                # swap query and sbjct!
                splittedhmmpf = swap_query_and_sbjct(splittedhmmpf)
            else:
                # whaaaat else !?
                raise "UNEXPECTED EVENT"

            # append to hmm_pacbporf_list
            hmm_pacbporf_list.append(splittedhmmpf)

    # return bitscore ordered list of hmmpacbporfs
    return _order_list_by_attribute(hmm_pacbporf_list,
                                    order_by='bitscore',
                                    reversed=True)