def get_pacbps_by_nodes(self, node1=None, node2=None, order_by='bits'): """ Get the pacbp(s) from the CodingBlockGraph by node, optional two nodes @type node1: * @param node1: node identifier @type node2: * @param node2: node identifier (or None) @type order_by: string @param order_by: 'length' (DESC), 'bits' (DESC), 'start' (ASC); default 'bits' @rtype: list @return: list of pacbps objects @attention: when only a single node is requested, pacbp(s) are swapped to make the requested node the query node """ if node1 not in self.get_nodes(): message = "node1 `%s` not in graph: %s" % (node1, self.get_nodes()) raise InproperlyAppliedArgument, message if node2 and node2 not in self.get_nodes(): message = "node2 `%s` not in graph: %s" % (node2, self.get_nodes()) raise InproperlyAppliedArgument, message if order_by not in ['bits', 'length']: order_by = 'bits' # if no pacbps are stored into the object yet, return [] if not self.pacbps: return [] thepacbps = [] for (key, nodeA, nodeB), pacbporf in self.pacbps.iteritems(): if nodeA == node1 or nodeB == node1: if not node2: if nodeA == node1: thepacbps.append(pacbporf) else: # swap query and sbjct! thispacbporf = pacb.swap_query_and_sbjct(pacbporf) thepacbps.append(thispacbporf) else: if nodeA == node2 or nodeB == node2: thepacbps.append(pacbporf) else: pass # order the requested pacbps if order_by == 'bits': thepacbps = ordering.order_list_by_attribute(thepacbps, "bits", reversed=True) else: thepacbps = ordering.order_list_by_attribute(thepacbps, "length", reversed=True) # return the requested pacbps return thepacbps
def hmmresults2splittedpacbps(results,hmmcoords,target,informant,inwpCBG,input,gapsize=2,min_bitscore=0): """ """ hmm_pacbporf_list = [] for hmmhit in results: ( sbjct_header, sbjct_start, sbjct_end, query_start, query_end, query, match, sbjct, score, expect ) = hmmhit if score < min_bitscore: continue _org,orfid = hmmhit[0].split('_orf_') orfSbjct = input[informant]['orfs'].get_orf_by_id( int(orfid) ) orfQuery = inwpCBG.get_orfs_of_graph(target)[0] querycoords = ( min(hmmcoords[target]),max(hmmcoords[target]) ) key_data, hmmpacbporf = hmmhit2pacbp( orfQuery,target,querycoords, orfSbjct,informant,hmmhit) # check if hmmpacbporf creation was succesfull if not hmmpacbporf: continue # if here, unextend and split on gapsize (pacbpkey,qNode,sNode) = key_data hmmpacbporf.unextend_pacbporf() splittedhmmpacbporfs, splittedstatus =\ split_pacb_on_gaps(hmmpacbporf,gapsize=gapsize) # loop over the splitted ones and store high(er) scoring fractions for splittedhmmpf in splittedhmmpacbporfs: # added code to strip unmatched ends. Should not # be neccesarily anymore, but just to be certain # no leading/trailing gaps are there # if '-' in [ splittedhmmpf.query[0], splittedhmmpf.sbjct[0], # splittedhmmpf.query[-1], splittedhmmpf.sbjct[-1] ]: # hmmpacbp = pacbporf2pacbp(splittedhmmpf) # hmmpacbp.strip_unmatched_ends() # if not hmmpacbp: continue # if len(hmmpacbp) <= 1: continue # # if here, make again a pacbporf of the pacbp # splittedhmmpf = pacbp2pacbporf(hmmpacbp,splittedhmmpf.orfQ,splittedhmmpf.orfS) if splittedhmmpf.bitscore < min_bitscore: continue # check if query/sbjct must be swapped queryNode = (target,orfQuery.id) sbjctNode = (informant,orfSbjct.id) if qNode == queryNode: pass elif qNode == sbjctNode: # swap query and sbjct! splittedhmmpf = swap_query_and_sbjct(splittedhmmpf) else: # whaaaat else !? raise "UNEXPECTED EVENT" # append to hmm_pacbporf_list hmm_pacbporf_list.append( splittedhmmpf ) # return bitscore ordered list of hmmpacbporfs return _order_list_by_attribute(hmm_pacbporf_list, order_by='bitscore',reversed=True)
def get_pacbps_by_organism(self, organism, order_by=None): """ Get the pacbp(s) from the CodingBlockGraph of a single organism @type organism: * (string) @param organism: Organism identifier @type order_by: string @param order_by: 'length' (DESC), 'bits' (DESC), 'node' or None (on node); default None @rtype: list @return: list of pacbps objects @attention: pacbps are swapped such that `organism` is always the query! @attention: pacbps are ordered by their sbjct nodes """ # check if requested organism is present in this graph if organism not in self.organism_set(): raise OrganismNotPresentInGraph # if no pacbps are stored into the object yet, return [] if not self.pacbps: return [] # reset order_by if falsely assigned if order_by not in [None, 'bits', 'length']: order_by = None thepacbps = [] for (key, (org1, orf1), (org2, orf2)), pacbporf in self.pacbps.iteritems(): if organism == org1: thepacbps.append(((org2, orf2), pacbporf)) elif organism == org2: # swap query and sbjct! thispacbporf = pacb.swap_query_and_sbjct(pacbporf) thepacbps.append(((org1, orf1), thispacbporf)) else: pass # sort the requested pacbps on Node thepacbps.sort() thepacbps = [pacbporf for node, pacbporf in thepacbps] # order the requested pacbps if requested for if order_by == 'bits': thepacbps = ordering.order_list_by_attribute(thepacbps, "bits", reversed=True) if order_by == 'length': thepacbps = ordering.order_list_by_attribute(thepacbps, "length", reversed=True) # return the requested pacbps return thepacbps
def hmmresults2splittedpacbps(results, hmmcoords, target, informant, inwpCBG, input, gapsize=2, min_bitscore=0): """ """ hmm_pacbporf_list = [] for hmmhit in results: (sbjct_header, sbjct_start, sbjct_end, query_start, query_end, query, match, sbjct, score, expect) = hmmhit if score < min_bitscore: continue _org, orfid = hmmhit[0].split('_orf_') orfSbjct = input[informant]['orfs'].get_orf_by_id(int(orfid)) orfQuery = inwpCBG.get_orfs_of_graph(target)[0] querycoords = (min(hmmcoords[target]), max(hmmcoords[target])) key_data, hmmpacbporf = hmmhit2pacbp(orfQuery, target, querycoords, orfSbjct, informant, hmmhit) # check if hmmpacbporf creation was succesfull if not hmmpacbporf: continue # if here, unextend and split on gapsize (pacbpkey, qNode, sNode) = key_data hmmpacbporf.unextend_pacbporf() splittedhmmpacbporfs, splittedstatus =\ split_pacb_on_gaps(hmmpacbporf,gapsize=gapsize) # loop over the splitted ones and store high(er) scoring fractions for splittedhmmpf in splittedhmmpacbporfs: # added code to strip unmatched ends. Should not # be neccesarily anymore, but just to be certain # no leading/trailing gaps are there # if '-' in [ splittedhmmpf.query[0], splittedhmmpf.sbjct[0], # splittedhmmpf.query[-1], splittedhmmpf.sbjct[-1] ]: # hmmpacbp = pacbporf2pacbp(splittedhmmpf) # hmmpacbp.strip_unmatched_ends() # if not hmmpacbp: continue # if len(hmmpacbp) <= 1: continue # # if here, make again a pacbporf of the pacbp # splittedhmmpf = pacbp2pacbporf(hmmpacbp,splittedhmmpf.orfQ,splittedhmmpf.orfS) if splittedhmmpf.bitscore < min_bitscore: continue # check if query/sbjct must be swapped queryNode = (target, orfQuery.id) sbjctNode = (informant, orfSbjct.id) if qNode == queryNode: pass elif qNode == sbjctNode: # swap query and sbjct! splittedhmmpf = swap_query_and_sbjct(splittedhmmpf) else: # whaaaat else !? raise "UNEXPECTED EVENT" # append to hmm_pacbporf_list hmm_pacbporf_list.append(splittedhmmpf) # return bitscore ordered list of hmmpacbporfs return _order_list_by_attribute(hmm_pacbporf_list, order_by='bitscore', reversed=True)