def PacbpCollectionGraph2CodingBlockGraph(pcg): """ Convert PacbpCollectionGraph 2 CodingBlockGraph @attention: function just converts, error check is not performed here! @type pcg: PacbpCollectionGraph @param pcg: PacbpCollectionGraph instance @rtype: CodingBlockGraph @return: CodingBlockGraph instance """ from graph_codingblock import CodingBlockGraph cbg = CodingBlockGraph() cbg.nodes = pcg.nodes cbg.weights = pcg.weights cbg.pacbps = pcg.pacbps return cbg
def ExonCollectionGraph2CodingBlockGraph(gra,is_first=False,is_last=False,firstCBG=None,lastCBG=None): """ Convert ECG -> CodingBlockGraph @type gra: ExonCollectionGraph @param gra: ExonCollectionGraph instance @type is_first: Boolean @param is_first: True or False (default False) @type is_last: Boolean @param is_last: True or False (default False) @type firstCBG: CodingBlockGraph (or None) @param firstCBG: ... @type lastCBG: CodingBlockGraph (or None) @param lastCBG: ... @attention: make shure to specify arguments correctly! @rtype: CodingBlockGraph @return: CodingBlockGraph instance or None when failed! """ from graph_codingblock import CodingBlockGraph cbg = CodingBlockGraph() cbg.MINIMAL_OVERAL_SPANNING_RANGE_SIZE = 1 for ecgnode in gra.get_nodes(): cbg.add_node( gra.ecgnode2cbgnode(ecgnode) ) # make Pacbp objects for the edges if not done yet if not gra.pacbps or len(gra.pacbps) != gra.edge_count(): gra.make_pacbps_for_edges() # check if number of pacbps matches number of edges if len(gra.pacbps) != gra.edge_count(): # pacbp creation failed for at least 1 edge -> no CBG! return None # check if connectivitysaturation == 1.0: if gra.connectivitysaturation() != 1.0: # no edge listed between some of the nodes! return None # transfer pacbps and edges to new CBG for (key,ecgnode1,ecgnode2), pacbporf in gra.pacbps.iteritems(): # convert ECGnode to CBGnode cbg_node1 = gra.ecgnode2cbgnode(ecgnode1) cbg_node2 = gra.ecgnode2cbgnode(ecgnode2) bitscore = key[0] cbg.add_edge(cbg_node1,cbg_node2,wt=bitscore) cbg.pacbps[(key,cbg_node1,cbg_node2)] = pacbporf # fix pacbps that are already present in what is currently # the first CBG. This is the case for nodes (exons) in the firstExonGraph # that end with a CodingBlockEnd. if firstCBG: replacements = {} for (key,node1,node2), pacbporf in cbg.pacbps.iteritems(): if firstCBG.has_edge(node1,node2): startPos = pacbporf._get_original_alignment_pos_start() firstpacbporf = firstCBG.get_pacbps_by_nodes(node1=node1,node2=node2)[0] firstStartPos = firstpacbporf._get_original_alignment_pos_start() if firstStartPos.query_pos <= startPos.query_pos and firstStartPos.sbjct_pos <= startPos.sbjct_pos: replacements[(key,node1,node2)] = firstpacbporf if replacements: for (key,node1,node2), pacbporf in replacements.iteritems(): del( cbg.pacbps[(key,node1,node2)] ) newkey = pacbporf.construct_unique_key(node1,node2) cbg.pacbps[(newkey,node1,node2)] = pacbporf if lastCBG: replacements = {} for (key,node1,node2), pacbporf in cbg.pacbps.iteritems(): if lastCBG.has_edge(node1,node2): endPos = pacbporf._get_original_alignment_pos_end() lastpacbporf = lastCBG.get_pacbps_by_nodes(node1=node1,node2=node2)[0] lastEndPos = lastpacbporf._get_original_alignment_pos_end() if lastEndPos.query_pos >= endPos.query_pos and lastEndPos.sbjct_pos <= endPos.sbjct_pos: replacements[(key,node1,node2)] = lastpacbporf if replacements: for (key,node1,node2), pacbporf in replacements.iteritems(): del( cbg.pacbps[(key,node1,node2)] ) newkey = pacbporf.construct_unique_key(node1,node2) cbg.pacbps[(newkey,node1,node2)] = pacbporf # Now make shure this CodingBlockGraph is fully compatible in the Genestructure # That means, do some site scanning because this newcbg is inserted AFTER # all the sitescanning has been done! if not cbg.has_overall_minimal_spanning_range(): ### print "ecg2cbg:", cbg, cbg.edge_count(), len(cbg.pacbps) return None # set the footprint of where this CBG came from # this can be deleted lateron, but is not required per se # this footprint is needed because the surrounding CBGs # must get some (additional) forced splice sites that depend # on the sites in the ExonCollectionGraph cbg._ExonCollectionGraph = gra # DO NOT update edge weights!! # this wipes out the alignment evidence outside of the OMSR region #cbg.update_edge_weights_by_minimal_spanning_range() # make AlignedStopCodonGraph cbg.align_stop_codons() # create splicedonorgraph & align sites cbg._splicedonorgraph = ExonCollectionGraph2DonorSiteCollectionGraph(gra) cbg._splicedonorgraph.collection2alignedsites( edges=gra.node_count()-1, minimal_edges=gra.node_count()-1 ) if is_first: # create tssgraph & align sites dummyTSScolgra = ExonCollectionGraph2TranslationalStartSiteCollectionGraph(gra) dummyTSScolgra.collection2alignedsites( edges=gra.node_count()-1 ) # make the *TRUE* TSScolgra and align them cbg.harvest_elegiable_tss_sites() cbg._startcodongraph.collection2alignedsites() # place the dummyTSScolgra in front of the que cbg._startcodongraph.alignedsites.insert( 0, dummyTSScolgra.alignedsites[0] ) # and assign _codingblockgraph attribute to all alignedsites; # this is needed to do an is_optimal() check for _algtss in cbg._startcodongraph.alignedsites: _algtss._codingblockgraph = cbg else: # create spliceacceptorgraph & align sites cbg._spliceacceptorgraph = ExonCollectionGraph2AcceptorSiteCollectionGraph(gra) cbg._spliceacceptorgraph.collection2alignedsites( edges=gra.node_count()-1, minimal_edges=gra.node_count()-1 ) if is_first: # make SpliceSiteCollectionGraphs ### print "ecg2cbg:(1)", cbg cbg.harvest_elegiable_acceptor_sites(projected_acceptors={},forced_codingblock_ends={},prev=None) # do site alignment of acceptors cbg._spliceacceptorgraph.collection2alignedsites(edges=cbg.node_count()-1,minimal_edges=2) if is_last: # make SpliceSiteCollectionGraphs cbg.harvest_elegiable_donor_sites(projected_donors={},forced_codingblock_ends={},next=None) # do site alignment of acceptors cbg._splicedonorgraph.collection2alignedsites(edges=cbg.node_count()-1,minimal_edges=2) # done! return the CBG ### print "ecg2cbg:done!", cbg return cbg