Ejemplo n.º 1
0
def PacbpCollectionGraph2CodingBlockGraph(pcg):
    """
    Convert PacbpCollectionGraph 2 CodingBlockGraph

    @attention: function just converts, error check is not performed here!

    @type  pcg: PacbpCollectionGraph
    @param pcg: PacbpCollectionGraph instance

    @rtype:  CodingBlockGraph
    @return: CodingBlockGraph instance
    """
    from graph_codingblock import CodingBlockGraph
    cbg = CodingBlockGraph()
    cbg.nodes   = pcg.nodes
    cbg.weights = pcg.weights
    cbg.pacbps  = pcg.pacbps
    return cbg
Ejemplo n.º 2
0
def PacbpCollectionGraph2CodingBlockGraph(pcg):
    """
    Convert PacbpCollectionGraph 2 CodingBlockGraph

    @attention: function just converts, error check is not performed here!

    @type  pcg: PacbpCollectionGraph
    @param pcg: PacbpCollectionGraph instance

    @rtype:  CodingBlockGraph
    @return: CodingBlockGraph instance
    """
    from graph_codingblock import CodingBlockGraph
    cbg = CodingBlockGraph()
    cbg.nodes = pcg.nodes
    cbg.weights = pcg.weights
    cbg.pacbps = pcg.pacbps
    return cbg
Ejemplo n.º 3
0
def ExonCollectionGraph2CodingBlockGraph(gra,is_first=False,is_last=False,firstCBG=None,lastCBG=None):
    """
    Convert ECG -> CodingBlockGraph

    @type  gra: ExonCollectionGraph
    @param gra: ExonCollectionGraph instance

    @type  is_first: Boolean
    @param is_first: True or False (default False)

    @type  is_last: Boolean
    @param is_last: True or False (default False)

    @type  firstCBG: CodingBlockGraph (or None)
    @param firstCBG: ...

    @type  lastCBG: CodingBlockGraph (or None)
    @param lastCBG: ...

    @attention: make shure to specify arguments correctly!

    @rtype:  CodingBlockGraph
    @return: CodingBlockGraph instance or None when failed!
    """
    from graph_codingblock import CodingBlockGraph
    cbg = CodingBlockGraph()
    cbg.MINIMAL_OVERAL_SPANNING_RANGE_SIZE = 1
    for ecgnode in gra.get_nodes():
        cbg.add_node( gra.ecgnode2cbgnode(ecgnode) )


    # make Pacbp objects for the edges if not done yet
    if not gra.pacbps or len(gra.pacbps) != gra.edge_count():
        gra.make_pacbps_for_edges()

    # check if number of pacbps matches number of edges
    if len(gra.pacbps) != gra.edge_count():
        # pacbp creation failed for at least 1 edge -> no CBG!
        return None

    # check if connectivitysaturation == 1.0:
    if gra.connectivitysaturation() != 1.0:
        # no edge listed between some of the nodes!
        return None

    # transfer pacbps and edges to new CBG
    for (key,ecgnode1,ecgnode2), pacbporf in gra.pacbps.iteritems():
        # convert ECGnode to CBGnode
        cbg_node1 = gra.ecgnode2cbgnode(ecgnode1)
        cbg_node2 = gra.ecgnode2cbgnode(ecgnode2)
        bitscore = key[0]
        cbg.add_edge(cbg_node1,cbg_node2,wt=bitscore)
        cbg.pacbps[(key,cbg_node1,cbg_node2)] = pacbporf

    # fix pacbps that are already present in what is currently
    # the first CBG. This is the case for nodes (exons) in the firstExonGraph
    # that end with a CodingBlockEnd.
    if firstCBG:
        replacements = {}
        for (key,node1,node2), pacbporf in cbg.pacbps.iteritems():
            if firstCBG.has_edge(node1,node2):
                startPos = pacbporf._get_original_alignment_pos_start()
                firstpacbporf = firstCBG.get_pacbps_by_nodes(node1=node1,node2=node2)[0]
                firstStartPos = firstpacbporf._get_original_alignment_pos_start()
                if firstStartPos.query_pos <= startPos.query_pos and firstStartPos.sbjct_pos <= startPos.sbjct_pos:
                    replacements[(key,node1,node2)] = firstpacbporf
        if replacements:
            for (key,node1,node2), pacbporf in replacements.iteritems():
                del( cbg.pacbps[(key,node1,node2)] )
                newkey = pacbporf.construct_unique_key(node1,node2)
                cbg.pacbps[(newkey,node1,node2)] = pacbporf

    if lastCBG:
        replacements = {}
        for (key,node1,node2), pacbporf in cbg.pacbps.iteritems():
            if lastCBG.has_edge(node1,node2):
                endPos = pacbporf._get_original_alignment_pos_end()
                lastpacbporf = lastCBG.get_pacbps_by_nodes(node1=node1,node2=node2)[0]
                lastEndPos   = lastpacbporf._get_original_alignment_pos_end()
                if lastEndPos.query_pos >= endPos.query_pos and lastEndPos.sbjct_pos <= endPos.sbjct_pos:
                    replacements[(key,node1,node2)] = lastpacbporf
        if replacements:
            for (key,node1,node2), pacbporf in replacements.iteritems():
                del( cbg.pacbps[(key,node1,node2)] )
                newkey = pacbporf.construct_unique_key(node1,node2)
                cbg.pacbps[(newkey,node1,node2)] = pacbporf

    # Now make shure this CodingBlockGraph is fully compatible in the Genestructure
    # That means, do some site scanning because this newcbg is inserted AFTER
    # all the sitescanning has been done!
    if not cbg.has_overall_minimal_spanning_range():
        ### print "ecg2cbg:", cbg, cbg.edge_count(), len(cbg.pacbps)
        return None 


    # set the footprint of where this CBG came from
    # this can be deleted lateron, but is not required per se
    # this footprint is needed because the surrounding CBGs
    # must get some (additional) forced splice sites that depend
    # on the sites in the ExonCollectionGraph 
    cbg._ExonCollectionGraph = gra

    # DO NOT update edge weights!!
    # this wipes out the alignment evidence outside of the OMSR region
    #cbg.update_edge_weights_by_minimal_spanning_range()

    # make AlignedStopCodonGraph
    cbg.align_stop_codons()


    # create splicedonorgraph & align sites
    cbg._splicedonorgraph = ExonCollectionGraph2DonorSiteCollectionGraph(gra)
    cbg._splicedonorgraph.collection2alignedsites(
            edges=gra.node_count()-1,
            minimal_edges=gra.node_count()-1
            )

    if is_first:
        # create tssgraph & align sites
        dummyTSScolgra = ExonCollectionGraph2TranslationalStartSiteCollectionGraph(gra)
        dummyTSScolgra.collection2alignedsites( edges=gra.node_count()-1 )
        # make the *TRUE* TSScolgra and align them
        cbg.harvest_elegiable_tss_sites()
        cbg._startcodongraph.collection2alignedsites()
        # place the dummyTSScolgra in front of the que
        cbg._startcodongraph.alignedsites.insert( 0, dummyTSScolgra.alignedsites[0] )
        # and assign _codingblockgraph attribute to all alignedsites;
        # this is needed to do an is_optimal() check
        for _algtss in cbg._startcodongraph.alignedsites:
            _algtss._codingblockgraph = cbg

    else:
        # create spliceacceptorgraph & align sites
        cbg._spliceacceptorgraph = ExonCollectionGraph2AcceptorSiteCollectionGraph(gra)
        cbg._spliceacceptorgraph.collection2alignedsites(
                edges=gra.node_count()-1,
                minimal_edges=gra.node_count()-1
                )

    if is_first:
        # make SpliceSiteCollectionGraphs
        ### print "ecg2cbg:(1)", cbg
        cbg.harvest_elegiable_acceptor_sites(projected_acceptors={},forced_codingblock_ends={},prev=None)
        
        # do site alignment of acceptors
        cbg._spliceacceptorgraph.collection2alignedsites(edges=cbg.node_count()-1,minimal_edges=2)

    if is_last:
        # make SpliceSiteCollectionGraphs
        cbg.harvest_elegiable_donor_sites(projected_donors={},forced_codingblock_ends={},next=None)

        # do site alignment of acceptors
        cbg._splicedonorgraph.collection2alignedsites(edges=cbg.node_count()-1,minimal_edges=2)

    # done! return the CBG
    ### print "ecg2cbg:done!", cbg
    return cbg