예제 #1
0
def LowSimilarityRegionCodingBlockGraph2GeneTreeGraph(cbg):
    """
    Convert LowSimilarityRegion 2 GeneTree

    @attention: function just converts, error check is not performed here!

    @type  cbg: LowSimilarityRegion
    @param cbg: LowSimilarityRegion instance

    @rtype:  GeneTreeGraph
    @return: GeneTreeGraph instance
    """
    gtg = GeneTreeGraph()
    cbgnode2orgnode = {}
    for node in cbg.get_nodes():
        org = cbg._organism_from_node(node)
        gtg.add_node(org)
        # add node/org combi to mapping dict
        cbgnode2orgnode[node] = org
    # now add all the edges
    omsr = cbg.overall_minimal_spanning_range()
    for (n1, n2) in cbg.pairwisecrosscombinations_node():
        if cbg.has_edge(n1, n2):
            # get pacbp(orf) object
            pacbps = cbg.get_pacbps_by_nodes(node1=n1, node2=n2)
            if pacbps:
                identityscore = pacbps[0].identityscore
            else:
                # this edge has no pacbp in the lsrCBG -> happens often
                identityscore = 0.0
        else:
            # this edge is absent in the lsrCBG!
            identityscore = 0.0
        # get organism identifyers from node and add edge
        o1, o2 = cbgnode2orgnode[n1], cbgnode2orgnode[n2]
        gtg.add_edge(o1, o2, wt=identityscore)
    # check if the graph is saturated (complete)
    # if not (organism/node/orf missing), add this as a zero-wt edge
    gtg.makecompletegraph(wt=0.0)
    # and return this new genetree graph
    return gtg
예제 #2
0
파일: conversion.py 프로젝트: IanReid/ABFGP
def LowSimilarityRegionCodingBlockGraph2GeneTreeGraph(cbg):
    """
    Convert LowSimilarityRegion 2 GeneTree

    @attention: function just converts, error check is not performed here!

    @type  cbg: LowSimilarityRegion
    @param cbg: LowSimilarityRegion instance

    @rtype:  GeneTreeGraph
    @return: GeneTreeGraph instance
    """
    gtg = GeneTreeGraph()
    cbgnode2orgnode = {}
    for node in cbg.get_nodes():
        org = cbg._organism_from_node(node)
        gtg.add_node(org)
        # add node/org combi to mapping dict
        cbgnode2orgnode[ node ] = org
    # now add all the edges
    omsr = cbg.overall_minimal_spanning_range()
    for (n1,n2) in cbg.pairwisecrosscombinations_node():
        if cbg.has_edge(n1,n2):
            # get pacbp(orf) object
            pacbps = cbg.get_pacbps_by_nodes(node1=n1,node2=n2)
            if pacbps:
                identityscore = pacbps[0].identityscore
            else:
                # this edge has no pacbp in the lsrCBG -> happens often
                identityscore = 0.0
        else:
            # this edge is absent in the lsrCBG!
            identityscore = 0.0
        # get organism identifyers from node and add edge
        o1,o2 = cbgnode2orgnode[ n1 ], cbgnode2orgnode[ n2 ]
        gtg.add_edge( o1, o2, wt=identityscore )
    # check if the graph is saturated (complete)
    # if not (organism/node/orf missing), add this as a zero-wt edge
    gtg.makecompletegraph(wt=0.0)
    # and return this new genetree graph
    return gtg
예제 #3
0
파일: conversion.py 프로젝트: IanReid/ABFGP
def CodingBlockGraph2GeneTreeGraph(cbg):
    """
    Convert CodingBlockGraph 2 GeneTree

    @attention: function just converts, error check is not performed here!

    @type  cbg: CodingBlockGraph
    @param cbg: CodingBlockGraph instance

    @rtype:  GeneTreeGraph
    @return: GeneTreeGraph instance
    """
    gtg = GeneTreeGraph()
    cbgnode2orgnode = {}
    for node in cbg.get_nodes():
        org = cbg._organism_from_node(node)
        gtg.add_node(org)
        # add node/org combi to mapping dict
        cbgnode2orgnode[ node ] = org
    # now add all the edges
    omsr = cbg.overall_minimal_spanning_range()
    for (n1,n2) in cbg.pairwisecrosscombinations_node():
        if cbg.has_edge(n1,n2):
            # get pacbp(orf) object
            thepacbp = cbg.get_pacbps_by_nodes(node1=n1,node2=n2)[0]
            # get relative coordinates of the OMSR part of the alignment
            omsrQs = thepacbp.alignmentposition_by_query_pos( min( omsr[n1] ) )
            omsrQe = thepacbp.alignmentposition_by_query_pos( max( omsr[n1] ) )

            # CHECK these coordinates; pacb.exceptions.CoordinateOutOfRange can occur
            # in freaky cases. They shouldn't, but do without discovered reason.
            # However, in the majority of cases, it is just a 1/few aa offset, which
            # can be easily corrected here.
            if str(omsrQs) == str(pacb.exceptions.CoordinateOutOfRange):
                if thepacbp.__class__.__name__ == 'PacbP':
                    # solve by taking thepacbp.query_start 
                    omsrQs = thepacbp.alignmentposition_by_query_pos( thepacbp.query_start )
                else:
                    # thepacbp.__class__.__name__ in ['PacbPDNA','PacbPORF']:
                    # solve by taking orginal alignment position start
                    omsrQs = thepacbp.alignmentposition_by_query_pos(
                        thepacbp._get_original_alignment_pos_start().query_pos
                        )

                ###########################################################################
                ## print warning message(s)
                #print "WARNING: pacb.exceptions.CoordinateOutOfRange (omsrQs, ", 
                #print "node %s in CodingBlockGraph2GeneTreeGraph" % ( str(n1) )
                #print "WARNING: min(omsr(", min( omsr[n1] ), ")", min(omsr[n1]),
                #print max(omsr[n1]), " taken ->", thepacbp.query_start, omsrQs
                #print "WARNING: ", thepacbp
                ###########################################################################

            if str(omsrQe) == str(pacb.exceptions.CoordinateOutOfRange):
                if thepacbp.__class__.__name__ == 'PacbP':
                    # solve by taking thepacbp.query_end
                    omsrQe = thepacbp.alignmentposition_by_query_pos( thepacbp.query_end )
                else:
                    # thepacbp.__class__.__name__ in ['PacbPDNA','PacbPORF']:
                    # solve by taking orginal alignment position end 
                    omsrQe = thepacbp.alignmentposition_by_query_pos(
                        thepacbp._get_original_alignment_pos_end().query_pos
                        ) + 1  # add +1 to create a python list range coordinate

                ###########################################################################
                ## print warning message(s)
                #print "WARNING: pacb.exceptions.CoordinateOutOfRange (omsrQe, ",
                #print node %s in CodingBlockGraph2GeneTreeGraph" % ( str(n1) )
                #print "WARNING: max(omsr(", max( omsr[n1] ), ")", min(omsr[n1]),
                #print max(omsr[n1]), " taken ->", thepacbp.query_end, omsrQe
                #print "WARNING: ", thepacbp
                ###########################################################################

            else:
                # omsrQe was nicely an integer; add +1 because max(OMSR) is not a range coord
                omsrQe += 1

            # calculate identityscore
            identityscore = pacb.calculate_identityscore( thepacbp.alignment[omsrQs:omsrQe] )
        else:
            # this edge is absent in the CBG!
            # TODO -> this will cause a crash a few lines later
            # by definition, a CBG MUST HAVE ALL EDGES at this stage!
            print "about to crash!!!!"
            print cbg
            print cbg.node_count(), cbg.edge_count(), "missing:", (n1,n2) 
            identityscore = 0.0
        # get organism identifyers from node and add edge
        o1,o2 = cbgnode2orgnode[ n1 ], cbgnode2orgnode[ n2 ]

        # Wt used is identityscore == Identity + 0.5* Similarity
        gtg.add_edge( o1, o2, wt=identityscore )

        # add additional statistics to gtg object. Wt used is
        # identitypercentage is TRUE aa indentity %
        identityperc = pacb.calculate_identity( thepacbp.alignment[omsrQs:omsrQe] )
        gtg._aa_identity_percentages[(o1,o2)] = identityperc
        gtg._aa_identity_percentages[(o2,o1)] = identityperc

        # bitscoreratio is ratio of bits / max bits
        bitscoreratio = pacb.calculate_bitscoreratio(
                thepacbp.query[omsrQs:omsrQe],
                thepacbp.sbjct[omsrQs:omsrQe],
                matrix = thepacbp.MATRIX
                )
        gtg._bitscore_ratios[(o1,o2)] = bitscoreratio
        gtg._bitscore_ratios[(o2,o1)] = bitscoreratio

      
        # ntidentity is obviously nt identity%
        dnaQseq, dnaSseq = thepacbp.get_unextended_aligned_dna_sequences()
        ntidentity = sequence_identity_ratio(dnaQseq,dnaSseq)
        gtg._nt_identity_percentages[(o1,o2)] = ntidentity
        gtg._nt_identity_percentages[(o2,o1)] = ntidentity

    # check if the graph is saturated (complete)
    # if not (organism/node/orf missing), add this as a zero-wt edge
    gtg.makecompletegraph(wt=0.0)
    # and return this new genetree graph
    return gtg
예제 #4
0
def CodingBlockGraph2GeneTreeGraph(cbg):
    """
    Convert CodingBlockGraph 2 GeneTree

    @attention: function just converts, error check is not performed here!

    @type  cbg: CodingBlockGraph
    @param cbg: CodingBlockGraph instance

    @rtype:  GeneTreeGraph
    @return: GeneTreeGraph instance
    """
    gtg = GeneTreeGraph()
    cbgnode2orgnode = {}
    for node in cbg.get_nodes():
        org = cbg._organism_from_node(node)
        gtg.add_node(org)
        # add node/org combi to mapping dict
        cbgnode2orgnode[node] = org
    # now add all the edges
    omsr = cbg.overall_minimal_spanning_range()
    for (n1, n2) in cbg.pairwisecrosscombinations_node():
        if cbg.has_edge(n1, n2):
            # get pacbp(orf) object
            thepacbp = cbg.get_pacbps_by_nodes(node1=n1, node2=n2)[0]
            # get relative coordinates of the OMSR part of the alignment
            omsrQs = thepacbp.alignmentposition_by_query_pos(min(omsr[n1]))
            omsrQe = thepacbp.alignmentposition_by_query_pos(max(omsr[n1]))

            # CHECK these coordinates; pacb.exceptions.CoordinateOutOfRange can occur
            # in freaky cases. They shouldn't, but do without discovered reason.
            # However, in the majority of cases, it is just a 1/few aa offset, which
            # can be easily corrected here.
            if str(omsrQs) == str(pacb.exceptions.CoordinateOutOfRange):
                if thepacbp.__class__.__name__ == 'PacbP':
                    # solve by taking thepacbp.query_start
                    omsrQs = thepacbp.alignmentposition_by_query_pos(
                        thepacbp.query_start)
                else:
                    # thepacbp.__class__.__name__ in ['PacbPDNA','PacbPORF']:
                    # solve by taking orginal alignment position start
                    omsrQs = thepacbp.alignmentposition_by_query_pos(
                        thepacbp._get_original_alignment_pos_start().query_pos)

                ###########################################################################
                ## print warning message(s)
                #print "WARNING: pacb.exceptions.CoordinateOutOfRange (omsrQs, ",
                #print "node %s in CodingBlockGraph2GeneTreeGraph" % ( str(n1) )
                #print "WARNING: min(omsr(", min( omsr[n1] ), ")", min(omsr[n1]),
                #print max(omsr[n1]), " taken ->", thepacbp.query_start, omsrQs
                #print "WARNING: ", thepacbp
                ###########################################################################

            if str(omsrQe) == str(pacb.exceptions.CoordinateOutOfRange):
                if thepacbp.__class__.__name__ == 'PacbP':
                    # solve by taking thepacbp.query_end
                    omsrQe = thepacbp.alignmentposition_by_query_pos(
                        thepacbp.query_end)
                else:
                    # thepacbp.__class__.__name__ in ['PacbPDNA','PacbPORF']:
                    # solve by taking orginal alignment position end
                    omsrQe = thepacbp.alignmentposition_by_query_pos(
                        thepacbp._get_original_alignment_pos_end().query_pos
                    ) + 1  # add +1 to create a python list range coordinate

                ###########################################################################
                ## print warning message(s)
                #print "WARNING: pacb.exceptions.CoordinateOutOfRange (omsrQe, ",
                #print node %s in CodingBlockGraph2GeneTreeGraph" % ( str(n1) )
                #print "WARNING: max(omsr(", max( omsr[n1] ), ")", min(omsr[n1]),
                #print max(omsr[n1]), " taken ->", thepacbp.query_end, omsrQe
                #print "WARNING: ", thepacbp
                ###########################################################################

            else:
                # omsrQe was nicely an integer; add +1 because max(OMSR) is not a range coord
                omsrQe += 1

            # calculate identityscore
            identityscore = pacb.calculate_identityscore(
                thepacbp.alignment[omsrQs:omsrQe])
        else:
            # this edge is absent in the CBG!
            # TODO -> this will cause a crash a few lines later
            # by definition, a CBG MUST HAVE ALL EDGES at this stage!
            print "about to crash!!!!"
            print cbg
            print cbg.node_count(), cbg.edge_count(), "missing:", (n1, n2)
            identityscore = 0.0
        # get organism identifyers from node and add edge
        o1, o2 = cbgnode2orgnode[n1], cbgnode2orgnode[n2]

        # Wt used is identityscore == Identity + 0.5* Similarity
        gtg.add_edge(o1, o2, wt=identityscore)

        # add additional statistics to gtg object. Wt used is
        # identitypercentage is TRUE aa indentity %
        identityperc = pacb.calculate_identity(
            thepacbp.alignment[omsrQs:omsrQe])
        gtg._aa_identity_percentages[(o1, o2)] = identityperc
        gtg._aa_identity_percentages[(o2, o1)] = identityperc

        # bitscoreratio is ratio of bits / max bits
        bitscoreratio = pacb.calculate_bitscoreratio(
            thepacbp.query[omsrQs:omsrQe],
            thepacbp.sbjct[omsrQs:omsrQe],
            matrix=thepacbp.MATRIX)
        gtg._bitscore_ratios[(o1, o2)] = bitscoreratio
        gtg._bitscore_ratios[(o2, o1)] = bitscoreratio

        # ntidentity is obviously nt identity%
        dnaQseq, dnaSseq = thepacbp.get_unextended_aligned_dna_sequences()
        ntidentity = sequence_identity_ratio(dnaQseq, dnaSseq)
        gtg._nt_identity_percentages[(o1, o2)] = ntidentity
        gtg._nt_identity_percentages[(o2, o1)] = ntidentity

    # check if the graph is saturated (complete)
    # if not (organism/node/orf missing), add this as a zero-wt edge
    gtg.makecompletegraph(wt=0.0)
    # and return this new genetree graph
    return gtg