예제 #1
0
def _is_intermediate_overlapping_cbg_a_gsg_scaffold_enrichment(gsg,
    cbgA,cbgB,cbgC,minimal_scaffold_aa_enrichment=5):
    """
    """
    if not (gsg and cbgA and cbgB and cbgC):
        # (most likely) cbgA or cbgC is not defined
        # function behavious should be ti return False
        return False
    if not cbgA.mutual_nodes(cbgC):
        # series of CBGs does not represent an suitable
        # gene structure scaffold -> return False
        return False

    # perform this check
    from graph_genestructure import GenestructureOfCodingBlockGraphs
    partGSG = GenestructureOfCodingBlockGraphs(gsg.input)
    partGSG.codingblockgraphs = [ cbgA,cbgB,cbgC ]
    partGSG._GENETREE = gsg._GENETREE
    partOMSRa = partGSG.overall_minimal_spanning_range()
    partGSG.codingblockgraphs = [ cbgA,cbgC ]
    partOMSRb = partGSG.overall_minimal_spanning_range()
    scaffold_enrichments = []
    for node in cbgA.mutual_nodes(cbgC):
        org = gsg.organism_by_node(node)
        scaffold_enrichments.append(
            len(partOMSRa[org]) - len(partOMSRb[org]) >=\
            minimal_scaffold_aa_enrichment
            )
    # check if True in scaffold_enrichments
    if True in scaffold_enrichments:
        return True
    else:
        return False
예제 #2
0
    def replace_scaffold_breaking_cbgs(self,verbose=False):
        """
        (Try) to replace CBG that break the GSG scaffold by other CBGs

        @type  verbose: Boolean
        @param verbose: print status/debugging messages to STDOUT

        @rtype:  Boolean
        @return: Is any CBG replaced?
        """
        # Boolean return value
        scaffold_breaking_cbg_replaced = False

        for cbgpos in self.cbgpositerator(reversed=True)[1:]:
            cbg = self.codingblockgraphs[cbgpos]
            if cbg.__class__.__name__ == 'LowSimilarityRegionCodingBlockGraph':
                continue
            if cbg.node_count() < self.EXACT_SG_NODE_COUNT:
                continue

            # loop forwards through the GSG and look for mutual nodes
            identical_nodes = []
            for backwardspos in range(cbgpos+1,len(self)):
                comparecbg = self.codingblockgraphs[backwardspos]
                if cbg.mutual_nodes(comparecbg):
                    identical_nodes.append(True)
                    break
                else:
                    identical_nodes.append(False)
            if not identical_nodes:
                continue    # final CBG -> continue
            elif identical_nodes == [True]:
                continue    # neighboring node has mutual nodes -> continue
            elif identical_nodes.count(True) == 0:
                continue    # no mutual nodes at all -> continue
            else:
                # this is what we are looking for, a list like
                # [ False, ... True ] with >1 False
                # get total_weights of the intermediate CBGs
                tws = [ self.codingblockgraphs[_pos].total_weight() for\
                        _pos in range(cbgpos+1,backwardspos) ]
                ################################################################
                if verbose:
                    print cbgpos, backwardspos, identical_nodes
                    print cbg
                    print tws
                    print comparecbg
                ################################################################

                # first, check if the CBGs are already partially overlapping
                # if so, get rid of this intermediate CBG
                omsrdist = cbg.omsr_distance_between_codingblocks(comparecbg)
                if max(omsrdist.values()) <= 1:
                    # yes, all organisms glue these CBGs perfectly together
                    # just remove this one without further checks
                    cbg._CBGinterface3p        = None
                    comparecbg._CBGinterface5p = None
                    self.codingblockgraphs.__setslice__(
                            cbgpos+1,
                            backwardspos+1,
                            [ comparecbg ] )
                    scaffold_breaking_cbg_replaced = True
                    # go to the next cbg in the list
                    continue

                # do a more eleborate check by trying to create a CBG
                # in this large_scaffold_gap
                from graph_genestructure import GenestructureOfCodingBlockGraphs
                partialGSG = GenestructureOfCodingBlockGraphs(self.input)
                partialGSG.codingblockgraphs = [ cbg, comparecbg ]
                partialGSG._GENETREE = self._GENETREE
                partialGSG.create_large_intermediate_cbg_for_scaffold_gap(
                        sprdif_min_node_count = 2,
                        cbg_min_node_count = self.EXACT_SG_NODE_COUNT,
                        verbose = verbose
                        )
                if len(partialGSG) == 2:
                    ############################################################
                    if verbose: print "NO scaffold CBGs found!"
                    ############################################################
                    pass
                else:
                    new_tws = [ _cbg.total_weight() for _cbg in\
                            partialGSG.codingblockgraphs[1:-1] ]
                    if sum(new_tws) > sum(tws):
                        # replace!
                        self.codingblockgraphs.__setslice__(
                            cbgpos+1,
                            backwardspos,
                            partialGSG.codingblockgraphs[1:-1] )
                        scaffold_breaking_cbg_replaced = True
                    else:
                        pass
                    ############################################################
                    if verbose:
                        if sum(new_tws) > sum(tws):
                            print "REPLACING scaffold-breaking CBGs!!!"
                        else:
                            print "MAINTAINING scaffold-breaking CBGs!!!"
                        for cbg in partialGSG: print cbg
                    ############################################################

        # return if CBGs are removed
        return scaffold_breaking_cbg_replaced