Example #1
0
    def collection2alignedsites(self,edges=None,minimal_edges=2):
        """
        TODO!!!
        """
        # handle edges argument (error check etc.)
        edges = self._handle_edges_argument(edges)

        # do the first basal alignment
        self.alignedsites = sort_by_cumulative_score(
                [ conversion.TranslationalStartSiteCollectionGraph2AlignedTranslationalStartSiteGraph(algsite,max_node_count=self.organism_set_size()) for algsite in self.find_conserved_sites(edges=edges) ]
                )
        # and order all the sites on cumulative score
        self.alignedsites = sort_by_cumulative_score(self.alignedsites)

        # no keep on aligning the remaining fraction of non-aligned sites
        for current_edges in range(edges-1,minimal_edges-1,-1):
            if self.alignedsites and self.alignedsites[-1].__class__.__name__ == 'TranslationalStartSiteCollectionGraph':
                # redo this non-aligned part
                gra = self.alignedsites.pop()
                self.alignedsites.extend(
                        [ conversion.TranslationalStartSiteCollectionGraph2AlignedTranslationalStartSiteGraph(algsite,max_node_count=self.organism_set_size()) for algsite in gra.find_conserved_sites(edges=current_edges) ]
                        )
                self.alignedsites = sort_by_cumulative_score(self.alignedsites)
Example #2
0
    def recombine_into_completegraphs(self,edges=None,verbose=False):
        """
        Create all possible ExonCollectionGraphs by organism node recombination 

        @type  edges: number
        @param edges: number of outgoing edges of a node in a FCG

        @rtype:  list
        @return: list with ExonCollectionGraph of the requested properties
        """
        from codingblock_splitting import cross

        # if edges is not applied get by definition from the OrganismGraph
        if not edges: edges = self.organism_set_size() - 1

        # currently, this function has only a hard-set max_missing_edges == 0
        max_missing_edges = 0

        retlist = []
        # make a cross of all the pacbp positions in the lists of alternatives 
        allcombis = cross([ self.get_organism_nodes(org) for org in self.organism_set() ])
        if verbose: print "combinations:", len(allcombis)
        # gather a list of missing edges in the ECG
        missing_edges = []
        for (node1,node2) in self.pairwisecrosscombinations_node():
            if self._organism_from_node(node1) == self._organism_from_node(node2): continue
            if not self.has_edge(node1,node2):
                missing_edges.append((node1,node2))
        if verbose: print "missing edges:", len(missing_edges)

        # check for combinations that nodes that are listed as a missing edge
        # these are not relevant because max_missing_edges == 0
        if edges == self.organism_set_size() - 1:
            for pos in range(len(allcombis)-1,-1,-1):
                combi = allcombis[pos]
                for node1,node2 in missing_edges:
                    if node1 in combi and node2 in combi:
                        allcombis.pop(pos)
                        break
        if verbose: print "relevant:", len(allcombis)

        for combi in allcombis:
            sg = ExonCollectionGraph()
            for node in combi:
                # get the exon object from the main ExonCollectionGraph
                exon = self.get_node_object(node)
                # add node & object to the subgraph
                sg.add_node_and_object(node,exon)

            # create the edges in the subgraph
            for (node1,node2) in sg.pairwisecrosscombinations_node():
                if self.has_edge(node1,node2):
                    wt = self.get_edge_weight(node1,node2)
                    sg.add_edge(node1,node2,wt=wt)

            ## now check if is a succesfull recombination
            #if sg.node_count() != self.organism_set_size():
            #    continue
            #if sg.edge_count() < sum(range(0,self.organism_set_size())) - max_missing_edges:
            #    continue

            # remove nodes that have zero edges
            sg.remove_low_connectivity_nodes(min_connectivity=1)

            # do not check nr. of nodes on organism_set_size, but on variable `edges`!
            if sg.node_count() < edges+1:
                continue

            if sg.edge_count() < sum(range(0,edges+1)) - max_missing_edges:
                continue

            # if here -> accepted!
            if edges == self.organism_set_size() - 1:
                retlist.append(sg)
            else:
                # hmm... recombination with allowing missing organisms/nodes
                # that means that there are duplicates in the subgraphs
                # that come to this point. Check if this subgraph is already
                # present in retlist before addition
                for alt in retlist:
                    if len(alt.node_set().difference(sg.node_set())) == 0:
                        break
                else:
                    # not recognized -> add!
                    retlist.append(sg)

        # update the attributes dicts
        for sg in retlist: sg._update_after_changes()
        # and return a ordered/prioritized list
        return sort_by_cumulative_score(retlist)
Example #3
0
    def collection2alignedsites(self,edges=None,minimal_edges=2):
        """
        """
        # handle edges argument (error check etc.)
        edges = self._handle_edges_argument(edges)

        # do the first basal alignment
        self.alignedsites = sort_by_cumulative_score(
                [ conversion.SpliceSiteCollectionGraph2AlignedSpliceSiteGraph(algsite,max_node_count=self.organism_set_size()) for algsite in self.find_conserved_sites(edges=edges) ]
                )

        # now keep on aligning the remaining fraction of non-aligned sites
        for current_edges in range(edges-1,minimal_edges-1,-1):
            if self.alignedsites and self.alignedsites[-1].__class__.__name__ in\
            ['DonorSiteCollectionGraph','AcceptorSiteCollectionGraph','SpliceSiteCollectionGraph']:
                # redo this non-aligned part
                gra = self.alignedsites.pop()
                self.alignedsites.extend(
                        [ conversion.SpliceSiteCollectionGraph2AlignedSpliceSiteGraph(algsite,max_node_count=self.organism_set_size()) for algsite in gra.find_conserved_sites(edges=current_edges) ]
                        )
                self.alignedsites = sort_by_cumulative_score(self.alignedsites)

        # if there are AlignedSpliceSiteWithPhaseShiftGraphs, remove all that have not all organisms represented
        lenrange = range(len(self.alignedsites)-1,-1,-1) 
        for pos in lenrange: 
            if self.alignedsites[pos].__class__.__name__ in ['AlignedDonorSiteWithPhaseShiftGraph',
            'AlignedAcceptorSiteWithPhaseShiftGraph','AlignedSpliceSiteWithPhaseShiftGraph']:
                if self.alignedsites[pos].organism_set_size() != self.organism_set_size():
                    _removed = self.alignedsites.pop(pos)

        # finally, merge AlignedSites that are separated due possible erroneous alignments
        # around (aligned) inframe-introns. Due to ALIGNED_DONOR_MAX_TRIPLET_DISTANCE,
        # situations like the following can occur. Suppose organism A-E, E having an inframe intron
        # that is in some Pacbps splitted, and in some aligned. Due to the differences in the location
        # where BLAST places the gaps, offset can arrise, resulting in 2 i.s.o. 1 AlignedSite:
        # A(x)-B(x)-C(x)-D(x) and A(x)-B(x)-C(x)-E(x), where A(x)-B(x)-C(x) are the same sites!
        if self.alignedsites:
            currentpos = 0
            while True:
                for pos in range(currentpos,len(self.alignedsites)):
                    site_merged = False
                    site = self.alignedsites[pos]
                    if site.organism_set_size() == self.organism_set_size():
                        continue
                    if site.__class__.__name__ not in\
                    ['AlignedDonorSiteGraph','AlignedAcceptorSiteGraph','AlignedSpliceSiteGraph']:
                        continue
                    for otherpos in range(pos+1,len(self.alignedsites)):
                        othersite = self.alignedsites[otherpos]
                        if othersite.organism_set_size() == self.organism_set_size():
                            continue
                        if othersite.__class__.__name__ not in\
                        ['AlignedDonorSiteGraph','AlignedAcceptorSiteGraph','AlignedSpliceSiteGraph']:
                            continue
                        if site.phase() == othersite.phase():
                            mutual_nodes = graphPlus.comparison.mutual_nodes(site,othersite)
                            if not mutual_nodes: continue
                            # check if the difference in nodes completes `site`
                            new_nodes = othersite.node_set().difference( site.get_nodes() )
                            new_orgs  = [ othersite._organism_from_node(node) for node in new_nodes]
                            if not site.organism_set().intersection(new_orgs):
                                # yes, there are mutual nodes! Now update `site` with the nodes
                                # that are only occuring in `othersite`
                                site.nodes.update(othersite.nodes)
                                site.weights.update(othersite.weights)
                                site._node_pssm.update(othersite._node_pssm)
                                site._node_object.update(othersite._node_object)
                                site._edge_binary_entropies.update(othersite._edge_binary_entropies)
                                # set site_merged to True to make shure the outern for loop
                                # is broken, remove `othersite` and break out
                                site_merged = True
                                self.alignedsites.pop(otherpos)
                                break
                    if site_merged:
                        # yes, a succesfull merge; break the outern forloop because the
                        # length of the list self.alignedsites has changed!
                        currentpos = pos
                        break
                else:
                    # eof list; break the while loop
                    break
            # if currentpos>0, >=1 sites are merged -> resort the sites
            self.alignedsites = sort_by_cumulative_score(self.alignedsites)