Esempio n. 1
0
    def __init__(self, target, splice_graph, ID, cutoff, upstream=None, downstream=None):
        '''
        As the purpose of ExonSeek is to flanking constitutive exons, it is
        necessary to know what needs to be "flanked", the target, and have a
        splice graph representation of gene structure (splice_graph). The ID
        variable is meant to prevent overwriting of files.
        '''
        self.id = ID  # id is to prevent overwriting files in self.save_path_info
        self.cutoff = cutoff
        self.target = target  # (start, end)
        self.upstream, self.downstream = upstream, downstream  # if set, the user specified upstream or downstream exons
        self.graph = splice_graph.get_graph()  # convenience variable (could just use splice_graph)
        if self.target not in self.graph.nodes():
            raise utils.PrimerSeqError('Error: The target was not found in the splice graph')

        self.strand = splice_graph.strand  # convenience variable
        self.splice_graph = splice_graph
        biconnected_comp = filter(lambda x: target in x, algs.get_biconnected(self.graph))
        self.total_components = None  # these will be defined after calling methods
        self.psi_upstream, self.psi_target, self.psi_downstream = None, None, None  # these will be defined after calling methods
        self.all_paths = None

        self.num_of_biconnected = len(biconnected_comp)
        if len(self.graph.predecessors(self.target)) == 0 or len(self.graph.successors(self.target)) == 0:
            self.component = None  # no flanking exon case
        elif self.num_of_biconnected == 0:
            self.no_biconnected_case()
        elif self.num_of_biconnected == 1:
            self.component = sorted(biconnected_comp[0], key=lambda x: (x[0], x[1]))  # make sure component is sorted by position
            self.one_biconnected_case()
        elif self.num_of_biconnected == 2:
            self.component = map(lambda x: sorted(x, key=lambda y: (y[0], y[1])), biconnected_comp)
            self.two_biconnected_case()
        else:
            raise ValueError('Error: There to be either 0, 1, or 2 biconnected components. Received %s' % self.num_of_biconnected)
Esempio n. 2
0
def get_flanking_biconnected_exons(name, target, sGraph, genome):
    '''
    Defines flanking exons as exons that cannot be skipped in
    the graph structure. Theese exons are 100% included and do not
    need estimation of inclusion level.
    '''
    graph = sGraph.get_graph()  # nx.DiGraph
    # search through each biconnected component
    for component in algs.get_biconnected(graph):
        component = sorted(
            component, key=lambda x:
            (x[0], x[1]))  # ensure first component is first exon, etc
        if target in component[1:-1]:
            # define upstream/downstream flanking exon
            if sGraph.strand == '+':
                upstream = component[0]
                downstream = component[-1]
            else:
                upstream = component[-1]
                downstream = component[0]

            # get possible lengths
            all_paths = algs.AllPaths(sGraph,
                                      component,
                                      target,
                                      chr=sGraph.chr,
                                      strand=sGraph.strand)
            # all_paths.set_all_path_lengths()  # should no longer need this since it is done in primer.py
            all_paths.set_all_path_coordinates()

            # get sequence of upstream/target/downstream combo
            genome_chr = genome[sGraph.chr]  # chr object from pygr
            upstream_seq, target_seq, downstream_seq = genome_chr[
                upstream[0]:upstream[1]], genome_chr[
                    target[0]:target[1]], genome_chr[
                        downstream[0]:downstream[1]]
            if sGraph.strand == '-':
                upstream_seq, target_seq, downstream_seq =  \
                    -upstream_seq, -target_seq, -downstream_seq

            return [
                sGraph.strand, name[1:], 'NA',
                sGraph.chr + ':' + '-'.join(map(str, upstream)), '1.0',
                sGraph.chr + ':' + '-'.join(map(str, downstream)), '1.0',
                all_paths,
                str(upstream_seq).upper(),
                str(target_seq).upper(),
                str(downstream_seq).upper()
            ]
    return ['Error: ' + name + ' was not found in a biconnected component']
Esempio n. 3
0
def get_flanking_biconnected_exons(name, target, sGraph, genome):
    '''
    Defines flanking exons as exons that cannot be skipped in
    the graph structure. Theese exons are 100% included and do not
    need estimation of inclusion level.
    '''
    graph = sGraph.get_graph()  # nx.DiGraph
    # search through each biconnected component
    for component in algs.get_biconnected(graph):
        component = sorted(component, key=lambda x: (x[0], x[1]))  # ensure first component is first exon, etc
        if target in component[1:-1]:
            # define upstream/downstream flanking exon
            if sGraph.strand == '+':
                upstream = component[0]
                downstream = component[-1]
            else:
                upstream = component[-1]
                downstream = component[0]

            # get possible lengths
            all_paths = algs.AllPaths(sGraph, component, target,
                                      chr=sGraph.chr, strand=sGraph.strand)
            # all_paths.set_all_path_lengths()  # should no longer need this since it is done in primer.py
            all_paths.set_all_path_coordinates()

            # get sequence of upstream/target/downstream combo
            genome_chr = genome[sGraph.chr]  # chr object from pygr
            upstream_seq, target_seq, downstream_seq = genome_chr[upstream[0]:upstream[1]], genome_chr[target[0]:target[1]], genome_chr[downstream[0]:downstream[1]]
            if sGraph.strand == '-':
                upstream_seq, target_seq, downstream_seq =  \
                    -upstream_seq, -target_seq, -downstream_seq

            return [sGraph.strand, name[1:], 'NA',
                    sGraph.chr + ':' + '-'.join(map(str, upstream)), '1.0',
                    sGraph.chr + ':' + '-'.join(map(str, downstream)), '1.0',
                    all_paths, str(upstream_seq).upper(),
                    str(target_seq).upper(), str(downstream_seq).upper()]
    return ['Error: ' + name + ' was not found in a biconnected component']