def __init__(self, target, splice_graph, ID, cutoff, upstream=None, downstream=None): ''' As the purpose of ExonSeek is to flanking constitutive exons, it is necessary to know what needs to be "flanked", the target, and have a splice graph representation of gene structure (splice_graph). The ID variable is meant to prevent overwriting of files. ''' self.id = ID # id is to prevent overwriting files in self.save_path_info self.cutoff = cutoff self.target = target # (start, end) self.upstream, self.downstream = upstream, downstream # if set, the user specified upstream or downstream exons self.graph = splice_graph.get_graph() # convenience variable (could just use splice_graph) if self.target not in self.graph.nodes(): raise utils.PrimerSeqError('Error: The target was not found in the splice graph') self.strand = splice_graph.strand # convenience variable self.splice_graph = splice_graph biconnected_comp = filter(lambda x: target in x, algs.get_biconnected(self.graph)) self.total_components = None # these will be defined after calling methods self.psi_upstream, self.psi_target, self.psi_downstream = None, None, None # these will be defined after calling methods self.all_paths = None self.num_of_biconnected = len(biconnected_comp) if len(self.graph.predecessors(self.target)) == 0 or len(self.graph.successors(self.target)) == 0: self.component = None # no flanking exon case elif self.num_of_biconnected == 0: self.no_biconnected_case() elif self.num_of_biconnected == 1: self.component = sorted(biconnected_comp[0], key=lambda x: (x[0], x[1])) # make sure component is sorted by position self.one_biconnected_case() elif self.num_of_biconnected == 2: self.component = map(lambda x: sorted(x, key=lambda y: (y[0], y[1])), biconnected_comp) self.two_biconnected_case() else: raise ValueError('Error: There to be either 0, 1, or 2 biconnected components. Received %s' % self.num_of_biconnected)
def get_flanking_biconnected_exons(name, target, sGraph, genome): ''' Defines flanking exons as exons that cannot be skipped in the graph structure. Theese exons are 100% included and do not need estimation of inclusion level. ''' graph = sGraph.get_graph() # nx.DiGraph # search through each biconnected component for component in algs.get_biconnected(graph): component = sorted( component, key=lambda x: (x[0], x[1])) # ensure first component is first exon, etc if target in component[1:-1]: # define upstream/downstream flanking exon if sGraph.strand == '+': upstream = component[0] downstream = component[-1] else: upstream = component[-1] downstream = component[0] # get possible lengths all_paths = algs.AllPaths(sGraph, component, target, chr=sGraph.chr, strand=sGraph.strand) # all_paths.set_all_path_lengths() # should no longer need this since it is done in primer.py all_paths.set_all_path_coordinates() # get sequence of upstream/target/downstream combo genome_chr = genome[sGraph.chr] # chr object from pygr upstream_seq, target_seq, downstream_seq = genome_chr[ upstream[0]:upstream[1]], genome_chr[ target[0]:target[1]], genome_chr[ downstream[0]:downstream[1]] if sGraph.strand == '-': upstream_seq, target_seq, downstream_seq = \ -upstream_seq, -target_seq, -downstream_seq return [ sGraph.strand, name[1:], 'NA', sGraph.chr + ':' + '-'.join(map(str, upstream)), '1.0', sGraph.chr + ':' + '-'.join(map(str, downstream)), '1.0', all_paths, str(upstream_seq).upper(), str(target_seq).upper(), str(downstream_seq).upper() ] return ['Error: ' + name + ' was not found in a biconnected component']
def get_flanking_biconnected_exons(name, target, sGraph, genome): ''' Defines flanking exons as exons that cannot be skipped in the graph structure. Theese exons are 100% included and do not need estimation of inclusion level. ''' graph = sGraph.get_graph() # nx.DiGraph # search through each biconnected component for component in algs.get_biconnected(graph): component = sorted(component, key=lambda x: (x[0], x[1])) # ensure first component is first exon, etc if target in component[1:-1]: # define upstream/downstream flanking exon if sGraph.strand == '+': upstream = component[0] downstream = component[-1] else: upstream = component[-1] downstream = component[0] # get possible lengths all_paths = algs.AllPaths(sGraph, component, target, chr=sGraph.chr, strand=sGraph.strand) # all_paths.set_all_path_lengths() # should no longer need this since it is done in primer.py all_paths.set_all_path_coordinates() # get sequence of upstream/target/downstream combo genome_chr = genome[sGraph.chr] # chr object from pygr upstream_seq, target_seq, downstream_seq = genome_chr[upstream[0]:upstream[1]], genome_chr[target[0]:target[1]], genome_chr[downstream[0]:downstream[1]] if sGraph.strand == '-': upstream_seq, target_seq, downstream_seq = \ -upstream_seq, -target_seq, -downstream_seq return [sGraph.strand, name[1:], 'NA', sGraph.chr + ':' + '-'.join(map(str, upstream)), '1.0', sGraph.chr + ':' + '-'.join(map(str, downstream)), '1.0', all_paths, str(upstream_seq).upper(), str(target_seq).upper(), str(downstream_seq).upper()] return ['Error: ' + name + ' was not found in a biconnected component']