def test_with_base(self): assert list( P.iterate(P.closed(0.4, 2), step=1, base=lambda x: round(x))) == [1, 2] assert list( P.iterate(P.closed(0.6, 2), step=1, base=lambda x: round(x))) == [1, 2]
def test_with_gaps(self): assert list(P.iterate(P.closed(0, 1) | P.closed(5, 6), step=1)) == [0, 1, 5, 6] assert list(P.iterate(P.closed(0, 1) | P.closed(2.5, 4), step=1)) == [0, 1, 2.5, 3.5] assert list(P.iterate(P.open(0, 1) | P.open(1, 2), step=1)) == [] assert list(P.iterate(P.open(0.5, 1) | P.open(1, 3), step=1)) == [2]
def test_reversed_iteration_with_open_intervals(self): with pytest.raises(ValueError): list(P.iterate(P.closedopen(0, P.inf), step=-1, reverse=True)) gen = P.iterate(P.openclosed(-P.inf, 0), step=-1, reverse=True) assert next(gen) == 0 assert next(gen) == -1 assert next(gen) == -2 # and so on
def test_open_intervals(self): with pytest.raises(ValueError): list(P.iterate(P.openclosed(-P.inf, 2), step=1)) gen = P.iterate(P.closedopen(0, P.inf), step=1) assert next(gen) == 0 assert next(gen) == 1 assert next(gen) == 2 # and so on
def get_compatible_isoforms_stitcher(mol_list, isoform_dict_json, refskip_dict_json, h): isoform_dict = P.IntervalDict() for i, s in isoform_dict_json.items(): isoform_dict[P.from_string(i, conv=int)] = set(s.split(',')) refskip_dict = P.IntervalDict() for i, s in refskip_dict_json.items(): refskip_dict[P.from_string(i, conv=int)] = set(s.split(',')) compatible_isoforms_trie = dict() new_mol_list = [] for success, m in mol_list: if not success: if type(m) is str: new_mol_list.append((success, m)) else: new_mol_list.append((success, m.to_string())) continue mol = pysam.AlignedRead.fromstring(m, h) i = interval(intervals_extract(mol.get_reference_positions())) refskip_cigar = [ t[0] for t in mol.cigartuples if t[1] > 0 and t[0] in [2, 3] ] blocks = mol.get_blocks() j = [] for n in range(len(blocks) - 1): if refskip_cigar[n] == 3: j.append((blocks[n][1], blocks[n + 1][0])) j = interval(j) set_list = [ s for k, s in isoform_dict.get(i, default={'intronic'}).items() if len(list(P.iterate(k, step=1))) > 4 ] set_refskip_list = [ s for k, s in refskip_dict.get(j, default={'intronic'}).items() if len(list(P.iterate(k, step=1))) > 4 ] if {'intronic'} in set_list: if len(set_list) > 1: del set_list[set_list.index({'intronic'})] if {'intronic'} in set_refskip_list: if len(set_refskip_list) > 1: del set_refskip_list[set_refskip_list.index({'intronic'})] try: if len(set_refskip_list) > 0: mol.set_tag( 'CT', ','.join( list( set.intersection(*set_list).intersection( *set_refskip_list)))) else: mol.set_tag('CT', ','.join(list(set.intersection(*set_list)))) new_mol_list.append((success, mol.to_string())) except: continue return new_mol_list
def test_reversed_iteration(self): assert list(P.iterate(P.closed(0, 1), step=-1, reverse=True)) == [1, 0] assert list(P.iterate(P.open(0, 3), step=-1, reverse=True)) == [2, 1] assert list(P.iterate(P.closed(0, 1), step=-0.5, reverse=True)) == [1, 0.5, 0] assert list( P.iterate(P.closed(0, 2), step=-1, base=lambda x: x - 1, reverse=True)) == [1, 0] assert list( P.iterate(P.closed(0, 2) | P.closed(4, 5), step=-1, reverse=True)) == [5, 4, 2, 1, 0]
def I_mid(X): i = l_m j = 2*l_m I = list(p.iterate(p.openclosed(X[i], X[j]), step=step)) while j == dt is False: i = 2*i j = 2*j I = I.append(list(p.iterate(p.openclosed(X[i], X[j]), step=step))) else: pass return I
def get_compatible_isoforms_stitcher(mol_list, isoform_dict_json, h): isoform_dict = P.IntervalDict() for i, s in isoform_dict_json.items(): isoform_dict[P.from_string(i, conv=int)] = set(s.split(',')) compatible_isoforms_trie = dict() new_mol_list = [] for success, mol in [(s, pysam.AlignedRead.fromstring(m, h)) for s, m in mol_list]: if not success: if type(mol) == 'str': new_mol_list.append((success, mol)) else: new_mol_list.append((success, mol.to_string())) continue i = interval(intervals_extract(mol.get_reference_positions())) set_list = [ s for k, s in isoform_dict.get(i, default={'intronic'}).items() if len(list(P.iterate(k, step=1))) > 4 ] if {'intronic'} in set_list: if len(set_list) > 1: del set_list[set_list.index({'intronic'})] try: mol.set_tag('CT', ','.join(list(set.intersection(*set_list)))) new_mol_list.append((success, mol.to_string())) except: continue return new_mol_list
def continuous(P, Q, l_m, dim=1, step=0.01): m = np.size(P) n = np.size(Q) t_m = int(np.power((m/l_m),(1/dim))) dt = l_m*(t_m-1) def I_mid(X): i = l_m j = 2*l_m I = list(p.iterate(p.openclosed(X[i], X[j]), step=step)) while j == dt is False: i = 2*i j = 2*j I = I.append(list(p.iterate(p.openclosed(X[i], X[j]), step=step))) else: pass return I I_start = lambda X: list(p.iterate(p.openclosed(min(X), X[l_m]), step=step)) I_end = lambda X: list(p.iterate(p.open(X[dt], max(X)), step=step)) I = lambda X:[*I_start(X),*I_mid(X),*I_end(X)] P_n = I(np.sort(P, axis=None)) Q_m = I(np.sort(Q, axis=None)) D_kl = sum([P_n[i]*np.log(P_n[i]/Q_m[i]) for i in range(0, np.size(P_n))]) return pd.Series({'KL Divergence': D_kl})
def get_stereotype_ratio(self): ''' Calculate stereotype ratio for the underlying ReferencedSequence. Stereotype ratio is the fraction of sequence elements presented as entries of some repetitive pattern. E.g., consider the following case: `seq` = [a, b (->1), c (->0), d]. Element 'b' is included into pattern referring to 'b' elem, and 'c' is the part of pattern referring to 'a'. Thus, 'a', 'b' and 'c' relate to some patterns while 'd' does not. So, ratio here is 3/4. ''' refs = self.content['refs'] patterned_area = P.empty() for i in range(len(refs)): for ref in refs[i]: patterned_area |= P.closed(*sorted([i, ref])) indices = list(P.iterate(patterned_area, step=1)) return len(indices) / len(refs)
def create_interval_dict_linear_time(gene, isoform_interval_dict): interval_set = set(isoform_interval_dict.keys()) d = P.IntervalDict() union = P.empty() for transcript, inter in isoform_interval_dict.items(): union = union | inter power_set_coords_dict = {} for p in P.iterate(union, step=1): s = list() for transcript, inter in isoform_interval_dict.items(): if p in inter: s.append(transcript) s = repr(s) if s in power_set_coords_dict: power_set_coords_dict[s].append(p) else: power_set_coords_dict[s] = [p] for s, coords in power_set_coords_dict.items(): d[interval(intervals_extract(coords))] = set(eval(s)) return gene, d
def make_POS_and_CIGAR(stitched_m): CIGAR = '' conflict = False interval_list = [] ref_and_skip_intersect = stitched_m['ref_intervals'] & stitched_m[ 'skipped_intervals'] nreads_conflict = 0 if not ref_and_skip_intersect.empty: conflict = True nreads_conflict = len(list(P.iterate(ref_and_skip_intersect, step=1))) stitched_m['skipped_intervals'] = stitched_m[ 'skipped_intervals'] - ref_and_skip_intersect interval_list = [ i for t in P.to_data(ref_and_skip_intersect) for i in t[1:-1] ] ref_tuples = [(i[1] if i[0] else i[1] + 1, i[2] if i[3] else i[2] - 1) for i in P.to_data(stitched_m['ref_intervals'])] if stitched_m['skipped_intervals'].empty: skipped_tuples = [] else: skipped_tuples = [(i[1] if i[0] else i[1] + 1, i[2] if i[3] else i[2] - 1) for i in P.to_data(stitched_m['skipped_intervals'])] if stitched_m['del_intervals'].empty: del_tuples = [] else: del_tuples = [(i[1] if i[0] else i[1] + 1, i[2] if i[3] else i[2] - 1) for i in P.to_data(stitched_m['del_intervals'])[1:-1]] POS = ref_tuples[0][0] + 1 tuple_dict = {'M': ref_tuples, 'N': skipped_tuples, 'D': del_tuples} while sum(len(t) for t in tuple_dict.values()) > 0: pos_dict = {k: v[0][0] for k, v in tuple_dict.items() if len(v) > 0} c = min(pos_dict, key=pos_dict.get) n_bases = np.int_(tuple_dict[c[0]][0][1] - tuple_dict[c[0]][0][0]) + 1 if n_bases == 0: del tuple_dict[c[0]][0] continue CIGAR += '{}{}'.format(n_bases, c[0]) del tuple_dict[c[0]][0] return POS, CIGAR, conflict, nreads_conflict, interval_list
def __getitem__(self, key: int): if self.basecalendar is None: return P.closed(key, key) else: turns = divmod(key, len(self.periods)) passed_periods_time = turns[0] * sum( self.periods) #How many times have all periods been passed? passed_periods_time += sum([ self.periods[x % len(self.periods)] for x in range(0, turns[1]) ]) p_key = self.reftime + passed_periods_time #print("p:" + str(p_key)) n_key = self.periods[key % len(self.periods)] #print("n:" + str(n_key)) c_key = P.closed(self.basecalendar[p_key].lower, self.basecalendar[p_key + n_key].lower) c_key = list(P.iterate(c_key, step=1, base=int)) c_key = P.closed(c_key[0], c_key[-1] - 1) return c_key
def test_default_parameters(self): assert list(P.iterate(P.closed(0, 2), step=1)) == [0, 1, 2] assert list(P.iterate(P.closedopen(0, 2), step=1)) == [0, 1] assert list(P.iterate(P.openclosed(0, 2), step=1)) == [1, 2] assert list(P.iterate(P.open(0, 2), step=1)) == [1] assert list(P.iterate(P.open(0, 2.5), step=1)) == [1, 2]
def test_with_step(self): assert list(P.iterate(P.closed(0, 6), step=2)) == [0, 2, 4, 6] assert list(P.iterate(P.closed(0, 6), step=4)) == [0, 4] assert list(P.iterate(P.closed(0, 6), step=lambda x: x + 2)) == [0, 2, 4, 6]
def test_empty_intervals(self): assert list(P.iterate(P.empty(), step=1)) == [] assert list(P.iterate(P.open(0, 1), step=1)) == []