def get_compatible_isoforms_stitcher(mol_list, isoform_dict_json, refskip_dict_json, h): isoform_dict = P.IntervalDict() for i, s in isoform_dict_json.items(): isoform_dict[P.from_string(i, conv=int)] = set(s.split(',')) refskip_dict = P.IntervalDict() for i, s in refskip_dict_json.items(): refskip_dict[P.from_string(i, conv=int)] = set(s.split(',')) compatible_isoforms_trie = dict() new_mol_list = [] for success, m in mol_list: if not success: if type(m) is str: new_mol_list.append((success, m)) else: new_mol_list.append((success, m.to_string())) continue mol = pysam.AlignedRead.fromstring(m, h) i = interval(intervals_extract(mol.get_reference_positions())) refskip_cigar = [ t[0] for t in mol.cigartuples if t[1] > 0 and t[0] in [2, 3] ] blocks = mol.get_blocks() j = [] for n in range(len(blocks) - 1): if refskip_cigar[n] == 3: j.append((blocks[n][1], blocks[n + 1][0])) j = interval(j) set_list = [ s for k, s in isoform_dict.get(i, default={'intronic'}).items() if len(list(P.iterate(k, step=1))) > 4 ] set_refskip_list = [ s for k, s in refskip_dict.get(j, default={'intronic'}).items() if len(list(P.iterate(k, step=1))) > 4 ] if {'intronic'} in set_list: if len(set_list) > 1: del set_list[set_list.index({'intronic'})] if {'intronic'} in set_refskip_list: if len(set_refskip_list) > 1: del set_refskip_list[set_refskip_list.index({'intronic'})] try: if len(set_refskip_list) > 0: mol.set_tag( 'CT', ','.join( list( set.intersection(*set_list).intersection( *set_refskip_list)))) else: mol.set_tag('CT', ','.join(list(set.intersection(*set_list)))) new_mol_list.append((success, mol.to_string())) except: continue return new_mol_list
def test_setdefault_with_intervals(self): d = P.IntervalDict([(P.closed(0, 2), 0)]) t = d.setdefault(P.closed(-2, -1), -1) assert t.as_dict() == {P.closed(-2, -1): -1} assert d.as_dict() == {P.closed(-2, -1): -1, P.closed(0, 2): 0} d = P.IntervalDict([(P.closed(0, 2), 0)]) t = d.setdefault(P.closed(-1, 1), 2) assert t.as_dict() == {P.closedopen(-1, 0): 2, P.closed(0, 1): 0} assert d.as_dict() == {P.closedopen(-1, 0): 2, P.closed(0, 2): 0}
def test_setdefault_with_intervals(self): d = P.IntervalDict([(P.closed(0, 2), 0)]) t = d.setdefault(P.closed(-2, -1), -1) assert t.items() == [(P.closed(-2, -1), -1)] assert d.items() == [(P.closed(-2, -1), -1), (P.closed(0, 2), 0)] d = P.IntervalDict([(P.closed(0, 2), 0)]) t = d.setdefault(P.closed(-1, 1), 2) assert t.items() == [(P.closedopen(-1, 0), 2), (P.closed(0, 1), 0)] assert d.items() == [(P.closedopen(-1, 0), 2), (P.closed(0, 2), 0)]
def test_update_with_intervaldict(self): d = P.IntervalDict() d2 = P.IntervalDict() d[1] = 'c' d2[1] = 'a' d2[2] = 'b' d.update(d2) assert d[1] == 'a' assert d[2] == 'b' assert len(d) == 2
def test_copy_and_update(self): d = P.IntervalDict({P.closed(0, 2): 0, P.closed(4, 5): 1}) assert d == P.IntervalDict([(P.closed(0, 2), 0), (P.closed(4, 5), 1)]) a, b = d.copy(), d.copy() a.update({P.closed(-1, 1): 2}) b.update([[P.closed(-1, 1), 2]]) assert a != d assert a == b assert a != 1 assert a.as_dict() == {P.closed(-1, 1): 2, P.openclosed(1, 2): 0, P.closed(4, 5): 1} assert P.IntervalDict([(0, 0), (1, 1)]) == P.IntervalDict([(1, 1), (0, 0)])
def test_containment(self): d = P.IntervalDict([(P.closed(0, 3), 0)]) assert 0 in d assert -1 not in d assert P.closed(-2, -1) not in d assert P.closed(1, 2) in d assert P.closed(1, 4) not in d
def get_compatible_isoforms_stitcher(mol_list, isoform_dict_json, h): isoform_dict = P.IntervalDict() for i, s in isoform_dict_json.items(): isoform_dict[P.from_string(i, conv=int)] = set(s.split(',')) compatible_isoforms_trie = dict() new_mol_list = [] for success, mol in [(s, pysam.AlignedRead.fromstring(m, h)) for s, m in mol_list]: if not success: if type(mol) == 'str': new_mol_list.append((success, mol)) else: new_mol_list.append((success, mol.to_string())) continue i = interval(intervals_extract(mol.get_reference_positions())) set_list = [ s for k, s in isoform_dict.get(i, default={'intronic'}).items() if len(list(P.iterate(k, step=1))) > 4 ] if {'intronic'} in set_list: if len(set_list) > 1: del set_list[set_list.index({'intronic'})] try: mol.set_tag('CT', ','.join(list(set.intersection(*set_list)))) new_mol_list.append((success, mol.to_string())) except: continue return new_mol_list
def test_pop_missing_value(self): d = P.IntervalDict([(P.closed(0, 3), 0)]) with pytest.raises(KeyError): d.pop(4) t = d.pop(4, 1) assert t == 1
def set_rate_amount(apps, price, amount, start_date=None, end_date=None): """set_rate_amount method from current Price model.""" RatePerDate = apps.get_model("core", "RatePerDate") new_period = create_interval(start_date, end_date) existing_periods = price.rates_per_date.all() d = P.IntervalDict() for period in existing_periods: interval = create_interval(period.start_date, period.end_date) d[interval] = period.rate # We generate all periods from scratch to avoid complicated # merging logic. existing_periods.delete() d[new_period] = amount for period in d.keys(): for interval in list(period): # In case of composite intervals start = ( interval.lower if isinstance(interval.lower, date) else None ) end = interval.upper if isinstance(interval.upper, date) else None period_rate_dict = d[ P.closedopen(interval.lower, interval.upper) or date.today() ] rate = period_rate_dict.values()[0] rpd = RatePerDate( start_date=start, end_date=end, rate=rate, main_rate=price ) rpd.save()
def test_with_intervals(self): d = P.IntervalDict([(P.closed(0, 2), 0)]) assert d[P.open(-P.inf, P.inf)].as_dict() == {P.closed(0, 2): 0} assert d[P.closed(0, 2)].as_dict() == {P.closed(0, 2): 0} assert d[P.closed(-1, 0)].as_dict() == {P.singleton(0): 0} assert d[P.closed(-2, -1)].as_dict() == {} assert d.get(P.closed(0, 2)).as_dict() == {P.closed(0, 2): 0} assert d.get(P.closed(-2, -1)).as_dict() == {P.closed(-2, -1): None} assert d.get(P.closed(-1, 0)).as_dict() == { P.closedopen(-1, 0): None, P.singleton(0): 0 } d[P.closed(1, 3)] = 1 assert d.as_dict() == {P.closedopen(0, 1): 0, P.closed(1, 3): 1} assert len(d) == 2 assert d[0] == 0 assert d.get(0, -1) == 0 assert d[1] == 1 assert d.get(1, -1) == 1 assert d[3] == 1 assert d.get(3, -1) == 1 with pytest.raises(KeyError): d[4] assert d.get(4, -1) == -1
def test_update_with_mapping(self): d = P.IntervalDict() d2 = {1: 'a', 2: 'b'} d.update(d2) assert d[1] == 'a' assert d[2] == 'b' assert len(d) == 2
def test_update_with_iterable(self): d = P.IntervalDict() d2 = {1: 'a', 2: 'b'} d.update(d2.items()) assert d[1] == 'a' assert d[2] == 'b' assert len(d) == 2
def test_delete_missing_value(self): d = P.IntervalDict([(P.closed(0, 2), 0)]) with pytest.raises(KeyError): del d[3] del d[1] with pytest.raises(KeyError): d[1]
def test_set(self): # Set values d = P.IntervalDict([(P.closed(0, 2), 0)]) d[3] = 2 assert d.as_dict() == {P.closed(0, 2): 0, P.singleton(3): 2} d[3] = 3 assert d.as_dict() == {P.closed(0, 2): 0, P.singleton(3): 3} d[P.closed(0, 2)] = 1 assert d.as_dict() == {P.closed(0, 2): 1, P.singleton(3): 3} d[P.closed(-1, 1)] = 2 assert d.as_dict() == {P.closed(-1, 1): 2, P.openclosed(1, 2): 1, P.singleton(3): 3} d = P.IntervalDict([(P.closed(0, 2), 0)]) d[P.closed(-1, 4)] = 1 assert d.as_dict() == {P.closed(-1, 4): 1} d[P.closed(5, 6)] = 1 assert d.as_dict() == {P.closed(-1, 4) | P.closed(5, 6): 1}
def test_pop_interval(self): d = P.IntervalDict([(P.closed(0, 3), 0)]) t = d.pop(P.closed(0, 1)) assert t.as_dict() == {P.closed(0, 1): 0} assert d.as_dict() == {P.openclosed(1, 3): 0} t = d.pop(P.closed(0, 2), 1) assert t.as_dict() == {P.closed(0, 1): 1, P.openclosed(1, 2): 0} assert d.as_dict() == {P.openclosed(2, 3): 0}
def test_pop_interval(self): d = P.IntervalDict([(P.closed(0, 3), 0)]) t = d.pop(P.closed(0, 1)) assert t.items() == [(P.closed(0, 1), 0)] assert d.items() == [(P.openclosed(1, 3), 0)] t = d.pop(P.closed(0, 2), 1) assert t.items() == [(P.closed(0, 1), 1), (P.openclosed(1, 2), 0)] assert d.items() == [(P.openclosed(2, 3), 0)]
def test_set(self): # Set values d = P.IntervalDict([(P.closed(0, 2), 0)]) d[3] = 2 assert d.items() == [(P.closed(0, 2), 0), (P.singleton(3), 2)] d[3] = 3 assert d.items() == [(P.closed(0, 2), 0), (P.singleton(3), 3)] d[P.closed(0, 2)] = 1 assert d.items() == [(P.closed(0, 2), 1), (P.singleton(3), 3)] d[P.closed(-1, 1)] = 2 assert d.items() == [(P.closed(-1, 1), 2), (P.openclosed(1, 2), 1), (P.singleton(3), 3)] d = P.IntervalDict([(P.closed(0, 2), 0)]) d[P.closed(-1, 4)] = 1 assert d.items() == [(P.closed(-1, 4), 1)] d[P.closed(5, 6)] = 1 assert d.items() == [(P.closed(-1, 4) | P.closed(5, 6), 1)]
def test_with_single_values(self): d = P.IntervalDict() # Single value d[P.closed(0, 2)] = 0 assert len(d) == 1 assert d[2] == 0 assert d.get(2) == 0 with pytest.raises(KeyError): d[3] assert d.get(3) is None
def test_iterators(self): d = P.IntervalDict([(P.closedopen(0, 1), 0), (P.closedopen(1, 3), 1), (P.singleton(3), 2)]) assert set(d.keys()) == {P.closedopen(0, 1), P.closedopen(1, 3), P.singleton(3)} assert d.domain() == P.closed(0, 3) assert set(d.values()) == {0, 1, 2} assert set(d.items()) == { (P.closedopen(0, 1), 0), (P.closedopen(1, 3), 1), (P.singleton(3), 2), } assert set(d) == set(d.keys())
def test_combine_nonempty(self): add = lambda x, y: x + y d1 = P.IntervalDict([(P.closed(1, 3) | P.closed(5, 7), 1)]) d2 = P.IntervalDict([(P.closed(2, 4) | P.closed(6, 8), 2)]) assert d1.combine(d2, add) == d2.combine(d1, add) assert d1.combine(d2, add) == P.IntervalDict([ (P.closedopen(1, 2) | P.closedopen(5, 6), 1), (P.closed(2, 3) | P.closed(6, 7), 3), (P.openclosed(3, 4) | P.openclosed(7, 8), 2), ]) d1 = P.IntervalDict({ P.closed(0, 1): 2, P.closed(3, 4): 2 }) d2 = P.IntervalDict({ P.closed(1, 3): 3, P.closed(4, 5): 1 }) assert d1.combine(d2, add) == d2.combine(d1, add) assert d1.combine(d2, add) == P.IntervalDict({ P.closedopen(0, 1): 2, P.singleton(1): 5, P.open(1, 3): 3, P.singleton(3): 5, P.open(3, 4): 2, P.singleton(4): 3, P.openclosed(4, 5): 1, })
def test_combine_empty(self): add = lambda x, y: x + y assert P.IntervalDict().combine(P.IntervalDict(), add) == P.IntervalDict() d = P.IntervalDict([(P.closed(0, 3), 0)]) assert P.IntervalDict().combine(d, add) == d assert d.combine(P.IntervalDict(), add) == d
def test_combine_empty(self): def add(x, y): return x + y assert P.IntervalDict().combine(P.IntervalDict(), add) == P.IntervalDict() d = P.IntervalDict([(P.closed(0, 3), 0)]) assert P.IntervalDict().combine(d, add) == d assert d.combine(P.IntervalDict(), add) == d
def test_views(self): d = P.IntervalDict({P.closed(0, 2): 3, P.closed(3, 4): 2}) k, v, i = d.keys(), d.values(), d.items() assert len(k) == len(v) == len(i) == len(d) assert list(k) == [P.closed(0, 2), P.closed(3,4)] assert list(v) == [3, 2] assert list(i) == [(P.closed(0, 2), 3), (P.closed(3, 4), 2)] d[5] = 4 assert list(k) == list(d.keys()) assert list(v) == list(d.values()) assert list(i) == list(d.items())
def test_iterators(self): d = P.IntervalDict([(P.closedopen(0, 1), 0), (P.closedopen(1, 3), 1), (P.singleton(3), 2)]) assert d.keys() == [ P.closedopen(0, 1), P.closedopen(1, 3), P.singleton(3) ] assert d.domain() == P.closed(0, 3) assert d.values() == [0, 1, 2] assert d.items() == list(zip(d.keys(), d.values())) assert list(d) == d.keys()
def test_as_dict(self): content = { P.closed(1, 2) | P.closed(4, 5): 1, P.open(7, 8) | P.closed(10, 12): 2, } d = P.IntervalDict(content) assert d.as_dict() == content assert d.as_dict(atomic=False) == content assert d.as_dict(atomic=True) == { P.closed(1, 2): 1, P.closed(4, 5): 1, P.open(7, 8): 2, P.closed(10, 12): 2, }
def tiles(seqs): tiles = {i: P.IntervalDict() for i in range(len(seqs))} union = lambda A, B: A.union(B) # get the atomic partitions for i, seq in enumerate(seqs): print(f"sequence {i}") for j, (I, anc) in enumerate(seq.items()): tiles[anc.a] = tiles[anc.a].combine( P.IntervalDict({anc.i: set([(i, j)])}), union) empty = [] for k, v in tiles.items(): if len(v) == 0: empty.append(k) for k in empty: tiles.pop(k) shift = lambda iv, old, new: P.closedopen(iv.lower - old.lower + new.lower, iv.upper - old.upper + new.upper) tiling = defaultdict(set) for i, seq in enumerate(seqs): for (I, anc) in seq.items(): for ai in tiles[anc.a][anc.i].keys(): for atom in ai: tiling[(anc.a, atom)].add((i, I & shift(atom, anc.i, I))) for key, tile in tiling.items(): for member in tile: if area(key[1]) != area(member[1]): print("ERROR") import ipdb ipdb.set_trace() return tiling
def create_interval_dict_linear_time(gene, isoform_interval_dict): interval_set = set(isoform_interval_dict.keys()) d = P.IntervalDict() union = P.empty() for transcript, inter in isoform_interval_dict.items(): union = union | inter power_set_coords_dict = {} for p in P.iterate(union, step=1): s = list() for transcript, inter in isoform_interval_dict.items(): if p in inter: s.append(transcript) s = repr(s) if s in power_set_coords_dict: power_set_coords_dict[s].append(p) else: power_set_coords_dict[s] = [p] for s, coords in power_set_coords_dict.items(): d[interval(intervals_extract(coords))] = set(eval(s)) return gene, d
def test_or_ior(self): # https://github.com/AlexandreDecan/portion/issues/37 d1 = P.IntervalDict({P.closed(0, 1): 1, P.closed(3, 4): 2}) d2 = P.IntervalDict({P.closed(0.5, 2): 3}) assert d1 | d2 == P.IntervalDict({ P.closedopen(0, 0.5): 1, P.closed(0.5, 2): 3, P.closed(3, 4): 2 }) assert d1 == P.IntervalDict({P.closed(0, 1): 1, P.closed(3, 4): 2}) assert d2 == P.IntervalDict({P.closed(0.5, 2): 3}) d1 |= d2 assert d1 == P.IntervalDict({ P.closedopen(0, 0.5): 1, P.closed(0.5, 2): 3, P.closed(3, 4): 2 }) assert d2 == P.IntervalDict({P.closed(0.5, 2): 3})
def test_with_intervals(self): d = P.IntervalDict([(P.closed(0, 2), 0)]) assert d[P.open(-P.inf, P.inf)].items() == [(P.closed(0, 2), 0)] assert d[P.closed(0, 2)].items() == [(P.closed(0, 2), 0)] assert d[P.closed(-1, 0)].items() == [(P.singleton(0), 0)] assert d[P.closed(-2, -1)].items() == [] assert d.get(P.closed(0, 2)).items() == [(P.closed(0, 2), 0)] assert d.get(P.closed(-2, -1)).items() == [(P.closed(-2, -1), None)] assert d.get(P.closed(-1, 0)).items() == [(P.closedopen(-1, 0), None), (P.singleton(0), 0)] d[P.closed(1, 3)] = 1 assert d.items() == [(P.closedopen(0, 1), 0), (P.closed(1, 3), 1)] assert len(d) == 2 assert d[0] == 0 assert d.get(0, -1) == 0 assert d[1] == 1 assert d.get(1, -1) == 1 assert d[3] == 1 assert d.get(3, -1) == 1 with pytest.raises(KeyError): d[4] assert d.get(4, -1) == -1
def test_parameters(self): d = {((P.OPEN, 'lowest', '4', P.CLOSED), (P.CLOSED, '6', 'highest', P.OPEN)): 'abcd'} assert P.dict_from_data(d, conv=int, pinf='highest', ninf='lowest') == P.IntervalDict({P.openclosed(-P.inf, 4) | P.closedopen(6, P.inf):'abcd'})