Example #1
0
 def test_with_base(self):
     assert list(
         P.iterate(P.closed(0.4, 2), step=1,
                   base=lambda x: round(x))) == [1, 2]
     assert list(
         P.iterate(P.closed(0.6, 2), step=1,
                   base=lambda x: round(x))) == [1, 2]
Example #2
0
 def test_with_gaps(self):
     assert list(P.iterate(P.closed(0, 1) | P.closed(5, 6),
                           step=1)) == [0, 1, 5, 6]
     assert list(P.iterate(P.closed(0, 1) | P.closed(2.5, 4),
                           step=1)) == [0, 1, 2.5, 3.5]
     assert list(P.iterate(P.open(0, 1) | P.open(1, 2), step=1)) == []
     assert list(P.iterate(P.open(0.5, 1) | P.open(1, 3), step=1)) == [2]
Example #3
0
    def test_reversed_iteration_with_open_intervals(self):
        with pytest.raises(ValueError):
            list(P.iterate(P.closedopen(0, P.inf), step=-1, reverse=True))

        gen = P.iterate(P.openclosed(-P.inf, 0), step=-1, reverse=True)
        assert next(gen) == 0
        assert next(gen) == -1
        assert next(gen) == -2  # and so on
Example #4
0
    def test_open_intervals(self):
        with pytest.raises(ValueError):
            list(P.iterate(P.openclosed(-P.inf, 2), step=1))

        gen = P.iterate(P.closedopen(0, P.inf), step=1)
        assert next(gen) == 0
        assert next(gen) == 1
        assert next(gen) == 2  # and so on
Example #5
0
def get_compatible_isoforms_stitcher(mol_list, isoform_dict_json,
                                     refskip_dict_json, h):
    isoform_dict = P.IntervalDict()
    for i, s in isoform_dict_json.items():
        isoform_dict[P.from_string(i, conv=int)] = set(s.split(','))
    refskip_dict = P.IntervalDict()
    for i, s in refskip_dict_json.items():
        refskip_dict[P.from_string(i, conv=int)] = set(s.split(','))

    compatible_isoforms_trie = dict()
    new_mol_list = []
    for success, m in mol_list:
        if not success:
            if type(m) is str:
                new_mol_list.append((success, m))
            else:
                new_mol_list.append((success, m.to_string()))
            continue
        mol = pysam.AlignedRead.fromstring(m, h)
        i = interval(intervals_extract(mol.get_reference_positions()))
        refskip_cigar = [
            t[0] for t in mol.cigartuples if t[1] > 0 and t[0] in [2, 3]
        ]
        blocks = mol.get_blocks()
        j = []
        for n in range(len(blocks) - 1):
            if refskip_cigar[n] == 3:
                j.append((blocks[n][1], blocks[n + 1][0]))
        j = interval(j)
        set_list = [
            s for k, s in isoform_dict.get(i, default={'intronic'}).items()
            if len(list(P.iterate(k, step=1))) > 4
        ]
        set_refskip_list = [
            s for k, s in refskip_dict.get(j, default={'intronic'}).items()
            if len(list(P.iterate(k, step=1))) > 4
        ]
        if {'intronic'} in set_list:
            if len(set_list) > 1:
                del set_list[set_list.index({'intronic'})]
        if {'intronic'} in set_refskip_list:
            if len(set_refskip_list) > 1:
                del set_refskip_list[set_refskip_list.index({'intronic'})]
        try:
            if len(set_refskip_list) > 0:
                mol.set_tag(
                    'CT', ','.join(
                        list(
                            set.intersection(*set_list).intersection(
                                *set_refskip_list))))
            else:
                mol.set_tag('CT', ','.join(list(set.intersection(*set_list))))
            new_mol_list.append((success, mol.to_string()))
        except:
            continue
    return new_mol_list
Example #6
0
 def test_reversed_iteration(self):
     assert list(P.iterate(P.closed(0, 1), step=-1, reverse=True)) == [1, 0]
     assert list(P.iterate(P.open(0, 3), step=-1, reverse=True)) == [2, 1]
     assert list(P.iterate(P.closed(0, 1), step=-0.5,
                           reverse=True)) == [1, 0.5, 0]
     assert list(
         P.iterate(P.closed(0, 2),
                   step=-1,
                   base=lambda x: x - 1,
                   reverse=True)) == [1, 0]
     assert list(
         P.iterate(P.closed(0, 2) | P.closed(4, 5), step=-1,
                   reverse=True)) == [5, 4, 2, 1, 0]
Example #7
0
 def I_mid(X):
         
     i = l_m
     j = 2*l_m
         
     I = list(p.iterate(p.openclosed(X[i], X[j]), step=step))
         
     while  j == dt is False:
         i = 2*i
         j = 2*j
         I = I.append(list(p.iterate(p.openclosed(X[i], X[j]), step=step)))
     else:
         pass
     return I
Example #8
0
def get_compatible_isoforms_stitcher(mol_list, isoform_dict_json, h):
    isoform_dict = P.IntervalDict()
    for i, s in isoform_dict_json.items():
        isoform_dict[P.from_string(i, conv=int)] = set(s.split(','))
    compatible_isoforms_trie = dict()
    new_mol_list = []
    for success, mol in [(s, pysam.AlignedRead.fromstring(m, h))
                         for s, m in mol_list]:
        if not success:
            if type(mol) == 'str':
                new_mol_list.append((success, mol))
            else:
                new_mol_list.append((success, mol.to_string()))
            continue
        i = interval(intervals_extract(mol.get_reference_positions()))
        set_list = [
            s for k, s in isoform_dict.get(i, default={'intronic'}).items()
            if len(list(P.iterate(k, step=1))) > 4
        ]
        if {'intronic'} in set_list:
            if len(set_list) > 1:
                del set_list[set_list.index({'intronic'})]
        try:
            mol.set_tag('CT', ','.join(list(set.intersection(*set_list))))
            new_mol_list.append((success, mol.to_string()))
        except:
            continue
    return new_mol_list
Example #9
0
def continuous(P, Q, l_m, dim=1, step=0.01):
        
    m = np.size(P)
    n = np.size(Q)
        
    t_m = int(np.power((m/l_m),(1/dim)))

    dt = l_m*(t_m-1)

    def I_mid(X):
            
        i = l_m
        j = 2*l_m
            
        I = list(p.iterate(p.openclosed(X[i], X[j]), step=step))
            
        while  j == dt is False:
            i = 2*i
            j = 2*j
            I = I.append(list(p.iterate(p.openclosed(X[i], X[j]), step=step)))
        else:
            pass
        return I
        
    I_start = lambda X: list(p.iterate(p.openclosed(min(X), X[l_m]), step=step))

    I_end = lambda X: list(p.iterate(p.open(X[dt], max(X)), step=step))
        
    I = lambda X:[*I_start(X),*I_mid(X),*I_end(X)]

    P_n = I(np.sort(P, axis=None))
    Q_m = I(np.sort(Q, axis=None))

    D_kl = sum([P_n[i]*np.log(P_n[i]/Q_m[i]) for i in range(0, np.size(P_n))])

    return pd.Series({'KL Divergence': D_kl})
    def get_stereotype_ratio(self):
        '''
        Calculate stereotype ratio for the underlying ReferencedSequence.

        Stereotype ratio is the fraction of sequence elements presented as
        entries of some repetitive pattern. E.g., consider the following case:
        `seq` = [a, b (->1), c (->0), d]. Element 'b' is included into pattern
        referring to 'b' elem, and 'c' is the part of pattern referring to 'a'.
        Thus, 'a', 'b' and 'c' relate to some patterns while 'd' does not.
        So, ratio here is 3/4.
        '''
        refs = self.content['refs']
        patterned_area = P.empty()
        for i in range(len(refs)):
            for ref in refs[i]:
                patterned_area |= P.closed(*sorted([i, ref]))

        indices = list(P.iterate(patterned_area, step=1))
        return len(indices) / len(refs)
Example #11
0
def create_interval_dict_linear_time(gene, isoform_interval_dict):
    interval_set = set(isoform_interval_dict.keys())
    d = P.IntervalDict()
    union = P.empty()
    for transcript, inter in isoform_interval_dict.items():
        union = union | inter
    power_set_coords_dict = {}
    for p in P.iterate(union, step=1):
        s = list()
        for transcript, inter in isoform_interval_dict.items():
            if p in inter:
                s.append(transcript)
        s = repr(s)
        if s in power_set_coords_dict:
            power_set_coords_dict[s].append(p)
        else:
            power_set_coords_dict[s] = [p]
    for s, coords in power_set_coords_dict.items():
        d[interval(intervals_extract(coords))] = set(eval(s))
    return gene, d
Example #12
0
def make_POS_and_CIGAR(stitched_m):
    CIGAR = ''
    conflict = False
    interval_list = []
    ref_and_skip_intersect = stitched_m['ref_intervals'] & stitched_m[
        'skipped_intervals']
    nreads_conflict = 0
    if not ref_and_skip_intersect.empty:
        conflict = True
        nreads_conflict = len(list(P.iterate(ref_and_skip_intersect, step=1)))
        stitched_m['skipped_intervals'] = stitched_m[
            'skipped_intervals'] - ref_and_skip_intersect
        interval_list = [
            i for t in P.to_data(ref_and_skip_intersect) for i in t[1:-1]
        ]
    ref_tuples = [(i[1] if i[0] else i[1] + 1, i[2] if i[3] else i[2] - 1)
                  for i in P.to_data(stitched_m['ref_intervals'])]
    if stitched_m['skipped_intervals'].empty:
        skipped_tuples = []
    else:
        skipped_tuples = [(i[1] if i[0] else i[1] + 1,
                           i[2] if i[3] else i[2] - 1)
                          for i in P.to_data(stitched_m['skipped_intervals'])]
    if stitched_m['del_intervals'].empty:
        del_tuples = []
    else:
        del_tuples = [(i[1] if i[0] else i[1] + 1, i[2] if i[3] else i[2] - 1)
                      for i in P.to_data(stitched_m['del_intervals'])[1:-1]]
    POS = ref_tuples[0][0] + 1
    tuple_dict = {'M': ref_tuples, 'N': skipped_tuples, 'D': del_tuples}
    while sum(len(t) for t in tuple_dict.values()) > 0:
        pos_dict = {k: v[0][0] for k, v in tuple_dict.items() if len(v) > 0}
        c = min(pos_dict, key=pos_dict.get)
        n_bases = np.int_(tuple_dict[c[0]][0][1] - tuple_dict[c[0]][0][0]) + 1
        if n_bases == 0:
            del tuple_dict[c[0]][0]
            continue
        CIGAR += '{}{}'.format(n_bases, c[0])
        del tuple_dict[c[0]][0]
    return POS, CIGAR, conflict, nreads_conflict, interval_list
Example #13
0
    def __getitem__(self, key: int):
        if self.basecalendar is None:
            return P.closed(key, key)
        else:
            turns = divmod(key, len(self.periods))
            passed_periods_time = turns[0] * sum(
                self.periods)  #How many times have all periods been passed?
            passed_periods_time += sum([
                self.periods[x % len(self.periods)]
                for x in range(0, turns[1])
            ])

            p_key = self.reftime + passed_periods_time
            #print("p:" + str(p_key))
            n_key = self.periods[key % len(self.periods)]
            #print("n:" + str(n_key))

            c_key = P.closed(self.basecalendar[p_key].lower,
                             self.basecalendar[p_key + n_key].lower)
            c_key = list(P.iterate(c_key, step=1, base=int))
            c_key = P.closed(c_key[0], c_key[-1] - 1)

            return c_key
Example #14
0
 def test_default_parameters(self):
     assert list(P.iterate(P.closed(0, 2), step=1)) == [0, 1, 2]
     assert list(P.iterate(P.closedopen(0, 2), step=1)) == [0, 1]
     assert list(P.iterate(P.openclosed(0, 2), step=1)) == [1, 2]
     assert list(P.iterate(P.open(0, 2), step=1)) == [1]
     assert list(P.iterate(P.open(0, 2.5), step=1)) == [1, 2]
Example #15
0
 def test_with_step(self):
     assert list(P.iterate(P.closed(0, 6), step=2)) == [0, 2, 4, 6]
     assert list(P.iterate(P.closed(0, 6), step=4)) == [0, 4]
     assert list(P.iterate(P.closed(0, 6),
                           step=lambda x: x + 2)) == [0, 2, 4, 6]
Example #16
0
 def test_empty_intervals(self):
     assert list(P.iterate(P.empty(), step=1)) == []
     assert list(P.iterate(P.open(0, 1), step=1)) == []