def parse_torrent(path_or_fin, *, encoding):
    # -> ordered_dict_
    # path_or_fin is a binary file
    # but string inside it need a encoding to decode
    # default: utf8
    if hasattr(path_or_fin, 'readable'):
        fin = path_or_fin
        ordered_dict_ = parse_torrent__file(fin, encoding=encoding)
    else:
        path = path_or_fin
        with open(path, 'rb') as fin:
            ordered_dict_ = parse_torrent__file(fin, encoding=encoding)

    info = ordered_dict_['info']
    total_bytes = sum(file_info['length'] for file_info in info['files'])
    piece_length = info['piece length']
    num_pieces = len(info['pieces'])
    #fake_num_pieces = sum(ceil_div(file_info['length'], piece_length) for file_info in info['files'])
    try:
        #assert fake_num_pieces == num_pieces
        assert ceil_div(total_bytes, piece_length) == num_pieces
        '''
        r_2213000.torrent
        http://libgen.io/libgen/repository_torrent/r_2213000.torrent
        #############
        total = 21526239267
        ceil_div(total, piece_length) = 5133
        num_pieces = 5133
        fake_num_pieces = 5682
        '''
    except:
        print_err('total_bytes =', total_bytes)
        print_err('ceil_div(total_bytes, piece_length) =', ceil_div(total_bytes, piece_length))
        print_err('num_pieces =', num_pieces)
        print_err('fake_num_pieces =', fake_num_pieces)
        raise
    return ordered_dict_
def uint2iter_bits(is_big_endian, u, *, length=None):
    if length is None:
        #length = u.bit_length()
        bs = uint2bytes(is_big_endian, u)
        if not bs: return null_iter
        #num_lead0s = 8 - bs[0].bit_length()
        it = bytes2iter_bits(is_big_endian, bs)
        return dropwhile(lambda b: not b, it)
    byte_length = ceil_div(length, 8)
    bs = uint2bytes(is_big_endian, u, length=byte_length)
    if not bs: return null_iter
    #num_lead0s = 8 - bs[0].bit_length()
    to_drop = byte_length * 8 - length
    it = bytes2iter_bits(is_big_endian, bs)
    return islice(it, to_drop, None)
def _group_at_most_2__via_remain_bytes(remain_bit_length, tmps, *, reverse):
    # remain_bytes -> [Tmp]{1..2} -> [[Tmp]]
    # since cmp uint directly, need "reverse"
    #
    # has same remain bits
    # assert tmps
    if len(tmps) == 1:
        return [tmps]
    a, b = tmps
    remain_bytes = ceil_div(remain_bit_length, 8)
    for t in tmps:
        bs = uint2bytes(True, t.u, length=remain_bytes)  # big-endian
        t.remain_u = int.from_bytes(bs, 'big')
    return group_at_most_2(tmps,
                           key=Tmp.get_remain_uint,
                           reverse=reverse,
                           with_key=False)
Exemple #4
0
def calc_num_blocks(array_length, block_size):
    '''calc_num_blocks array_length block_size = ceil(array_length/block_size)

def num_normal_blocks = calc_num_blocks(p, complete_normal_block_size)
def num_super_blocks = calc_num_blocks(p, complete_super_block_size)

example:
    >>> calc_num_blocks(0, 1)
    0
    >>> calc_num_blocks(0, 2)
    0
    >>> calc_num_blocks(1, 1)
    1
    >>> calc_num_blocks(1, 2)
    1
    >>> calc_num_blocks(2, 1)
    2
    >>> calc_num_blocks(2, 2)
    1
    >>> calc_num_blocks(2, 3)
    1
    >>> calc_num_blocks(3, 1)
    3
    >>> calc_num_blocks(3, 2)
    2
    >>> calc_num_blocks(3, 3)
    1
    >>> calc_num_blocks(3, 4)
    1
    >>> calc_num_blocks(3, 5)
    1
    >>> calc_num_blocks(3, 6)
    1
'''
    assert array_length >= 0
    assert block_size > 0
    return ceil_div(array_length, block_size)
    return (array_length + block_size - 1) // block_size
def uint2byte_length(u):
    L = uint2bit_length(u)
    return ceil_div(L, 8)
Exemple #6
0
    def this(alphabet_size, string):
        L = len(string)

        ######################## basic case
        may_SA = handle_if_basic_case(alphabet_size, string)
        if may_SA is not None:
            return may_SA
        if L == 0:
            return []

        ######################## useless
        ######################## optional handle if all chars are different
        # when all chars are different:
        #   the suffix-tree is flatten
        #   we can simple sort chars
        if False:
            #see above and below instead
            #   above handle the global input string
            #   below handle before recur call
            #   so, this stmt body is useless

            #if is_strict_sorted(string): return list(range(L))
            # def key
            key = string.__getitem__
            # O(L)
            sorted_string_indices = bucket_sort(alphabet_size,
                                                range(L),
                                                key=key)
            if is_strict_sorted(sorted_string_indices, key=key):
                SA = sorted_string_indices
                return SA
            del key

        ######################## non-basic case
        # BEGIN: radix_sort singleton_or_pair_ls
        '''
        # bucket_sort all snd of pairs
        #   i.e. all snd of string[3z+2:...+2]
        #   i.e. string[3z+3]
        tmp = bucket_sort(range(1,Lx, 2)[:(|-1)], key=\i->string[i//2*3+2 +1])
            = bucket_sort(range(1,Lx-1, 2), key=\i->string[i//2*3+2 +1])
            when the last i2 not followed by i0 then exclude it
            i.e. when L = i2+1 = 3z+2+1 = 3x > 0
            i.e. when Lx = 2x > 0
        sorted_indices_of_singleton_or_pair_ls =
            bucket_sort(range(0,Lx,2)+ may last i2 +tmp
                        , key=\i->string[i//2*3+1+bool(i&1)])
        '''
        # def Lx
        Lx = L - ceil_div(L, 3)  # len(singleton_or_pair_ls)
        assert 0 <= Lx < L
        assert L == 1 or Lx > 0
        # O(L/3) half of the first bucket_sort round
        tmp_half_1round = bucket_sort(alphabet_size,
                                      range(1, Lx - 1, 2),
                                      key=lambda i: string[i // 2 * 3 + 2 + 1])
        # the second bucket_sort round
        # last i2 = 2z+1 == Lx-1
        may_last_i2 = [Lx - 1] if is_even(Lx) and Lx > 0 else []
        sorted_indices_of_singleton_or_pair_ls = (
            #or: bucket_sort(alphabet_size, chain(may_last_i2, range(0,Lx,2), tmp)
            bucket_sort(alphabet_size,
                        chain(range(0, Lx, 2), may_last_i2, tmp_half_1round),
                        key=lambda i: string[i // 2 * 3 + 1 + bool(i & 1)]))

        # END: radix_sort singleton_or_pair_ls

        ############################
        # BEGIN: make_array_idx2group_idx(singleton_or_pair_ls)
        # def key
        def key(i: 'suffix_begin_of_singleton_or_pair_ls'):
            if is_odd(i):
                # i2
                i2 = i // 2 * 3 + 2
                return string[i2:i2 + 2]  # len == 1 or 2
            i1 = i // 2 * 3 + 1
            return string[i1:i1 + 1]  # len == 1

        singleton_or_pair_ls_idx2group_idx = [None] * Lx
        gs = groupby(sorted_indices_of_singleton_or_pair_ls, key=key)
        group_idx = -1
        for group_idx, (_, g) in enumerate(gs):
            for singleton_or_pair_ls_idx in g:
                assert singleton_or_pair_ls_idx2group_idx[
                    singleton_or_pair_ls_idx] is None
                singleton_or_pair_ls_idx2group_idx[singleton_or_pair_ls_idx]\
                    = group_idx
        group_idx_upper_bound = group_idx + 1
        assert all(idx is not None
                   for idx in singleton_or_pair_ls_idx2group_idx)

        assert 0 <= group_idx_upper_bound <= Lx < L
        assert 0 <= group_idx_upper_bound <= alphabet_size**2 + alphabet_size
        # may: group_idx_upper_bound > alphabet_size
        # END: make_array_idx2group_idx(singleton_or_pair_ls)

        # calc SA_1_2
        #if is_strict_sorted(sorted_indices_of_singleton_or_pair_ls, key=key):
        if group_idx_upper_bound == Lx:
            assert is_strict_sorted(sorted_indices_of_singleton_or_pair_ls,
                                    key=key)
            # all chars are different for SA_1_2
            SA_1_2 = sorted_indices_of_singleton_or_pair_ls
        else:
            # recur call
            assert not is_strict_sorted(sorted_indices_of_singleton_or_pair_ls,
                                        key=key)
            SA_1_2 = this(group_idx_upper_bound,
                          singleton_or_pair_ls_idx2group_idx)
        del key
        ########################### SA_1_2 DONE

        ########################### SA_0
        '''
        def SA_0
        SA_0 = sorted(range(len(suffices_0)), key=\i->suffices_0[i])
            = sorted(range(len(suffices_0)), key=\i->string[3*i])
            where suffices_0 = [string[i:] for i in range(L) if i==3*_]
        def SA_1
        SA_1 = sorted(range(len(suffices_1)), key=\i->suffices_1[i])
            = sorted(range(len(suffices_1)), key=\i->string[3*i+1])
            where suffices_1 = [string[i:] for i in range(L) if i==3*_+1]

        invSA_1_2 = invUIntSA<singleton_or_pair_ls> = invSA<SA_1_2>
            # calc invSA is easy


        # calc:
        SA_1 = [i_xy//2 | i_xy <- SA_1_2, i_xy&1==0]
        # calc:
        SA_0
            = radix_sort range(0,L, 3) with key=\i->(string[3*i], invSA_1_2[3*i+1])

            # can save the first bucket_sort (i.e. with key[-1])
            #   since SA_1 has known
            = [may last i0 if ...] + bucket_sort(SA_1, key=\i0->string[i0*3])
                # if last i0 not follow a i1
                #   i.e. L = 3*i0+1
                #   i.e. Lx = 2*i0
        '''
        invSA_1_2 = inverse_uint_bijection_array(SA_1_2)
        SA_1 = [i_xy // 2 for i_xy in SA_1_2 if is_even(i_xy)]
        may_last_i0 = [Lx >> 1] if divs(3, L - 1) else []
        #bug: tmp = bucket_sort(len(SA_1), SA_1, key=lambda i0: string[i0*3])
        tmpSA_0 = bucket_sort(alphabet_size,
                              SA_1,
                              key=lambda i0: string[i0 * 3])
        SA_0 = may_last_i0 + tmpSA_0 if may_last_i0 else tmpSA_0
        ########################### SA_0 DONE

        ########################### merge SA_0 and SA_1_2
        '''
        SA = merge le (map (3*) SA_0) (map i_xy_to_i_str SA_1_2)
            where
                Left = Right = id
                #le i_str_0 i_str_1_2
                le (3*i0) (3*i1+1) =
                    (string[3*i0], invSA_1_2[i_str_1_2_to_i_xy(3*i0+1)])
                    <=
                    (string[3*i1+1], invSA_1_2[i_str_1_2_to_i_xy(3*i1+2)])
                le (3*i0) (3*i2+2) =
                    (string[3*i0], string[3*i0+1]
                        , invSA_1_2[i_str_1_2_to_i_xy(3*i0+2)])
                    <=
                    (string[3*i2+2], string[3*i2+3], invSA_1_2[i_str_1_2_to_i_xy(3*i2+4)])
                i_xy_to_i_str i_xy = i_xy//2*3+1 +bool(i_xy&1)
                i_str_1_2_to_i_xy i_str = # inverse i_xy_to_i_str
                    if i_str == 3*i0+1 then 2*i0
                    elif i_str == 3*i0+2 then 2*i0+1
                    else undefined
        '''

        def i_xy_to_i_str(i_xy):
            return i_xy // 2 * 3 + 1 + bool(i_xy & 1)

        def le(i_str_0, i_str_1_2):
            # assert divs(3, i_str_0)
            # assert not divs(3, i_str_1_2)
            i0 = i_str_0 // 3
            i_xy_base = i_str_1_2 // 3 * 2
            r = i_str_1_2 % 3

            def may_invSA_1_2(i_xy):
                n = len(invSA_1_2)
                if n == i_xy:
                    return -1
                return invSA_1_2[i_xy]

            if r == 1:
                i_str_1 = i_str_1_2
                i1 = i_xy_base  # + 0
                i2_after_i1 = i1 + 1

                i1_after_i0 = 2 * i0  # + 0
                lhs = (string[i_str_0], may_invSA_1_2(i1_after_i0))
                rhs = (string[i_str_1], may_invSA_1_2(i2_after_i1))
            else:
                assert r == 2
                i_str_2 = i_str_1_2
                i2 = i_xy_base + 1
                i1_after_i2 = i2 + 1

                i2_after_i0 = 2 * i0 + 1
                lhs = (string[i_str_0:i_str_0 + 2], may_invSA_1_2(i2_after_i0))
                rhs = (string[i_str_2:i_str_2 + 2], may_invSA_1_2(i1_after_i2))
            return lhs <= rhs

        #SA = merge le (map (3*) SA_0) (map i_xy_to_i_str SA_1_2)
        idc_str_0 = map(lambda i0: 3 * i0, SA_0)
        idc_str_1_2 = map(i_xy_to_i_str, SA_1_2)
        [*SA] = merge_two_sorted_iterables(idc_str_0, idc_str_1_2, __le__=le)
        if __debug__:
            singleton_or_pair_ls = [
                string[i:i + 1 + divs(3, i - 2)] for i in range(L)
                if not divs(3, i)
            ]
            print(f'''
string
    {string}
singleton_or_pair_ls
    {singleton_or_pair_ls}
tmp_half_1round
    {tmp_half_1round}
sorted_indices_of_singleton_or_pair_ls
    {sorted_indices_of_singleton_or_pair_ls}
SA_1_2
    {SA_1_2}
SA_0
    {SA_0}
SA
    {SA}
''')
        input('...')
        return SA