def parse_torrent(path_or_fin, *, encoding): # -> ordered_dict_ # path_or_fin is a binary file # but string inside it need a encoding to decode # default: utf8 if hasattr(path_or_fin, 'readable'): fin = path_or_fin ordered_dict_ = parse_torrent__file(fin, encoding=encoding) else: path = path_or_fin with open(path, 'rb') as fin: ordered_dict_ = parse_torrent__file(fin, encoding=encoding) info = ordered_dict_['info'] total_bytes = sum(file_info['length'] for file_info in info['files']) piece_length = info['piece length'] num_pieces = len(info['pieces']) #fake_num_pieces = sum(ceil_div(file_info['length'], piece_length) for file_info in info['files']) try: #assert fake_num_pieces == num_pieces assert ceil_div(total_bytes, piece_length) == num_pieces ''' r_2213000.torrent http://libgen.io/libgen/repository_torrent/r_2213000.torrent ############# total = 21526239267 ceil_div(total, piece_length) = 5133 num_pieces = 5133 fake_num_pieces = 5682 ''' except: print_err('total_bytes =', total_bytes) print_err('ceil_div(total_bytes, piece_length) =', ceil_div(total_bytes, piece_length)) print_err('num_pieces =', num_pieces) print_err('fake_num_pieces =', fake_num_pieces) raise return ordered_dict_
def uint2iter_bits(is_big_endian, u, *, length=None): if length is None: #length = u.bit_length() bs = uint2bytes(is_big_endian, u) if not bs: return null_iter #num_lead0s = 8 - bs[0].bit_length() it = bytes2iter_bits(is_big_endian, bs) return dropwhile(lambda b: not b, it) byte_length = ceil_div(length, 8) bs = uint2bytes(is_big_endian, u, length=byte_length) if not bs: return null_iter #num_lead0s = 8 - bs[0].bit_length() to_drop = byte_length * 8 - length it = bytes2iter_bits(is_big_endian, bs) return islice(it, to_drop, None)
def _group_at_most_2__via_remain_bytes(remain_bit_length, tmps, *, reverse): # remain_bytes -> [Tmp]{1..2} -> [[Tmp]] # since cmp uint directly, need "reverse" # # has same remain bits # assert tmps if len(tmps) == 1: return [tmps] a, b = tmps remain_bytes = ceil_div(remain_bit_length, 8) for t in tmps: bs = uint2bytes(True, t.u, length=remain_bytes) # big-endian t.remain_u = int.from_bytes(bs, 'big') return group_at_most_2(tmps, key=Tmp.get_remain_uint, reverse=reverse, with_key=False)
def calc_num_blocks(array_length, block_size): '''calc_num_blocks array_length block_size = ceil(array_length/block_size) def num_normal_blocks = calc_num_blocks(p, complete_normal_block_size) def num_super_blocks = calc_num_blocks(p, complete_super_block_size) example: >>> calc_num_blocks(0, 1) 0 >>> calc_num_blocks(0, 2) 0 >>> calc_num_blocks(1, 1) 1 >>> calc_num_blocks(1, 2) 1 >>> calc_num_blocks(2, 1) 2 >>> calc_num_blocks(2, 2) 1 >>> calc_num_blocks(2, 3) 1 >>> calc_num_blocks(3, 1) 3 >>> calc_num_blocks(3, 2) 2 >>> calc_num_blocks(3, 3) 1 >>> calc_num_blocks(3, 4) 1 >>> calc_num_blocks(3, 5) 1 >>> calc_num_blocks(3, 6) 1 ''' assert array_length >= 0 assert block_size > 0 return ceil_div(array_length, block_size) return (array_length + block_size - 1) // block_size
def uint2byte_length(u): L = uint2bit_length(u) return ceil_div(L, 8)
def this(alphabet_size, string): L = len(string) ######################## basic case may_SA = handle_if_basic_case(alphabet_size, string) if may_SA is not None: return may_SA if L == 0: return [] ######################## useless ######################## optional handle if all chars are different # when all chars are different: # the suffix-tree is flatten # we can simple sort chars if False: #see above and below instead # above handle the global input string # below handle before recur call # so, this stmt body is useless #if is_strict_sorted(string): return list(range(L)) # def key key = string.__getitem__ # O(L) sorted_string_indices = bucket_sort(alphabet_size, range(L), key=key) if is_strict_sorted(sorted_string_indices, key=key): SA = sorted_string_indices return SA del key ######################## non-basic case # BEGIN: radix_sort singleton_or_pair_ls ''' # bucket_sort all snd of pairs # i.e. all snd of string[3z+2:...+2] # i.e. string[3z+3] tmp = bucket_sort(range(1,Lx, 2)[:(|-1)], key=\i->string[i//2*3+2 +1]) = bucket_sort(range(1,Lx-1, 2), key=\i->string[i//2*3+2 +1]) when the last i2 not followed by i0 then exclude it i.e. when L = i2+1 = 3z+2+1 = 3x > 0 i.e. when Lx = 2x > 0 sorted_indices_of_singleton_or_pair_ls = bucket_sort(range(0,Lx,2)+ may last i2 +tmp , key=\i->string[i//2*3+1+bool(i&1)]) ''' # def Lx Lx = L - ceil_div(L, 3) # len(singleton_or_pair_ls) assert 0 <= Lx < L assert L == 1 or Lx > 0 # O(L/3) half of the first bucket_sort round tmp_half_1round = bucket_sort(alphabet_size, range(1, Lx - 1, 2), key=lambda i: string[i // 2 * 3 + 2 + 1]) # the second bucket_sort round # last i2 = 2z+1 == Lx-1 may_last_i2 = [Lx - 1] if is_even(Lx) and Lx > 0 else [] sorted_indices_of_singleton_or_pair_ls = ( #or: bucket_sort(alphabet_size, chain(may_last_i2, range(0,Lx,2), tmp) bucket_sort(alphabet_size, chain(range(0, Lx, 2), may_last_i2, tmp_half_1round), key=lambda i: string[i // 2 * 3 + 1 + bool(i & 1)])) # END: radix_sort singleton_or_pair_ls ############################ # BEGIN: make_array_idx2group_idx(singleton_or_pair_ls) # def key def key(i: 'suffix_begin_of_singleton_or_pair_ls'): if is_odd(i): # i2 i2 = i // 2 * 3 + 2 return string[i2:i2 + 2] # len == 1 or 2 i1 = i // 2 * 3 + 1 return string[i1:i1 + 1] # len == 1 singleton_or_pair_ls_idx2group_idx = [None] * Lx gs = groupby(sorted_indices_of_singleton_or_pair_ls, key=key) group_idx = -1 for group_idx, (_, g) in enumerate(gs): for singleton_or_pair_ls_idx in g: assert singleton_or_pair_ls_idx2group_idx[ singleton_or_pair_ls_idx] is None singleton_or_pair_ls_idx2group_idx[singleton_or_pair_ls_idx]\ = group_idx group_idx_upper_bound = group_idx + 1 assert all(idx is not None for idx in singleton_or_pair_ls_idx2group_idx) assert 0 <= group_idx_upper_bound <= Lx < L assert 0 <= group_idx_upper_bound <= alphabet_size**2 + alphabet_size # may: group_idx_upper_bound > alphabet_size # END: make_array_idx2group_idx(singleton_or_pair_ls) # calc SA_1_2 #if is_strict_sorted(sorted_indices_of_singleton_or_pair_ls, key=key): if group_idx_upper_bound == Lx: assert is_strict_sorted(sorted_indices_of_singleton_or_pair_ls, key=key) # all chars are different for SA_1_2 SA_1_2 = sorted_indices_of_singleton_or_pair_ls else: # recur call assert not is_strict_sorted(sorted_indices_of_singleton_or_pair_ls, key=key) SA_1_2 = this(group_idx_upper_bound, singleton_or_pair_ls_idx2group_idx) del key ########################### SA_1_2 DONE ########################### SA_0 ''' def SA_0 SA_0 = sorted(range(len(suffices_0)), key=\i->suffices_0[i]) = sorted(range(len(suffices_0)), key=\i->string[3*i]) where suffices_0 = [string[i:] for i in range(L) if i==3*_] def SA_1 SA_1 = sorted(range(len(suffices_1)), key=\i->suffices_1[i]) = sorted(range(len(suffices_1)), key=\i->string[3*i+1]) where suffices_1 = [string[i:] for i in range(L) if i==3*_+1] invSA_1_2 = invUIntSA<singleton_or_pair_ls> = invSA<SA_1_2> # calc invSA is easy # calc: SA_1 = [i_xy//2 | i_xy <- SA_1_2, i_xy&1==0] # calc: SA_0 = radix_sort range(0,L, 3) with key=\i->(string[3*i], invSA_1_2[3*i+1]) # can save the first bucket_sort (i.e. with key[-1]) # since SA_1 has known = [may last i0 if ...] + bucket_sort(SA_1, key=\i0->string[i0*3]) # if last i0 not follow a i1 # i.e. L = 3*i0+1 # i.e. Lx = 2*i0 ''' invSA_1_2 = inverse_uint_bijection_array(SA_1_2) SA_1 = [i_xy // 2 for i_xy in SA_1_2 if is_even(i_xy)] may_last_i0 = [Lx >> 1] if divs(3, L - 1) else [] #bug: tmp = bucket_sort(len(SA_1), SA_1, key=lambda i0: string[i0*3]) tmpSA_0 = bucket_sort(alphabet_size, SA_1, key=lambda i0: string[i0 * 3]) SA_0 = may_last_i0 + tmpSA_0 if may_last_i0 else tmpSA_0 ########################### SA_0 DONE ########################### merge SA_0 and SA_1_2 ''' SA = merge le (map (3*) SA_0) (map i_xy_to_i_str SA_1_2) where Left = Right = id #le i_str_0 i_str_1_2 le (3*i0) (3*i1+1) = (string[3*i0], invSA_1_2[i_str_1_2_to_i_xy(3*i0+1)]) <= (string[3*i1+1], invSA_1_2[i_str_1_2_to_i_xy(3*i1+2)]) le (3*i0) (3*i2+2) = (string[3*i0], string[3*i0+1] , invSA_1_2[i_str_1_2_to_i_xy(3*i0+2)]) <= (string[3*i2+2], string[3*i2+3], invSA_1_2[i_str_1_2_to_i_xy(3*i2+4)]) i_xy_to_i_str i_xy = i_xy//2*3+1 +bool(i_xy&1) i_str_1_2_to_i_xy i_str = # inverse i_xy_to_i_str if i_str == 3*i0+1 then 2*i0 elif i_str == 3*i0+2 then 2*i0+1 else undefined ''' def i_xy_to_i_str(i_xy): return i_xy // 2 * 3 + 1 + bool(i_xy & 1) def le(i_str_0, i_str_1_2): # assert divs(3, i_str_0) # assert not divs(3, i_str_1_2) i0 = i_str_0 // 3 i_xy_base = i_str_1_2 // 3 * 2 r = i_str_1_2 % 3 def may_invSA_1_2(i_xy): n = len(invSA_1_2) if n == i_xy: return -1 return invSA_1_2[i_xy] if r == 1: i_str_1 = i_str_1_2 i1 = i_xy_base # + 0 i2_after_i1 = i1 + 1 i1_after_i0 = 2 * i0 # + 0 lhs = (string[i_str_0], may_invSA_1_2(i1_after_i0)) rhs = (string[i_str_1], may_invSA_1_2(i2_after_i1)) else: assert r == 2 i_str_2 = i_str_1_2 i2 = i_xy_base + 1 i1_after_i2 = i2 + 1 i2_after_i0 = 2 * i0 + 1 lhs = (string[i_str_0:i_str_0 + 2], may_invSA_1_2(i2_after_i0)) rhs = (string[i_str_2:i_str_2 + 2], may_invSA_1_2(i1_after_i2)) return lhs <= rhs #SA = merge le (map (3*) SA_0) (map i_xy_to_i_str SA_1_2) idc_str_0 = map(lambda i0: 3 * i0, SA_0) idc_str_1_2 = map(i_xy_to_i_str, SA_1_2) [*SA] = merge_two_sorted_iterables(idc_str_0, idc_str_1_2, __le__=le) if __debug__: singleton_or_pair_ls = [ string[i:i + 1 + divs(3, i - 2)] for i in range(L) if not divs(3, i) ] print(f''' string {string} singleton_or_pair_ls {singleton_or_pair_ls} tmp_half_1round {tmp_half_1round} sorted_indices_of_singleton_or_pair_ls {sorted_indices_of_singleton_or_pair_ls} SA_1_2 {SA_1_2} SA_0 {SA_0} SA {SA} ''') input('...') return SA