def fit_transform(self, database): def func(col): mean, std = np.nanmean(col.data), np.nanstd(col.data) col.data = (col.data - mean) / std np.nan_to_num(col.data, copy=False) parallel(func, database.num_columns)
def crawl_pkgs_meta(packages, target_dir, workers): pkgs_dict = LazyBucketDict(target_dir) args_list = [(name, pkgs_dict) for name in packages] if workers > 1: utils.parallel(save_pkg_meta, zip(*args_list), workers=workers) else: [save_pkg_meta(*args) for args in args_list] pkgs_dict.save()
def fit_transform(self, database): todo_list = [] for table in database.tables.values(): for cat_col in table.cat_columns: for num_col in table.num_columns: if not cat_col.keyindex.is_one: todo_list.append((cat_col, num_col)) def func(args): cat_col, num_col = args mean, std, skew, kurt = moment_group_by(cat_col.keyindex, num_col.data) mean[0] = std[0] = skew[0] = kurt[0] = np.nan return mean[cat_col.data], std[cat_col.data], \ skew[cat_col.data], kurt[cat_col.data] rets = parallel(func, todo_list) for (cat_col, num_col), (mean, std, skew, kurt) in zip(todo_list, rets): self.engine.cache_column( cat_col.table.name, f'nMean({num_col.name})GroupBy({cat_col.name})', 'num', mean) self.engine.cache_column( cat_col.table.name, f'nStd({num_col.name})GroupBy({cat_col.name})', 'num', std) self.engine.cache_column( cat_col.table.name, f'nSkew({num_col.name})GroupBy({cat_col.name})', 'num', skew) self.engine.cache_column( cat_col.table.name, f'nKurt({num_col.name})GroupBy({cat_col.name})', 'num', kurt)
def snapshot(self, community): home = self.home / community.country tarname = '{}-{}.tar.bz2'.format( '-'.join( sorted( set(pathlib.PurePath(e.path).stem for e in self.endpoints))), int(time())) home.mkdir(parents=True, exist_ok=True) tar = tarfile.open(str(home / tarname), mode='w:bz2') with Cache(home / 'timestamps.json') as cache: resources = ((resource, cache.get(resource.url)) for resource in chain(*(s.resources(self.endpoints) for s in community))) total_waited_time = 0 for resource, request, timestamp, total in utils.parallel( resources, Snapshot.fetch): total_waited_time += total cache.set(resource.url, timestamp) filename = str( resource.server) + '/' + resource.endpoint.encode() info = tarfile.TarInfo(name=filename) info.size = len(request.content) data = io.BytesIO(request.content) tar.addfile(info, fileobj=data) tar.close() return total_waited_time
def fit_transform(self, database): # Build keyindex def func(col): order = col.table.order order_data = order.data if order is not None else None # TODO # set_time = True if order is not None and order.type == 'time' else False set_time = False col.keyindex = build_key_index(col.data, order_data, col.block['unique'], set_time) parallel(func, database.cat_columns) # Remove all order attributes for table in database.tables.values(): order = table.order if order is not None and order.type == 'order': table.drop_column(order.name)
def fit_transform(self, database): todo_list = database.attr_columns def func(col): return is_dup_column(col.type, col.data) rets = parallel(func, todo_list) for col, is_drop in zip(todo_list, rets): if is_drop: col.table.drop_column(col.name)
def step_opt(args: Namespace) -> int: args.opt_bc_list = [] for opt in args.opt_levels or ["0"]: ret = parallel( f"timeout 1m opt {{}} -O{opt} {args.opt_flags} -o {{.}}-O{opt}.bc", args.bc_list, ) if ret > 0 and args.exit_on_error: return ret args.opt_bc_list.extend( [os.path.splitext(x)[0] + f"-O{opt}.bc" for x in args.bc_list] ) return 0
def run(self, dataset, test=False): mode = 'test' if test else 'train' U.log(f'Running algorithm in {mode} mode.') def _extract(user): return pd.DataFrame(self.extractor(user, self.meta, test)) grouped = dataset.groupby('installation_id', sort=False) users = (g for _, g in grouped) if self.pbar: users = tqdm(users, total= grouped.ngroups) datasets = U.parallel(_extract, users, num_workers=self.num_workers) dataset = pd.concat(datasets, axis=0) dataset = dataset.reset_index(drop=True) return dataset
def main(): dump_dir = sys.argv[1] workers = int(os.environ.get('WORKERS', "1")) pypi_fetcher_dir = os.environ.get('pypi_fetcher') print(f'Index directory: {pypi_fetcher_dir}') assert isdir(pypi_fetcher_dir) for bucket in LazyBucketDict.bucket_keys(): pypi_dict = LazyBucketDict(f"{pypi_fetcher_dir}/pypi") dump_dict = LazyBucketDict(dump_dir, restrict_to_bucket=bucket) print(f"Prune bucket {bucket}") prune_entries(bucket, pypi_dict, dump_dict) pypi_dict.save() dump_dict.save() print(f"Calculating jobs for bucket {bucket}") jobs = list(get_jobs(bucket, pypi_dict, dump_dict)) if not jobs: continue print(f"Starting batch with {len(jobs)} jobs") func = mine_wheel_metadata_full_download if workers > 1: def f(job): return exec_or_return_exc(func, job) result = parallel(f, (jobs, ), workers=workers) else: result = [exec_or_return_exc(func, job) for job in jobs] for r in result: if isinstance(r, Exception): continue name = r.job.name ver = r.job.ver pyver = r.job.pyver fn = r.job.filename if name not in dump_dict: dump_dict[name] = {} if pyver not in dump_dict[name]: dump_dict[name][pyver] = {} if ver not in dump_dict[name][pyver]: dump_dict[name][pyver][ver] = {} dump_dict[name][pyver][ver][fn] = {} for key in ('requires_dist', 'provides_extras', 'requires_external', 'requires_python'): val = getattr(r, key) if val: dump_dict[name][pyver][ver][fn][key] = val compress(dump_dict) dump_dict.save()
def forward(self, img): if self.quantize: img = np.float_(np.int_(img * 255.0)) img_parallel = parallel(img, stride=1) for i in range(img_parallel.shape[1]): patch = img_parallel[:, i].reshape((8, 8)) #patch = (patch-patch.mean())/(10) img_parallel[:, i] = convolve2d(patch, self.param, mode='same').reshape( img_parallel[:, i].shape) img_parallel = 0.5 * np.sign(img_parallel + self.bias) + 0.5 img_ht = np.where( unparallel_grad(img_parallel, img, stride=1) > 32.0, np.ones_like(img), np.zeros_like(img)) return img_ht
def extract_event_data(df, features=DEFAULT_EVENT_FEATUERS, num_workers=cpu_count(), pbar=False, **opts): parse_row = EventParser(features) event_data = df.event_data if pbar: event_data = tqdm(event_data, desc='Processing events') df = pd.DataFrame(parallel(parse_row, event_data, num_workers)) df = fillna(df, 'game_time', method='mean') df = fillna(df, 'coordinates.x', method='mode') df = fillna(df, 'coordinates.y', method='mode') df = fillna(df, 'coordinates.stage_height', method='mode') df = fillna(df, 'coordinates.stage_width', method='mode') df = fillna(df, 'description', method='mode', fallback='none') df = fillna(df, 'media_type', method='const', value='none') df = fillna(df, 'identifier', method='const', value='none') df = fillna(df, 'duration', method='mean') df = fillna(df, 'total_duration', method='mean') return df
def update_cache_columns(self): """ 更新 cache_columns 到表 筛除 NaN 过多和重复值过多的列 目前只支持新增 num 属性列 """ def is_drop(args): return True if is_nan_column(*args) or is_dup_column( *args) else False # return False self._cache_columns = [(k, v) for k, v in self._cache_columns.items()] self._cache_columns.sort() ret = parallel(is_drop, [v for k, v in self._cache_columns]) rest_table2data = {} for ((tname, attr), (typ, data)), drop_flag in zip(self._cache_columns, ret): if not drop_flag: self.database.tables[tname].add_column(attr, typ, data) self._cache_columns = {}
def main(): workers = int(os.environ.get('WORKERS', "1")) pypi_fetcher_dir = os.environ.get('pypi_fetcher', '/tmp/pypi_fetcher') ensure_pypi_fetcher(pypi_fetcher_dir) init_db() build_base(store=os.environ.get('STORE', None)) P = Package with Measure('Get processed pkgs from DB'): processed = set((p.name, p.version) for p in P.select(P.name, P.version).distinct()) print(f"DB contains {len(processed)} pkgs at this time") for bucket in LazyBucketDict.bucket_keys(): with Measure("getting jobs"): jobs = get_jobs(pypi_fetcher_dir, bucket, processed, amount=1000) if not jobs: continue with Measure('batch'): if workers > 1: pool_results = utils.parallel(extract_requirements, (jobs, ), workers=workers, use_processes=False) else: pool_results = [extract_requirements(args) for args in jobs] results = [] for i, res in enumerate(pool_results): if isinstance(res, Exception): print(f"Problem with {jobs[i].name}:{jobs[i].version}") if isinstance(res, sp.CalledProcessError): print(res.stderr) traceback.print_exception(res, res, res.__traceback__) else: for r in res: results.append(r) sleep(1) with db.atomic(): with Measure('bulk insert'): Package.bulk_create([Package(**r) for r in results]) if os.environ.get('CLEANUP', None): cleanup()
def fit_transform(self, database): for table in sorted(database.tables.values()): id_col = table.id if id_col is None: todo_list = table.cat_columns def func(col): return pd.Series(col.data).nunique(dropna=True) n_unique = np.array(parallel(func, todo_list), dtype=np.int32) idx = np.where(n_unique == table.n_lines)[0] if len(idx) == 1: table.info[todo_list[idx[0]].name] = 'id' elif len(idx) > 1: for col in todo_list[idx[1:]]: table.drop_column(col.name) logging.warning( 'More than one column in table %s have unique values: %s' % \ (table.name, todo_list[idx][0].global_name)) + \ ', we have dropped the unnecessary.' table.info[todo_list[idx[0]].name] = 'id' else: assert pd.Series(id_col.data).nunique(dropna=True) == table.n_lines, \ '%s is not an id column' % id_col.global_name
def CW_attack_fast(img_0, mean_cat_attack, cov_cat_attack, pi_cat_attack, mean_grass_attack, cov_grass_attack, pi_grass_attack, mean_cat_defense, cov_cat_defense, pi_cat_defense, mean_grass_defense, cov_grass_defense, pi_grass_defense, original_img, truth, l=5, target_index=1, stride=8, alpha=0.0001, display_iter=300, title='', path='./Outputs', preprocessing=[None, None], attack_type='blackbox'): iter_num = 0 parallel_img_0 = parallel(img_0, stride=stride) img_k = img_0 W_cat, w_cat, w_0_cat = get_parameters(mean_cat_attack, cov_cat_attack, pi_cat_attack) W_grass, w_grass, w_0_grass = get_parameters(mean_grass_attack, cov_grass_attack, pi_grass_attack) while iter_num < 300: iter_num += 1 parallel_img_k = parallel(img_k, stride=stride) if attack_type == 'whitebox' and preprocessing[0] != None: parallel_img_k = preprocessing[0].forward(parallel_img_k) parallel_img_0 = preprocessing[0].forward(parallel_img_0) current_grad = gradient_CW(patch_vec_k=parallel_img_k, patch_vec_0=parallel_img_0, mean_cat=mean_cat_attack, cov_cat=cov_cat_attack, pi_cat=pi_cat_attack, mean_grass=mean_grass_attack, cov_grass=cov_grass_attack, pi_grass=pi_grass_attack, W_cat=W_cat, w_cat=w_cat, w_0_cat=w_0_cat, W_grass=W_grass, w_grass=w_grass, w_0_grass=w_0_grass, l=l, target_index=target_index) grad = unparallel_grad(current_grad, img_0, stride=stride) img_k_1 = np.clip(img_k - alpha * grad, 0, 1) change = np.linalg.norm((img_k_1 - img_k)) img_k = img_k_1 if (iter_num) % display_iter == 0: print("\n") display_image(img_perturbed=img_k_1, mean_cat=mean_cat_defense, cov_cat=cov_cat_defense, pi_cat=pi_cat_defense, mean_grass=mean_grass_defense, cov_grass=cov_grass_defense, pi_grass=pi_grass_defense, original_img=original_img, truth=truth, title=title + 'iter_' + str(iter_num), stride=stride, preprocessing=preprocessing[1], path=path) print(' Change:{}'.format(change)) if change < 0.001 and stride == 8: print("\n\nMax Iteration:" + str(iter_num)) break elif change < 0.01 and stride == 1: print("\n\nMax Iteration:" + str(iter_num)) break return img_k_1
def test_fapkc_encryption(Ring, block_size, stream_length, test_uncompiled=False, print_data=False): print("FAPKC encryption / decryption test") print(" algebra:", Ring, ", data block size =", block_size, ", stream length =", stream_length) Automaton = automaton_factory(Ring) ConstVector = Automaton.base_const_vector for memory_size in range(1, 33): print() print(" memory_size =", memory_size) text = [ConstVector.random(block_size) for i in range(stream_length)] print(" generating FAPKC0 key pair") start_time = time() encrypt, decrypt = Automaton.fapkc0(block_size=block_size, memory_size=memory_size) print(" time:", int(time() - start_time)) if test_uncompiled: print(" encryption/decryption test") print(" encrypt... length =", stream_length) start_time = time() cipher_1 = list(encrypt(text)) print(" time:", int(time() - start_time)) if print_data: print(''.join(['{:02x}'.format(int(_x)) for _x in cipher_1])) print(" decrypt... length =", stream_length) start_time = time() text_1 = list(decrypt(cipher_1)) print(" time:", int(time() - start_time)) if print_data: print(''.join([' '] * memory_size + ['{:02x}'.format(int(_x)) for _x in text])) print(''.join(['{:02x}'.format(int(_x)) for _x in text_1])) assert text_1[memory_size:] == text[:-memory_size] compiler = Compiler() with parallel(0): print(" compiling encrypt automaton") start_time = time() encrypt.compile('encrypt', compiler) print(" time:", int(time() - start_time)) print(" compiling decrypt automaton") start_time = time() decrypt.compile('decrypt', compiler) print(" time:", int(time() - start_time)) print(" code generation") code = compiler.compile() encrypt = encrypt.wrap_compiled('encrypt', code) decrypt = decrypt.wrap_compiled('decrypt', code) print(" time:", int(time() - start_time)) print(" testing compiled automata") with code: print(" encrypt... length =", stream_length) start_time = time() cipher_2 = list(encrypt(text)) print(" time:", int(time() - start_time)) if print_data: print(''.join(['{:02x}'.format(int(_x)) for _x in cipher_2])) print(" decrypt... length =", stream_length) start_time = time() text_2 = list(decrypt(cipher_2)) print(" time:", int(time() - start_time)) if print_data: print(''.join([' '] * memory_size + ['{:02x}'.format(int(_x)) for _x in text])) print(''.join(['{:02x}'.format(int(_x)) for _x in text_2]))
def step_strip(args: Namespace) -> int: ret = parallel(f"opt {{}} -strip-debug -strip -o {{.}}-strip.bc", args.bc_list) if ret > 0 and args.exit_on_error: return ret args.bc_list = [os.path.splitext(x)[0] + "-strip.bc" for x in args.bc_list] return 0
def automaton_test_suite(verbose=False): if verbose: print("running test suite") Automaton = automaton_factory(BooleanRing.get_algebra()) Vector = Automaton.base_const_vector zero_v = Vector.zero(8) ''' print() print("Testing nonlinear automata") for memory_size in range(1, 5): print() print("test for memory size", memory_size) print(" generating automata...") ls, li = Automaton.nonlinear_nodelay_wifa_pair(block_size=8, memory_size=memory_size) print(" compiling automata...") compiler = Compiler() with parallel(0): ls.compile('ls', compiler) li.compile('li', compiler) code = compiler.compile() ls = ls.wrap_compiled('ls', code) li = li.wrap_compiled('li', code) xi = [Vector.random(8) for _i in range(1024)] print(" xi =", ''.join(['{:02x}'.format(int(_x)) for _x in xi])) y = list(ls(xi)) print(" y =", ''.join(['{:02x}'.format(int(_x)) for _x in y])) xo = list(li(y)) print(" xo =", ''.join(['{:02x}'.format(int(_x)) for _x in xo])) assert xi == xo print(" ok", memory_size) print() print("Testing linear automata") for memory_size in range(1, 5): print() print("test for memory size", memory_size) print(" generating automata...") ls, li = Automaton.linear_delay_wifa_pair(block_size=8, memory_size=memory_size) print(" compiling automata...") compiler = Compiler() with parallel(0): ls.compile('ls', compiler) li.compile('li', compiler) code = compiler.compile() ls = ls.wrap_compiled('ls', code) li = li.wrap_compiled('li', code) xi = [Vector.random(8) for _i in range(1024)] print(" xi =", ''.join(['{:02x}'.format(int(_x)) for _x in xi])) y = list(ls(xi + [Vector.random(8) for _i in range(memory_size)])) print(" y =", ''.join(['{:02x}'.format(int(_x)) for _x in y])) xo = list(li(y))[memory_size:] print(" xo =", ''.join(['{:02x}'.format(int(_x)) for _x in xo])) assert xi == xo print(" ok", memory_size) ''' print() print("Testing FAPKC0") for memory_size in range(1, 5): print() print("test for memory size", memory_size) print(" generating automata...") ls, li = Automaton.fapkc0(block_size=8, memory_size=memory_size) print(" composing identity automaton...") ll = ls @ li ll.optimize() print(ll.output_transition) print(ll.state_transition) print(" compiling automata...") compiler = Compiler() with parallel(0): ls.compile('ls', compiler) li.compile('li', compiler) ll.compile('ll', compiler) code = compiler.compile() ls = ls.wrap_compiled('ls', code) li = li.wrap_compiled('li', code) ll = ll.wrap_compiled('ll', code) xi = [Vector.random(8) for _i in range(1024)] print(" xi =", ''.join(['{:02x}'.format(int(_x)) for _x in xi])) y = list(ls(xi + [Vector.random(8) for _i in range(memory_size)])) print(" y =", ''.join(['{:02x}'.format(int(_x)) for _x in y])) xo = list(li(y))[memory_size:] print(" xo =", ''.join(['{:02x}'.format(int(_x)) for _x in xo])) assert xi == xo, "Encryption / decryption test failed." print(" testing identity automaton...") xr = list(ll(xi + [Vector.random(8) for _i in range(memory_size)]))[memory_size:] assert xi == xr, "Identity automaton test failed." print(" ok") #quit() #Automaton.fapkc0(memory_size=6) ''' for i in (2, 3, 4, 5, 16, 64, 128, 512, 1024): if verbose: print() if verbose: print("test ModularRing(size={})".format(i)) ring = ModularRing.get_algebra(size=i) if verbose: print(" automaton test") test_automaton_composition(ring) ''' if verbose: print() if verbose: print("test BooleanRing()") ring = BooleanRing.get_algebra() if verbose: print(" automaton test") test_automaton_composition(ring) ''' for i in (2, 3, 4, 5, 16, 64, 128, 512, 1024): if verbose: print() if verbose: print("test GaloisRing(size={})".format(i)) field = GaloisField.get_algebra(size=i) if verbose: print(" automaton test") test_automaton_composition(field) assert BinaryRing.get_algebra(exponent=1)(1) != RijndaelRing(1) for i in (2, 3, 4, 5, 8, 9, 10): if verbose: print() if verbose: print("test BinaryRing(exponent={})".format(i)) field = BinaryRing.get_algebra(exponent=i) if verbose: print(" automaton test") test_automaton_composition(field) ''' if verbose: print() if verbose: print("test RijndaelField()") field = RijndaelField if verbose: print(" automaton test") test_automaton_composition(field)
def fit_transform(self, database): def func(col): np.nan_to_num(col.data, copy=False) parallel(func, database.num_columns)
x_draw = np.linspace(0, big_zero + 1000, 500) y_draw = utils.f_2(x_draw, params[0], params[1], params[2]) print(params, 'stop=', stop) file = open('final_df/r6(20180916~20180930)all/params.txt', 'w') file.write(str(params[0]) + ',' + str(params[1]) + ',' + str(params[2]) + '\n' + str(big_zero)) file.close() print('========================') plt.plot(x_draw, y_draw, 'g-', label='fitting curve') plt.legend(loc='best') plt.show() # 完成全部的权重、时间处理,现在开始并行迭代计算: print(data.shape) data = utils.parallel(data, params, big_zero)[0] user_ts = utils.parallel(data, params, big_zero)[1] goal_ts = utils.parallel(data, params, big_zero)[2] # 将弥补项考虑进来,以此更新ts_pts值: print('ts is updating...') data = utils.parallel_ts(data, user_ts, goal_ts) print('DATA is saving...') data.to_csv('final_df/r6(20180916~20180930)all/data_round6.csv', index=False) print('==========================================') print('school rank1 is saving...') frame1 = result.school_rank(data, user_ts) frame1.to_csv('final_df/r6(20180916~20180930)all/school_rank_round6.csv') print('==========================================')
def step_souper(args: Namespace) -> int: return parallel("souper {} > {.}.souper", args.bc_list)
def step_dis(args: Namespace) -> int: return parallel("llvm-dis", args.bc_list) + parallel("llvm-dis", args.opt_bc_list)
def generate_encrypt_aes_128_fsm(key): print("Composing AES round prefix automaton...") #print(" generate_clock:", [str(_x) for _x in generate_clock_fsm.output_transition], [str(_x) for _x in generate_clock_fsm.state_transition]) #print(" delay 16:", [str(_x) for _x in delay_16_fsm.output_transition], [str(_x) for _x in delay_16_fsm.state_transition]) aes_encrypt_round_prefix_fsm = generate_clock_fsm @ delay_16_fsm #print(" unoptimized:", [_x.circuit_size() for _x in aes_encrypt_round_prefix_fsm.output_transition], [_x.circuit_size() for _x in aes_encrypt_round_prefix_fsm.state_transition]) aes_encrypt_round_prefix_fsm.optimize() #print(" optimized:", [str(_x) for _x in aes_encrypt_round_prefix_fsm.output_transition], [str(_x) for _x in aes_encrypt_round_prefix_fsm.state_transition]) print("Composing AES round suffix automaton...") print( " sub_bytes:", [_x.circuit_size() for _x in encrypt_sub_bytes_fsm.output_transition], [_x.circuit_size() for _x in encrypt_sub_bytes_fsm.state_transition]) print(" ", [ len(_x.variables_set()) for _x in encrypt_sub_bytes_fsm.output_transition ], [ len(_x.variables_set()) for _x in encrypt_sub_bytes_fsm.state_transition ]) print( " shift_rows:", [_x.circuit_size() for _x in encrypt_shift_rows_fsm.output_transition], [_x.circuit_size() for _x in encrypt_shift_rows_fsm.state_transition]) print(" ", [ len(_x.variables_set()) for _x in encrypt_shift_rows_fsm.output_transition ], [ len(_x.variables_set()) for _x in encrypt_shift_rows_fsm.state_transition ]) print(" mix_columns:", [ _x.circuit_size() for _x in encrypt_mix_columns_fsm.output_transition ], [_x.circuit_size() for _x in encrypt_mix_columns_fsm.state_transition]) print(" ", [ len(_x.variables_set()) for _x in encrypt_mix_columns_fsm.output_transition ], [ len(_x.variables_set()) for _x in encrypt_mix_columns_fsm.state_transition ]) print(" delay_16:", [_x.circuit_size() for _x in delay_16_fsm.output_transition], [_x.circuit_size() for _x in delay_16_fsm.state_transition]) print(" ", [len(_x.variables_set()) for _x in delay_16_fsm.output_transition], [len(_x.variables_set()) for _x in delay_16_fsm.state_transition]) print(" remove_clock:", [_x.circuit_size() for _x in remove_clock_fsm.output_transition], [_x.circuit_size() for _x in remove_clock_fsm.state_transition]) print( " ", [len(_x.variables_set()) for _x in remove_clock_fsm.output_transition], [len(_x.variables_set()) for _x in remove_clock_fsm.state_transition]) with parallel(): aes_encrypt_round_suffix_fsm = encrypt_sub_bytes_fsm @ encrypt_shift_rows_fsm @ encrypt_mix_columns_fsm @ delay_16_fsm @ remove_clock_fsm print(" unoptimized:", [ _x.circuit_size() for _x in aes_encrypt_round_suffix_fsm.output_transition ], [ _x.circuit_size() for _x in aes_encrypt_round_suffix_fsm.state_transition ]) aes_encrypt_round_suffix_fsm.optimize() print(" optimized:", [ _x.circuit_size() for _x in aes_encrypt_round_suffix_fsm.output_transition ], [ _x.circuit_size() for _x in aes_encrypt_round_suffix_fsm.state_transition ]) print(" ", [ _x.circuit_size() for _x in aes_encrypt_round_suffix_fsm.output_transition ], [ _x.circuit_size() for _x in aes_encrypt_round_suffix_fsm.state_transition ]) print("Calculating AES 128 key automaton...") argument = vector(Automaton.x[_i] for _i in range(18)) history = deque( vector(Automaton.s[_j, _i] for _i in range(8)) for _j in range(1, 17)) generator = add_round_key_128(key, [argument], history) result = next(generator) exhaust(generator) add_round_key_128_fsm = Automaton(output_transition=result, state_transition=history[0]) add_round_key_128_fsm.optimize() print( " ", [_x.circuit_size() for _x in add_round_key_128_fsm.output_transition], [_x.circuit_size() for _x in add_round_key_128_fsm.state_transition]) print("Composing AES single round automaton...") encrypt_aes_128_fsm = aes_encrypt_round_prefix_fsm @ add_round_key_128_fsm @ aes_encrypt_round_suffix_fsm encrypt_aes_128_fsm.optimize() print(" ", [_x.circuit_size() for _x in encrypt_aes_128_fsm.output_transition], [_x.circuit_size() for _x in encrypt_aes_128_fsm.state_transition])
def test_homomorphic_encryption(Ring, block_size, memblock_size, length): print("Gonzalez-Llamas homomorphic encryption test") print(" algebra:", Ring, ", data block size:", block_size, ", memory block size:", memblock_size, ", stream length:", length) Automaton = automaton_factory(Ring) Vector = Automaton.base_vector ConstVector = Automaton.base_const_vector x = Vector([Automaton.x[_i] for _i in range(block_size)]) s_1 = Vector([Automaton.s[1, _i] for _i in range(memblock_size)]) s_2 = Vector([Automaton.s[2, _i] for _i in range(memblock_size)]) s_3 = Vector([Automaton.s[3, _i] for _i in range(memblock_size)]) variables = list(x) + list(s_1) + list(s_2) + list(s_3) def automaton_input(): for i in range(length): yield ConstVector.random(block_size) for i in range(1, 5): print() print(" round", i) print(" generating automata...") memory_size = i + 4 #mixer, unmixer = Automaton.linear_nodelay_wifa_pair(block_size=block_size, memory_size=memory_size) mixer, unmixer = Automaton.fapkc0(block_size=block_size, memory_size=memory_size) plain_automaton = Automaton(Vector.random(dimension=block_size, variables=variables, order=3), Vector.random(dimension=memblock_size, variables=variables, order=3)) print(" optimizing automata...") start_time = time() print(f" mixer: {mixer.output_transition.circuit_size()} {mixer.state_transition.circuit_size()} {mixer.output_transition.dimension} {mixer.state_transition.dimension}") mixer.optimize() print(f" {mixer.output_transition.circuit_size()} {mixer.state_transition.circuit_size()}") print(f" unmixer: {unmixer.output_transition.circuit_size()} {unmixer.state_transition.circuit_size()} {unmixer.output_transition.dimension} {unmixer.state_transition.dimension}") unmixer.optimize() print(f" {unmixer.output_transition.circuit_size()} {unmixer.state_transition.circuit_size()}") print(f" plain: {plain_automaton.output_transition.circuit_size()} {plain_automaton.state_transition.circuit_size()} {plain_automaton.output_transition.dimension} {plain_automaton.state_transition.dimension}") plain_automaton.optimize() print(f" {plain_automaton.output_transition.circuit_size()} {plain_automaton.state_transition.circuit_size()}") print(" time:", int(time() - start_time)) print(" composing automata...") start_time = time() homo_automaton = mixer @ plain_automaton @ unmixer print(" time:", int(time() - start_time)) print(" mixing states") start_time = time() homo_automaton.mix_states() print(" time:", int(time() - start_time)) print(" optimizing automata...") start_time = time() print(f" homomorphic: {homo_automaton.output_transition.circuit_size()} {homo_automaton.state_transition.circuit_size()} {homo_automaton.output_transition.dimension} {homo_automaton.state_transition.dimension}") print(f" {[_circuit.circuit_size() for _circuit in homo_automaton.output_transition]} {[_circuit.circuit_size() for _circuit in homo_automaton.state_transition]}") homo_automaton.optimize() print(f" {homo_automaton.output_transition.circuit_size()} {homo_automaton.state_transition.circuit_size()}") print(f" {[_circuit.circuit_size() for _circuit in homo_automaton.output_transition]} {[_circuit.circuit_size() for _circuit in homo_automaton.state_transition]}") print(" time:", int(time() - start_time)) print(" compiling automata...") start_time = time() compiler = Compiler() #try: # Ring.compile_tables('RijndaelField', compiler) #except AttributeError: # pass with parallel(0): mixer.compile('m', compiler) unmixer.compile('u', compiler) plain_automaton.compile('p', compiler) homo_automaton.compile('h', compiler) code = compiler.compile() #Path('automaton_' + str(i) + '.bc').write_bytes(code.modules[0].as_bitcode()) mixer = mixer.wrap_compiled('m', code) unmixer = unmixer.wrap_compiled('u', code) plain_automaton = plain_automaton.wrap_compiled('p', code) homo_automaton = homo_automaton.wrap_compiled('h', code) print(" time:", int(time() - start_time)) print(" encryption/decryption test...") text = list(automaton_input()) start_time = time() with code: result1 = list(homo_automaton(text)) result2 = list(mixer(plain_automaton(unmixer(text)))) print(" actual: ", ''.join(['{:02x}'.format(int(_ch)) for _ch in result1])) print(" predicted:", ''.join(['{:02x}'.format(int(_ch)) for _ch in result2])) assert result1 == result2 print(" time:", int(time() - start_time)) Path(f'homomorphic_{i}.ll').write_text(str(compiler))
print(ls.state_transition.circuit_size(), [_x.circuit_size() for _x in ls.state_transition]) print() print(li.output_transition.circuit_size(), [_x.circuit_size() for _x in li.output_transition]) print(li.state_transition.circuit_size(), [_x.circuit_size() for _x in li.state_transition]) ns, ni = Automaton.nonlinear_nodelay_wifa_pair(block_size=8, memory_size=5) print() print(ns.output_transition.circuit_size(), [_x.circuit_size() for _x in ns.output_transition]) print(ns.state_transition.circuit_size(), [_x.circuit_size() for _x in ns.state_transition]) print() print(ni.output_transition.circuit_size(), [_x.circuit_size() for _x in ni.output_transition]) print(ni.state_transition.circuit_size(), [_x.circuit_size() for _x in ni.state_transition]) straight = ns @ ls inverse = li @ ni print() print(straight.output_transition.circuit_size(), [_x.circuit_size() for _x in straight.output_transition]) print(straight.state_transition.circuit_size(), [_x.circuit_size() for _x in straight.state_transition]) print() print(inverse.output_transition.circuit_size(), [_x.circuit_size() for _x in inverse.output_transition]) print(inverse.state_transition.circuit_size(), [_x.circuit_size() for _x in inverse.state_transition]) ''' with parallel(): #test_fapkc_encryption(BooleanRing.get_algebra(), 8, 64, print_data=True) test_homomorphic_encryption(BooleanRing.get_algebra(), 8, 8, 128)
def main(): # settings related to performance/parallelization amount_buckets = int(os.environ.get('AMOUNT_BUCKETS', "256")) limit_names = set( filter(lambda n: bool(n), os.environ.get('LIMIT_NAMES', "").split(','))) max_minutes = int(os.environ.get('MAX_MINUTES', "0")) bucket_jobs = int(os.environ.get('BUCKET_JOBS', "0")) start_bucket = int(os.environ.get('BUCKET_START', "0")) workers = int(os.environ.get('WORKERS', multiprocessing.cpu_count() * 2)) # general settings dump_dir = os.environ.get('DUMP_DIR', "./sdist") extractor_src = os.environ.get("EXTRACTOR_SRC") if not extractor_src: raise Exception( "Set env variable 'EXTRACTOR_SRC to {mach-nix}/lib/extractor'") min_free_gb = int(os.environ.get('MIN_FREE_GB', "0")) py_vers_short = os.environ.get('PYTHON_VERSIONS', "27,36,37,38,39,310").strip().split(',') pypi_fetcher_dir = os.environ.get('PYPI_FETCHER', '/tmp/pypi_fetcher') store = os.environ.get('STORE', None) deadline_total = time() + max_minutes * 60 if max_minutes else None # cache build time deps, otherwise first job will be slow with Measure("ensure build time deps"): build_base(extractor_src, py_vers_short, store=store) garbage_collected = False for idx, bucket in enumerate(LazyBucketDict.bucket_keys()): # calculate per bucket deadline if MAX_MINUTES is used if deadline_total: amount = min(amount_buckets, 256 - start_bucket) deadline = time() + (deadline_total - time()) / amount else: deadline = None if idx < start_bucket or idx >= start_bucket + amount_buckets: continue pkgs_dict = LazyBucketDict(dump_dir, restrict_to_bucket=bucket) pypi_index = LazyBucketDict(f"{pypi_fetcher_dir}/pypi", restrict_to_bucket=bucket) # load error data error_dict = LazyBucketDict(dump_dir + "-errors", restrict_to_bucket=bucket) decompress(error_dict.by_bucket(bucket)) with Measure('Get processed pkgs'): print( f"DB contains {len(list(pkgs_dict.keys()))} pkgs at this time for bucket {bucket}" ) with Measure("decompressing data"): decompress(pkgs_dict.by_bucket(bucket)) # purge data for old python versions and packages which got deleted from pypi with Measure("purging packages"): purge(pypi_index, pkgs_dict, bucket, py_vers_short) with Measure("getting jobs"): jobs = get_jobs(pypi_index, error_dict, pkgs_dict, bucket, py_vers_short, limit_num=bucket_jobs, limit_names=limit_names) if not jobs: continue compute_drvs(jobs, extractor_src, store=store) # ensure that all the build time dependencies are cached before starting, # otherwise jobs might time out if garbage_collected: with Measure("ensure build time deps"): build_base(extractor_src, py_vers_short, store=store) with Measure('executing jobs'): if workers > 1: pool_results = utils.parallel(extract_requirements, (jobs, (deadline, ) * len(jobs), (len(jobs), ) * len(jobs), (store, ) * len(jobs)), workers=workers, use_processes=False) else: pool_results = [ extract_requirements(args, deadline, store) for args in jobs ] # filter out exceptions results = [] for i, res in enumerate(pool_results): if not isinstance(res, Exception): for r in res: results.append(r) # insert new data for pkg in sorted(results, key=lambda pkg: (pkg.name, pkg.version, sort_key_pyver(pkg.py_ver))): py_ver = ''.join(filter(lambda c: c.isdigit(), pkg.py_ver)) if pkg.error: target = error_dict else: target = pkgs_dict insert(py_ver, pkg.name, pkg.version, pkg_to_dict(pkg), target, error=pkg.error) # compress and save with Measure("compressing data"): compress(pkgs_dict.by_bucket(bucket)) compress(error_dict.by_bucket(bucket)) print("finished compressing data") with Measure("saving data"): pkgs_dict.save() error_dict.save() # collect garbage if free space < MIN_FREE_GB if shutil.disk_usage(store or "/nix/store").free / (1000** 3) < min_free_gb: with Measure("collecting nix store garbage"): sp.run( f"nix-collect-garbage {f'--store {store}' if store else ''}", capture_output=True, shell=True) garbage_collected = True # stop execution if deadline occurred if deadline_total and time() > deadline_total: print( f"Deadline occurred. Stopping execution. Last Bucket was {bucket}" ) break
def all(countries=None): """Scraps servers for all communities at once in parallel""" action = lambda c: (c, servers(c)) return utils.parallel(countries or communities(), action)