def worker_main(self, idx): """ Data gen worker method :return: """ logger.info('Starting worker %d' % idx) while True: job = None function, cur_round = None, None try: job = self.joq_queue.get_nowait() function, cur_round = job except QEmpty: break try: tmpdir = self.gen_randomdir(function, cur_round) if self.is_function_egen(function): config_js = egenerator.get_config(function_name=function, rounds=cur_round, data=self.data_to_gen) else: config_js = {'algorithm': function, 'round': cur_round, 'seed': self.seed} logger.info('Generating %s:%s' % (function, cur_round)) data_file = self.data_generator(tmpdir=tmpdir, function=function, cur_round=cur_round, config_js=config_js) if data_file is None: logger.error('Data file is invalid') continue new_data_file = os.path.join(self.args.results_dir, os.path.basename(data_file)) if not os.path.exists(new_data_file) or not os.path.samefile(data_file, new_data_file): logger.info("Copying to %s" % new_data_file) shutil.copy(data_file, new_data_file) cfgname = 'config_%s_r%d_%04dMB' % (function, cur_round, self.data_to_gen//1024//1024) with open(os.path.join(self.args.results_dir, cfgname), 'w') as fh: fh.write(json.dumps(config_js, indent=2)) with self.res_lock: self.res_map[(function, cur_round)] = (data_file, cfgname, config_js) logger.info('Generated %s:%s' % (function, cur_round)) # Remove test dir self.clean_temp_dir(tmpdir) except Exception as e: logger.error('Exception when computing %s:%s : %s' % (function, cur_round, e)) logger.debug(traceback.format_exc()) sys.exit(1) finally: # Job finished self.joq_queue.task_done() logger.info('Terminating worker %d' % idx)
def egen_benchmark(self): """ Benchmarks egenerator speed. :return: """ fnc_estream = sorted(list(egenerator.ESTREAM.keys())) fnc_sha3 = sorted(list(egenerator.SHA3.keys())) fnc_block = sorted(list(egenerator.BLOCK.keys())) logger.info('Egenerator benchmark. Total functions:') logger.info(' Estream: %s' % fnc_estream) logger.info(' Sha3: %s' % fnc_sha3) logger.info(' Block: %s' % fnc_block) fnc_rounds = sorted(list(egenerator.ROUNDS.keys())) logger.info(' Round reduced targets: %s' % fnc_rounds) data_to_gen = 1024 * 1024 * 10 results = {} for fnc in fnc_rounds: rounds = egenerator.ROUNDS[fnc] cur_round = max(rounds) logger.info('Testing %s, round %s' % (fnc, cur_round)) tmpdir = self.gen_randomdir(fnc, cur_round) try: config_js = egenerator.get_config(function_name=fnc, rounds=cur_round, data=data_to_gen) time_start = time.time() data_file = self.eacirc_generator( tmpdir=tmpdir, generator_path=self.generator_path, config_js=config_js) time_elapsed = time.time() - time_start results[fnc] = time_elapsed logger.info('Finished, time: %s' % time_elapsed) except Exception as e: logger.error('Exception in generating %s: %s' % (fnc, e)) logger.debug(traceback.format_exc()) finally: self.clean_temp_dir(tmpdir) # JSON + CSV output print(json.dumps(results, indent=2)) print('-' * 80) print('function,time') for fnc in results: print('%s,%s' % (fnc, results[fnc]))
def work(self): """ Main entry point - data processing :return: """ self.init_params() # Special code path: benchmarking egenerator if self.args.egen_benchmark: self.egen_benchmark() return # Init logic, analysis. # Define test set. test_sizes_mb = self.args.matrix_size test_block_sizes = self.args.matrix_block test_degree = self.args.matrix_deg test_comb_k = self.args.matrix_comb_deg logger.info( 'Computing test matrix for sizes: %s, blocks: %s, degree: %s, comb degree: %s' % (test_sizes_mb, test_block_sizes, test_degree, test_comb_k)) # Test all functions battery = self.get_test_battery() functions = sorted(list(battery.keys())) self.data_to_gen = max(test_sizes_mb) * 1024 * 1024 logger.info('Battery of functions to test: %s' % battery) total_test_idx = 0 for function in functions: rounds = battery[function] # Generate random tmpdir, generate data, test it there... for cur_round in rounds: tmpdir = self.gen_randomdir(function, cur_round) if self.is_function_egen(function): self.config_js = egenerator.get_config( function_name=function, rounds=cur_round, data=self.data_to_gen) else: self.config_js = { 'algorithm': function, 'round': cur_round, 'seed': self.seed } # Reseed testcase scenario random generator test_rand_seed = self.test_random.randint(0, 2**64 - 1) self.test_random.seed(test_rand_seed) # Generate test cases, run the analysis. for test_case in self.test_case_generator( test_sizes_mb, test_block_sizes, test_degree, test_comb_k): data_size, block_size, degree, comb_deg = test_case total_test_idx += 1 test_desc = 'idx: %04d, data: %04d, block: %d, deg: %d, comb-deg: %d, fun: %s, round: %s' \ % (total_test_idx, data_size, block_size, degree, comb_deg, function, cur_round) if self.test_manuals > 1 and ( total_test_idx % self.test_manuals) != self.test_stride: logger.info('Skipping test %s' % test_desc) continue res_file = '%s-r%02d-seed%s-%04dMB-%sbl-%sdeg-%sk.json' \ % (function, cur_round, self.config_js['seed'], data_size, block_size, degree, comb_deg) res_file_path = os.path.join(self.results_dir, res_file) if self.check_res_file(res_file_path): logger.info('Already computed test %s' % test_desc) continue data_file = self.data_generator(tmpdir=tmpdir, function=function, cur_round=cur_round) if data_file is None: logger.error('Data file is invalid') continue logger.info('Working on test: %s' % test_desc) jsres = self.testcase(function, cur_round, data_size, block_size, degree, comb_deg, data_file, tmpdir) with open(res_file_path, 'w') as fh: fh.write(json.dumps(jsres, indent=2)) # Remove test dir self.clean_temp_dir(tmpdir)