def reset(self): """ Reset internal stats - for use with new test vector set :return: """ self.total_n = 0 self.total_rounds = 0 self.total_hws = [[0] * common.comb(self.blocklen, x, True) for x in range(self.deg + 1)] self.ref_total_hws = [[0] * common.comb(self.blocklen, x, True) for x in range(self.deg + 1)] self.input_poly_hws = [0] * len(self.input_poly) self.input_poly_ref_hws = [0] * len(self.input_poly) self.last_res = None self.input_poly_last_res = None
def independence_test(self, term_eval, ddeg=3, vvar=10): """ Experimental verification of term independence. :param term_eval: :param ddeg: :param vvar: :return: """ tterms = common.comb(vvar, ddeg) print('Independence test C(%d, %d) = %s' % (vvar, ddeg, tterms)) ones = [0] * common.comb(vvar, ddeg, True) for val in common.pos_generator(dim=vvar, maxelem=1): for idx, term in enumerate(common.term_generator(ddeg, vvar - 1)): ones[idx] += term_eval.eval_term_raw_single(term, val) print('Done') print(ones)
def init(self): """ Initializes state, term_eval engine, input polynomials expected probability. :return: """ logger.info('Initializing HWanalysis') if not self.no_term_map: logger.info('Precomputing term mappings') self.term_map = common.build_term_map(self.deg, self.blocklen) self.term_eval = common.TermEval(blocklen=self.blocklen, deg=self.deg) self.ref_term_eval = common.TermEval(blocklen=self.blocklen, deg=self.deg) self.total_hws = [[0] * common.comb(self.blocklen, x, True) for x in range(self.deg + 1)] self.ref_total_hws = [[0] * common.comb(self.blocklen, x, True) for x in range(self.deg + 1)] self.input_poly_exp = [0] * len(self.input_poly) self.input_poly_hws = [0] * len(self.input_poly) self.input_poly_ref_hws = [0] * len(self.input_poly) self.precompute_input_poly() if self.best_x_combinations is not None and self.best_x_combinations <= 0: self.best_x_combinations = None
def work(self): """ Main entry point - data processing. RandVerif is used to benchmark particular data sources, with different seeds. It takes e.g., AES-CTR(SHA256(random)), runs it 1000 times and stores the results. This particular computation was used to determine reference z-scores of the test. Another usual scenario is to take Java util.Random, seed it 1000 times with a different random seed and analyze the results. RandVerif supports supplying custom distinguishers so the whole space is not searched. This setup is used to assess found distinguishers on more data / independent data streams with different seeds. RandVerif produces results on STDOUT, it contains multiple sections, separated by -----BEGIN SECTION----- Sections contain stats (avg. z-score), all z-scores, the best distinguishers for further analysis... :return: """ self.blocklen = int(self.defset(self.args.blocklen, 128)) deg = int(self.defset(self.args.degree, 3)) tvsize_orig = int( self.defset(self.process_size(self.args.tvsize), 1024 * 256)) zscore_thresh = float(self.args.conf) rounds = int( self.args.rounds) if self.args.rounds is not None else None top_k = int(self.args.topk) if self.args.topk is not None else None top_comb = int(self.defset(self.args.combdeg, 2)) reffile = self.defset(self.args.reffile) all_deg = self.args.alldeg tvsize = tvsize_orig top_distinguishers = [] # Load input polynomials self.load_input_poly() script_path = common.get_script_path() logger.info( 'Basic settings, deg: %s, blocklen: %s, TV size: %s, rounds: %s' % (deg, self.blocklen, tvsize_orig, rounds)) total_terms = int(common.comb(self.blocklen, deg, True)) hwanalysis = HWAnalysis() hwanalysis.deg = deg hwanalysis.blocklen = self.blocklen hwanalysis.top_comb = top_comb hwanalysis.comb_random = self.args.comb_random hwanalysis.top_k = top_k hwanalysis.combine_all_deg = all_deg hwanalysis.zscore_thresh = zscore_thresh hwanalysis.do_ref = reffile is not None hwanalysis.skip_print_res = True hwanalysis.input_poly = self.input_poly hwanalysis.no_comb_and = self.args.no_comb_and hwanalysis.no_comb_xor = self.args.no_comb_xor hwanalysis.prob_comb = self.args.prob_comb hwanalysis.all_deg_compute = len(self.input_poly) == 0 hwanalysis.do_only_top_comb = self.args.only_top_comb hwanalysis.do_only_top_deg = self.args.only_top_deg hwanalysis.no_term_map = self.args.no_term_map hwanalysis.use_zscore_heap = self.args.topterm_heap hwanalysis.sort_best_zscores = max( common.replace_none([self.args.topterm_heap_k, top_k, 100])) hwanalysis.best_x_combinations = self.args.best_x_combinations logger.info('Initializing test') hwanalysis.init() dist_result_map = {} for idx, poly in enumerate(self.input_poly): dist_result_map[idx] = [] for test_idx in range(self.args.tests): seed = random.randint(0, 2**32 - 1) iobj = None if self.args.test_randc: path = os.path.realpath( os.path.join(script_path, '../assets/rndgen-c/rand')) cmd = '%s %s' % (path, seed) iobj = common.CommandStdoutInputObject(cmd=cmd, seed=seed, desc='randc-%s' % seed) elif self.args.test_randc_small: path = os.path.realpath( os.path.join(script_path, '../assets/rndgen-c-small/rand')) cmd = '%s %s' % (path, seed) iobj = common.CommandStdoutInputObject(cmd=cmd, seed=seed, desc='randc-small-%s' % seed) elif self.args.test_java: path = os.path.realpath( os.path.join(script_path, '../assets/rndgen-java/')) cmd = 'java -cp %s Main %s' % (path, seed) iobj = common.CommandStdoutInputObject(cmd=cmd, seed=seed, desc='randjava-%s' % seed) elif self.args.test_aes: iobj = common.AESInputObject(seed=seed) else: raise ValueError('No generator to test') size = iobj.size() logger.info( 'Testing input object: %s, size: %d kB, iteration: %d' % (iobj, size / 1024.0, test_idx)) # size smaller than TV? Adapt tv then if size >= 0 and size < tvsize: logger.info('File size is smaller than TV, updating TV to %d' % size) tvsize = size if tvsize * 8 % self.blocklen != 0: rem = tvsize * 8 % self.blocklen logger.warning( 'Input data size not aligned to the block size. ' 'Input bytes: %d, block bits: %d, rem: %d' % (tvsize, self.blocklen, rem)) tvsize -= rem // 8 logger.info('Updating TV to %d' % tvsize) hwanalysis.reset() logger.info('BlockLength: %d, deg: %d, terms: %d' % (self.blocklen, deg, total_terms)) with iobj: data_read = 0 cur_round = 0 while size < 0 or data_read < size: if rounds is not None and cur_round > rounds: break data = iobj.read(tvsize) bits = common.to_bitarray(data) if len(bits) == 0: logger.info('File read completely') break logger.info( 'Pre-computing with TV, deg: %d, blocklen: %04d, tvsize: %08d = %8.2f kB = %8.2f MB, ' 'round: %d, avail: %d' % (deg, self.blocklen, tvsize, tvsize / 1024.0, tvsize / 1024.0 / 1024.0, cur_round, len(bits))) hwanalysis.process_chunk(bits, None) cur_round += 1 pass res = hwanalysis.input_poly_last_res if res is not None and len(res) > 0: res_top = res[0] top_distinguishers.append((res_top, seed)) for cur in res: dist_result_map[cur.idx].append(cur.zscore) elif hwanalysis.last_res is not None and len( hwanalysis.last_res) > 0: res_top = hwanalysis.last_res[0] top_distinguishers.append((res_top, seed)) else: raise ValueError('No data from the analysis') logger.info('Finished processing %s ' % iobj) logger.info('Data read %s ' % iobj.data_read) logger.info('Read data hash %s ' % iobj.sha1.hexdigest()) all_zscores = [] print('-----BEGIN JSON-----') js = [] for dist in top_distinguishers: cr = collections.OrderedDict() cr['z'] = dist[0].zscore try: cr['d'] = dist[0].idx except: pass cr['seed'] = dist[1] js.append(cr) all_zscores.append(dist[0].zscore) print(json.dumps(js, indent=2)) print('-----BEGIN JSON-STATS-----') js = [] for idx in dist_result_map: cur = dist_result_map[idx] cr = collections.OrderedDict() cr['idx'] = idx cr['poly'] = common.poly2str(self.input_poly[idx]) cr['avg'] = sum([abs(x) for x in cur]) / float(len(cur)) cr['cnt'] = len(cur) cr['zscores'] = cur js.append(cr) print(json.dumps(js, indent=2)) print('-----BEGIN RUN-CONFIG-----') js = collections.OrderedDict() js['block'] = self.blocklen js['deg'] = deg js['top_comb'] = top_comb js['top_k'] = top_k js['tvsize'] = tvsize js['tests'] = self.args.tests js['prob_comb'] = self.args.prob_comb js['all_deg'] = all_deg print(json.dumps(js)) print('-----BEGIN Z-SCORES-NORM-----') print(all_zscores) print('-----BEGIN Z-SCORES-ABS-----') print([abs(x) for x in all_zscores]) print('-----BEGIN Z-SCORES-AVG-----') print(sum([abs(x) for x in all_zscores]) / float(len(all_zscores))) print('-----BEGIN Z-SCORES-NAVG-----') print(sum([x for x in all_zscores]) / float(len(all_zscores))) if self.args.csv_zscore: print('-----BEGIN Z-SCORES-CSV-----') print('zscore') for x in [abs(x) for x in all_zscores]: print(x) logger.info('Processing finished')
def work(self, bin_data=None): """ Main entry point - data processing :return: """ config = self.config_data if self.args.config_file: with open(self.args.config_file) as fh: config = json.load(fh) NRES_TO_DUMP = 128 self.timer_data_read.reset() self.timer_data_bins.reset() self.timer_process.reset() cpu_pcnt_load_before = misc.try_get_cpu_percent() cpu_load_before = misc.try_get_cpu_load() hw_cfg = config['hwanalysis'] test_run = config['config'] data_file = common.defvalkeys(config, 'spec.data_file') skip_finished = common.defvalkey(config, 'skip_finished', False) self.do_halving = common.defvalkey(config, 'halving', False) self.halving_top = common.defvalkey(config, 'halving_top', NRES_TO_DUMP) res_file = common.defvalkey(config, 'res_file') backup_dir = common.defvalkey(config, 'backup_dir') all_zscores = common.defvalkey(config, 'all_zscores') if res_file and self.check_res_file(res_file): if skip_finished: logger.info('Already computed in %s' % res_file) return elif backup_dir: misc.file_backup(res_file, backup_dir=backup_dir) self.hw_cfg = hw_cfg self.hwanalysis = HWAnalysis() self.hwanalysis.from_json(hw_cfg) self.check_ref_db(self.hwanalysis) self.blocklen = self.hwanalysis.blocklen self.deg = self.hwanalysis.deg self.top_comb = self.hwanalysis.top_comb self.top_k = self.hwanalysis.top_k self.all_deg = self.hwanalysis.all_deg_compute self.zscore_thresh = self.hwanalysis.zscore_thresh self.json_nice = True self.json_top = min(NRES_TO_DUMP, self.halving_top) if all_zscores: self.hwanalysis.all_zscore_comp = True self.rounds = common.defvalkey(test_run['spec'], 'data_rounds') tvsize = common.defvalkey(test_run['spec'], 'data_size') # Load input polynomials # self.load_input_poly() logger.info('Basic settings, deg: %s, blocklen: %s, TV size: %s' % (self.hwanalysis.deg, self.hwanalysis.blocklen, tvsize)) total_terms = int( common.comb(self.hwanalysis.blocklen, self.hwanalysis.deg, True)) logger.info('Initializing test') time_test_start = time.time() self.hwanalysis.init() # Process input object iobj = None if data_file: iobj = common.FileInputObject(data_file) elif bin_data: iobj = common.BinaryInputObject(bin_data) else: iobj = common.StdinInputObject('stdin') size = iobj.size() logger.info('Testing input object: %s, size: %d kB' % (iobj, size / 1024.0)) # size smaller than TV? Adapt tv then tvsize = self.adjust_tvsize(tvsize, size) self.hwanalysis.reset() logger.info( 'BlockLength: %d, deg: %d, terms: %d' % (self.hwanalysis.blocklen, self.hwanalysis.deg, total_terms)) jscres = [] with iobj: self.analyze_iobj(iobj, 0, tvsize, jscres) data_hash = iobj.sha1.hexdigest() logger.info('Finished processing %s ' % iobj) logger.info('Data read %s ' % iobj.data_read) logger.info('Read data hash %s ' % data_hash) # All zscore list for statistical processing / theory check if all_zscores and res_file: self.all_zscore_process(res_file, self.hwanalysis) return # RESULT process... total_results = len( self.hwanalysis.last_res) if self.hwanalysis.last_res else 0 best_dists = self.hwanalysis.last_res[0:min( NRES_TO_DUMP, total_results)] if self.hwanalysis.last_res else None halving_pvals_ok = False best_dist_hlv_zscore = None if self.do_halving and len(jscres) > 1 and 'halvings' in jscres[ 1] and jscres[1]['halvings']: halving_pvals_ok = True # Re-sort best distinguishers by the halving ordering sorder = self.build_poly_sort_index( [common.jsunwrap(x['poly']) for x in jscres[1]['halvings']]) best_dists.sort(key=lambda x: sorder[common.immutable_poly( common.jsunwrap(x.poly))]) # Dists from halving try: dists1s = sorted(jscres[1]['dists'], key=lambda x: sorder[common.immutable_poly( common.jsunwrap(x['poly']))]) best_dist_hlv_zscore = dists1s[0]['zscore'] except Exception as e: logger.warning('Exception best_dist_hlv_zscore: %s' % (e, ), exc_info=e) # Add pvalue from the halving to the best distingushers mrange = min(len(jscres[1]['halvings']), len(best_dists)) best_dists = [ (list(best_dists[ix]) + [jscres[1]['halvings'][ix]['pval']]) for ix in range(mrange) ] best_dists_json = [NoIndent(x) for x in best_dists ] if best_dists is not None else None jsres = collections.OrderedDict() if best_dists: jsres['best_zscore'] = best_dists[0][4] # .zscore jsres['best_poly'] = NoIndent(best_dists[0][0]) # .poly if halving_pvals_ok: jsres['best_pval'] = jscres[1]['halvings'][0]['pval'] jsres['best_zscore_hlv'] = best_dist_hlv_zscore for ix, rr in enumerate(jscres): if 'dists' in rr: rr['dists'] = [ NoIndent(common.jsunwrap(x)) for x in rr['dists'] ] if 'halvings' in rr: rr['halvings'] = [ NoIndent(common.jsunwrap(x)) for x in rr['halvings'] ] jsres['blocklen'] = self.hwanalysis.blocklen jsres['degree'] = self.hwanalysis.deg jsres['comb_degree'] = self.hwanalysis.top_comb jsres['top_k'] = self.top_k jsres['all_deg'] = self.all_deg jsres['time_elapsed'] = time.time() - time_test_start jsres['time_data_read'] = self.timer_data_read.total() jsres['time_data_bins'] = self.timer_data_bins.total() jsres['time_process'] = self.timer_process.total() jsres['data_hash'] = data_hash jsres['data_read'] = iobj.data_read jsres['generator'] = self.config_js jsres['best_dists'] = best_dists_json jsres['config'] = config jsres['booltest_res'] = jscres if self.dump_cpu_info: jsres['hostname'] = misc.try_get_hostname() jsres['cpu_pcnt_load_before'] = cpu_pcnt_load_before jsres['cpu_load_before'] = cpu_load_before jsres['cpu_pcnt_load_after'] = misc.try_get_cpu_percent() jsres['cpu_load_after'] = misc.try_get_cpu_load() jsres['cpu'] = misc.try_get_cpu_info() if res_file: with open(res_file, 'w+') as fh: fh.write(common.json_dumps(jsres, indent=2)) misc.try_chmod_gr(res_file) return common.jsunwrap(jsres)
def testcase(self, function, cur_round, size_mb, blocklen, degree, comb_deg, data_file, tmpdir): """ Test case executor :param function: :param cur_round: :param size_mb: :param blocklen: :param degree: :param comb_deg: :param data_file: :return: """ rounds = 0 tvsize = 1024 * 1024 * size_mb # Load input polynomials self.load_input_poly() script_path = common.get_script_path() logger.info('Basic settings, deg: %s, blocklen: %s, TV size: %s' % (degree, blocklen, tvsize)) total_terms = int(common.comb(blocklen, degree, True)) hwanalysis = HWAnalysis() hwanalysis.deg = degree hwanalysis.blocklen = blocklen hwanalysis.top_comb = comb_deg hwanalysis.comb_random = self.args.comb_random hwanalysis.top_k = self.top_k hwanalysis.combine_all_deg = self.all_deg hwanalysis.zscore_thresh = self.zscore_thresh hwanalysis.do_ref = None hwanalysis.skip_print_res = True hwanalysis.input_poly = self.input_poly hwanalysis.no_comb_and = self.args.no_comb_and hwanalysis.no_comb_xor = self.args.no_comb_xor hwanalysis.prob_comb = self.args.prob_comb hwanalysis.all_deg_compute = len(self.input_poly) == 0 hwanalysis.do_only_top_comb = self.args.only_top_comb hwanalysis.do_only_top_deg = self.args.only_top_deg hwanalysis.no_term_map = self.args.no_term_map hwanalysis.use_zscore_heap = self.args.topterm_heap hwanalysis.sort_best_zscores = max( common.replace_none([self.args.topterm_heap_k, self.top_k, 100])) hwanalysis.best_x_combinations = self.args.best_x_combinations logger.info('Initializing test') time_test_start = time.time() hwanalysis.init() # Process input object iobj = common.FileInputObject(data_file) size = iobj.size() logger.info('Testing input object: %s, size: %d kB' % (iobj, size / 1024.0)) # size smaller than TV? Adapt tv then if size >= 0 and size < tvsize: logger.info('File size is smaller than TV, updating TV to %d' % size) tvsize = size if tvsize * 8 % blocklen != 0: rem = tvsize * 8 % blocklen logger.warning('Input data size not aligned to the block size. ' 'Input bytes: %d, block bits: %d, rem: %d' % (tvsize, blocklen, rem)) tvsize -= rem // 8 logger.info('Updating TV to %d' % tvsize) hwanalysis.reset() logger.info('BlockLength: %d, deg: %d, terms: %d' % (blocklen, degree, total_terms)) with iobj: data_read = 0 cur_round = 0 while size < 0 or data_read < size: if rounds is not None and cur_round > rounds: break data = iobj.read(tvsize) bits = common.to_bitarray(data) if len(bits) == 0: logger.info('File read completely') break logger.info( 'Pre-computing with TV, deg: %d, blocklen: %04d, tvsize: %08d = %8.2f kB = %8.2f MB, ' 'round: %d, avail: %d' % (degree, blocklen, tvsize, tvsize / 1024.0, tvsize / 1024.0 / 1024.0, cur_round, len(bits))) hwanalysis.process_chunk(bits, None) cur_round += 1 pass # RESULT process... total_results = len(hwanalysis.last_res) best_dists = hwanalysis.last_res[0:min(128, total_results)] data_hash = iobj.sha1.hexdigest() jsres = collections.OrderedDict() jsres['best_zscore'] = best_dists[0].zscore jsres['best_poly'] = best_dists[0].poly jsres['blocklen'] = blocklen jsres['degree'] = degree jsres['comb_degree'] = comb_deg jsres['top_k'] = self.top_k jsres['all_deg'] = self.all_deg jsres['time_elapsed'] = time.time() - time_test_start jsres['data_hash'] = data_hash jsres['data_read'] = iobj.data_read jsres['generator'] = self.config_js jsres['best_dists'] = best_dists logger.info('Finished processing %s ' % iobj) logger.info('Data read %s ' % iobj.data_read) logger.info('Read data hash %s ' % data_hash) return jsres