def proces_chunk(self, bits, ref_bits=None): """ Processes input chunk of bits for analysis. :param bits: :param ref_bits: :return: """ # Compute the basis. # Input polynomials optimization - evaluate basis only for variables used in polynomials. self.term_eval.load(bits, eval_only_vars=None if len(self.input_poly_vars) == 0 else self.input_poly_vars) ln = len(bits) hws2, hws_input = None, None # Evaluate all terms of degrees 1..deg if self.all_deg_compute: logger.info('Evaluating all terms, bitlen: %d, bytes: %d' % (ln, ln // 8)) hws2 = self.term_eval.eval_all_terms(self.deg) logger.info('Done: %s' % [len(x) for x in hws2]) # Accumulate hws to the results. # If the first round, use the returned array directly to reduce time & memory for copying. if self.total_rounds == 0: self.total_hws = hws2 logger.info('HWS merged - move') else: for d in range(1, self.deg + 1): for i in common.range2(len(self.total_hws[d])): self.total_hws[d][i] += hws2[d][i] logger.info('HWS merged - merge') self.total_rounds += 1 # Evaluate given input polynomials if len(self.input_poly) > 0: comb_res = self.term_eval.new_buffer() comb_subres = self.term_eval.new_buffer() hws_input = [0] * len(self.input_poly) for idx, poly in enumerate(self.input_poly): obs_cnt = self.term_eval.hw( self.term_eval.eval_poly(poly, res=comb_res, subres=comb_subres)) hws_input[idx] = obs_cnt self.input_poly_hws[idx] += obs_cnt self.total_n += self.term_eval.cur_evals # Reference stream ref_hws = self.process_ref(ref_bits, ln) # Done. self.analyse(num_evals=self.term_eval.cur_evals, hws=hws2, hws_input=hws_input, ref_hws=ref_hws)
def best_zscored_base_poly_heap(self, deg, zscores, zscores_ref, num_evals, hws=None, ref_hws=None, exp_count=None): """ Uses heap to keep X best base distinguishers in the zscores array :param deg: :param zscores: :return: (zscore mean, number of zscores above threshold) """ logger.info('Find best with heap start deg: %d' % deg) zscore_denom = common.zscore_denominator(exp_count[deg], num_evals) if ref_hws is not None: raise ValueError('Heap optimization not allowed with ref stream') # zscore = hwdiff * 1/num_evals * 1/zscore_denom # zscore mean = \sum_{i=0}^{cnt} (hwdiff) * 1/num_evals * 1/zscore_denom / cnt hw_diff_sum = 0 # threshold zscore = self.zscore_thresh, # threshold hw_diff = self.zscore_thresh * zscore_denom * num_evals hw_diff_threshold = self.zscore_thresh * zscore_denom * num_evals hw_diff_over = 0 # After this iteration hp will be a heap with sort_best_zscores elements hp = [] hp_size = 0 for (idx, hw) in enumerate(hws[deg]): hw_diff = abs(hw - exp_count[deg]) hw_diff_sum += hw_diff hw_diff_over += 1 if hw_diff >= hw_diff_threshold else 0 if self.sort_best_zscores < 0 or hp_size <= self.sort_best_zscores: heapq.heappush(hp, (hw_diff, hw, idx)) hp_size += 1 elif hw_diff > hp[0][0]: # this difference is larger than minimum in heap heapq.heapreplace(hp, (hw_diff, hw, idx)) logger.info('Heap done: %d' % len(hp)) # zscores[deg] space allocation top_range = min(len(hp), self.sort_best_zscores if self.sort_best_zscores >= 0 else len(hp)) if len(zscores[deg]) < top_range: zscores[deg] = [0] * top_range # Take n largest from the heap, zscore. # Size of the queue ~ number of elements to sort, using sorted on the heap array is faster. hp.sort(reverse=True) logger.info('Heap sorted, len: %s' % top_range) for i in common.range2(top_range): hw_diff, hw, idx = hp[i] zscores[deg][i] = common.zscore_den(hw, exp_count[deg], num_evals, zscore_denom), idx, hw # stats total_n = float(len(hws[deg])) zscore_mean = hw_diff_sum / zscore_denom / num_evals / total_n logger.info('Stats done [%d], mean zscore: %s' % (deg, zscore_mean)) return zscore_mean, hw_diff_over
def process_ref(self, ref_bits, ln): """ Process reference data stream :return: """ if ref_bits is None: return None if len(ref_bits) != ln: raise ValueError('Reference data stream has a different size') logger.info('Evaluating ref data stream') if self.all_deg_compute: self.ref_term_eval.load(ref_bits) ref_hws = self.ref_term_eval.eval_all_terms(self.deg) for d in range(1, self.deg+1): for i in common.range2(len(self.ref_total_hws[d])): self.ref_total_hws[d][i] += ref_hws[d][i] return ref_hws else: return None
def analyse(self, num_evals, hws=None, hws_input=None, ref_hws=None): """ Analyse hamming weights :param num_evals: :param hws: hamming weights on results for all degrees :param hws_input: hamming weights on results for input polynomials :param ref_hws: reference hamming weights :return: """ # Input polynomials self.analyse_input(num_evals=num_evals, hws_input=hws_input) # All degrees polynomials + combinations if not self.all_deg_compute: return probab = [self.term_eval.expp_term_deg(deg) for deg in range(0, self.deg + 1)] exp_count = [num_evals * x for x in probab] logger.info('Probabilities: %s, expected count: %s' % (probab, exp_count)) top_terms = [] zscores = [[0] * len(x) for x in hws] zscores_ref = [[0] * len(x) for x in hws] start_deg = self.deg if self.do_only_top_deg else 1 for deg in range(start_deg, self.deg+1): # Compute (zscore, idx) # Memory optimizations: # 1. for ranking avoid z-score computation - too expensive. # 2. add polynomials to the heap, keep there max 1-10k elements. mean_zscore, fails = self.best_zscored_base_poly(deg, zscores, zscores_ref, num_evals, hws, ref_hws, exp_count) # Selecting TOP k polynomials for further combinations for idx, x in enumerate(zscores[deg][0:15]): fail = 'x' if abs(x[0]) > self.zscore_thresh else ' ' self.tprint(' - zscore[deg=%d]: %+05.5f, %+05.5f, observed: %08d, expected: %08d %s idx: %6d, term: %s' % (deg, x[0], zscores_ref[deg][idx]-x[0], x[2], exp_count[deg], fail, x[1], self.unrank(deg, x[1]))) # Take top X best polynomials if self.top_k is None: continue logger.info('Comb...') if self.combine_all_deg or deg == self.deg: top_terms += [self.unrank(deg, x[1]) for x in zscores[deg][0: (None if self.top_k < 0 else self.top_k)]] if self.comb_random > 0: random_subset = random.sample(zscores[deg], self.comb_random) top_terms += [self.unrank(deg, x[1]) for x in random_subset] logger.info('Stats...') fails_fraction = float(fails)/len(zscores[deg]) self.tprint('Mean zscore[deg=%d]: %s' % (deg, mean_zscore)) self.tprint('Num of fails[deg=%d]: %s = %02f.5%%' % (deg, fails, 100.0*fails_fraction)) if self.top_k is None: return # Combine & store the results - XOR, AND combination top_res = [] logger.info('Combining %d terms in %d degree, total = %s evals, keep best limit: %s' % (len(top_terms), self.top_comb, scipy.misc.comb(len(top_terms), self.top_comb, True), self.best_x_combinations)) self.comb_res = self.term_eval.new_buffer() self.comb_subres = self.term_eval.new_buffer() start_deg = self.top_comb if self.do_only_top_comb else 1 start = time.time() #print("veci",start_deg, self.top_comb + 1, self.top_comb) for top_comb_cur in common.range2(start_deg, self.top_comb + 1): #print(top_comb_cur) #print(top_terms) #print(top_res) #print(num_evals) #print(ref_hws) # Combine * store results - XOR #start = time.time() if not self.no_comb_xor: self.comb_xor(top_comb_cur=top_comb_cur, top_terms=top_terms, top_res=top_res, num_evals=num_evals, ref_hws=ref_hws) #end = time.time() #print("XOR:", end-start) # Combine & store results - AND #start = time.time() if not self.no_comb_and: self.comb_and(top_comb_cur=top_comb_cur, top_terms=top_terms, top_res=top_res, num_evals=num_evals, ref_hws=ref_hws) #end = time.time() #print("AND:", end-start) end = time.time() # print("seconds AND + XOR: ",end-start) logger.info('Evaluating') top_res = self.sort_top_res(top_res) for i in range(min(len(top_res), 30)): comb = top_res[i] self.tprint(' - best poly zscore %9.5f, expp: %.4f, exp: %4d, obs: %s, diff: %f %%, poly: %s' % (comb.zscore, comb.expp, comb.exp_cnt, comb.obs_cnt, 100.0 * (comb.exp_cnt - comb.obs_cnt) / comb.exp_cnt, sorted(comb.poly))) self.last_res = top_res return top_res
def proces_chunk(self, bits, ref_bits=None): """ Processes input chunk of bits for analysis. :param bits: :param ref_bits: :return: """ # Compute the basis. # Input polynomials optimization - evaluate basis only for variables used in polynomials. start = time.time() self.term_eval.load(bits, eval_only_vars=None if len(self.input_poly_vars) == 0 else self.input_poly_vars) end = time.time() print(end-start) ln = len(bits) hwsGPU, hws2, hws_input = None, None, None if not self.compute_on_cpu: #basic cuda informations for the user to see which setup will be used #change 0 to any device number you are running this program on (default with one GPU is 0) device = cuda.Device(0) attrs = device.get_attributes() logger.info('### Basic GPU info for user: ###') logger.info('Name of device used: %s, total memory: %d MB' % (device.name(),(device.total_memory()/(1024*1024)))) for (key,value) in attrs.iteritems(): if(str(key) == "MAX_THREADS_PER_BLOCK"): logger.info('Max threads per block: %s' % str(value)) if(str(key) == "WARP_SIZE"): logger.info('Warp size of device: %s' % str(value)) if(str(key) == "MAX_REGISTERS_PER_BLOCK"): logger.info('Max registers per block allocated: %s' % str(value)) # Evaluate all terms of degrees 1..deg if self.all_deg_compute: if self.verify: start = time.time() logger.info('Evaluating all terms on GPU, bitlen: %d, bytes: %d' % (ln, ln//8)) hwsGPU = self.term_eval.eval_all_terms_GPU(self.deg) logger.info('Done: %s' % [len(x) for x in hwsGPU]) end = time.time() print("GPU:",end-start) start = time.time() logger.info('Evaluating all terms on CPU, bitlen: %d, bytes: %d' % (ln, ln//8)) hws2 = self.term_eval.eval_all_terms(self.deg) logger.info('Done: %s' % [len(x) for x in hws2]) end = time.time() print("CPU:",end-start) logger.info('Starting verification of calculated data after evaluating all terms.') for i in range(0,(self.deg)+1): np.testing.assert_array_equal(hwsGPU[i],hws2[i]) logger.info('All evaluations verified. No errors found.') if not self.verify and self.compute_on_cpu: start = time.time() logger.info('Evaluating all terms on CPU, bitlen: %d, bytes: %d' % (ln, ln//8)) hws2 = self.term_eval.eval_all_terms(self.deg) logger.info('Done: %s' % [len(x) for x in hws2]) end = time.time() #print("CPU:",end-start) elif not self.verify and not self.compute_on_cpu: start = time.time() logger.info('Evaluating all terms on GPU, bitlen: %d, bytes: %d' % (ln, ln//8)) hws2 = self.term_eval.eval_all_terms_GPU(self.deg) logger.info('Done: %s' % [len(x) for x in hws2]) end = time.time() #print("GPU:",end-start) # Accumulate hws to the results. # If the first round, use the returned array directly to reduce time & memory for copying. if self.total_rounds == 0: self.total_hws = hws2 logger.info('HWS merged - move') else: for d in range(1, self.deg+1): for i in common.range2(len(self.total_hws[d])): self.total_hws[d][i] += hws2[d][i] logger.info('HWS merged - merge') self.total_rounds += 1 # Evaluate given input polynomials if len(self.input_poly) > 0: comb_res = self.term_eval.new_buffer() comb_subres = self.term_eval.new_buffer() hws_input = [0] * len(self.input_poly) for idx, poly in enumerate(self.input_poly): obs_cnt = self.term_eval.hw(self.term_eval.eval_poly(poly, res=comb_res, subres=comb_subres)) hws_input[idx] = obs_cnt self.input_poly_hws[idx] += obs_cnt self.total_n += self.term_eval.cur_evals # Reference stream ref_hws = self.process_ref(ref_bits, ln) start = time.time() # Done. self.analyse(num_evals=self.term_eval.cur_evals, hws=hws2, hws_input=hws_input, ref_hws=ref_hws) end = time.time() print(end-start)