def compute_metrics(self, pos, ex): def calc_scores(batches): scores = [] for b in batches: scores.append(self.model.predict(self.params, b)) scores = np.array(scores).ravel() scores = np.append(scores, pos) assert pos == scores[-1] ranks = util.ranks(scores.flatten(), ascending=False) return ranks[-1] if self.is_dev: s_negs = self.neg_sampler.sample(ex, 1000, False) t_negs = self.neg_sampler.sample(ex, 1000, True) else: s_negs = self.neg_sampler.bordes_negs(ex, False, 1000) t_negs = self.neg_sampler.bordes_negs(ex, True, 1000) s_negs = self.pack_negs(ex, s_negs, False) t_negs = self.pack_negs(ex, t_negs, True) negs_s = util.chunk(s_negs, constants.test_batch_size) negs_t = util.chunk(t_negs, constants.test_batch_size) s_rank = calc_scores(negs_s) t_rank = calc_scores(negs_t) return s_rank, t_rank
def calc_obj(self, data, f, sample=True): if sample: samples = util.chunk(util.sample(data, self.dev_samples), 100) else: samples = util.chunk(data, 100) values = [f(self.params, np.asarray(s)) for s in samples] return np.nanmean(values)
def evaluate(self,data,num_samples,sample=True): if sample: batch_size = np.minimum(num_samples, self.test_batch_size) samples = util.chunk(util.sample(data,num_samples), batch_size) else: samples = util.chunk(data, self.test_batch_size) values = [self.evaluator.evaluate(s) for s in samples] return np.nanmean(values)
def c12_detect_ecb(oracle, block_size): repeated_blocks = bytes(block_size * 4) cipher_text = oracle(repeated_blocks) chunks = chunk(cipher_text, block_size) distinct_chunks = set(chunks) return len(chunks) != len(distinct_chunks)
def minimize(self): print("Training...") for epoch in range(self.num_epochs): start = time.time() train_cp = list(self.train) np.random.shuffle(train_cp) batches = util.chunk(train_cp, self.batch_size) for step,batch in enumerate(batches): self.optim.zero_grad() loss = self.fprop(batch) loss.backward() g_norm = torch.nn.utils.clip_grad_norm(self.model.parameters(), 100) self.optim.step() if step % self.report_steps == 0: self.report(step,g_norm) self.prev_steps=0 self.prev_time=time.time() end = time.time() mins = int(end - start)/60 secs = int(end - start)%60 print("Epoch {} took {} minutes {} seconds".format(epoch+1,mins,secs)) # Refresh self.save(self.dump) # Only one epoch for Dynamic Samplers if isinstance(self.ns,Dynamic_Sampler) or isinstance(self.ns,Policy_Sampler): self.dump = False if epoch>=4: self.halt = True if self.halt: return
def _iex_aggregator(fn_name: str, **kwargs: object) -> dict: agg = dict() for chunk in util.chunk(symbols(remove='expired'), 100): tickers = list(chunk) fn = getattr(iex.Stock(tickers), fn_name) retrieved = fn(**kwargs) if type(retrieved) is list: data = defaultdict(list) for datum in retrieved: data[datum['symbol']].append(datum) elif type(retrieved) is dict: data = {ticker: datum for ticker, datum in retrieved.items()} else: raise RuntimeError( "Error: `%s' (retrieved) is neither dict nor list" % retrieved) agg.update(data) if fn_name == 'get_financials': # Yes, iexfinance is returning this mess for financials, which we need to correct return { ticker: datum.get('financials', [None])[0] for ticker, datum in agg.items() } else: return {ticker: datum for ticker, datum in agg.items()}
def evaluate(data, evaluater, results_dir, is_dev): print("Evaluating") h10, mrr = 0.0, 0.0 start = time.time() report_period = 1 for count, d in enumerate(util.chunk(data, constants.test_batch_size)): rr, hits_10 = evaluater.evaluate(d, constants.num_test_negs) h10 = (h10 * count + hits_10) / float(count + 1) mrr = (mrr * count + rr) / (count + 1) if count % report_period == 0: end = time.time() secs = (end - start) print("Speed {} queries per second".format( report_period * constants.test_batch_size / float(secs))) print("Query Count : {}".format(count)) print("Mean Reciprocal Rank : {:.4f}, HITS@10 : {:.4f}".format( mrr, h10)) start = time.time() print('Writing Results.') split = 'dev' if is_dev else 'test' all_ranks = [str(x) for x in evaluater.all_ranks] with open(os.path.join(results_dir, 'ranks_{}'.format(split)), 'w') as f: f.write("\n".join(all_ranks)) with open(os.path.join(results_dir, 'results_{}.'.format(split)), 'w') as f: f.write("Mean Reciprocal Rank : {:.4f}\nHITS@10 : {:.4f}\n".format( mrr, h10))
def refresh(ctx): acc = account.Account() batch_size = 5 view = 'all' columns = _VIEWS[view]['columns'] sort_by = _VIEWS[view]['sort_by'] filter_by = _VIEWS[view]['filter_by'] for tickers in util.chunk(acc.stocks.ticker.to_list(), batch_size): print("Processing next batch of %d (%s)..." % (batch_size, ','.join(tickers))) acc = account.Account(tickers) table = util.output.mktable( acc.stocks, columns, tickers=tickers, filter_by=filter_by, sort_by=sort_by, reverse=False, limit=0, ) util.output.prtable(table) print(util.debug.measurements()) time.sleep(100)
def decryptData(key, encryptedData): """Decrypts the apk data using the specified AES key""" aes = AES.new(key, AES.MODE_ECB) return ''.join( util.unpad(aes.decrypt(c)) for c in util.chunk(encryptedData, constants.blockSize + constants.paddingSize))
def c13(): block_size = 16 secret_key = get_random_bytes(block_size) def encryptor(email_address): return aes128_ecb_encode( secret_key, pkcs7_pad(c13_profile_for(email_address), block_size)) def decryptor(cipher_text): return c13_parse_kv( pkcs7_unpad(aes128_ecb_decode(secret_key, cipher_text), block_size)) # The minimum amount of prefix padding to cause a duplicated block # will give us the target block in the next block for repeat_pad_size in range(2 * block_size - 1, 3 * block_size): repeat_pad = b"A" * repeat_pad_size trick_email_address = repeat_pad + pkcs7_pad( b"admin", block_size) + b"@example.com" cipher_text = encryptor(trick_email_address) chunks = chunk(cipher_text, block_size) # If we have a repeat, the block after repeat is target next_is_target = False target_cipher_block = b'' last_chunk = b'' for c in chunks: if next_is_target: target_cipher_block = c break next_is_target = (c == last_chunk) last_chunk = c if target_cipher_block != b'': break if target_cipher_block == b'': raise RuntimeError("Didn't find target cipher block") # At some padding between 0..block_size the end block should # be 'user<pkcspadding>'. If so, replacing it with our # target cipher block should give us something which will decode # to our desired plaintext for padding_size in range(0, block_size): padded_email_address = (b"A" * padding_size) + b"@example.com" cipher_text = encryptor(padded_email_address) # Splice in target block cipher_text = bytearray(cipher_text) cipher_text[-block_size:] = target_cipher_block cipher_text = bytes(cipher_text) try: profile = decryptor(cipher_text) if profile[b"role"] == b"admin": print("S2C13 - did it! got an admin role") return except (KeyError, ValueError): pass print("S2C13 fail. Bad coder, no biscuit")
def parseMessage(message): lines = message.splitlines() diffs = util.chunk(lines, 2) for diff in diffs: diff[1] = binascii.unhexlify(diff[1].zfill(8)) return diffs
def decrypt_cbc(message, iv_value, key): chain = [iv_value] + chunk(message, 16) result = [] for index, block in enumerate(chain): if index == 0: # Skip initialization vector continue result.append(_decrypt_cbc_block(block, key, chain[index - 1])) return b''.join(result)
def train(self, training_data, log=True): start = time() random.shuffle(training_data) training_data = util.chunk(training_data, self.batch_size) for epoch, batch in enumerate(training_data): error = self.train_batch(batch) if log: print("Epoch:", epoch, "Error:", error) if log: print('Training took {0:.2f} seconds'.format(int(time() - start)))
def kd_to_pt_score(kd): key_size = kd[0] chunks = chunk(cipher_text, key_size) chunks = transpose(chunks) key = bytes(map(lambda t: t[1], map(c4_best_single_byte_xor, chunks))) plain_text = decrypt_xor(key, cipher_text) score = english_score(plain_text) return (score, plain_text)
def c14(): unknown_key = get_random_bytes(16) # oracle = lambda pt: c14_encryption_oracle(unknown_key, pt) def oracle(pt): return c14_encryption_oracle(unknown_key, pt) block_size = 16 pad_char = b'A' recovered_plain_text = bytearray() chosen_plain_text = bytearray() while True: # We construct a (block_size - 1) piece plain text. Which # ends in the our recovered plain text and is prepended with enough # pad_char to make the size chosen_plain_text[:] = recovered_plain_text if len(chosen_plain_text) > block_size - 1: chosen_plain_text = chosen_plain_text[-(block_size - 1):] added_pad = max(0, (block_size - 1) - len(chosen_plain_text)) chosen_plain_text = bytearray(pad_char * added_pad) + chosen_plain_text assert len( chosen_plain_text ) == block_size - 1, "Using correct size chosen_plain_text block" # By prepending with enough pad_chars and appending with bytes 0->255, # and repeating until we get block_size different # answers, we find 'block_size' candidate cipher blocks for each possible end byte dictionary = c14_dictionary_for_block(oracle, block_size, chosen_plain_text) next_byte = None for num_attempts in range(0, 10 * block_size): pad = pad_char * added_pad cipher_text = oracle(pad) for c in chunk(cipher_text, block_size): try: next_byte = dictionary[c] break except KeyError: pass if next_byte is None: raise RuntimeError("Failed to find next byte in {} iterations", num_attempts) recovered_plain_text.append(next_byte) print("{}".format(recovered_plain_text.decode('ascii'))) print("S2C14 msg is {}", recovered_plain_text)
def c11(): block_size = 16 # we're doing AES128 for i in range(10): plain_text = bytes(block_size * 10) # A lot of repetition, which repeats under ECB cipher_text = c11_encrypt_ecb_or_cbc_oracle(plain_text) chunks = chunk(cipher_text, block_size) distinct_chunks = set(chunks) if len(chunks) != len(distinct_chunks): print("S2C11 - guess ECB!") else: print("S2C11 - guess CBC!")
def c8(): lines = slurp_hex_file_as_lines("8.txt") block_size = 16 lineno = 0 for line in lines: chunks = chunk(line, block_size) distinct_chunks = set(chunks) if len(distinct_chunks) != len(chunks): print("Line {} has dup chunks".format(lineno)) break lineno += 1
def threaded_aggregate(): """ This method uses multithreading to fetch website screenshot color data. """ pool = Pool(NUM_THREADS) urls = chunk(get_urls(), NUM_THREADS) logfiles = ["Thread{}.tmp.log".format(i) for i in range(NUM_THREADS)] pool.starmap(aggregate, zip(urls, logfiles)) pool.close() pool.join() with contextlib.suppress(FileNotFoundError): [os.remove(logfile) for logfile in logfiles]
def c12_make_block_dictionary(oracle, block_size, prefix): if len(prefix) != block_size - 1: raise RuntimeError("sanity violation: {} != {}".format( block_size - 1, len(prefix))) d = {} for b in range(0, 256): msg = bytearray(prefix) msg.append(b) cipher_text = oracle(msg) cipher_chunks = chunk(cipher_text, block_size) d[cipher_chunks[0]] = b return d
def find_block_after_duplicates(buf, block_size): next_is_target = False last_chunk = b'' chunks = chunk(buf, block_size) for c in chunks: # Continue while we keep seeing duplicates if c == last_chunk: next_is_target = True continue if next_is_target: return c last_chunk = c raise RuntimeError("Didn't find block after duplicates")
def minimize(self): self.steps = 0 #rand = np.random.RandomState(2568) self.save() train_cp = list(self.train) if self.is_typed and self.alpha == 1.0: train_cp.extend(self.typed_data) print( "alpha 1.0, combining training data, current training data triples {}" .format(len(train_cp))) self.is_typed = False while True: if self.is_typed: typed_cp = list(self.typed_data) np.random.shuffle(typed_cp) typed_batches = util.chunk(typed_cp, self.batch_size) # For typed regularizer self.sgd(typed_batches, True) np.random.shuffle(train_cp) batches = util.chunk(train_cp, self.batch_size) self.sgd(batches) if self.halt: return
def c12(): unknown_key = get_random_bytes(16) def oracle(pt): return c12_encryption_oracle(unknown_key, pt) # Shim is number of bytes to fill a block (block_size, shim_size) = c12_discover_block_and_shim_sizes(oracle) print("S2C12 - found block size {}".format(block_size)) is_ecb = c12_detect_ecb(oracle, block_size) print("S2C12 - is ECB?: {}".format(is_ecb)) known_bytes = bytearray() for index in range(0, 10 * block_size): # block_index = index // block_size chunk_index = index % block_size # print("block_index {} chunk_index {}".format(block_index, chunk_index)) needed_pad_len = (block_size - 1) - chunk_index needed_pad = bytes(needed_pad_len) trick_block = bytearray(block_size) + known_bytes trick_block = trick_block[-(block_size - 1):] block_dictionary = c12_make_block_dictionary(oracle, block_size, trick_block) cipher_text = oracle(needed_pad) cipher_chunks = chunk(cipher_text, block_size) interesting_chunk = cipher_chunks[index // block_size] # print("C0: {}".format(interesting_chunk)) try: plain_text_byte = block_dictionary[interesting_chunk] except KeyError: break known_bytes.append(plain_text_byte) # print("Got byte: {}".format(plain_text_byte)) # print("Got known bytes: {}".format(known_bytes)) plain_text = pkcs7_unpad(known_bytes, block_size) print("S2C12 - got msg: {}", plain_text.decode('ascii'))
def eval_obj(self,data): loss = 0.0 print("Evaluating object ...") # samples = util.sample(data, self.dev_samples) samples = list(data) count = 1 print("All samples: {}".format(len(samples))) for s in util.chunk(samples,constants.test_batch_size): # if count < 12: print("Sample {}: {}".format(count,s)) loss += self.fprop(s,volatile=True).data.cpu().numpy() print("Loss (at {}): {}".format(count,loss)) count += 1 print("Evaluating object: done") return loss
def play (data=None, **kwargs): if data is None: data = gensound(**kwargs) sounds.set_samplerate(_DEFAULT_SAMPLERATE) _BUFPERIOD = int(sounds.SAMPLERATE/4) p = pyaudio.PyAudio() out = p.open(rate=_DEFAULT_SAMPLERATE, format=pyaudio.paInt16, channels=_CHANNELS, frames_per_buffer=_BUFPERIOD, output=True, ) for bs in chunk(data, _BUFPERIOD*_CHANNELS): out.write(bs) out.close() p.terminate()
def aes128_cbc_encode(key, iv, plain_text): ecb_cipher = AES.new(key, AES.MODE_ECB) block_size = ecb_cipher.block_size if len(plain_text) % block_size != 0: raise RuntimeError("CBC requires padding to block size") if len(iv) != block_size: raise RuntimeError("IV must be one block") plain_chunks = chunk(plain_text, block_size) last_cipher_chunk = iv cipher_chunks = [] for pc in plain_chunks: next_cipher_chunk = ecb_cipher.encrypt(xor_buf(pc, last_cipher_chunk)) cipher_chunks.append(next_cipher_chunk) last_cipher_chunk = next_cipher_chunk return b''.join(cipher_chunks)
def guess_keysizes(message): '''Return top 5 keysize candidates.''' Result = namedtuple('Result', ['distance', 'keysize']) # pylint: disable=invalid-name max_keysize = 40 result = [] for keysize in range(1, max_keysize + 1): chunks = chunk(message, keysize) distance = 0 for index, i in enumerate(chunks[:-2]): distance += calculate_hamming_distance(i, chunks[index + 1]) normalized_diff_occurence = (distance / (len(chunks) - 1)) / keysize result.append(Result(normalized_diff_occurence, keysize)) return [ i.keysize for i in sorted(result, key=operator.attrgetter('distance'))[:5] ]
def aes128_cbc_decode(key, iv, cipher_text): cipher_text = bytes(cipher_text) ecb_cipher = AES.new(key, AES.MODE_ECB) block_size = ecb_cipher.block_size if len(cipher_text) % block_size != 0: raise RuntimeError("CBC requires padding to block size") if len(iv) != block_size: raise RuntimeError("IV must be one block") cipher_chunks = chunk(cipher_text, block_size) last_cipher_chunk = iv plain_chunks = [] for cc in cipher_chunks: next_plain_chunk = xor_buf(last_cipher_chunk, ecb_cipher.decrypt(cc)) plain_chunks.append(next_plain_chunk) last_cipher_chunk = cc return b''.join(plain_chunks)
def play (input, dur): import alsaaudio from util import chunk out = alsaaudio.PCM() out.setchannels(CHANNELS) out.setformat(alsaaudio.PCM_FORMAT_S16_LE) SAMPLERATE = out.setrate(DEFAULT_SAMPLERATE) print(SAMPLERATE) ALSAPERIOD = out.setperiodsize(SAMPLERATE//4) total = 0 for bs in chunk(Sampler(input, SAMPLERATE, dur), ALSAPERIOD*CHANNELS): wrote = out.write(bs) total += wrote print(wrote, total) if wrote != ALSAPERIOD: print("Huh? Only wrote {}/{}".format(wrote, ALSAPERIOD)) print('Closing...') out.close()
def c17(): block_size = 16 random_key = get_random_bytes(block_size) random_iv = get_random_bytes(block_size) cipher_text = c17_encryptor(block_size, random_key, random_iv) cipher_blocks = chunk(cipher_text, block_size) cipher_blocks.insert(0, random_iv) def c17_decryptor(cipher_text): return c17_decryptor_good_padding(block_size, random_key, random_iv, cipher_text) def break_one_block(i): return c17_break_block(cipher_blocks[i], cipher_blocks[i + 1], c17_decryptor) plain_text = b''.join((map(break_one_block, range(0, len(cipher_blocks) - 1)))) plain_text = pkcs7_unpad(plain_text, block_size) print("S3C17: {}".format(plain_text))
def sweep(data_dir='experience/'): i = 0 start_time = time.time() files = os.listdir(data_dir) # .DS_Store, temp files keep = lambda f: not (f.startswith(".") or f.startswith("tmp")) files = list(filter(keep, files)) np.random.shuffle(files) batches = util.chunk(files, args.batch_size) for batch in batches: batch = [data_dir + f for f in batch] batch = list(filter(os.path.exists, batch)) print("Step", i) #print("Training on", batch) model.train(batch, args.batch_steps) i += len(batch) if i > 0: total_time = time.time() - start_time print("time/experience", total_time / i) model.save()
def _assert_block_size(): '''Detect and assert that encryption is AES in ECB mode.''' # AES keys can be 16, 24, or 32 bytes. # This doesn't affect the block size, which is fixed at 16 bytes. key_size = 32 block_size = 16 key = generate_random_bytes(key_size) # Pad out user input with 3 times blocksize # This will guarantee a minimum of at 2 complete blocks of identical data user_input = '\x00' * block_size * 3 message = oracle(user_input, key) chunks = chunk(message, block_size) assert len(chunks) != len( set(chunks)), 'There should be duplicate blocks using ECB.' detected_size = detect_block_size(key, oracle) assert detected_size == block_size, ( 'This is supposed to be AES encrypted, 16 byte block sizes. ' f'Detected block size of {detected_size} instead.')
def minimize(self): train_cp = list(self.train) print("Length of train_cp: {}".format(len(train_cp))) self.steps = 0 g_norm = 0.0 print("Minimizing ...") for epoch in range(self.num_epochs): print( "epoch (minimizing) ... ",epoch) np.random.shuffle(train_cp) batches = util.chunk(train_cp,self.batch_size) for batch in batches: print(" batch {} (minimizing) ... {}".format(self.steps,batch)) assert len(batch)!=len(train_cp) self.optimizer.zero_grad() loss = self.fprop(batch) loss.backward() g_norm = torch.nn.utils.clip_grad_norm(self.model.parameters(), 100.0) self.optimizer.step() self.steps+=1 self.save() self.report(g_norm) if self.halt: break
def encryptData(key, data): """Encrypts the apk data using the specified AES key""" aes = AES.new(key, AES.MODE_ECB) return ''.join(aes.encrypt(util.pad(c, constants.paddingSize)) for c in util.chunk(data, constants.blockSize))
def decryptData(key, encryptedData): """Decrypts the apk data using the specified AES key""" aes = AES.new(key, AES.MODE_ECB) return ''.join(util.unpad(aes.decrypt(c)) for c in util.chunk(encryptedData, constants.blockSize + constants.paddingSize))