def admin_get(): block_size = AES.block_size final_ct = b'' # get number of bytes needed to get 'role=user' on a block boundary _ct = encrypt_profile_for('') offset = 0 for i in range(1, block_size): if len(encrypt_profile_for('A'*i)) != len(_ct): offset = i break # shift 'role=' to end of block and make our email end in .io' dummy = 'A'*(offset+1)+'.io' ct = encrypt_profile_for(dummy) final_ct += chunks(ct, block_size)[-2] # first block contains [email protected] # make next block start with 'admin and look like last block by padding it dummy = 'baz@foobar'+pkcs7_pad(b'admin', block_size).decode('utf8') ct = encrypt_profile_for(dummy) # put email in ciphertext in first block final_ct = chunks(ct, block_size)[0] + final_ct final_ct += chunks(ct, block_size)[1] return final_ct
def test_chunk_function(self): """Tests the function to split a collection into chunks.""" list_ = [1, 2, 3, 4, 5, 6] chunk_bigger_then_len = list(h.chunks(list_, 7)) chunk_half = list(h.chunks(list_, 3)) chunk_zero = list(h.chunks(list_, 0)) self.assertEqual(chunk_bigger_then_len[0], list_) self.assertEqual(chunk_half[0], [1, 2, 3]) self.assertEqual(chunk_half[1], [4, 5, 6]) self.assertEqual(len(chunk_zero), len(list_))
def curveSpec(self, x, target, **kargs): if self.ty.__class__ in SYMETRIC_DOMAINS: return self.boxSpec(x, target, **kargs) batch_size = x.size()[0] newTargs = [None for i in range(batch_size)] newSpecs = [None for i in range(batch_size)] bestSpecs = [None for i in range(batch_size)] for i in range(batch_size): newTarg = target[i] newTargs[i] = newTarg newSpec = x[i] best_x = newSpec best_dist = float("inf") for j in range(batch_size): potTarg = target[j] potSpec = x[j] if (not newTarg.data.equal(potTarg.data)) or i == j: continue curr_dist = (newSpec - potSpec).norm( 1).item() # must experiment with the type of norm here if curr_dist <= best_dist: best_x = potSpec newSpecs[i] = newSpec bestSpecs[i] = best_x new_batch_size = self.sub_batch_size batchedTargs = h.chunks(newTargs, new_batch_size) batchedSpecs = h.chunks(newSpecs, new_batch_size) batchedBest = h.chunks(bestSpecs, new_batch_size) def batch(t, s, b): t = h.lten(t) s = torch.stack(s) b = torch.stack(b) if h.use_cuda: t.cuda() s.cuda() b.cuda() m = self.ty.line(s, b, w=self.curve_width, **kargs) return (m, t) return [ batch(t, s, b) for t, s, b in zip(batchedTargs, batchedSpecs, batchedBest) ]
def getContributors(conn, wiki, pageIDs): sqlTpl = "select rev_actor, count(*) as COUNT from revision where rev_page IN ({}) and rev_timestamp like %s group by rev_actor" chunkList = chunks(pageIDs,CHUNK_SIZE) wikiContribs = {} for chunk in chunkList: sqlTemplateFormatted = sqlTpl.format(whereIn(chunk)) sqlParams = chunk + ['{}%'.format(YEAR)] result = runQuery(conn, sqlTemplateFormatted, tuple(sqlParams)) formattedResult = {f['rev_actor']:f['COUNT'] for f in result} for userID in formattedResult: userID = int(userID) if userID in wikiContribs: wikiContribs[userID] += formattedResult[userID] else: wikiContribs.update({userID: formattedResult[userID]}) userNames = getUserNames(conn, wikiContribs.keys()) retData = [] for user in wikiContribs: userName = userNames[user] if user in userNames else 'UNKNOWN - {}'.format(user) retData.append('\t'.join(map(str,[wiki, userName, wikiContribs[user]]))) return '\n'.join(retData)
def padding_like_a_boss(): plaintext = b'' blksz = AES.block_size blocks = chunks(IV + ciphertext, blksz) for i in range(1, len(blocks)): plaintext += break_dat_block(blocks[i], blocks[i - 1]) return pkcs7_unpad(plaintext, blksz)
def write(self, values): chunked = chunks(values, 3) # convert list from RGB to GRB internally # yay code reuse filt = RGBtoGRBLambentOutputFilter() filtered = [filt.do_filter(i) for i in chunked] values = list(itertools.chain.from_iterable(filtered)) structd = struct.pack('B'*len(values), *values) for a in self.addr: self.socket.sendto(structd, (a, self.port))
def bruteforce_ecb(blocknum, offset): # detect block size and ECB mode blksz = find_blocksize(encryption_oracle) blockpos = blocknum * blksz if not detect_ecb(encryption_oracle(b'A' * (3 * blksz)), blksz): raise Exception("Cipher not in ECB mode") ciphertext = encryption_oracle(b'') plaintext = b'B' * blksz pt_block = b'' # the fun part for i in range(blockpos + 1, len(ciphertext) + offset): # calculate target ciphertext by correctly aligning things. # must prepend dummy to align for random prefix dummy = b'A' * offset targetpt = dummy + plaintext[-(blksz - i % 16):] if i % 16 != 0 else dummy targetct = encryption_oracle(targetpt) targetblknum = (len(plaintext) // blksz - 1) + blocknum targetblk = chunks(targetct, blksz)[targetblknum] # brute-force one byte at a time by comparing each guess to target # only difference from p12 is that we compare later in the ciphertext for j in range(0, 255 + 1): guesspt = targetpt + pt_block + bytes([j]) guessct = encryption_oracle(guesspt) guessblk = chunks(guessct, blksz)[blocknum] if guessblk == targetblk: pt_block += bytes([j]) if len(pt_block) == blksz: plaintext += pt_block pt_block = b'' break # add any leftover bytes at the end if pt_block: plaintext += pt_block # Since I'm not trying to decrypt padding, I just strip off the last byte return plaintext[blksz:-1]
def write(self, values): chunked = list(chunks(values, 3)) if self.shuffle: random.shuffle(chunked) # operates in place yay self.peripherals = filter(None, [self._connect_single(a) for a in self.device_addresses]) for p in self.peripherals: value = chunked.pop() self._write_rgb(*value, periph=p) for p in self.peripherals: p.disconnect()
def see_exp(experiment_name, page=1): '''An experiment is selected from the main page and the result is given as a variable on this page. This page shows the files associated with this experiment''' files = Files.select().join(Experiment).where(Experiment.name == experiment_name).order_by(Files.discovered_date.desc()).paginate(int(page), 45) # structure the files to be two-pair tuples file_struct = helpers.chunks(files.iterator(), 3) # kwargs = {'files': files} return render_template("experiment_view.html", files=file_struct, page=int(page), expname=experiment_name)
def bruteforce_ecb(): # detect block size and ECB mode block_size = find_blocksize(encryption_oracle) if not detect_ecb(encryption_oracle(b'A' * (3 * block_size)), block_size): raise Exception("Cipher not in ECB mode") # initialize things ciphertext = encryption_oracle(b'') plaintext = b'A' * block_size pt_block = b'' # the fun part for i in range(1, len(ciphertext) + 1): # calculate target ciphertext by correctly aligning things targetpt = plaintext[-(block_size - i % 16):] if i % 16 != 0 else b'' targetct = encryption_oracle(targetpt) targetpos = (len(plaintext) // block_size - 1) targetblk = chunks(targetct, block_size)[targetpos] # brute-force one byte at a time by comparing each guess to target for j in range(0, 255 + 1): guesspt = targetpt + pt_block + bytes([j]) guessct = encryption_oracle(guesspt) guessblk = chunks(guessct, block_size)[0] if guessblk == targetblk: pt_block += bytes([j]) if len(pt_block) == block_size: plaintext += pt_block pt_block = b'' break # add any leftover bytes at the end if pt_block: plaintext += pt_block # Since I'm not trying to decrypt padding, I just strip off the last byte return plaintext[block_size:-1]
def find_known_index(cryptf): """ Find the index of our known plaintext in the ciphertext output, given the encryption oracle cryptf. Returns: blocknum -- index of block where our plaintext begins offset -- N bytes to get known plaintext to start on the block boundary """ ct1 = cryptf(b'A') ct2 = cryptf(b'B') bsz = find_blocksize(cryptf) # find where are plaintext block is by comparing two ciphertexts with equal size inputs blocknum = -1 for i, (blk1, blk2) in enumerate(zip(chunks(ct1, bsz), chunks(ct2, bsz))): if blk1 != blk2: blocknum = i + 1 break else: raise Exception("Could not find delta block!") nextblock = chunks(ct1, bsz)[blocknum] # find offset by creating blocks of equal size but different last byte # when the next block differs, we know we've passed a block boundary offset = -1 for i in range(bsz + 1): nextblock1 = chunks(cryptf(b'A' * i + b'A'), bsz)[blocknum] nextblock2 = chunks(cryptf(b'A' * i + b'B'), bsz)[blocknum] if nextblock1 != nextblock2: offset = i break else: raise Exception("Could not find alignment offset!") return blocknum, offset
def applyParallelFetch(date_start): chunked_stocks = chunks(sorted(stocks.keys()), 500) n_screens = len(chunked_stocks) print "Number of screens in parallel computation: " + str(n_screens) PIK = "pickled_stock_sequences.dat" pick_dir = os.getcwd()+"/Pickle/" if not os.path.exists(pick_dir): os.makedirs(pick_dir) pick_dir += PIK with open(pick_dir, "wb") as f: pickle.dump(chunked_stocks,f) for i in range(0,n_screens): execution_string = "screen -d -m -L python -c \"import fetchData; fetchData.fetch('" + date_start + "', stock_sequence='"+ str(i) + "\')\"" print execution_string subprocess.call(execution_string, shell=True)
def manage(self, elements, process_name): """Manage the processing, splitting the task between the workres. Args: elements (str): list of elements that feed the process. process_name (str): name of the task to be paralelized. """ process = getattr(self.processor, process_name) n_bundles = int(len(elements) / self.n_workers) bundles = list(helpers.chunks(elements, n_bundles)) for bundle in bundles: self.queue.put(bundle) worker = Task(process, self.queue) worker.start() self.workers.append(worker) while threading.active_count() > self.thread_count: pass
def getPageIds(conn, pageTitles): sqlTpl = "select page_id from page where page_namespace=0 and page_title in ({})" chunkList = chunks(pageTitles,CHUNK_SIZE) pageIDs = [] for chunk in chunkList: formattedTitles = [f.replace(' ','_') for f in chunk] sqlTemplateFormatted = sqlTpl.format(whereIn(formattedTitles)) result = runQuery(conn, sqlTemplateFormatted, tuple(formattedTitles)) pageIDs.extend([f['page_id'] for f in result if 'page_id' in f]) #print(result) return pageIDs
def getUserNames(conn, actorIDs): sqlTpl = "select actor_id, actor_name from actor_revision where actor_id in ({})" actorIDs = list(set(actorIDs)) chunkList = chunks(actorIDs,CHUNK_SIZE) wikiContribs = {} for chunk in chunkList: sqlTemplateFormatted = sqlTpl.format(whereIn(chunk)) result = runQuery(conn, sqlTemplateFormatted, tuple(chunk)) formattedResult = {f['actor_id']:encode_if_necessary(f['actor_name']) for f in result} wikiContribs.update(formattedResult) return wikiContribs
def train_models(corpus, model_name="models_compressed.pkl"): """Takes in a preferably long string (corpus/training data), split that string into a list, we \"chunkify\" resulting in a list of 2-elem list. Finally we create a dictionary, where each key = first elem and each value = Counter([second elems]) Will save/pickle model by default ('models_compressed.pkl'). Set second argument to false if you wish to not save the models. """ # "preperation" step # word is in WORDS global WORDS WORDS = helpers.re_split(corpus) # first model -> P(word) global WORDS_MODEL WORDS_MODEL = collections.Counter(WORDS) # another preperation step # wordA, wordB are in WORDS global WORD_TUPLES WORD_TUPLES = list(helpers.chunks(WORDS, 2)) # second model -> P(wordA|wordB) global WORD_TUPLES_MODEL WORD_TUPLES_MODEL = { first: collections.Counter() for first, second in WORD_TUPLES } for tup in WORD_TUPLES: try: WORD_TUPLES_MODEL[tup[0]].update([tup[1]]) except: # hack-y fix for uneven # of elements in WORD_TUPLES pass if model_name: save_models(os.path.join(os.path.dirname(__file__), model_name))
def train_models(corpus, path=None): """takes in a preferably long string (corpus), split that string -> list, we \"chunk\" (partition) -> list -> list of 2-elem list, create dictionary, where each key = first elem; each value = Counter([second elems]) Optionally include a path if you intend on pickle'ing the model (RECOMMENDED!) """ # "preperation" step # word is in WORDS global WORDS WORDS = helpers.re_split(corpus) # first model -> P(word) global WORDS_MODEL WORDS_MODEL = collections.Counter(WORDS) # another preperation step # wordA, wordB are in WORDS global WORD_TUPLES WORD_TUPLES = list(helpers.chunks(WORDS, 2)) # second model -> P(wordA|wordB) global WORD_TUPLES_MODEL WORD_TUPLES_MODEL = { first: collections.Counter() for first, second in WORD_TUPLES } for tup in WORD_TUPLES: try: WORD_TUPLES_MODEL[tup[0]].update([tup[1]]) except: # hack-y fix for uneven # of elements in WORD_TUPLES pass save_models(os.path.dirname(__file__))
def test_batch_get_items_one_table(self): # If you want to stress test batch_get_items_one_table, use bigger numbers num_of_items = 5 query_from = 2 query_till = 4 expected_items = query_till - query_from # Write items operations = [] query_keys = [] for i in range(num_of_items): item = {self.HASH_COL: f'cat{i%2}', self.RANGE_COL: i} operations.append( {'Put': self.dynamo_client.build_put_query(item)}) query_keys.append(item) for operations_chunk in chunks(operations, 10): self.dynamo_client.dynamo_client.transact_write_items( TransactItems=operations_chunk) time.sleep(1) # cause the table has 10 write/sec capacity # Batch get items results = self.dynamo_client.batch_get_items_one_table( keys_list=query_keys[query_from:query_till]) self.assertEqual(expected_items, len(results))
def cbc_decrypt(ct, cipher, iv): """ Decrypt ciphertext bytes in CBC mode Arguments: ct -- Ciphertext bytes to decrypt cipher -- Cipher object used to decrypt (must expose decrypt(ciphertext) method and block_size member) iv -- Initializion vector padf -- Padding function called after decryption. Must take two arguments: bytes to pad and block size, and it should return the padded bytes. Returns: Bytes encrypted in CBC mode """ pt = [] ct = [iv] + chunks(ct, cipher.block_size) for i in range(1, len(ct)): pt += [fixed_xor(ct[i-1], cipher.decrypt(ct[i]))] return flatten(pt)
def cbc_encrypt(pt, cipher, iv): """ Encrypt plaintext bytes in CBC mode Arguments: pt -- Plaintext bytes to encrypt cipher -- cipher object used to encrypt (must expose encrypt(ciphertext) method and block_size member) iv -- Initializion vector padf -- Padding function to called before encryption. Must take two arguments: bytes to pad and block size, and it should return the padded bytes. Returns: Bytes encrypted in CBC mode """ ct = [iv] pt = chunks(pt, cipher.block_size) for i in range(len(pt)): ct += [cipher.encrypt(bytes(fixed_xor(pt[i], ct[i])))] return flatten(ct[1:])
redirects='yes')['entities'] for entity in reqData: entData = reqData[entity] currSitelinks = {} if 'sitelinks' in entData: currSitelinks = { f: entData['sitelinks'][f]['title'] for f in entData['sitelinks'] if f.endswith('wiki') and not f == 'commonswiki' } ALL_DATA.update({entity: currSitelinks}) with open(fileNameInput, 'r', encoding='utf-8') as inputFile: wdItemList = json.loads(inputFile.read()) allChunks = chunks(wdItemList, CHUNK_SIZE) for chunk in allChunks: oneBatch(chunk) with open(fileOutputRaw, 'w', encoding='utf-8') as fileSave: fileSave.write(json.dumps(ALL_DATA, ensure_ascii=False)) formatData() with open(fileOutput, 'w', encoding='utf-8') as fileSave: fileSave.write(json.dumps(BY_LANG, ensure_ascii=False))
def dispatch (self, level, type, parallelization): # get directory directory = self.directory (level, type) # copy parallelization for further modifications parallelization = copy.deepcopy (parallelization) # if available, use scheduler's dispatch routine if self.scheduler.dispatch != None: # set label label = self.label (level, type) jobs = [ self.job (args, wrap=False) for args in self.batch ] # dispatch and get info info = self.scheduler.dispatch (self.batch, jobs, directory, label, parallelization) # empty queue self.batch = [] return info # if batch mode -> submit batch job(s) if local.cluster and parallelization.batch: # suffix format for batch jobs and ensembles suffix_format = '.%s%03d' # split batch job into smaller batches according to 'parallelization.batchmax' if parallelization.batchmax: batches = helpers.chunks (self.batch, parallelization.batchmax) else: batches = [ self.batch [:] ] # if merging into ensembles is disabled if not local.ensembles: # submit each batch for index, batch in enumerate (batches): # set batch in parallelization (last batch might be smaller) parallelization.batch = len (batch) # construct batch job from all jobs in the current batch batch = '\n'.join ( [ self.wrap (self.job (args), args ['sample']) for args in batch ] ) # set suffix suffix = suffix_format % ('b', index + 1) # set label label = self.label (level, type, suffix=suffix) # submit self.execute ( self.submit (batch, parallelization, label, directory, suffix=suffix, timer=1), directory ) # empty queue self.batch = [] return '' # else if merging into ensembles is enabled else: # check if blocks need to be split into subblocks subblocks = max (1, local.min_cores / parallelization.cores) # form blocks each containing grouped 'subblocks' batch jobs blocks = helpers.chunks (batches, subblocks) # warn if the first block is not fully utilized if len (blocks) > 1: utilized = parallelization.cores * len (blocks [0]) >= local.min_cores else: return 'SKIPPED' ''' if not utilized: message = 'Requested number of cores and samples does not fully utilize the smallest block' details = '%s * %s < %s' % ( helpers.intf (parallelization.cores), helpers.intf (len (blocks [0])), helpers.intf (local.min_cores) ) advice = 'Increase paralellization ratio for this level' helpers.warning (message, details=details, advice=advice) # TODO: in such case, should batchsize be reduced (for all under-utilized blocks) to improve the utilization? ''' # check if the number of sub-blocks does not exceed machine limit if local.max_ensemble != None and len (blocks) * subblocks > local.max_ensemble: message = 'Maximum number of ensemble jobs exceeded:' details = '%d > %d' (len (blocks) * subblocks > local.max_ensemble) advice = 'Reduce the number of ensemble jobs or use more nodes per job and apply batching.' helpers.error (message, details, advice) # split blocks into ensembles (with ensemble sizes being powers of 2) binary = bin ( len (blocks) ) decomposition = [ 2**(len(binary) - 1 - power) if flag == '1' else 0 for power, flag in enumerate(binary) ] decomposition = [ size for size in decomposition if size != 0 ] # respect parallelization.mergemax filtered = [] for i, size in enumerate (decomposition): if parallelization.mergemax == None or size * subblocks <= parallelization.mergemax: filtered += [size] else: chunks = 2 ** int ( math.ceil ( math.log ( float (size * subblocks) / parallelization.mergemax, 2) ) ) filtered += [ size / chunks ] * chunks decomposition = filtered # submit each ensemble index = 0 submitted = 0 for i, merge in enumerate (decomposition): # set suffix suffix = suffix_format % ('e', i + 1) # set label label = self.label (level, type, suffix=suffix) # initialize ensemble job ensemble = '' # set batch and merge in parallelization parallelization.batch = len (blocks [0][0]) parallelization.merge = merge # submit each block for block, batches in enumerate (blocks [submitted : submitted + merge]): # header for the subensemble job if local.block != None: ensemble += '\n# === BLOCK %d\n' % block # initialize subensemble job subensemble = '' # determine the shape of a subblock shape = local.get_shape (parallelization.nodes) # add corner initialization if shape != None: subensemble += local.corners % { 'block' : block, 'shape' : shape } + '\n' # submit each batch for corner, batch in enumerate (batches): # increment 'index' counter index += 1 # header for the batch job subensemble += '\n# === BATCH JOB %d' % index # additional header information if local.block != None: subensemble += ' [block %d, corner %d]' % (block, corner) # end of header for the batch job subensemble += '\n' # append additional parameters to 'args' jobs = [] for args in batch: # add batch job of 'shape' to 'corner' within block which is part of an entire ensemble jobs.append ( self.wrap (self.job (args, block, corner, shape), args ['sample']) ) # construct batch job batch = '\n'.join (jobs) # add timer if local.timer: batch = local.timer.rstrip() % { 'job' : '\n\n' + batch + '\n', 'timerfile' : self.timerfile + suffix_format % ('b', index) } # fork to background (such that other batch jobs in subensemble could proceed) if subblocks > 1: batch = self.fork (batch) # add batch job to the subensemble subensemble += batch # add synchronization if subblocks > 1: subensemble += self.sync () # add block booting and block freeing subensemble = self.boot (subensemble, block) # fork to background (such that other subensemble jobs in ensemble could proceed) if merge > 1: subensemble = self.fork (subensemble) # add batch job to the ensemble ensemble += subensemble # add synchronization if merge > 1: ensemble += self.sync() # copy parallelization to prevent modifications submit_parallelization = copy.deepcopy (parallelization) # adjust parallelization according to the number of subblocks submit_parallelization.nodes *= subblocks submit_parallelization.cores *= subblocks # submit self.execute ( self.submit (ensemble, submit_parallelization, label, directory, suffix=suffix, boot=0, timer=0), directory ) # update 'submitted' counter submitted += size # empty queue self.batch = [] # return information about ensembles from helpers import intf info = [ '%s (%s N)' % ( intf (subblocks * merge), intf (parallelization.nodes * subblocks * merge) ) for merge in decomposition ] return ' + '.join (info) + (' [not fully utilized]' if not utilized else '') return ''
def split_rgbvals(self, vals): return [i for i in chunks(vals, 3)]
def main(): args = parse_arguments() api = get_twitter_api() usernames = load_file(args.filename) users_chunks = chunks(usernames, args.max_query_size) fetch_data(users_chunks, args.minutes_to_sleep, api, args.statuses)
import numpy as np import pandas as pd import requests import math import xlsxwriter from scipy import stats from helpers import portfolio_input, chunks # Retrieve stocks stocks = pd.read_csv('sp_500_stocks.csv') # Retrieve API Key from secrets import IEX_CLOUD_API_TOKEN # Split up 500 stocks into groups of 100 symbol_groups = list(chunks(stocks['Ticker'], 100)) symbol_strings = [] for i in range(0, len(symbol_groups)): symbol_strings.append(','.join(symbol_groups[i])) # Create DataFrame columns = [ 'Ticker', 'Price', 'Price-to-Earnings Ratio', 'Number of Shares to Buy' ] final_dataframe = pd.DataFrame(columns=columns) # Make a Batch API call for each group of symbols for symbol_string in symbol_strings: batch_api_url = f"https://sandbox.iexapis.com/stable/stock/market/batch/?symbols={symbol_string}&types=quote&token={IEX_CLOUD_API_TOKEN}" data = requests.get(batch_api_url).json()
def detect_ecb(ct, blksz): blocks = chunks(ct, blksz) for i in range(len(blocks)-1): if blocks[i] in blocks[i+1:]: return True return False
def ctr_crypt(msg, cipher, nonce): ct = b'' for blk in chunks(msg, cipher.block_size): ct += fixed_xor(cipher.encrypt(nonce)[:len(blk)], blk) nonce = next_nonce(nonce) return ct