def data_to_batches(self): """ Create batches from data stored in an (numpy) array. """ self.num_batches = int((self.data.size - self.history_size) / (self.batch_size * self.seq_length)) # Print an error message when the data array is too small if self.num_batches == 0: assert False, "ERROR: Cannot create batches ==> data size={}, \ batch size={}, segment size={}".format(self.data.size, self.batch_size, self.seq_length) self.data = self.data[:(self.num_batches * self.batch_size * self.seq_length) + self.history_size] # Remove the last words in the input chunk and shift the target words input = self.data[:-1] target = np.copy(self.data) target = target[self.history_size:] input = np.array( chunk(input, (self.num_batches * self.seq_length) + self.history_size - 1, overlap=self.history_size - 1)) target = np.array( chunk(target, (self.num_batches * self.seq_length), overlap=0)) self.input = chunk(input, self.seq_length + self.history_size - 1, overlap=self.history_size - 1) self.target = chunk(target, self.seq_length, overlap=0)
def fairy_upload(): data = request.data header = data[:(32 + 64)].decode() Cached = cache_uploadtoken.get(".".join(["fairy", "security", "checkinfo", header[:32]])) if not Cached: raise Exceptions.InvalidToken() imagehex = header[32:] if model.gettexture_hash(imagehex): return Response(json.dumps({ "error": "ForbiddenOperationException", "errorMessage": "Quantum repeatability does not hold here." }), status=403, mimetype='application/json; charset=utf-8') hexed = hashlib.sha256(data[(32 + 64):]).hexdigest() if Cached.get("sha256") != hexed: return Response(json.dumps({ "error": "ForbiddenOperationException", "errorMessage": "Hash value does not match." }), status=403, mimetype='application/json; charset=utf-8') size = Cached.get("size") height = size.get("height") width = size.get("width") if len(data) - (32 + 64 + 8) != ((height * width) * 4): return Response(json.dumps({"error": "ForbiddenOperationException","errorMessage": "Parsing does not provide sufficient amount of bytes"}), status=403, mimetype='application/json; charset=utf-8') if (len(data) - (32 + 64)) % 4 != 0: return Response(json.dumps({"error": "ForbiddenOperationException","errorMessage": "No correct encoded image."}), status=403, mimetype='application/json; charset=utf-8') if not ((height % 32 == 0) or (height % 17 == 0)) and ((width % 64 == 0) or (width % 22 == 0)): return Response(json.dumps({"error": "ForbiddenOperationException","errorMessage": "No correct encoded image."}), status=403, mimetype='application/json; charset=utf-8') if height % 17 == 0: height = int(width / 17) * 32 if width % 22 == 0: width = int(width / 22) * 32 image = Image.new('RGBA', (width, height), (255, 255, 255, 255)) draw = ImageDraw.Draw(image) dots = utils.chunk(list(data[(32 + 64):]), 4)[2:] chunks = utils.chunk(dots, height) for x in range(len(chunks)): for y in range(len(chunks[x])): draw.point((x, y), fill=(chunks[x][y][1], chunks[x][y][2], chunks[x][y][3], chunks[x][y][0])) image.save("".join(["./data/texture/", imagehex, ".png"]), "PNG") skintype = Cached.get("type") if skintype == "SKIN" and height % 64 == 0: skinmodel = ["STEVE", "ALEX"][sv3d.isSilmSkin(image)] texture = model.textures( userid=Token.gettoken_strict(Cached.get("accessToken")).get("user"), photoname=Cached.get("name"), height=height, width=width, model=skinmodel, type=skintype, hash=hexed ) texture.save() return Response(model.kf_format_textures(texture), mimetype='application/json; charset=utf-8')
def parse(self, raw_data): chunks = raw_data.split('$') self.name = chunks[0] self.timeout = utils.chunk(chunks, 1, mapping=utils.convert_string_to_integer) self.begin_time = utils.chunk(chunks, 2, mapping=utils.convert_string_to_integer) self.end_time = utils.chunk(chunks, 3, mapping=utils.convert_string_to_integer) self.begin_date = utils.chunk(chunks, 4, mapping=utils.convert_string_to_integer) self.end_date = utils.chunk(chunks, 5, mapping=utils.convert_string_to_integer) self.begin_lock_date = utils.chunk( chunks, 6, mapping=utils.convert_string_to_integer) self.end_lock_date = utils.chunk( chunks, 7, mapping=utils.convert_string_to_integer) self.day_mask = utils.chunk(chunks, 8, mapping=utils.convert_string_to_integer)
def ParallelLSTDQ(D,env,w,damping=0.001,ncpus=None): """ D : source of samples (s,a,r,s',a') env: environment contianing k,phi,gamma w : weights for the linear policy evaluation damping : keeps the result relatively stable ncpus : the number of cpus to use """ if ncpus: nprocess = ncpus else: nprocess = cpu_count() pool = Pool(nprocess) indx = chunk(len(D),nprocess) results = [] for (i,j) in indx: r = pool.apply_async(dict_loop,(D[i:j],env,w,0.0)) # note that damping needs to be zero here results.append(r) k = len(w) A = sp.identity(k,format='csr') * damping b = sp_create(k,1,'csr') for r in results: T,t = r.get() A = A + T b = b + t # close out the pool of workers pool.close() pool.join() w,info = solve(A,b,method="spsolve") return A,b,w,info
async def osu_toprange(ctx: Context, rankstart: int = 1, rankend: int = 10, *, u: Optional[str] = None): if rankstart < 1 or rankend < 1 or rankend > 100 or rankstart > rankend or rankend - rankstart >= 30: return await ctx.send('invalid score rank range (max 30 scores, ranks must be between 1-100) ') if not u: u = get_osuid(ctx) if not u: return await ctx.send('invalid user') topScores = get_top_scores(u, rankend) if not topScores: return await ctx.send(f'No top scores found for user {u}. Make sure to provide a valid osu username/id.') scores = topScores[rankstart - 1: rankend] chunkedScores = chunk(scores, 10) user = get_user(u) first = True for scoreChunk in chunkedScores: toprangeEmbed = Embed( type='rich', color=EMBED_COLOR, description='\n'.join(map(format_score_inline, scoreChunk)) ) if first: toprangeEmbed.set_author( name=f'Top {rankstart} - {rankend} scores for {user["username"]}', url=osu.profile_link(user["user_id"]), icon_url=osu.profile_thumb(user["user_id"]), ) first = False await ctx.send(embed=toprangeEmbed)
def roll_list(l, n): res = [] l = list(chunk(l, n-1)) for i in range(n-1): for j in range(n): res.append(l[j][(i+j)%(n-1)]) return res
def get_batch_stream(self,batch_size,n_repeat): ''' 生成器 在DataSource:: :param batch_size: :param n_repeat: 重复遍历数据集次数 :yield: Xs,dict field_name:Sparse_Input or Dense_Input Ys array label ''' n_repeat = n_repeat if n_repeat > 0 else sys.maxsize for _ in range(n_repeat): random.shuffle(self._lines) for chunk in utils.chunk(self._lines,batch_size): Xs={} Ys=[] for field_name in CATEGORY_FIELDS: Xs[field_name]=SparseInput(len(chunk),[],[],[]) for field_name in DENSE_FIELDS: #dense input 对应的是[[example1_value],[example2_value]] Xs[field_name]=[] for example_id,line in enumerate(chunk): features,label=self.parse_line(line) Ys.append(label) for field in CATEGORY_FIELDS: ''' 这里主要是针对缺失值,产生的实际效果是缺失的都设为0 ''' if field in features: Xs[field].add(example_id,features[field],1) for field in DENSE_FIELDS: Xs[field].append([features[field]]) yield Xs,np.asarray(Ys)
def wreath_yor_par(alpha, _parts, prefix='/local/hopan/', par=8): ''' alpha: weak partition of 8 into 3 parts? _parts: list of partitions of each part of alpha Return a dict mapping group elmeent in S_8 -> rep The rep actually needs to be a dictionary of tuples (i, j) -> matrix where the i, j denote the i, j block in the matrix. Ex: alpha = (0, 0, 0, 0, 1, 1, 1, 1) _parts = [(2,2), (3,1)] ''' #print('Wreath yor with {} processes'.format(par)) n = sum(alpha) _sn = perm2.sn(n, prefix) young_sub = young_subgroup_perm(alpha) young_sub_set = tup_set(young_sub) young_yor = young_subgroup_yor(alpha, _parts, os.path.join(prefix, 'irreps')) reps = coset_reps(_sn, young_sub) sn_chunks = chunk(_sn, par) manager = Manager() rep_dict = manager.dict() nprocs = [] for i in range(par): perms = sn_chunks[i] proc = Process(target=_proc_yor, args=[perms, young_yor, young_sub_set, reps, rep_dict]) nprocs.append(proc) for p in nprocs: p.start() for p in nprocs: p.join() return rep_dict
def _batch_by_source(pairs, *, batch_size=None): by_source_length = lambda pair: len(pair[0]) pairs = sorted(pairs, key=by_source_length) return tuple( _make_minibatch_pair(pairs) for _, batch in groupby(pairs, key=by_source_length) for pairs in chunk(batch, batch_size))
def generate(seed=b'That day', n=6, max_len=(1000 * 5), show_metrics=True) -> Generator[bytes, None, None]: start = time() tokens: List[bytes] = [m.group(0) for m in list(chunk(seed))[-n:]] no_tokens = len(tokens) succ = np.array([0 for _ in range(n + 1)], dtype='uint32') ps: Dict[bytes, float] = get_chunk_ps() chunks: ndarray = np.array(list(ps.keys())) chunk_ps: ndarray = np.array(list(ps.values())) with ThreadPool(max_workers=NO_CPUS, thread_name_prefix='markov/w') as pool: lookup = ChainMap(*[ task.result() for task in [pool.submit(fn=get_nchunks_ps, n=i) for i in range(n, 0, -1)] ]) yield seed # token generation while no_tokens * AVG_CHUNK_LEN < max_len: found = False for m in range(n, 0, -1): ngram = tuple(tokens[-m:]) maybe_ps: Optional[Dict[bytes, float]] = lookup.get(ngram, None) if maybe_ps is not None and len(maybe_ps) > 1: found = True succ[m] += 1 next_chunk: bytes = choice( a=list(maybe_ps.keys()), p=list(maybe_ps.values()), ) yield next_chunk tokens.append(next_chunk) tokens = tokens[-n:] break if not found: succ[0] += 1 next_chunk = choice(a=chunks, p=chunk_ps) yield next_chunk tokens.append(next_chunk) tokens = tokens[-n:] no_tokens += 1 if show_metrics: # metrics log.info('-' * (1 + 6 + 15 + 2)) log.info('%9s%s' % (' ', 'METRICS')) log.info('-' * (1 + 6 + 15 + 2)) log.info('%-1s %-6s %-15s' % ('#', 'PROB', 'NO EXAMPLES')) log.info('%-1s %-6s %-15s' % ('-' * 1, '-' * 6, '-' * 15)) no_gen_tokens: int = sum(succ) for i in range(n, -1, -1): log.info('%-1d %-6.4f %-15d' % (i, succ[i] / no_gen_tokens, succ[i])) log.debug(f'[finished] generating text (took {time() - start:4.2f}s)')
def test(self, iter_no): write_log_file(self.log_path, "Start to testing ...") test_query_ids = self.text_data.split_ids['test'] success = {1: 0, 5: 0, 10: 0} total_test_scores = [] test_start = datetime.now() for test_chunk in chunk(test_query_ids, 100): one_chunk_scores = [] for i, query_id in enumerate(test_chunk): rank_ids, one_row_scores = self.retrieve_rank( query_id, test_chunk, self.text_data, self.code_data) one_chunk_scores.append(one_row_scores) for k in success.keys(): if query_id in rank_ids[:k]: success[k] += 1 total_test_scores.append(one_chunk_scores) write_log_file( self.log_path, "\n&Testing Iteration {}: for {} queries finished. Time elapsed = {}." .format(iter_no, len(test_query_ids), datetime.now() - test_start)) all_mrr = [] for i in range(len(total_test_scores)): one_chunk_square_score = total_test_scores[i] one_chunk_square_score = np.vstack(one_chunk_square_score) assert one_chunk_square_score.shape[ 0] == one_chunk_square_score.shape[ 1], "Every Chunk must be square" mrr_array = self.calculate_square_mrr(one_chunk_square_score) all_mrr.extend(mrr_array) mrr = np.array(all_mrr).mean() self.test_iter.append(iter_no) self.test_mrr.append(mrr) write_log_file( self.log_path, "&Testing Iteration {}: MRR = &{}&".format(iter_no, mrr)) for k, v in success.items(): value = v * 1.0 / len(test_query_ids) write_log_file( self.log_path, "&Testing Iteration {}: S@{}@ = &{}&".format( iter_no, k, value)) if k == 1: self.test_s1.append(value) elif k == 5: self.test_s5.append(value) elif k == 10: self.test_s10.append(value) else: print('cannot find !') write_log_file( self.log_path, "S@1, S@5, S@10\n{}, {}, {}".format(self.test_s1[-1], self.test_s5[-1], self.test_s10[-1]))
def detector(black_box, block_size=16): pt = array('B', 'A' * (block_size * 3)) ct = black_box(pt) chunks = chunk(block_size, ct) nxt = chunks.next() for block in chunks: if len(block) and block == nxt: return 'ECB' nxt = block return 'NOT ECB'
def convert_in_parallel(): paths = glob.glob('./sounds/mp3/**/*.mp3') chunks = chunk(paths, cpu_count() * 2) processes = [] for list in chunks: process = Process(target=convert_paths, args=(list, )) process.start() processes.append(process) for process in processes: process.join() print "All done!"
def remove_repeats(self, lst): rs_bits = [] for bit_lst in chunk(lst, self.t): if 1 in bit_lst and 0 in bit_lst: raise BECError('Both 0 and 1 found in chunk of repeated bits') if 1 in bit_lst: rs_bits.append(1) elif 0 in bit_lst: rs_bits.append(0) else: rs_bits.append(None) return rs_bits
def make_pictures(grids, ordering, width): grid = {title: grid for (title, grid, _, _) in grids} pp = [row for i, row in enumerate(utils.chunk(ordering, width))] joined = [[crop(grid[x]) for x in row] for row in pp] picture = [] for blocks in joined: for i in range(len(blocks[0])): picture.append(''.join([block[i] for block in blocks])) return [flipv(fliph(rot90(picture))), flipv(picture), fliph(picture), \ flipv(fliph(picture)), rot90(picture),\ fliph(rot90(picture)), flipv(rot90(picture)),picture]
async def get_lots_of_contributors(org: str, repos: [str]): results = defaultdict(int) with yaspin(text=f"Fetching all {org} contributors..") as spinner: tasks = [get_contributors_by_org_repo(org, repo) for repo in repos] pieces = chunk(tasks, 20) for piece in pieces: repos = await asyncio.gather(*piece) flat_repos = reduce(lambda x, y: x + y, repos) for contributor in flat_repos: results[contributor['login']] += contributor['contributions'] await asyncio.sleep(1) spinner.ok('✅ ') return results
def decode(self, received): rs_bits = self.remove_repeats(received) rs_code = [0] * (len(rs_bits) // self.c) erase_pos = [] for idx, char_lst in enumerate(chunk(rs_bits, self.c)): if None in char_lst: erase_pos.append(idx) else: rs_code[idx] = lst_to_int(char_lst) raw_none = received.count(None) if isinstance(received, list) else 0 contract_none = rs_bits.count(None) erased_count = len(erase_pos) # print("Decode: Raw None = " + str(raw_none) + " / " + str(len(received)) + " \t Contracted None = " + str(contract_none) + " / " + str(len(rs_bits)) + " \t Erased characters = " + str(erased_count) + " / " + str(len(rs_code)) + " = " + "{0:.3f}".format(erased_count/len(rs_code))) return self._codec.decode(rs_code, erase_pos=erase_pos, only_erasures=True)
async def osu_leaderboard(ctx: Context, *, modeString: Optional[str] = '0'): mode = get_mode(modeString) if mode is None: return await ctx.send(f'Invalid gamemode {modeString}') gid = ctx.guild.id userData = backend.read_all_data(backend.USER_DATA) guildUsers: List[osu.User] = [] for uid, userData in userData.items(): if 'osuid' not in userData or 'guilds' not in userData: continue registeredGuilds = userData['guilds'] if gid in registeredGuilds: user = get_user(userData['osuid'], mode) if user: guildUsers.append(user) else: await ctx.send( f'Profile retrieval failed for user {osu.profile_link(userData["osuid"])} <@{uid}>' ) guildUsers.sort(key=lambda user: (int(user['pp_rank'] or 0) or float('inf'), -float(user['level'] or 0))) chunksize = 10 chunkedGuildUsers = chunk(guildUsers, chunksize) first = True for cidx, userChunk in enumerate(chunkedGuildUsers): leaderboardRows = [] for i, user in enumerate(userChunk): leaderboardRows.append( f'**#{(cidx * chunksize) + i + 1}** ' f'{flag(user["country"])} [{user["username"]}]({osu.profile_link(user["user_id"])}) - ' f'#{int(user["pp_rank"] or 0):n} | ' f'{float(user["pp_raw"] or 0):n}pp | ' f'LVL {float(user["level"] or 0):.2f}' ) leaderboardEmbed = Embed( type='rich', color=EMBED_COLOR, description='\n'.join(leaderboardRows) ) if first: leaderboardEmbed.set_author( name=f'{osu.MODE_STRING_ENUM[mode]} leaderboard for {ctx.guild.name}', icon_url=str(ctx.guild.icon_url) or Embed.Empty, ) first = False await ctx.send(embed=leaderboardEmbed)
def test_images_generator(test_dir, max_target_image_size): def preprocess_image(image): image, _ = preprocessing.square_padding(image, []) image = resize_image_if_neccessary(image, max_target_image_size) return np.asarray(image) def load_image(image_name): image = Image.open(path.join_path(test_dir, image_name)) return image_name, np.asarray(image), preprocess_image(image) image_names = path.list_all_images(test_dir) for names in utils.chunk(image_names, 1): if not names: break yield list(zip(*map(load_image, names)))
def main(args): print('args: {}'.format(args)) print('split dir: {}'.format(args.splitdir)) print('pkl dir : {}'.format(args.pkldir)) print('save dir : {}'.format(args.savedir)) print('Evaluating irrep: {}'.format(args.alpha)) if not os.path.exists(args.savedir): os.makedirs(args.savedir) split_files = [ os.path.join(args.splitdir, f) for f in os.listdir(args.splitdir) if args.suffix in f ] split_chunks = chunk(split_files, args.par) parts = ast.literal_eval(args.parts) alpha = ast.literal_eval(args.alpha) #assert all(sum(parts[i]) == alpha[i] for i in range(len(parts))), 'Invalid partition for alpha!' print('About to full transform: {}'.format(split_chunks)) full_transform(args, alpha, parts, split_chunks)
def main(): parser = argparse.ArgumentParser() parser.add_argument("image_dir", help="path to directory with images") parser.add_argument("save_prefix", help="start of saved file names") parser.add_argument("--save_dir", help="directory to save outputs", default="pose_data") parser.add_argument("--threads", help="number of threads to use", type=int, default=8) parser.add_argument("--max_frames", help="maximum frames to compute", type=int, default=None) args = parser.parse_args() pool = Pool(processes=args.threads) def to_full_path(file_name): return os.path.join(args.image_dir, file_name) paths = list(map(to_full_path, get_ordered_files(args.image_dir))) if args.max_frames is not None: paths = paths[:args.max_frames] poses_and_subsets = pool.map(get_poses, chunk(paths, args.threads)) print("after pool") full_poses = [] full_subsets = [] for poses, subsets in poses_and_subsets: full_poses.extend(poses) full_subsets.extend(subsets) to_iter = (("poses", full_poses), ("subsets", full_subsets)) for name, values in to_iter: np.save(os.path.join(args.save_dir, args.save_prefix + name), values)
def parseLines(self, lines): """ Parse each line in chunks of nseq + 2 items. Each block contains: - nseq sequence lines - one line for alignment outcome (skipped) - a separator line among blocks (skipped) """ sequences = {} for block in utils.chunk(self._skipHeader(lines), self.nseq + 2): for line in block[0:self.nseq]: key, rawSequence, bases = self._parseLine(line) sequence = sequences.get(key, AlignmentSequence(key)).extend( rawSequence, bases) if not key in sequences: sequences[key] = sequence return sequences
def write_entities_to_datastore(ds_client, entities): logger.debug( f"Writing {len(entities)} entities to Cloud Datastore for project beachbirbys..." ) chunks = list(utils.chunk(entities, 500)) logger.debug(f"Split entities into {len(chunks)} chunks.") for chunk in chunks: try: with ds_client.batch(): logger.debug("Writing chunk...") logger.debug(chunk) ds_client.put_multi(chunk) except Exception as e: logger.exception(e) logger.error(chunk) logger.error(entities) raise logger.info( f"Wrote {len(entities)} entities to Cloud Datastore for project beachbirbys." ) return
def get_batch_stream(self, batch_size, n_repeat=1): n_repeat = n_repeat if n_repeat > 0 else sys.maxsize for _ in range(n_repeat): random.shuffle(self._lines) for batch_lines in utils.chunk(self._lines, batch_size): Xs = {} ys = [] # ------------- allocate for categorical feature for field in CATEGORY_FIELDS: Xs[field] = SparseInput(n_total_examples=len(batch_lines), example_indices=[], feature_ids=[], feature_values=[]) # ------------- allocate for numeric feature for field in DENSE_FIELDS: Xs[field] = [] # ------------- loop and add for example_index, line in enumerate(batch_lines): # 顺序遍历,能够保证插入SparseInput中的非零元是按example_index从小到大排好序的 current_features, label = self.parse_line(line) ys.append(label) # add categorical feature for field in CATEGORY_FIELDS: if field in current_features: Xs[field].add(example_idx=example_index, feat_id=current_features[field], feat_val=1) # add numeric feature for field in DENSE_FIELDS: # wrap into one-element list, since we need to add one row Xs[field].append([current_features[field]]) yield Xs, np.asarray(ys)
def ParallelLSTDQ(D, env, w, damping=0.001, ncpus=None): """ D : source of samples (s,a,r,s',a') env: environment contianing k,phi,gamma w : weights for the linear policy evaluation damping : keeps the result relatively stable ncpus : the number of cpus to use """ if ncpus: nprocess = ncpus else: nprocess = cpu_count() pool = Pool(nprocess) indx = chunk(len(D), nprocess) results = [] for (i, j) in indx: r = pool.apply_async( dict_loop, (D[i:j], env, w, 0.0)) # note that damping needs to be zero here results.append(r) k = len(w) A = sp.identity(k, format='csr') * damping b = sp_create(k, 1, 'csr') for r in results: T, t = r.get() A = A + T b = b + t # close out the pool of workers pool.close() pool.join() w, info = solve(A, b, method="spsolve") return A, b, w, info
def ParallelLSTDQRmax(D,env,w,track,damping=0.001,rmax=1.0,ncpus=None): """ D : source of samples (s,a,r,s',a') env: environment contianing k,phi,gamma w : weights for the linear policy evaluation track : an object that records what is known damping : keeps the result relatively stable (solves some difficulties with oscillation if A is singular) rmax : the maximum reward ncpus : the number of cpus to use """ if ncpus: nprocess = ncpus else: nprocess = cpu_count() pool = Pool(nprocess) indx = chunk(len(D),nprocess) results = [] for (i,j) in indx: r = pool.apply_async(drmax_loop,(D[i:j],env,w,track,0.0,rmax)) # note that damping needs to be zero here results.append(r) k = len(w) A = sp.identity(k,format='csr') * damping b = sp_create(k,1,'csr') for r in results: T,t = r.get() A = A + T b = b + t # close out the pool of workers pool.close() pool.join() w,info = solve(A,b,method="spsolve") return A,b,w,info
""" Apply the DIPCompute transform to DATA and store it. IDX used for multiprocessing. """ data_subset = Subset(data, data_idx) dataloader = DataLoader(data_subset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) gpu_num = idx % (torch.cuda.device_count() - 1) + 1 comp = DIPCompute(dataloader, save_dir, (3, 32, 32), num_loops=CONST_NUM_LOOPS, iters=iters, input_noise_std=0.03, device=f'cuda:{gpu_num}') comp.run_all() if __name__ == '__main__': mp.set_start_method('spawn') nproc = 32 data_idxs = list(range(len(data) // 2, len(data))) # TODO do other half chks = chunk(data_idxs, nproc) with mp.Pool(processes=nproc) as pool: for chk in chks: results = [] for chk_idx, chk in enumerate(chks): #subset_data = Subset(data, chk) results.append(pool.apply_async(proc_func, (chk, chk_idx))) for res in results: res.get()
def _fill_table(self, sentences, headers, max_sentence_length): total = len(sentences) chunk_size = max(total // self.MAX_CHUNK_NUM, self.MIN_CHUNK_SIZE) for chunk_id, chunk in enumerate(utils.chunk(sentences, chunk_size)): shift = chunk_id * chunk_size self.emit(QtCore.SIGNAL(InputDataLookup.SENTENCES_LOADED_EVENT), chunk, shift, total)
p = audiobackend.Play(channels=1, queue=q_out) x = utils.rand_gen(10240) Fcarr = 2000 Fsampl = 8000 K = 6 signal = modulate(x, Fcarr, Fsampl, K) y = demodulate(signal, Fcarr, Fsampl, K) x = x.tolist() # print len(x) # print len(y) # print x # print y if bool(utils.contains(x[0:196], y)) == False: # if bool(utils.contains(x, y)) == False: print "data error" else: print "data ok" s = utils.conv_to_audio(signal) p.start() for x in utils.chunk(s,size): q_out.put(x) p.samples_ready() p.done() p.join()
def load_tles(filename): d = {} for a, b, c in utils.chunk(file(filename), 3): name = a.strip() d[name] = ephem.readtle(a, b, c) return d
def main(): logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") usage = "usage: %prog -o -OUTPUT_DIR [options] [input, config [,starting_rules]]..." parser = OptionParser(usage=usage) parser.add_option( "-p", "--print_only", action="store_false", dest="run", default=True, help="Only shows configuration, does not run it", ) parser.add_option("-o", "--output", dest="output", default=None, help="Destination") parser.add_option( "-s", "--starting_rules", action="store_true", dest="starting_rules", default=False, help="Script expects triples of data(input, config, starting_rules) " + "instead of doubles (input, config), if used.", ) options, args = parser.parse_args() if options.output is None: print("No output dir specified!") return print("artifact directory:", options.output) chunk_size = 3 if options.starting_rules else 2 if not args or len(args) % chunk_size != 0: print("Invalid number of arguments!") return i = 0 tasks = [] for input_file, config_file, *rest in chunk(args, chunk_size): print("Task", i) task = TaskModel() print("\tinput:", input_file) task.data_configuration = input_file print("\tconfig:", config_file) task.params_configuration = config_file starting_population = None if rest: starting_population = rest[0] print("\tstarting population:", starting_population) task.population_configuration = starting_population tasks.append(task) if options.run: print("Starting run") executor = SimulationExecutor() runner = Runner() for i, x in enumerate(tasks): run_func, configuration, population_printer = executor.prepare_simulation( runner, i, x.data_configuration, x.params_configuration, x.population_configuration ) result = run_func(configuration) collected = False while not collected: try: _collect_task(x, result, i, configuration, population_printer, executor, options.output) except PermissionError: collected = False print("not collected!") else: collected = True
#!/usr/bin/env python3 import sys import utils import dasm.assembler if len(sys.argv) != 2: print('usage: asm.py program.asm') exit(1) asm_filename = sys.argv[1] asm_listing = open(asm_filename,'r').read() word_list, statements = dasm.assembler.assemble_listing(asm_listing) #print(word_list) output_hex = [] for pos,chunk in enumerate(utils.chunk(word_list,8)): if len(chunk) < 8: chunk.extend([0] * (8 - len(chunk))) output_hex.append('{:>4x}: '.format(pos*8) + ' '.join(('{:0>4x}'.format(x) for x in chunk))) print('\n'.join(output_hex)) output_filename = asm_filename.rsplit('.',1)[0] output_file = open(output_filename + '.hex','w') output_file.write('\n'.join(output_hex) + '\n') #for instr in instructions: # print('{:>4x}: '.format(instr.addr) + ' '.join(("0x{:0>4x}".format(x) for x in instr.assemble())))
def main(args): # seed the random number generator (RNG) seed = args.seed np.random.seed(seed * 13 // 7) if args.mode == 0: train(args) elif args.mode == 1: generate(args) elif args.mode == 2: # compute test perpexlity train_data = read_corpus(args.train) # Construct vocab vocab = Vocab(train_data, int(args.size), int(args.freq_cutoff)) # Load the trained model model = LM_LSTM(hidden_size=args.hidden_dimension, embedding_dim=args.embedding_dimension, output_size=len(vocab.train), n_layers=args.n_layers, is_gru=args.is_gru) loss_fn = torch.nn.CrossEntropyLoss( reduce=False) # loss function / optimizer if torch.cuda.is_available(): model = model.cuda() loss_fn = loss_fn.cuda() model.load_state_dict( torch.load(args.model_dir + '/' + args.model_file)) model.eval() dev_data = read_corpus(args.test) file = open('sentence_ppl.txt', 'w') j = 0 for sentence in dev_data: words = [] for word in sentence: words.append(vocab.train.word2id[word] if word in vocab.train. word2id else vocab.train.word2id['<unk>']) words = np.array(words).reshape(-1, 1) test_loss, test_ppl = get_val_loss(model, loss_fn, words, args.seq_len, 1) file.write(' '.join(sentence) + " -> " + str(test_ppl)) j += 1 if j > 100: break file.close() elif args.mode == 3: # compute test perpexlity train_data = read_corpus(args.train) # Construct vocab vocab = Vocab(train_data, int(args.size), int(args.freq_cutoff)) # Load the trained model model = LM_LSTM(hidden_size=args.hidden_dimension, embedding_dim=args.embedding_dimension, output_size=len(vocab.train), n_layers=args.n_layers, is_gru=args.is_gru) loss_fn = torch.nn.CrossEntropyLoss( reduce=False) # loss function / optimizer if torch.cuda.is_available(): model = model.cuda() loss_fn = loss_fn.cuda() model.load_state_dict( torch.load(args.model_dir + '/' + args.model_file)) model.eval() dev_data = read_corpus(args.test) dev_data = convert_to_idx(dev_data, vocab) dev = chunk(dev_data, chunk_size=args.chunk_size, is_evaluation=True) test_loss, test_ppl = get_val_loss(model, loss_fn, dev, args.seq_len, args.chunk_size) print('Test set perplexity: {}'.format(test_ppl))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data_dir', '-d', type=str, required=True) parser.add_argument('--task', '-t', type=str, required=True) parser.add_argument('--output_folder_name', '-o', type=str, default="bert_output") parser.add_argument('--bert-model', type=str, default='bert-base-uncased') parser.add_argument('--batch-size', '-b', type=int, default=16) parser.add_argument('--finetuned_model', '-f', type=str) parser.add_argument('--layers', type=int, nargs='+', default=list(range(12))) parser.add_argument( "--attention_mask_heads", default="", type=str, nargs="*", help="[layer]:[head1],[head2]..." ) parser.add_argument( '--reverse_head_mask', action='store_true', help="Mask all heads except those specified by `--attention-mask-heads`" ) parser.add_argument( "--actually_prune", action='store_true', help="Really prune (like, for real)" ) args = parser.parse_args() # prepare output dir output_dir = os.path.join(args.output_folder_name, args.task) if not os.path.exists(output_dir): os.makedirs(output_dir) # prepare model if args.finetuned_model is None: model = BertModel.from_pretrained(args.bert_model) else: model = BertForSequenceClassification.from_pretrained(args.bert_model) state_dict = torch.load(args.finetuned_model) model.load_state_dict(state_dict) model = model.bert model.cuda() model.eval() # TODO:: must be dynamic to pruning num_attention_heads=model.encoder.layer[0].attention.self.n_heads print(f'TASK - {args.task}') print(f'layers - {args.layers}') print(f'num heads - {num_attention_heads}') # Parse pruning descriptor to_prune = pruning.parse_head_pruning_descriptors( args.attention_mask_heads, reverse_descriptors=args.reverse_head_mask, ) print(f'masked heads - {args.attention_mask_heads}') # Mask heads if args.actually_prune: model.prune_heads(to_prune) else: model.mask_heads(to_prune) # hooks hooks = [] for idx in args.layers: attn_hook = BundleAveragingHook() hook_bert_layer_attn(model, idx, attn_hook) hooks.append((f'attn-l{idx}', attn_hook)) # load data data_file = os.path.join(args.data_dir, args.task, "test.tsv") df = pd.read_csv(data_file, sep='\t', quoting=3) print("total size :", len(df)) tokenizer = BertTokenizer.from_pretrained(args.bert_model) # processing input data token_file = open(os.path.join(output_dir, 'tokens.tsv'), 'wt') tsv_writer = csv.writer(token_file, delimiter='\t') for data in tqdm(list(chunk(list(df.itertuples()), args.batch_size))): if "CoLA" in data_file: _, _, sentences = list(zip(*data)) elif "SST-2" in data_file: _, _, sentences = list(zip(*data)) else: raise Exception('Unhandled dataset') lower_sentences = [ ' '.join(tokenizer.tokenize(s.lower())) for s in sentences ] bundle = SingleInputBundle(list(map(str.split, lower_sentences)), tokenizer.vocab) bundle.cuda() for _, hook in hooks: hook.bundle = bundle with torch.no_grad(): model(bundle.token_ids, bundle.segment_ids, bundle.input_mask) for padded_sentence in bundle.padded_sentences: tsv_writer.writerow(padded_sentence) # storing activations for name, hook in hooks: combined = torch.cat(hook.data) print(name, combined.shape) torch.save(combined, os.path.join(output_dir, f'{name}.pt')) new_shape = combined.size()[:-1] + (num_attention_heads, -1) combined = combined.view(*new_shape).permute((1, 0, 2)) for subhead_idx, subhead_data in enumerate(combined): torch.save(subhead_data, os.path.join(output_dir, f'{name}-{subhead_idx}.pt')) print(f'\t{name}-{subhead_idx}', subhead_data.shape)
# cmap="YlOrRd", # cmap="hot", cmap="Blues", annot=display_values, square=True, ) g.set_title(title, fontsize=16) g.set_xlabel("Predicted Label", fontsize=14) g.set_ylabel("True Label", fontsize=14) plt.savefig(save_path, bbox_inches="tight") plt.show() conversations, labels = load_corpus_data(corpus, detail_level) conversations = chunk(conversations, max_nr_utterances) labels = chunk(labels, max_nr_utterances) n_tags = len(get_id2tag(corpus, detail_level=detail_level)) tokenizer = get_tokenizer(rebuild_from_all_words=False) word2id = tokenizer.word_index X, y = make_model_readable_data(conversations, labels, tokenizer, max_nr_utterances, max_nr_words) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=True)
q_out = Queue.Queue() p = audiobackend.Play(channels=1, queue=q_out) x = utils.rand_gen(10240) Fcarr = 2000 Fsampl = 8000 K = 6 signal = modulate(x, Fcarr, Fsampl, K) y = demodulate(signal, Fcarr, Fsampl, K) x = x.tolist() # print len(x) # print len(y) # print x # print y if bool(utils.contains(x[0:196], y)) == False: # if bool(utils.contains(x, y)) == False: print "data error" else: print "data ok" s = utils.conv_to_audio(signal) p.start() for x in utils.chunk(s, size): q_out.put(x) p.samples_ready() p.done() p.join()
if value is not None: output_buffers[index].append(value) if len(output_buffers[index]) == 3: address, X, Y = output_buffers[index] output_buffers[index] = [] if address < size: nodes[address].input.list += [X, Y] nodes[address].input.idle = False else: NAT = (X, Y) yield address, X, Y, False if all(node.input.idle for node in nodes) and NAT is not None: address = 0 X, Y = NAT yield address, X, Y, True nodes[address].input.list += [X, Y] nodes[address].input.idle = False only_NAT = partial(filter, unpack(lambda address, X, Y, NAT_origin: NAT_origin)) first_repeat_Y = lambda packets: next(p2[2] for p1, p2 in chunk(packets, 2) if p1[2] == p2[2]) one = compose( lambda packets: next(Y for address, X, Y, NAT_origin in packets if address == 255), partial(run, 50), parse) two = compose(first_repeat_Y, only_NAT, partial(run, 50), parse)