def async_build_examples( self, data_type: str, dials: List[Tuple[str, dict]]) -> Tuple[list, list]: """Use multiprocessing to process raw dialogue data. Args: data_type: train, dev or test dials: raw dialogues data Returns: new examples by all processes """ neg_examples = Manager().list() pos_examples = Manager().list() dials4single_process = (len(dials) - 1) // self.config['num_processes'] + 1 print(f'Single process have {dials4single_process} dials ...') pool = Pool(self.config['num_processes']) for i in range(self.config['num_processes']): pool.apply_async(func=self.iter_dials, args=(dials[dials4single_process * i:dials4single_process * (i + 1)], data_type, pos_examples, neg_examples, i)) pool.close() pool.join() pos_examples = list(pos_examples) neg_examples = list(neg_examples) return neg_examples, pos_examples
def load_async(pool: Pool, fn: Callable, *args, callback: Callable = None, **kwargs) -> Any: """ Load data asynchronously and serialize data via dill Args: pool: multiprocessing pool to use for :func:`apply_async` fn: function to load a single sample *args: positional arguments to dump with dill callback: optional callback. defaults to None. **kwargs: keyword arguments to dump with dill Returns: Any: reference to obtain data with :func:`get` """ if not DILL_AVAILABLE: raise RuntimeError('dill is not installed. For async loading ' 'please install it') payload = dill.dumps((fn, args, kwargs)) return pool.apply_async(dill_helper, (payload, ), callback=callback)
def propagate(nnf, feat_A, feat_AP, feat_B, feat_BP, patch_size, iters=2, rand_search_radius=200): print("\tpatch_size:{}; num_iters:{}; rand_search_radius:{}".format(patch_size, iters, rand_search_radius)) nnd = np.zeros(nnf.shape[:2]) A_size = feat_A.shape[:2] B_size = feat_B.shape[:2] for ay in range(A_size[0]): for ax in range(A_size[1]): by, bx = nnf[ay, ax] nnd[ay, ax] = cal_dist(ay, ax, by, bx, feat_A, feat_AP, feat_B, feat_BP, A_size, B_size, patch_size) manager = mp.Manager() q = manager.Queue(A_size[1] * A_size[0]) cpus = min(mp.cpu_count(), A_size[0] // 20 + 1) for i in range(iters): p = Pool(cpus) ay_start = 0 while ay_start < A_size[0]: ax_start = 0 while ax_start < A_size[1]: p.apply_async(pixelmatch, args=(q, ax_start, ay_start, cpus, nnf, nnd, A_size, B_size, feat_A, feat_AP, feat_B, feat_BP, patch_size, rand_search_radius,)) ax_start += A_size[1] // cpus + 1 ay_start += A_size[0] // cpus + 1 p.close() p.join() while not q.empty(): ax, ay, xbest, ybest, dbest = q.get() nnf[ay, ax] = np.array([ybest, xbest]) nnd[ay, ax] = dbest return nnf, nnd
def match(self): pool = Pool(self.num_processes) results = [] total = len(self.pairs_to_match) counter = 0 matches_pairs_fname = os.path.join( self.input_dir, "matches_pairs" + self.suffix + ".txt") mp_file = open(matches_pairs_fname, 'w') for pair in tqdm(self.pairs_to_match): orig_name1 = self.image_names[pair[0]] orig_name2 = self.image_names[pair[1]] name1 = os.path.join(self.input_dir, orig_name1) name2 = os.path.join(self.input_dir, orig_name2) image_id1 = self.image_ids[pair[0]] image_id2 = self.image_ids[pair[1]] if self.cuda: # use single process data = self.matchPairCached(orig_name1, orig_name2, image_id1, image_id2, self.recompute, self.cuda) self.db.add_matches(data[0], data[1], data[2]) mp_file.write(data[3] + " " + data[4] + "\n") counter += 1 if counter % 50000 == 0: counter = 0 self.db.commit() # print("Matched", counter, "out of", total) else: # use multiple processes res = pool.apply_async(self.matchPairCached, (orig_name1, orig_name2, image_id1, image_id2, self.recompute, self.cuda)) results.append(res) if not self.cuda: # if more processes were used, collect the results counter = 0 for res in tqdm(results): data = res.get() if data is None: continue self.db.add_matches(data[0], data[1], data[2]) name1 = data[3] name2 = data[4] mp_file.write(name1 + " " + name2 + "\n") counter += 1 if counter % 50000 == 0: counter = 0 self.db.commit() # be sure to commit everything at the end self.db.commit() mp_file.close()
def save(self): try: mp.set_start_method('spawn') except RuntimeError: pass pool = Pool(processes=4) self.labels = [] item = 0 for label_name in label_map.keys(): images = self._listdir(os.path.join(self.path, label_name)) for i in range(len(images)): rows, cols = [], [] files = glob.glob( os.path.join(self.path, label_name, images[i], str(self.magnify), '*.jpeg')) for file in files: filename = os.path.basename(file) nums = filename.split('_') row, col = int(nums[0]), int(nums[1]) rows.append(row) cols.append(col) num_row = max(rows) - min(rows) + 1 num_col = max(cols) - min(cols) + 1 patches = np.chararray((num_row, num_col), itemsize=1024) for file in files: filename = os.path.basename(file) nums = filename.split('_') row, col = int(nums[0]), int(nums[1]) patches[row - min(rows), col - min(cols)] = file self.labels.append(label_map[label_name]) # Save feature vector pool.apply_async(self.doit, args=(item, patches, num_row, num_col), error_callback=self.print_error) item += 1 # Save labels torch.save(self.labels, self._get_label_file()) pool.close() pool.join() print('done')
def mul_infer(args): setuplogging() set_start_method('spawn', force=True) root_data_dir = os.path.join(args.root_data_dir, 'testdata') checkpoint = torch.load(os.path.join(args.model_dir, args.load_ckpt_name), map_location=torch.device('cpu')) subcategory_dict = checkpoint['subcategory_dict'] category_dict = checkpoint['category_dict'] logging.info('load ckpt: {}'.format(args.load_ckpt_name)) check_preprocess_result(args, root_data_dir, mode='test', category=category_dict, subcategory=subcategory_dict) logging.info('finish the preprocess of docfeatures') docid_features, category_dict, subcategory_dict = read_news( args, root_data_dir) news_index = {} news_feature = [] cnt = 0 for k, v in docid_features.items(): news_index[k] = cnt news_feature.append(v) cnt += 1 news_num = len(news_feature) logging.info('news_num:{}'.format(news_num)) pool = Pool(processes=args.world_size) results = [] sigle_size = news_num // args.world_size for rank in range(args.world_size): start = sigle_size * rank end = sigle_size * (rank + 1) if rank == args.world_size - 1: end = news_num local_features = news_feature[start:end] result = pool.apply_async(sigle_process_infer, args=(rank, local_features, checkpoint, args)) results.append(result) pool.close() pool.join() results = [x.get() for x in results] news_vecs = np.concatenate(results, 0) return news_index, news_vecs
def fit(self, train_data, train_label): a = time() pool = Pool(16) results = [] for i in range(self.round): print(i) bag_index = np.random.choice(np.arange(train_label.shape[0]), train_label.shape[0]) data = train_data[bag_index] label = train_label[bag_index] results.append( pool.apply_async(self.parallel_fit, args=(self.clf[i], data, label))) pool.close() pool.join() for i, result in enumerate(results): self.clf[i] = result.get() print('Class %d cost %.1f seconds' % (i, time() - a))
def train(self, data, labels, val_data=None, val_labels=None, warm_start=False): """ :param data: :param labels: :param val_data: :param val_labels: :param warm_start: :return: """ # initialize variable data = torch.from_numpy(data).float() labels = torch.from_numpy(labels).char() if val_data is None: train_data, val_data = data, data train_labels, val_labels = labels, labels else: train_data = data train_labels = labels val_data = torch.from_numpy(val_data).float() val_labels = torch.from_numpy(val_labels).char() orig_cols = train_data.size(1) # counting class and get their index self.plus_row_index = [] self.minus_row_index = [] self.orig_minus = 0 self.orig_plus = 0 for idx, value in enumerate(train_labels): if value == 1: self.orig_plus += 1 self.plus_row_index.append(idx) else: self.orig_minus += 1 self.minus_row_index.append(idx) # balanced pick rows and cols plus = max(2, int(self.orig_plus * self.nrows)) minus = max(2, int(self.orig_minus * self.nrows)) num_cols = max(min(5, orig_cols), int(self.nfeatures * orig_cols)) # initialize up triangle matrix and reference index rows_sum = plus + minus self.yp = torch.ones((self.width, rows_sum), dtype=torch.int8) self.ref_full_index1 = torch.repeat_interleave(torch.arange( self.updated_features * self.step.shape[0]).view((-1, 1)), rows_sum, dim=1) self.ref_full_index2 = torch.repeat_interleave(torch.arange( self.hidden_nodes * self.step.shape[0]).view((-1, 1)), rows_sum, dim=1) # multi-process c = self.round // self.num_gpus for r in range(c + 1): pool = Pool(self.n_jobs) results = [] for t in range(min(self.n_jobs, self.round - r * self.num_gpus)): if warm_start and self.w_index != []: column_indices = self.w_index[r * self.num_gpus + t] w1 = self.w1[:, :, r * self.num_gpus + t] w2 = self.w2[:, r * self.num_gpus + t] else: column_indices = np.random.choice(np.arange(orig_cols), num_cols, replace=False) # column_indices = np.arange(orig_cols) self.w_index.append(column_indices) results.append( pool.apply_async(self.single_run, args=(train_data, train_labels, plus, minus, val_data, val_labels, column_indices, t % self.num_gpus))) pool.close() pool.join() df = pd.DataFrame(columns=[]) for i, result in enumerate(results): temp_w1, temp_b1, temp_w2, temp_b2, temp_obj, uba, ba = result.get( ) df['vote %d imbalanced acc' % i] = uba df['vote %d balanced acc' % i] = ba # temp_w1, temp_b1, temp_w2, temp_b2, temp_obj = self.single_run(train_data, train_labels, plus, minus, val_data, val_labels, w1, w2, column_indices, r % self.num_gpus) if warm_start: self.w1[:, :, i] = temp_w1 self.w2[:, i] = temp_w2 self.b1[:, i] = temp_b1 self.b2[i] = temp_b2 self.obj[i] = temp_obj else: self.w1.append(temp_w1) self.w2.append(temp_w2) self.b1.append(temp_b1) self.b2.append(temp_b2) self.obj.append(temp_obj) del pool, results df.to_csv('v15.csv', index=False) if warm_start is False: self.w1 = torch.stack(self.w1, dim=2) self.w2 = torch.stack(self.w2, dim=1) self.b1 = torch.stack(self.b1, dim=1) self.b2 = torch.Tensor(self.b2) self.obj = torch.Tensor(self.obj) best_index = self.obj.argmax() self.best_acc = self.obj[best_index] self.best_w1 = self.w1[:, :, best_index] self.best_w2 = self.w2[:, best_index] self.best_b1 = self.b1[:, best_index] self.best_b2 = self.b2[best_index] self.best_w_index = self.w_index[best_index] return
def detection_by_tracking( frame_dir, json_file, tracker_model, detection_threshold=0.9, tracking_threshold=0.9, save_json_file="data/demo_tracking/detection_by_tracking.tracking_json", offset=0, low=None, high=None, step=1, parallel=False, multithreading=False): # Load annotations data = json.load(open(json_file, "r")) annotations = dict() for annotation in data['annotations']: if annotation['image_id'] in annotations: annotations[annotation['image_id']] += [annotation] else: annotations[annotation['image_id']] = [annotation] # Load frames frame_files = general_utils.get_all_files(frame_dir, keep_dir=True, sort=True) num_frame = len(frame_files) tracking_data = dict() tracking_data["images"] = data["images"] tracking_data["categories"] = data["categories"] tracking_data["annotations"] = list() if low is None: low = -int(1e9) if high is None: high = int(1e9) start = time.time() last_count = 0 # Set up parallel processing if parallel: mp.set_start_method('spawn', force=True) mp.set_sharing_strategy('file_system') pool = Pool() else: pool = None results = [None for _ in range(num_frame)] # Set up multithreading processing if multithreading: executor = ThreadPoolExecutor() else: executor = None # Loop over frames for frame_id in range(num_frame): # Align id frame_id += offset num_box = len(annotations[frame_id]) # Count boxes with high confidence count = 0 for box_id in range(num_box): score = annotations[frame_id][box_id]["score"] if score > detection_threshold: count += 1 # If this frame has more boxes, track from it for certain; else check skip criteria if count <= last_count: last_count = count # Skip frame if frame_id % step != 0: continue else: last_count = count print("Process frame ", frame_id) forward_tracker = build_tracker(tracker_model) backward_tracker = build_tracker(tracker_model) # Loop over detection boxes for box_id in range(num_box): # print("=> Process box ", box_id) # Filter by detection score score = annotations[frame_id][box_id]["score"] if score < detection_threshold: # print("==> Skip") continue if multithreading: print( f"---> Multithread tracking for box {box_id} frame {frame_id}" ) executor.submit(single_box_in_single_frame_tracking, (frame_files, frame_id, box_id, annotations, tracking_threshold, forward_tracker, backward_tracker, offset, low, high)) if parallel: print( f"---> Parallel tracking for box {box_id} frame {frame_id}" ) results[frame_id - offset] = pool.apply_async( single_box_in_single_frame_tracking, [ frame_files, frame_id, box_id, annotations, tracking_threshold, forward_tracker, backward_tracker, offset, low, high ]) if not multithreading and not parallel: tracking_data[ "annotations"] += single_box_in_single_frame_tracking( frame_files, frame_id, box_id, annotations, tracking_threshold, forward_tracker, backward_tracker, offset, low, high) for result in results: if result is not None: tracking_data["annotations"] += result.get() end = time.time() print(f"Total time: {(end - start)} s") with open(save_json_file, "w") as outfile: json.dump(tracking_data, outfile)
def main(): opt = parse_args() device = 'cuda' if torch.cuda.is_available() else 'cpu' random.seed(opt.seed) numpy.random.seed(opt.seed) torch.manual_seed(opt.seed) data_path = 'traffic-data/state-action-cost/data_i80_v0' dataloader = DataLoader(None, opt, 'i80') ( forward_model, value_function, policy_network_il, policy_network_mper, data_stats ) = load_models(opt, data_path, device) splits = torch.load(path.join(data_path, 'splits.pth')) if opt.u_reg > 0.0: forward_model.train() forward_model.opt.u_hinge = opt.u_hinge if hasattr(forward_model, 'value_function'): forward_model.value_function.train() planning.estimate_uncertainty_stats( forward_model, dataloader, n_batches=50, npred=opt.npred) gym.envs.registration.register( id='I-80-v1', entry_point='map_i80_ctrl:ControlledI80', kwargs=dict( fps=10, nb_states=opt.ncond, display=False, delta_t=0.1, store_simulator_video=opt.save_sim_video, show_frame_count=False, ) ) print('Building the environment (loading data, if any)') env_names = { 'i80': 'I-80-v1', } env = gym.make(env_names[opt.map]) plan_file = build_plan_file_name(opt) print(f'[saving to {path.join(opt.save_dir, plan_file)}]') # different performance metrics time_travelled, distance_travelled, road_completed = [], [], [] collided, offscreen = [], [] # values saved for later inspection action_sequences, state_sequences, cost_sequences = [], [], [] image_sequences = [] writer = utils.create_tensorboard_writer(opt) n_test = len(splits['test_indx']) set_start_method('spawn') pool = Pool(opt.num_processes) async_results = [] time_started = time.time() total_images = 0 for j in range(n_test): # print(type(splits), len(splits['test_indx']), splits['test_indx'].shape, list(dataloader.car_sizes.keys())[0:5], list(dataloader.car_sizes[list(dataloader.car_sizes.keys())[0]].keys())[0:5],dataloader.car_sizes[list(dataloader.car_sizes.keys())[0]][list(dataloader.car_sizes[list(dataloader.car_sizes.keys())[0]].keys())[0]]) car_path = dataloader.ids[splits['test_indx'][j]] timeslot, car_id = utils.parse_car_path(car_path) car_sizes = torch.tensor( dataloader.car_sizes[sorted(list(dataloader.car_sizes.keys()))[ timeslot]][car_id] )[None, :] async_results.append( pool.apply_async( process_one_episode, ( opt, env, car_path, forward_model, policy_network_il, data_stats, plan_file, j, car_sizes ) ) ) for j in range(n_test): simulation_result = async_results[j].get() time_travelled.append(simulation_result.time_travelled) distance_travelled.append(simulation_result.distance_travelled) road_completed.append(simulation_result.road_completed) action_sequences.append(torch.from_numpy( simulation_result.action_sequence)) state_sequences.append(torch.from_numpy( simulation_result.state_sequence)) # image_sequences.append(torch.from_numpy( # simulation_result.image_sequence)) cost_sequences.append(simulation_result.cost_sequence) total_images += time_travelled[-1] collided.append(simulation_result.has_collided) offscreen.append(simulation_result.off_screen) log_string = ' | '.join(( f'ep: {j + 1:3d}/{n_test}', f'time: {time_travelled[-1]}', f'distance: {distance_travelled[-1]:.0f}', f'success: {road_completed[-1]:d}', f'mean time: {torch.Tensor(time_travelled).mean():.0f}', f'mean distance: {torch.Tensor(distance_travelled).mean():.0f}', f'mean success: {torch.Tensor(road_completed).mean():.3f}', )) print(log_string) utils.log(path.join(opt.save_dir, f'{plan_file}.log'), log_string) if writer is not None: # writer.add_video( # f'Video/success={simulation_result.road_completed:d}_{j}', # simulation_result.images.unsqueeze(0), # j # ) writer.add_scalar('ByEpisode/Success', simulation_result.road_completed, j) writer.add_scalar('ByEpisode/Collision', simulation_result.has_collided, j) writer.add_scalar('ByEpisode/OffScreen', simulation_result.off_screen, j) writer.add_scalar('ByEpisode/Distance', simulation_result.distance_travelled, j) pool.close() pool.join() diff_time = time.time() - time_started print('avg time travelled per second is', total_images / diff_time) torch.save({"road_completed" : road_completed, "collided": collided, "offscreen": offscreen}, path.join(opt.save_dir, f'{plan_file}.others')) torch.save(action_sequences, path.join( opt.save_dir, f'{plan_file}.actions')) torch.save(state_sequences, path.join(opt.save_dir, f'{plan_file}.states')) # torch.save(image_sequences, path.join(opt.save_dir, f'{plan_file}.images')) torch.save(cost_sequences, path.join(opt.save_dir, f'{plan_file}.costs')) if writer is not None: writer.close()
with open(os.path.join( output_root, "%s_list.Score" % tgtname), "a") as F: F.write(score_output) pbar3.update() return for i in range(datasize): tplname = template_name_list[i] observations = torch.load( os.path.join(obs_path, '%s-%s.pth' % (tplname, tgt['name'])), pickle_module=pickle) observations = observations.float() observations = torch.mul(observations, Node_Weight) pool.apply_async(compute_alignment, args=(tplname + tpl_type, observations, transitions, pair_distance, disc_method.tolist(), ADMM_ITERATION, edge_type, Node_Weight), callback=getoutput) pool.close() pool.join() pbar3.close() print('time: %.3f s' % (time.time()-start)) print('finish admm algorithm') print('start generate output..') print('sort by index: %d' % sort_col) sortoutput(tgt['name'], os.path.join(output_root, tgt['name'] + '_list.Score'), args.k, sort_col) print("finish %d alignment generation and save them in %s" % (datasize, output_root))
def train(self, data, labels, val_data=None, val_labels=None, warm_start=False): """ :param data: :param labels: :param val_data: :param val_labels: :param warm_start: :return: """ # print('start train') # initialize variable data = torch.from_numpy(data).float() labels = torch.from_numpy(labels).char() if val_data is None: train_data, val_data = data, data train_labels, val_labels = labels, labels else: train_data = data train_labels = labels val_data = torch.from_numpy(val_data).float() val_labels = torch.from_numpy(val_labels).char() orig_cols = train_data.size(1) # counting class and get their index self.plus_row_index = [] self.minus_row_index = [] self.orig_minus = 0 self.orig_plus = 0 for idx, value in enumerate(train_labels): if value == 1: self.orig_plus += 1 self.plus_row_index.append(idx) else: self.orig_minus += 1 self.minus_row_index.append(idx) # balanced pick rows and cols plus = max(2, int(self.orig_plus * self.nrows)) minus = max(2, int(self.orig_minus * self.nrows)) num_cols = max(min(5, orig_cols), int(self.nfeatures * orig_cols)) # initialize up triangle matrix and reference index rows_sum = plus + minus if self.adv_train: rows_sum = rows_sum * 2 # plus = plus * 2 # minus = minus * 2 self.yp = torch.ones((self.width, rows_sum), dtype=torch.int8) self.ref_full_index = torch.repeat_interleave(torch.arange( self.updated_features * self.step.shape[0]).view((-1, 1)), rows_sum, dim=1) # multi-process c = self.round // self.num_gpus results = [] logs = {} # print('enter pool') for r in range(c + 1): pool = Pool(self.n_jobs) results = [] for t in range(min(self.n_jobs, self.round - r * self.num_gpus)): if warm_start and self.w_index != []: column_indices = self.w_index[r * self.num_gpus + t] w1 = self.w1[:, :, r * self.num_gpus + t] w2 = self.w2[:, r * self.num_gpus + t] else: column_indices = np.random.choice(np.arange(orig_cols), num_cols, replace=False) # column_indices = np.arange(orig_cols) self.w_index.append(column_indices) results.append( pool.apply_async(self.single_run, args=(train_data, train_labels, plus, minus, val_data, val_labels, column_indices, t % self.num_gpus))) pool.close() pool.join() for i, result in enumerate(results): temp_w, temp_b, temp_obj = result.get() # logs['vote%d_train' % i] = train_log # logs['vote%d_test' % i] = test_log if warm_start: self.w[:, i] = temp_w self.b[:, i] = temp_b self.obj[i] = temp_obj else: self.w.append(temp_w.view((-1, 1))) self.b.append(temp_b.view((1, 1))) self.obj.append(temp_obj) del pool, results if warm_start is False: self.w = torch.cat(self.w, dim=1) self.b = torch.cat(self.b, dim=1) self.obj = torch.Tensor(self.obj) best_index = self.obj.argmax() self.best_acc = self.obj[best_index] self.best_w = self.w[:, best_index] self.best_b = self.b[:, best_index] self.best_w_index = self.w_index[best_index] del self.yp, self.ref_full_index return
def learn_selfplay_client(self): """ Process that continuously generates self-play data """ manager = mp.Manager() sharedQ = manager.Queue() statedict_name = "Default" # Create num_selfplay_procs queues for sending nn eval results to selfplay procs. queues = [] for j in range(self.args.num_selfplay_procs): queues.append(manager.Queue()) # Create num_gpu_procs queues for sending state_dict update info to nn procs. nn_update_pipes1 = [] nn_update_pipes2 = [] for j in range(self.args.num_gpu_procs): c1, c2 = mp.Pipe() nn_update_pipes1.append(c1) nn_update_pipes2.append(c2) # Create num_gpu_procs nnProcess nnProcs = [] for j in range(self.args.num_gpu_procs): # Run nnProc nnProc = mp.Process( target=JanggiCoach.nnProcess, args=[(self.game, nn_update_pipes1[j], sharedQ, self.args.gpus_to_use[j % len(self.args.gpus_to_use)], queues, self.args.checkpoint_folder)]) nnProc.daemon = True nnProc.start() nnProcs.append(nnProc) # Create a queue for receiving info of finished jobs nextSelfplayQ = manager.Queue() # Create self-play process pool selfplayPool = Pool(self.args.num_selfplay_procs) # Run the first num_selfplay_procs process ibs = pickle.loads( requests.get(url=self.args.request_base_url + "/getIBS").content) for j in range(self.args.num_selfplay_procs): selfplayPool.apply_async( JanggiCoach.executeEpisode, [(Game(self.game.c1, self.game.c2, mode=ibs), self.args, sharedQ, queues[j], j, nextSelfplayQ, None)]) # Continuously generate self-plays while True: # Check for any network updates new_sd = pickle.loads( requests.get(url=self.args.request_base_url + "/getSD").content) if statedict_name != new_sd: statedict_name = new_sd sharedStateDictFile = JanggiCoach.getSharedStateDictFile( self.args.remote_checkpoint_folder) if (self.args.scp_base_url != None): JanggiCoach.checkpointSCP( self.args.scp_base_url + ":" + sharedStateDictFile, sharedStateDictFile) for q in nn_update_pipes2: q.send(statedict_name) q.recv() log.info('Alerted the nn procs to update the network') # Wait for a selfplay result is_selfplay, q_data = nextSelfplayQ.get() if is_selfplay: data, finished_id = q_data self.selfPlaysPlayed += 1 log.info( str(self.selfPlaysPlayed) + ' selfplay games played. Data length = ' + str(len(data))) requests.post(url=self.args.request_base_url + "/postData", data=pickle.dumps(data)) else: checkpoint, is_rp, did_win = q_data log.info("Evaluated (" + str(checkpoint) + ", " + str(is_rp) + ", " + str(did_win) + ")") requests.post(url=self.args.request_base_url + "/uploadEvalRes", data=pickle.dumps((checkpoint, is_rp, did_win))) # Run new selfplay ibs = pickle.loads( requests.get(url=self.args.request_base_url + "/getIBS").content) next_game = pickle.loads( requests.get(url=self.args.request_base_url + "/getNextGame").content) if next_game == None: selfplayPool.apply_async( JanggiCoach.executeEpisode, [(Game(self.game.c1, self.game.c2, mode=ibs), self.args, sharedQ, queues[finished_id], finished_id, nextSelfplayQ, None)]) else: checkpoint, is_rp, is_p1 = next_game assert False
def train(self, data, labels, val_data=None, val_labels=None, warm_start=False): """ :param data: :param labels: :param val_data: :param val_labels: :param warm_start: :return: """ # initialize variable data = torch.from_numpy(data).float() labels = torch.from_numpy(labels).char() if val_data is None: # rows = labels.shape[0] # index = np.random.permutation(rows) # val_size = rows // 5 # val_data = data[index[:val_size]] # val_labels = labels[index[:val_size]] # train_data = data[index[val_size:]] # train_labels = labels[index[val_size:]] train_data, val_data = data, data train_labels, val_labels = labels, labels else: train_data = data train_labels = labels val_data = torch.from_numpy(val_data).float() val_labels = torch.from_numpy(val_labels).char() orig_cols = train_data.size(1) # counting class and get their index self.plus_row_index = [] self.minus_row_index = [] self.orig_minus = 0 self.orig_plus = 0 for idx, value in enumerate(train_labels): if value == 1: self.orig_plus += 1 self.plus_row_index.append(idx) else: self.orig_minus += 1 self.minus_row_index.append(idx) # balanced pick rows and cols plus = max(2, int(self.orig_plus * self.nrows)) minus = max(2, int(self.orig_minus * self.nrows)) num_cols = max(min(5, orig_cols), int(self.nfeatures * orig_cols)) # initialize up triangle matrix and reference index rows_sum = plus + minus if self.adv_train: rows_sum = rows_sum * 2 # plus = plus * 2 # minus = minus * 2 self.yp = torch.ones((rows_sum, rows_sum), dtype=torch.int8).triu_(0) self.ref_full_index = torch.repeat_interleave(torch.arange( self.updated_features * self.step.shape[0]).view((-1, 1)), rows_sum, dim=1) # multi-process pool = Pool(self.n_jobs) results = [] for r in range(self.round): if warm_start and self.w_index != []: column_indices = self.w_index[r] w = self.w[:, r] else: column_indices = np.random.choice(np.arange(orig_cols), num_cols, replace=False) self.w_index.append(column_indices) w = np.random.uniform(-1, 1, size=(num_cols, )).astype(np.float32) results.append( pool.apply_async( self.single_run_adv if self.adv_train else self.single_run, args=(train_data, train_labels, plus, minus, val_data, val_labels, w, column_indices, r % self.num_gpus))) pool.close() pool.join() for i, result in enumerate(results): temp_w, temp_b, temp_obj = result.get() if warm_start: self.w[:, i] = temp_w self.b[:, i] = temp_b self.obj[i] = temp_obj else: self.w.append(temp_w.view((-1, 1))) self.b.append(temp_b.view((1, 1))) self.obj.append(temp_obj) if warm_start is False: self.w = torch.cat(self.w, dim=1) self.b = torch.cat(self.b, dim=1) self.obj = torch.Tensor(self.obj) best_index = self.obj.argmax() self.best_acc = self.obj[best_index] self.best_w = self.w[:, best_index] self.best_b = self.b[:, best_index] self.best_w_index = self.w_index[best_index]
tplname + '-' + tgtname + '.fasta'), 'w') as f: f.write(alignment_output) with open(os.path.join(outputname, "%s_list.Score" % tgt['name']), "a") as F: F.write(score_output) pbar.update() return for i in range(datasize): tplname = template_name_list[i] observations = torch.load(os.path.join( obs_path, "%s-%s.pth" % (tplname, tgt['name'])), pickle_module=pickle) observations = observations.float() pool.apply_async(generateAlign, args=(tplname + tpl_type, tgt['name'] + tgt_type, observations, transitions), callback=getoutput) pool.close() pool.join() pbar.close() print("finish calculating alignment in %.2fs" % (time.time() - start)) sortoutput(tgt['name'], os.path.join(outputname, tgt['name'] + '_list.Score'), args.k) print("finish %d alignment generation and save them in %s" % (datasize, outputname)) print("Date: %s" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))) print("Command: %s" % " ".join(sys.argv)) print("Query name: %s" % tgt['name']) print("Template_list: %s" % os.path.basename(args.l)) print("Output path: %s" % outputname)
pbar.update() return for i in range(datasize): if len(pair_name_list[i].split('-')) == 3: tgtname, domainID, tplname = pair_name_list[i].split('-') tgtname = "%s-%s" % (tgtname, domainID) else: tgtname, tplname = pair_name_list[i].split('-') observations = torch.load(os.path.join( s1_path, "%s-%s.DRNF.Score.pkl" % (tplname, tgtname)), pickle_module=pickle) observations = observations.float() pool0.apply_async(generateAlign, args=(tplname + tpl_type, tgtname + tgt_type, observations, transitions), callback=getoutput_init) pool0.close() pool0.join() pbar.close() print("finish initial alignment in %.2fs" % (time.time() - start)) # empty the cache if args.s1 == "" and args.s2 == '': del observation, featdata, seqX, seqY, maskX, maskY del obsmodel, model1 del data_generator, AlignmentSet with torch.cuda.device(GPU): torch.cuda.empty_cache() del obs_group, crf_group, crfmodel