def learn_episodic_MADDPG(args): ### args.env = "simple_speaker_listener" # args.discrete_action = True env = make_multiagent_env(args.env) # print(act_sp) if not args.use_writer: print("not using writer") n_agents = len(env.agents) action_spaces = [act_sp.n for act_sp in env.action_space] observation_spaces = [ob_sp.shape[0] for ob_sp in env.observation_space] log_dir = "maddpg_test_run" writer = SummaryWriter(log_dir) if args.use_writer else None running_rewards = deque([], maxlen=args.lograte) # discrete actions maddpg agentgent # agent = None trainer = MADDPG_Trainer(n_agents, action_spaces, observation_spaces, writer, args) trainer.eval() timesteps = 0 episode_rewards = [0.0] for ep in range(args.n_eps): observations = env.reset() trainer.reset() done = False for t in range(args.T): timesteps += 1 actions = trainer.get_actions(observations) actions = [a.cpu().numpy() for a in actions] # print(actions) next_obs, rewards, dones, _ = env.step(actions) trainer.store_transitions(*map_to_tensors( observations, actions, rewards, next_obs, dones)) done = all(dones) or t >= args.T if timesteps % args.train_freq == 0: trainer.prep_training() trainer.sample_and_train(args.batch_size) trainer.eval() observations = next_obs if args.render: env.render() episode_rewards[-1] += np.sum(rewards) if done: break if args.use_writer: writer.add_scalar('rewards', episode_rewards[-1] / n_agents, ep) running_rewards.append(episode_rewards[-1] / n_agents) episode_rewards.append(0) if (ep + 1) % args.lograte == 0: print( f"episode: {ep}, running episode rewards: {np.mean(running_rewards)}" ) # TODO ADD logging to the writer.export_scalars_to_json(str(log_dir / 'summary.json')) writer.close() return 0
def train(): """ Main training loop. """ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("device:", device) cfg = get_args() cfg = vars(cfg) for k, v in cfg.items(): print("{:20} : {:10}".format(k, str(v))) num_iterations = cfg["num_iterations"] print_every = cfg["print_every"] eval_every = cfg["eval_every"] batch_size = cfg["batch_size"] eval_batch_size = cfg.get("eval_batch_size", batch_size) aspect = cfg["aspect"] if aspect > -1: assert "aspect"+str(aspect) in cfg["train_path"], \ "chosen aspect does not match train file" assert "aspect"+str(aspect) in cfg["dev_path"], \ "chosen aspect does not match dev file" # Let's load the data into memory. print("Loading data") train_data = list( beer_reader(cfg["train_path"], aspect=cfg["aspect"], max_len=cfg["max_len"])) dev_data = list( beer_reader(cfg["dev_path"], aspect=cfg["aspect"], max_len=cfg["max_len"])) test_data = beer_annotations_reader(cfg["test_path"], aspect=cfg["aspect"]) print("train", len(train_data)) print("dev", len(dev_data)) print("test", len(test_data)) iters_per_epoch = len(train_data) // batch_size if eval_every == -1: eval_every = iters_per_epoch print("eval_every set to 1 epoch = %d iters" % eval_every) if num_iterations < 0: num_iterations = -num_iterations * iters_per_epoch print("num_iterations set to %d iters" % num_iterations) example = dev_data[0] print("First train example tokens:", example.tokens) print("First train example scores:", example.scores) print("Loading pre-trained word embeddings") vocab = Vocabulary() vectors = load_embeddings(cfg["embeddings"], vocab) # build model model = build_model(cfg["model"], vocab, cfg=cfg) initialize_model_(model) # load pre-trained word embeddings with torch.no_grad(): model.embed.weight.data.copy_(torch.from_numpy(vectors)) print("Embeddings fixed: {}".format(cfg["fix_emb"])) model.embed.weight.requires_grad = not cfg["fix_emb"] model = model.to(device) optimizer = Adam(model.parameters(), lr=cfg["lr"], weight_decay=cfg["weight_decay"]) # set learning rate scheduler if cfg["scheduler"] == "plateau": scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=cfg["lr_decay"], patience=cfg["patience"], threshold=cfg["threshold"], threshold_mode='rel', cooldown=cfg["cooldown"], verbose=True, min_lr=cfg["min_lr"]) elif cfg["scheduler"] == "exponential": scheduler = ExponentialLR(optimizer, gamma=cfg["lr_decay"]) elif cfg["scheduler"] == "multistep": milestones = cfg["milestones"] print("milestones (epoch):", milestones) scheduler = MultiStepLR(optimizer, milestones=milestones, gamma=cfg["lr_decay"]) else: raise ValueError("Unknown scheduler") # print model and parameters print(model) print_parameters(model) writer = SummaryWriter(log_dir=cfg["save_path"]) # TensorBoard start = time.time() iter_i = 0 epoch = 0 best_eval = 1e12 best_iter = 0 pad_idx = vocab.w2i[PAD_TOKEN] # resume from a checkpoint if cfg.get("ckpt", ""): print("Resuming from ckpt: {}".format(cfg["ckpt"])) ckpt = torch.load(cfg["ckpt"]) model.load_state_dict(ckpt["state_dict"]) best_iter = ckpt["best_iter"] best_eval = ckpt["best_eval"] iter_i = ckpt["best_iter"] optimizer.load_state_dict(ckpt["optimizer_state_dict"]) cur_lr = scheduler.optimizer.param_groups[0]["lr"] print("# lr = ", cur_lr) # main training loop while True: # when we run out of examples, shuffle and continue for batch in get_minibatch(train_data, batch_size=batch_size, shuffle=True): # forward pass model.train() x, targets, _ = prepare_minibatch(batch, model.vocab, device=device) output = model(x) mask = (x != pad_idx) assert pad_idx == 1, "pad idx" loss, loss_optional = model.get_loss(output, targets, mask=mask) model.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=cfg["max_grad_norm"]) optimizer.step() iter_i += 1 # print info if iter_i % print_every == 0: # print main loss, lr, and optional stuff defined by the model writer.add_scalar('train/loss', loss.item(), iter_i) cur_lr = scheduler.optimizer.param_groups[0]["lr"] writer.add_scalar('train/lr', cur_lr, iter_i) for k, v in loss_optional.items(): writer.add_scalar('train/%s' % k, v, iter_i) # print info to console loss_str = "%.4f" % loss.item() opt_str = make_kv_string(loss_optional) seconds_since_start = time.time() - start hours = seconds_since_start / 60 // 60 minutes = seconds_since_start % 3600 // 60 seconds = seconds_since_start % 60 print("Epoch %03d Iter %08d time %02d:%02d:%02d loss %s %s" % (epoch, iter_i, hours, minutes, seconds, loss_str, opt_str)) # take epoch step (if using MultiStepLR scheduler) if iter_i % iters_per_epoch == 0: cur_lr = scheduler.optimizer.param_groups[0]["lr"] if cur_lr > cfg["min_lr"]: if isinstance(scheduler, MultiStepLR): scheduler.step() elif isinstance(scheduler, ExponentialLR): scheduler.step() cur_lr = scheduler.optimizer.param_groups[0]["lr"] print("#lr", cur_lr) scheduler.optimizer.param_groups[0]["lr"] = max( cfg["min_lr"], cur_lr) # evaluate if iter_i % eval_every == 0: print("Evaluation starts - %s" % str(datetime.datetime.now())) # print a few examples examples = get_examples(model, dev_data, num_examples=3, device=device) for i, example in enumerate(examples, 1): print("Example %d:" % i, " ".join(example)) writer.add_text("examples/example_%d" % i, " ".join(example), iter_i) model.eval() print("Evaluating..", str(datetime.datetime.now())) dev_eval = evaluate_loss(model, dev_data, batch_size=eval_batch_size, device=device, cfg=cfg) for k, v in dev_eval.items(): writer.add_scalar('dev/' + k, v, iter_i) test_eval = evaluate_loss(model, test_data, batch_size=eval_batch_size, device=device, cfg=cfg) for k, v in test_eval.items(): writer.add_scalar('test/' + k, v, iter_i) # compute precision for models that have z if hasattr(model, "z"): path = os.path.join( cfg["save_path"], "rationales_i{:08d}_e{:03d}.txt".format(iter_i, epoch)) test_precision, test_macro_prec = evaluate_rationale( model, test_data, aspect=aspect, device=device, path=path, batch_size=eval_batch_size) writer.add_scalar('test/precision', test_precision, iter_i) writer.add_scalar('test/macro_precision', test_macro_prec, iter_i) test_eval["precision"] = test_precision test_eval["macro_precision"] = test_macro_prec else: test_eval["precision"] = 0. test_eval["macro_precision"] = 0. print("Evaluation epoch %03d iter %08d dev %s test %s" % (epoch, iter_i, make_kv_string(dev_eval), make_kv_string(test_eval))) print(str(datetime.datetime.now())) # save best model parameters (lower is better) compare_obj = dev_eval["obj"] if "obj" in dev_eval \ else dev_eval["loss"] dynamic_threshold = best_eval * (1 - cfg["threshold"]) # only update after first 5 epochs (for stability) if compare_obj < dynamic_threshold \ and iter_i > 5 * iters_per_epoch: print("new highscore", compare_obj) best_eval = compare_obj best_iter = iter_i if not os.path.exists(cfg["save_path"]): os.makedirs(cfg["save_path"]) for k, v in dev_eval.items(): writer.add_scalar('best/dev/' + k, v, iter_i) for k, v in test_eval.items(): writer.add_scalar('best/test/' + k, v, iter_i) ckpt = { "state_dict": model.state_dict(), "cfg": cfg, "best_eval": best_eval, "best_iter": best_iter, "optimizer_state_dict": optimizer.state_dict() } path = os.path.join(cfg["save_path"], "model.pt") torch.save(ckpt, path) # update lr scheduler if isinstance(scheduler, ReduceLROnPlateau): if iter_i > 5 * iters_per_epoch: scheduler.step(compare_obj) # done training cur_lr = scheduler.optimizer.param_groups[0]["lr"] # if iter_i == num_iterations or cur_lr < stop_lr: if iter_i == num_iterations: print("Done training") print("Last lr: ", cur_lr) # export scalar data to JSON for external processing writer.export_scalars_to_json( os.path.join(cfg["save_path"], "./all_scalars.json")) writer.close() # evaluate on test with best model print("Loading best model") path = os.path.join(cfg["save_path"], "model.pt") ckpt = torch.load(path) model.load_state_dict(ckpt["state_dict"]) print("Evaluating") dev_eval = evaluate_loss(model, dev_data, batch_size=eval_batch_size, device=device, cfg=cfg) test_eval = evaluate_loss(model, test_data, batch_size=eval_batch_size, device=device, cfg=cfg) if hasattr(model, "z"): path = os.path.join(cfg["save_path"], "final_rationales.txt") test_precision, test_macro_prec = evaluate_rationale( model, test_data, aspect=aspect, device=device, batch_size=eval_batch_size, path=path) else: test_precision = 0. test_macro_prec = 0. test_eval["precision"] = test_precision test_eval["macro_precision"] = test_macro_prec dev_s = make_kv_string(dev_eval) test_s = make_kv_string(test_eval) print("best model iter {:d} dev {} test {}".format( best_iter, dev_s, test_s)) # save result result_path = os.path.join(cfg["save_path"], "results.json") cfg["best_iter"] = best_iter for name, eval_result in zip(("dev", "test"), (dev_eval, test_eval)): for k, v in eval_result.items(): cfg[name + '_' + k] = v with open(result_path, mode="w") as f: json.dump(cfg, f) return epoch += 1
max_length=MAX_LEN, tokenizer=BertTokenizer.from_pretrained(MODEL_DIR)) train_loader = DataLoader(train_dataset, shuffle=True, batch_size=BATCH_SIZE) # test dataloader examples = DataProcessor().get_test_examples(content[NUM_TRAIN_DATA:], target[NUM_TRAIN_DATA:]) test_dataset = convert_examples_to_features( examples, max_length=MAX_LEN, tokenizer=BertTokenizer.from_pretrained(MODEL_DIR)) test_loader = DataLoader(test_dataset, shuffle=False, batch_size=BATCH_SIZE) # start training and callback for eval # train(train_loader, MODEL_DIR, num_labels=18, epochs=EPOCHS, eval_callback=evaluate, test_loader=train_loader) train(train_loader, MODEL_DIR, num_labels=18, epochs=EPOCHS, eval_callback=evaluate, test_loader=test_loader) if __name__ == '__main__': main() writer.export_scalars_to_json("./log.json") writer.close()
class Logger: def __init__(self, log_dir, n_logged_samples=10, summary_writer=None): self._log_dir = log_dir print('########################') print('logging outputs to ', log_dir) print('########################') self._n_logged_samples = n_logged_samples self._summ_writer = SummaryWriter(log_dir, flush_secs=1, max_queue=1) def log_scalar(self, scalar, name, step_): self._summ_writer.add_scalar('{}'.format(name), scalar, step_) def log_scalars(self, scalar_dict, group_name, step, phase): """Will log all scalars in the same plot.""" self._summ_writer.add_scalars('{}_{}'.format(group_name, phase), scalar_dict, step) def log_image(self, image, name, step): assert (len(image.shape) == 3) # [C, H, W] self._summ_writer.add_image('{}'.format(name), image, step) def log_video(self, video_frames, name, step, fps=10): assert len( video_frames.shape ) == 5, "Need [N, T, C, H, W] input tensor for video logging!" self._summ_writer.add_video('{}'.format(name), video_frames, step, fps=fps) def log_paths_as_videos(self, paths, step, max_videos_to_save=2, fps=10, video_title='video'): # reshape the rollouts videos = [np.transpose(p['image_obs'], [0, 3, 1, 2]) for p in paths] # max rollout length max_videos_to_save = np.min([max_videos_to_save, len(videos)]) max_length = videos[0].shape[0] for i in range(max_videos_to_save): if videos[i].shape[0] > max_length: max_length = videos[i].shape[0] # pad rollouts to all be same length for i in range(max_videos_to_save): if videos[i].shape[0] < max_length: padding = np.tile([videos[i][-1]], (max_length - videos[i].shape[0], 1, 1, 1)) videos[i] = np.concatenate([videos[i], padding], 0) # log videos to tensorboard event file print("Logging videos") videos = np.stack(videos[:max_videos_to_save], 0) self.log_video(videos, video_title, step, fps=fps) def log_figures(self, figure, name, step, phase): """figure: matplotlib.pyplot figure handle""" assert figure.shape[ 0] > 0, "Figure logging requires input shape [batch x figures]!" self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step) def log_figure(self, figure, name, step, phase): """figure: matplotlib.pyplot figure handle""" self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step) def log_graph(self, array, name, step, phase): """figure: matplotlib.pyplot figure handle""" im = plot_graph(array) self._summ_writer.add_image('{}_{}'.format(name, phase), im, step) def dump_scalars(self, log_path=None): log_path = os.path.join( self._log_dir, "scalar_data.json") if log_path is None else log_path self._summ_writer.export_scalars_to_json(log_path) def flush(self): self._summ_writer.flush()
class Experiment(object): def __init__(self): set_seed() torch.set_num_threads(100) logger.info("Welcome to: Deep Hex Agent") logger.info(' ' * 26 + 'Simulation Hyperparameters') for k, v in vars(args).items(): logger.info(' ' * 26 + k + ': ' + str(v)) # consts self.uncertainty_samples = 1 # parameters self.start_time = time.time() self.exptime = time.strftime("%Y%m%d_%H%M%S", time.localtime()) self.device = torch.device("cuda:%d" % args.cuda) self.opt_level = "O1" if args.half else "O0" if "gpu" in socket.gethostname(): self.root_dir = os.path.join('/home/dsi/', username, 'data', project_name) elif "root" == username: self.root_dir = os.path.join('/data/data', project_name) else: self.root_dir = os.path.join('/data/', username, project_name) self.base_dir = os.path.join(self.root_dir, 'results') for folder in [self.base_dir, self.root_dir]: if not os.path.exists(folder): os.makedirs(folder) dirs = os.listdir(self.base_dir) self.resume = args.num temp_name = "%s_%s_%s_exp" % (args.algorithm, args.identifier, args.environment.split('-')[0]) self.exp_name = "" self.load_model = True if self.resume >= 0: for d in dirs: if "%s_%04d_" % (temp_name, self.resume) in d: self.exp_name = d self.exp_num = self.resume break elif self.resume == -1: ds = [d for d in dirs if temp_name in d] ns = np.array([int(d.split("_")[-3]) for d in ds]) if len(ns): self.exp_name = ds[np.argmax(ns)] else: raise Exception("Non-existing experiment") if not self.exp_name: # count similar experiments n = max([-1] + [int(d.split("_")[-3]) for d in dirs if temp_name in d]) + 1 self.exp_name = "%s_%04d_%s" % (temp_name, n, self.exptime) self.exp_num = n self.load_model = False # init experiment parameters self.root = os.path.join(self.base_dir, self.exp_name) # set dirs self.tensorboard_dir = os.path.join(self.root, 'tensorboard') self.checkpoints_dir = os.path.join(self.root, 'checkpoints') self.results_dir = os.path.join(self.root, 'results') self.code_dir = os.path.join(self.root, 'code') self.checkpoint = os.path.join(self.checkpoints_dir, 'checkpoint') if self.load_model and args.reload: print("Resuming existing experiment") else: if not self.load_model: print("Creating new experiment") else: print("Deleting old experiment") shutil.rmtree(self.root) os.makedirs(self.root) os.makedirs(self.tensorboard_dir) os.makedirs(self.checkpoints_dir) os.makedirs(self.results_dir) # os.makedirs(self.code_dir) # make log dirs os.makedirs(os.path.join(self.results_dir, 'train')) os.makedirs(os.path.join(self.results_dir, 'eval')) # copy code to dir # copy_tree(os.path.dirname(os.path.realpath(__file__)), self.code_dir) copytree(os.path.dirname(os.path.realpath(__file__)), self.code_dir, ignore=include_patterns('*.py', '*.md', '*.ipynb')) # write args to file filename = os.path.join(self.root, "args.txt") with open(filename, 'w') as fp: fp.write('\n'.join(sys.argv[1:])) pd.to_pickle(vars(args), os.path.join(self.root, "args.pkl")) # initialize tensorboard writer if args.tensorboard: self.writer = SummaryWriter(log_dir=self.tensorboard_dir, comment=args.identifier) def log_data(self, train_results, test_results, n, alg=None): defaults_argv = defaultdict(dict) for param, val in train_results['scalar'].items(): if type(val) is dict: for p, v in val.items(): val[p] = np.mean(v) else: train_results['scalar'][param] = np.mean(val) if test_results is not None: for param, val in test_results['scalar'].items(): if type(val) is dict: for p, v in val.items(): val[p] = np.mean(v) else: test_results['scalar'][param] = np.mean(val) if args.tensorboard: if alg is not None: networks = alg.get_networks() for net in networks: for name, param in networks[net](): try: self.writer.add_histogram("weight_%s/%s" % (net, name), param.data.cpu().numpy(), n, bins='tensorflow') self.writer.add_histogram("grad_%s/%s" % (net, name), param.grad.cpu().numpy(), n, bins='tensorflow') if hasattr(param, 'intermediate'): self.writer.add_histogram( "iterm_%s/%s" % (net, name), param.intermediate.cpu().numpy(), n, bins='tensorflow') except: pass for log_type in train_results: log_func = getattr(self.writer, f"add_{log_type}") for param in train_results[log_type]: if type(train_results[log_type][param]) is dict: for p, v in train_results[log_type][param].items(): log_func(f"train_{param}/{p}", v, n, **defaults_argv[log_type]) elif type(train_results[log_type][param]) is list: log_func(f"eval/{param}", *train_results[log_type][param], n, **defaults_argv[log_type]) else: log_func(f"train/{param}", train_results[log_type][param], n, **defaults_argv[log_type]) if test_results is not None: for log_type in test_results: log_func = getattr(self.writer, f"add_{log_type}") for param in test_results[log_type]: if type(test_results[log_type][param]) is dict: for p, v in test_results[log_type][param].items(): log_func(f"eval_{param}/{p}", v, n, **defaults_argv[log_type]) elif type(test_results[log_type][param]) is list: log_func(f"eval/{param}", *test_results[log_type][param], n, **defaults_argv[log_type]) else: log_func(f"eval/{param}", test_results[log_type][param], n, **defaults_argv[log_type]) stat_line = 'Train: ' for param in train_results['scalar']: if type(train_results['scalar'][param]) is not dict: stat_line += ' %s %g \t|' % (param, train_results['scalar'][param]) logger.info(stat_line) path = os.path.join(self.results_dir, 'train') np.save(f'{path}/{n:06d}.npy', {k: v for k, v in train_results.items()}) if test_results is not None: stat_line = 'Eval: ' for param in test_results['scalar']: if type(test_results['scalar'][param]) is not dict: stat_line += ' %s %g \t|' % ( param, test_results['scalar'][param]) logger.info(stat_line) path = os.path.join(self.results_dir, 'eval') np.save(f'{path}/{n:06d}.npy', {k: v for k, v in test_results.items()}) def log_alg(self, alg): pass # self.writer.add_hparams(hparam_dict=vars(args), metric_dict={'x': 0}) # for name, net in alg.networks_dict: # self.writer.add_graph(net) # self.writer.flush() def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): if args.tensorboard: self.writer.export_scalars_to_json( os.path.join(self.tensorboard_dir, "all_scalars.json")) self.writer.close()