def learn_episodic_MADDPG(args):
    ###
    args.env = "simple_speaker_listener"
    # args.discrete_action = True
    env = make_multiagent_env(args.env)

    # print(act_sp)
    if not args.use_writer:
        print("not using writer")
    n_agents = len(env.agents)
    action_spaces = [act_sp.n for act_sp in env.action_space]
    observation_spaces = [ob_sp.shape[0] for ob_sp in env.observation_space]
    log_dir = "maddpg_test_run"
    writer = SummaryWriter(log_dir) if args.use_writer else None
    running_rewards = deque([], maxlen=args.lograte)
    # discrete actions maddpg agentgent
    # agent = None
    trainer = MADDPG_Trainer(n_agents, action_spaces, observation_spaces,
                             writer, args)
    trainer.eval()
    timesteps = 0
    episode_rewards = [0.0]
    for ep in range(args.n_eps):
        observations = env.reset()
        trainer.reset()
        done = False
        for t in range(args.T):
            timesteps += 1
            actions = trainer.get_actions(observations)
            actions = [a.cpu().numpy() for a in actions]
            # print(actions)
            next_obs, rewards, dones, _ = env.step(actions)
            trainer.store_transitions(*map_to_tensors(
                observations, actions, rewards, next_obs, dones))
            done = all(dones) or t >= args.T
            if timesteps % args.train_freq == 0:
                trainer.prep_training()
                trainer.sample_and_train(args.batch_size)
                trainer.eval()
            observations = next_obs

            if args.render:
                env.render()

            episode_rewards[-1] += np.sum(rewards)

            if done:
                break

        if args.use_writer:
            writer.add_scalar('rewards', episode_rewards[-1] / n_agents, ep)
        running_rewards.append(episode_rewards[-1] / n_agents)
        episode_rewards.append(0)
        if (ep + 1) % args.lograte == 0:
            print(
                f"episode: {ep}, running episode rewards: {np.mean(running_rewards)}"
            )
        # TODO ADD logging to the
    writer.export_scalars_to_json(str(log_dir / 'summary.json'))
    writer.close()

    return 0
Exemple #2
0
def train():
    """
    Main training loop.
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print("device:", device)

    cfg = get_args()
    cfg = vars(cfg)

    for k, v in cfg.items():
        print("{:20} : {:10}".format(k, str(v)))

    num_iterations = cfg["num_iterations"]
    print_every = cfg["print_every"]
    eval_every = cfg["eval_every"]
    batch_size = cfg["batch_size"]
    eval_batch_size = cfg.get("eval_batch_size", batch_size)
    aspect = cfg["aspect"]

    if aspect > -1:
        assert "aspect"+str(aspect) in cfg["train_path"], \
            "chosen aspect does not match train file"
        assert "aspect"+str(aspect) in cfg["dev_path"], \
            "chosen aspect does not match dev file"

    # Let's load the data into memory.
    print("Loading data")

    train_data = list(
        beer_reader(cfg["train_path"],
                    aspect=cfg["aspect"],
                    max_len=cfg["max_len"]))
    dev_data = list(
        beer_reader(cfg["dev_path"],
                    aspect=cfg["aspect"],
                    max_len=cfg["max_len"]))
    test_data = beer_annotations_reader(cfg["test_path"], aspect=cfg["aspect"])

    print("train", len(train_data))
    print("dev", len(dev_data))
    print("test", len(test_data))

    iters_per_epoch = len(train_data) // batch_size

    if eval_every == -1:
        eval_every = iters_per_epoch
        print("eval_every set to 1 epoch = %d iters" % eval_every)

    if num_iterations < 0:
        num_iterations = -num_iterations * iters_per_epoch
        print("num_iterations set to %d iters" % num_iterations)

    example = dev_data[0]
    print("First train example tokens:", example.tokens)
    print("First train example scores:", example.scores)

    print("Loading pre-trained word embeddings")
    vocab = Vocabulary()
    vectors = load_embeddings(cfg["embeddings"], vocab)

    # build model
    model = build_model(cfg["model"], vocab, cfg=cfg)
    initialize_model_(model)

    # load pre-trained word embeddings
    with torch.no_grad():
        model.embed.weight.data.copy_(torch.from_numpy(vectors))
        print("Embeddings fixed: {}".format(cfg["fix_emb"]))
        model.embed.weight.requires_grad = not cfg["fix_emb"]

    model = model.to(device)
    optimizer = Adam(model.parameters(),
                     lr=cfg["lr"],
                     weight_decay=cfg["weight_decay"])

    # set learning rate scheduler
    if cfg["scheduler"] == "plateau":
        scheduler = ReduceLROnPlateau(optimizer,
                                      mode='min',
                                      factor=cfg["lr_decay"],
                                      patience=cfg["patience"],
                                      threshold=cfg["threshold"],
                                      threshold_mode='rel',
                                      cooldown=cfg["cooldown"],
                                      verbose=True,
                                      min_lr=cfg["min_lr"])
    elif cfg["scheduler"] == "exponential":
        scheduler = ExponentialLR(optimizer, gamma=cfg["lr_decay"])
    elif cfg["scheduler"] == "multistep":
        milestones = cfg["milestones"]
        print("milestones (epoch):", milestones)
        scheduler = MultiStepLR(optimizer,
                                milestones=milestones,
                                gamma=cfg["lr_decay"])
    else:
        raise ValueError("Unknown scheduler")

    # print model and parameters
    print(model)
    print_parameters(model)

    writer = SummaryWriter(log_dir=cfg["save_path"])  # TensorBoard
    start = time.time()
    iter_i = 0
    epoch = 0
    best_eval = 1e12
    best_iter = 0
    pad_idx = vocab.w2i[PAD_TOKEN]

    # resume from a checkpoint
    if cfg.get("ckpt", ""):
        print("Resuming from ckpt: {}".format(cfg["ckpt"]))
        ckpt = torch.load(cfg["ckpt"])
        model.load_state_dict(ckpt["state_dict"])
        best_iter = ckpt["best_iter"]
        best_eval = ckpt["best_eval"]
        iter_i = ckpt["best_iter"]
        optimizer.load_state_dict(ckpt["optimizer_state_dict"])
        cur_lr = scheduler.optimizer.param_groups[0]["lr"]
        print("# lr = ", cur_lr)

    # main training loop
    while True:  # when we run out of examples, shuffle and continue
        for batch in get_minibatch(train_data,
                                   batch_size=batch_size,
                                   shuffle=True):

            # forward pass
            model.train()
            x, targets, _ = prepare_minibatch(batch,
                                              model.vocab,
                                              device=device)

            output = model(x)

            mask = (x != pad_idx)
            assert pad_idx == 1, "pad idx"
            loss, loss_optional = model.get_loss(output, targets, mask=mask)

            model.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           max_norm=cfg["max_grad_norm"])
            optimizer.step()
            iter_i += 1

            # print info
            if iter_i % print_every == 0:

                # print main loss, lr, and optional stuff defined by the model
                writer.add_scalar('train/loss', loss.item(), iter_i)
                cur_lr = scheduler.optimizer.param_groups[0]["lr"]
                writer.add_scalar('train/lr', cur_lr, iter_i)

                for k, v in loss_optional.items():
                    writer.add_scalar('train/%s' % k, v, iter_i)

                # print info to console
                loss_str = "%.4f" % loss.item()
                opt_str = make_kv_string(loss_optional)
                seconds_since_start = time.time() - start
                hours = seconds_since_start / 60 // 60
                minutes = seconds_since_start % 3600 // 60
                seconds = seconds_since_start % 60
                print("Epoch %03d Iter %08d time %02d:%02d:%02d loss %s %s" %
                      (epoch, iter_i, hours, minutes, seconds, loss_str,
                       opt_str))

            # take epoch step (if using MultiStepLR scheduler)
            if iter_i % iters_per_epoch == 0:

                cur_lr = scheduler.optimizer.param_groups[0]["lr"]
                if cur_lr > cfg["min_lr"]:
                    if isinstance(scheduler, MultiStepLR):
                        scheduler.step()
                    elif isinstance(scheduler, ExponentialLR):
                        scheduler.step()

                cur_lr = scheduler.optimizer.param_groups[0]["lr"]
                print("#lr", cur_lr)
                scheduler.optimizer.param_groups[0]["lr"] = max(
                    cfg["min_lr"], cur_lr)

            # evaluate
            if iter_i % eval_every == 0:

                print("Evaluation starts - %s" % str(datetime.datetime.now()))

                # print a few examples
                examples = get_examples(model,
                                        dev_data,
                                        num_examples=3,
                                        device=device)
                for i, example in enumerate(examples, 1):
                    print("Example %d:" % i, " ".join(example))
                    writer.add_text("examples/example_%d" % i,
                                    " ".join(example), iter_i)

                model.eval()

                print("Evaluating..", str(datetime.datetime.now()))

                dev_eval = evaluate_loss(model,
                                         dev_data,
                                         batch_size=eval_batch_size,
                                         device=device,
                                         cfg=cfg)

                for k, v in dev_eval.items():
                    writer.add_scalar('dev/' + k, v, iter_i)

                test_eval = evaluate_loss(model,
                                          test_data,
                                          batch_size=eval_batch_size,
                                          device=device,
                                          cfg=cfg)

                for k, v in test_eval.items():
                    writer.add_scalar('test/' + k, v, iter_i)

                # compute precision for models that have z
                if hasattr(model, "z"):
                    path = os.path.join(
                        cfg["save_path"],
                        "rationales_i{:08d}_e{:03d}.txt".format(iter_i, epoch))
                    test_precision, test_macro_prec = evaluate_rationale(
                        model,
                        test_data,
                        aspect=aspect,
                        device=device,
                        path=path,
                        batch_size=eval_batch_size)
                    writer.add_scalar('test/precision', test_precision, iter_i)
                    writer.add_scalar('test/macro_precision', test_macro_prec,
                                      iter_i)
                    test_eval["precision"] = test_precision
                    test_eval["macro_precision"] = test_macro_prec
                else:
                    test_eval["precision"] = 0.
                    test_eval["macro_precision"] = 0.

                print("Evaluation epoch %03d iter %08d dev %s test %s" %
                      (epoch, iter_i, make_kv_string(dev_eval),
                       make_kv_string(test_eval)))

                print(str(datetime.datetime.now()))

                # save best model parameters (lower is better)
                compare_obj = dev_eval["obj"] if "obj" in dev_eval \
                    else dev_eval["loss"]
                dynamic_threshold = best_eval * (1 - cfg["threshold"])
                # only update after first 5 epochs (for stability)
                if compare_obj < dynamic_threshold \
                        and iter_i > 5 * iters_per_epoch:
                    print("new highscore", compare_obj)
                    best_eval = compare_obj
                    best_iter = iter_i
                    if not os.path.exists(cfg["save_path"]):
                        os.makedirs(cfg["save_path"])

                    for k, v in dev_eval.items():
                        writer.add_scalar('best/dev/' + k, v, iter_i)

                    for k, v in test_eval.items():
                        writer.add_scalar('best/test/' + k, v, iter_i)

                    ckpt = {
                        "state_dict": model.state_dict(),
                        "cfg": cfg,
                        "best_eval": best_eval,
                        "best_iter": best_iter,
                        "optimizer_state_dict": optimizer.state_dict()
                    }

                    path = os.path.join(cfg["save_path"], "model.pt")
                    torch.save(ckpt, path)

                # update lr scheduler
                if isinstance(scheduler, ReduceLROnPlateau):
                    if iter_i > 5 * iters_per_epoch:
                        scheduler.step(compare_obj)

            # done training
            cur_lr = scheduler.optimizer.param_groups[0]["lr"]

            # if iter_i == num_iterations or cur_lr < stop_lr:
            if iter_i == num_iterations:
                print("Done training")
                print("Last lr: ", cur_lr)

                # export scalar data to JSON for external processing
                writer.export_scalars_to_json(
                    os.path.join(cfg["save_path"], "./all_scalars.json"))
                writer.close()

                # evaluate on test with best model
                print("Loading best model")
                path = os.path.join(cfg["save_path"], "model.pt")
                ckpt = torch.load(path)
                model.load_state_dict(ckpt["state_dict"])

                print("Evaluating")
                dev_eval = evaluate_loss(model,
                                         dev_data,
                                         batch_size=eval_batch_size,
                                         device=device,
                                         cfg=cfg)
                test_eval = evaluate_loss(model,
                                          test_data,
                                          batch_size=eval_batch_size,
                                          device=device,
                                          cfg=cfg)

                if hasattr(model, "z"):
                    path = os.path.join(cfg["save_path"],
                                        "final_rationales.txt")
                    test_precision, test_macro_prec = evaluate_rationale(
                        model,
                        test_data,
                        aspect=aspect,
                        device=device,
                        batch_size=eval_batch_size,
                        path=path)
                else:
                    test_precision = 0.
                    test_macro_prec = 0.
                test_eval["precision"] = test_precision
                test_eval["macro_precision"] = test_macro_prec

                dev_s = make_kv_string(dev_eval)
                test_s = make_kv_string(test_eval)

                print("best model iter {:d} dev {} test {}".format(
                    best_iter, dev_s, test_s))

                # save result
                result_path = os.path.join(cfg["save_path"], "results.json")

                cfg["best_iter"] = best_iter

                for name, eval_result in zip(("dev", "test"),
                                             (dev_eval, test_eval)):
                    for k, v in eval_result.items():
                        cfg[name + '_' + k] = v

                with open(result_path, mode="w") as f:
                    json.dump(cfg, f)

                return

        epoch += 1
Exemple #3
0
        max_length=MAX_LEN,
        tokenizer=BertTokenizer.from_pretrained(MODEL_DIR))
    train_loader = DataLoader(train_dataset,
                              shuffle=True,
                              batch_size=BATCH_SIZE)

    # test dataloader
    examples = DataProcessor().get_test_examples(content[NUM_TRAIN_DATA:],
                                                 target[NUM_TRAIN_DATA:])
    test_dataset = convert_examples_to_features(
        examples,
        max_length=MAX_LEN,
        tokenizer=BertTokenizer.from_pretrained(MODEL_DIR))
    test_loader = DataLoader(test_dataset,
                             shuffle=False,
                             batch_size=BATCH_SIZE)

    # start training and callback for eval
    # train(train_loader, MODEL_DIR, num_labels=18, epochs=EPOCHS, eval_callback=evaluate, test_loader=train_loader)
    train(train_loader,
          MODEL_DIR,
          num_labels=18,
          epochs=EPOCHS,
          eval_callback=evaluate,
          test_loader=test_loader)


if __name__ == '__main__':
    main()
    writer.export_scalars_to_json("./log.json")
    writer.close()
class Logger:
    def __init__(self, log_dir, n_logged_samples=10, summary_writer=None):
        self._log_dir = log_dir
        print('########################')
        print('logging outputs to ', log_dir)
        print('########################')
        self._n_logged_samples = n_logged_samples
        self._summ_writer = SummaryWriter(log_dir, flush_secs=1, max_queue=1)

    def log_scalar(self, scalar, name, step_):
        self._summ_writer.add_scalar('{}'.format(name), scalar, step_)

    def log_scalars(self, scalar_dict, group_name, step, phase):
        """Will log all scalars in the same plot."""
        self._summ_writer.add_scalars('{}_{}'.format(group_name, phase),
                                      scalar_dict, step)

    def log_image(self, image, name, step):
        assert (len(image.shape) == 3)  # [C, H, W]
        self._summ_writer.add_image('{}'.format(name), image, step)

    def log_video(self, video_frames, name, step, fps=10):
        assert len(
            video_frames.shape
        ) == 5, "Need [N, T, C, H, W] input tensor for video logging!"
        self._summ_writer.add_video('{}'.format(name),
                                    video_frames,
                                    step,
                                    fps=fps)

    def log_paths_as_videos(self,
                            paths,
                            step,
                            max_videos_to_save=2,
                            fps=10,
                            video_title='video'):

        # reshape the rollouts
        videos = [np.transpose(p['image_obs'], [0, 3, 1, 2]) for p in paths]

        # max rollout length
        max_videos_to_save = np.min([max_videos_to_save, len(videos)])
        max_length = videos[0].shape[0]
        for i in range(max_videos_to_save):
            if videos[i].shape[0] > max_length:
                max_length = videos[i].shape[0]

        # pad rollouts to all be same length
        for i in range(max_videos_to_save):
            if videos[i].shape[0] < max_length:
                padding = np.tile([videos[i][-1]],
                                  (max_length - videos[i].shape[0], 1, 1, 1))
                videos[i] = np.concatenate([videos[i], padding], 0)

        # log videos to tensorboard event file
        print("Logging videos")
        videos = np.stack(videos[:max_videos_to_save], 0)
        self.log_video(videos, video_title, step, fps=fps)

    def log_figures(self, figure, name, step, phase):
        """figure: matplotlib.pyplot figure handle"""
        assert figure.shape[
            0] > 0, "Figure logging requires input shape [batch x figures]!"
        self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)

    def log_figure(self, figure, name, step, phase):
        """figure: matplotlib.pyplot figure handle"""
        self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)

    def log_graph(self, array, name, step, phase):
        """figure: matplotlib.pyplot figure handle"""
        im = plot_graph(array)
        self._summ_writer.add_image('{}_{}'.format(name, phase), im, step)

    def dump_scalars(self, log_path=None):
        log_path = os.path.join(
            self._log_dir,
            "scalar_data.json") if log_path is None else log_path
        self._summ_writer.export_scalars_to_json(log_path)

    def flush(self):
        self._summ_writer.flush()
Exemple #5
0
class Experiment(object):
    def __init__(self):

        set_seed()

        torch.set_num_threads(100)
        logger.info("Welcome to: Deep Hex Agent")
        logger.info(' ' * 26 + 'Simulation Hyperparameters')
        for k, v in vars(args).items():
            logger.info(' ' * 26 + k + ': ' + str(v))

        # consts

        self.uncertainty_samples = 1
        # parameters

        self.start_time = time.time()
        self.exptime = time.strftime("%Y%m%d_%H%M%S", time.localtime())
        self.device = torch.device("cuda:%d" % args.cuda)
        self.opt_level = "O1" if args.half else "O0"

        if "gpu" in socket.gethostname():
            self.root_dir = os.path.join('/home/dsi/', username, 'data',
                                         project_name)
        elif "root" == username:
            self.root_dir = os.path.join('/data/data', project_name)
        else:
            self.root_dir = os.path.join('/data/', username, project_name)

        self.base_dir = os.path.join(self.root_dir, 'results')

        for folder in [self.base_dir, self.root_dir]:
            if not os.path.exists(folder):
                os.makedirs(folder)

        dirs = os.listdir(self.base_dir)

        self.resume = args.num
        temp_name = "%s_%s_%s_exp" % (args.algorithm, args.identifier,
                                      args.environment.split('-')[0])
        self.exp_name = ""
        self.load_model = True
        if self.resume >= 0:
            for d in dirs:
                if "%s_%04d_" % (temp_name, self.resume) in d:
                    self.exp_name = d
                    self.exp_num = self.resume
                    break
        elif self.resume == -1:

            ds = [d for d in dirs if temp_name in d]
            ns = np.array([int(d.split("_")[-3]) for d in ds])
            if len(ns):
                self.exp_name = ds[np.argmax(ns)]
        else:
            raise Exception("Non-existing experiment")

        if not self.exp_name:
            # count similar experiments
            n = max([-1] +
                    [int(d.split("_")[-3])
                     for d in dirs if temp_name in d]) + 1
            self.exp_name = "%s_%04d_%s" % (temp_name, n, self.exptime)
            self.exp_num = n
            self.load_model = False

        # init experiment parameters
        self.root = os.path.join(self.base_dir, self.exp_name)

        # set dirs
        self.tensorboard_dir = os.path.join(self.root, 'tensorboard')
        self.checkpoints_dir = os.path.join(self.root, 'checkpoints')
        self.results_dir = os.path.join(self.root, 'results')
        self.code_dir = os.path.join(self.root, 'code')
        self.checkpoint = os.path.join(self.checkpoints_dir, 'checkpoint')

        if self.load_model and args.reload:
            print("Resuming existing experiment")

        else:

            if not self.load_model:
                print("Creating new experiment")

            else:
                print("Deleting old experiment")
                shutil.rmtree(self.root)

            os.makedirs(self.root)
            os.makedirs(self.tensorboard_dir)
            os.makedirs(self.checkpoints_dir)
            os.makedirs(self.results_dir)
            # os.makedirs(self.code_dir)

            # make log dirs
            os.makedirs(os.path.join(self.results_dir, 'train'))
            os.makedirs(os.path.join(self.results_dir, 'eval'))

            # copy code to dir
            # copy_tree(os.path.dirname(os.path.realpath(__file__)), self.code_dir)

            copytree(os.path.dirname(os.path.realpath(__file__)),
                     self.code_dir,
                     ignore=include_patterns('*.py', '*.md', '*.ipynb'))

            # write args to file
            filename = os.path.join(self.root, "args.txt")
            with open(filename, 'w') as fp:
                fp.write('\n'.join(sys.argv[1:]))

            pd.to_pickle(vars(args), os.path.join(self.root, "args.pkl"))

        # initialize tensorboard writer
        if args.tensorboard:
            self.writer = SummaryWriter(log_dir=self.tensorboard_dir,
                                        comment=args.identifier)

    def log_data(self, train_results, test_results, n, alg=None):

        defaults_argv = defaultdict(dict)

        for param, val in train_results['scalar'].items():
            if type(val) is dict:
                for p, v in val.items():
                    val[p] = np.mean(v)
            else:
                train_results['scalar'][param] = np.mean(val)

        if test_results is not None:
            for param, val in test_results['scalar'].items():
                if type(val) is dict:
                    for p, v in val.items():
                        val[p] = np.mean(v)
                else:
                    test_results['scalar'][param] = np.mean(val)

        if args.tensorboard:

            if alg is not None:
                networks = alg.get_networks()
                for net in networks:
                    for name, param in networks[net]():
                        try:
                            self.writer.add_histogram("weight_%s/%s" %
                                                      (net, name),
                                                      param.data.cpu().numpy(),
                                                      n,
                                                      bins='tensorflow')
                            self.writer.add_histogram("grad_%s/%s" %
                                                      (net, name),
                                                      param.grad.cpu().numpy(),
                                                      n,
                                                      bins='tensorflow')
                            if hasattr(param, 'intermediate'):
                                self.writer.add_histogram(
                                    "iterm_%s/%s" % (net, name),
                                    param.intermediate.cpu().numpy(),
                                    n,
                                    bins='tensorflow')
                        except:
                            pass

            for log_type in train_results:
                log_func = getattr(self.writer, f"add_{log_type}")
                for param in train_results[log_type]:

                    if type(train_results[log_type][param]) is dict:
                        for p, v in train_results[log_type][param].items():
                            log_func(f"train_{param}/{p}", v, n,
                                     **defaults_argv[log_type])
                    elif type(train_results[log_type][param]) is list:
                        log_func(f"eval/{param}",
                                 *train_results[log_type][param], n,
                                 **defaults_argv[log_type])
                    else:
                        log_func(f"train/{param}",
                                 train_results[log_type][param], n,
                                 **defaults_argv[log_type])

            if test_results is not None:
                for log_type in test_results:
                    log_func = getattr(self.writer, f"add_{log_type}")
                    for param in test_results[log_type]:

                        if type(test_results[log_type][param]) is dict:
                            for p, v in test_results[log_type][param].items():
                                log_func(f"eval_{param}/{p}", v, n,
                                         **defaults_argv[log_type])
                        elif type(test_results[log_type][param]) is list:
                            log_func(f"eval/{param}",
                                     *test_results[log_type][param], n,
                                     **defaults_argv[log_type])
                        else:
                            log_func(f"eval/{param}",
                                     test_results[log_type][param], n,
                                     **defaults_argv[log_type])

        stat_line = 'Train: '
        for param in train_results['scalar']:
            if type(train_results['scalar'][param]) is not dict:
                stat_line += '  %s %g \t|' % (param,
                                              train_results['scalar'][param])
        logger.info(stat_line)
        path = os.path.join(self.results_dir, 'train')
        np.save(f'{path}/{n:06d}.npy',
                {k: v
                 for k, v in train_results.items()})

        if test_results is not None:
            stat_line = 'Eval: '
            for param in test_results['scalar']:
                if type(test_results['scalar'][param]) is not dict:
                    stat_line += '  %s %g \t|' % (
                        param, test_results['scalar'][param])
            logger.info(stat_line)

            path = os.path.join(self.results_dir, 'eval')
            np.save(f'{path}/{n:06d}.npy',
                    {k: v
                     for k, v in test_results.items()})

    def log_alg(self, alg):
        pass
        # self.writer.add_hparams(hparam_dict=vars(args), metric_dict={'x': 0})
        # for name, net in alg.networks_dict:
        #     self.writer.add_graph(net)
        #     self.writer.flush()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        if args.tensorboard:
            self.writer.export_scalars_to_json(
                os.path.join(self.tensorboard_dir, "all_scalars.json"))
            self.writer.close()