Exemple #1
0
def setup_ignite(
    engine: Engine,
    params: SimpleNamespace,
    exp_source,
    run_name: str,
    extra_metrics: Iterable[str] = (),
):
    warnings.simplefilter("ignore", category=UserWarning)
    handler = ptan_ignite.EndOfEpisodeHandler(exp_source, bound_avg_reward=params.stop_reward)
    handler.attach(engine)
    ptan_ignite.EpisodeFPSHandler().attach(engine)

    @engine.on(ptan_ignite.EpisodeEvents.EPISODE_COMPLETED)
    def episode_completed(trainer: Engine):
        passed = trainer.state.metrics.get("time_passed", 0)
        print(
            "Episode %d: reward=%.2f, steps=%s, "
            "speed=%.1f f/s, elapsed=%s"
            % (
                trainer.state.episode,
                trainer.state.episode_reward,
                trainer.state.episode_steps,
                trainer.state.metrics.get("avg_fps", 0),
                timedelta(seconds=int(passed)),
            )
        )

    @engine.on(ptan_ignite.EpisodeEvents.BOUND_REWARD_REACHED)
    def game_solved(trainer: Engine):
        passed = trainer.state.metrics["time_passed"]
        print(
            "Game solved in %s, after %d episodes "
            "and %d iterations!"
            % (timedelta(seconds=int(passed)), trainer.state.episode, trainer.state.iteration)
        )
        trainer.should_terminate = True

    now = datetime.now().isoformat(timespec="minutes")
    logdir = f"runs/{now}-{params.run_name}-{run_name}"
    tb = tb_logger.TensorboardLogger(log_dir=logdir)
    run_avg = RunningAverage(output_transform=lambda v: v["loss"])
    run_avg.attach(engine, "avg_loss")

    metrics = ["reward", "steps", "avg_reward"]
    handler = tb_logger.OutputHandler(tag="episodes", metric_names=metrics)
    event = ptan_ignite.EpisodeEvents.EPISODE_COMPLETED
    tb.attach(engine, log_handler=handler, event_name=event)

    # write to tensorboard every 100 iterations
    ptan_ignite.PeriodicEvents().attach(engine)
    metrics = ["avg_loss", "avg_fps"]
    metrics.extend(extra_metrics)
    handler = tb_logger.OutputHandler(
        tag="train", metric_names=metrics, output_transform=lambda a: a
    )
    event = ptan_ignite.PeriodEvents.ITERS_100_COMPLETED
    tb.attach(engine, log_handler=handler, event_name=event)
def setup_ignite(engine: Engine,
                 params: SimpleNamespace,
                 exp_source,
                 run_name: str,
                 extra_metrics: Iterable[str] = ()):
    # get rid of missing metrics warning
    warnings.simplefilter("ignore", category=UserWarning)

    # das Ding feuert EpisodenEnde Events / hört selber auf ITERATION_COMPLETED
    ptan_ignite.EndOfEpisodeHandler(
        exp_source, bound_avg_reward=params.stop_reward).attach(engine)
    ptan_ignite.EpisodeFPSHandler().attach(engine)

    @engine.on(ptan_ignite.EpisodeEvents.EPISODE_COMPLETED)
    def episode_completed(trainer: Engine):
        passed = trainer.state.metrics.get('time_passed', 0)
        print("Episode %d: reward=%.0f, steps=%s, "
              "speed=%.1f f/s, elapsed=%s" %
              (trainer.state.episode, trainer.state.episode_reward,
               trainer.state.episode_steps,
               trainer.state.metrics.get('avg_fps',
                                         0), timedelta(seconds=int(passed))))

    @engine.on(ptan_ignite.EpisodeEvents.BOUND_REWARD_REACHED)
    def game_solved(trainer: Engine):
        passed = trainer.state.metrics['time_passed']
        print("Game solved in %s, after %d episodes "
              "and %d iterations!" %
              (timedelta(seconds=int(passed)), trainer.state.episode,
               trainer.state.iteration))
        trainer.should_terminate = True

    RunningAverage(output_transform=lambda v: v['loss']).attach(
        engine, "avg_loss")

    now = datetime.now().isoformat(timespec='minutes')
    now = now.replace(":", "")
    logdir = f"runs/{now}-{params.run_name}-{run_name}"
    tb = tb_logger.TensorboardLogger(log_dir=logdir)

    handler = tb_logger.OutputHandler(
        tag="episodes", metric_names=['reward', 'steps', 'avg_reward'])
    tb.attach(engine,
              log_handler=handler,
              event_name=ptan_ignite.EpisodeEvents.EPISODE_COMPLETED)

    # write to tensorboard every 100 iterations
    ptan_ignite.PeriodicEvents().attach(engine)

    metrics = ['avg_loss', 'avg_fps']
    metrics.extend(extra_metrics)
    handler = tb_logger.OutputHandler(tag="train",
                                      metric_names=metrics,
                                      output_transform=lambda a: a)
    tb.attach(engine,
              log_handler=handler,
              event_name=ptan_ignite.PeriodEvents.ITERS_100_COMPLETED)
Exemple #3
0
def setup_ignite(engine: Engine,
                 exp_source,
                 run_name: str,
                 extra_metrics: Iterable[str] = ()):
    # get rid of missing metrics warning
    warnings.simplefilter("ignore", category=UserWarning)

    handler = ptan_ignite.EndOfEpisodeHandler(exp_source)
    handler.attach(engine)
    ptan_ignite.EpisodeFPSHandler().attach(engine)

    @engine.on(ptan_ignite.EpisodeEvents.EPISODE_COMPLETED)
    def episode_completed(trainer: Engine):
        passed = trainer.state.metrics.get("time_passed", 0)
        avg_steps = trainer.state.metrics.get("avg_steps", 50)
        avg_reward = trainer.state.metrics.get("avg_reward", 0.0)
        print("Episode %d: reward=%.0f (avg %.2f), "
              "steps=%s (avg %.2f), speed=%.1f f/s, "
              "elapsed=%s" % (
                  trainer.state.episode,
                  trainer.state.episode_reward,
                  avg_reward,
                  trainer.state.episode_steps,
                  avg_steps,
                  trainer.state.metrics.get("avg_fps", 0),
                  timedelta(seconds=int(passed)),
              ))

        if avg_steps < 15 and trainer.state.episode > 100:
            print("Average steps has fallen below 10, stop training")
            trainer.should_terminate = True

    now = datetime.now().isoformat(timespec="minutes")
    logdir = f"runs/{now}-{run_name}"
    tb = tb_logger.TensorboardLogger(log_dir=logdir)
    run_avg = RunningAverage(output_transform=lambda v: v["loss"])
    run_avg.attach(engine, "avg_loss")

    metrics = ["reward", "steps", "avg_reward", "avg_steps"]
    handler = tb_logger.OutputHandler(tag="episodes", metric_names=metrics)
    event = ptan_ignite.EpisodeEvents.EPISODE_COMPLETED
    tb.attach(engine, log_handler=handler, event_name=event)

    # write to tensorboard every 100 iterations
    ptan_ignite.PeriodicEvents().attach(engine)
    metrics = ["avg_loss", "avg_fps"]
    metrics.extend(extra_metrics)
    handler = tb_logger.OutputHandler(tag="train",
                                      metric_names=metrics,
                                      output_transform=lambda a: a)
    event = ptan_ignite.PeriodEvents.ITERS_100_COMPLETED
    tb.attach(engine, log_handler=handler, event_name=event)
    def init_handlers(self, trainer: ie.Engine, evaluator: ie.Engine,
                      model: nn.Module, optimizer):
        self.tb_logger.attach(trainer,
                              log_handler=tbl.OutputHandler(
                                  tag='training', metric_names='all'),
                              event_name=ie.Events.ITERATION_COMPLETED)
        self.tb_logger.attach(trainer,
                              log_handler=tbl.OptimizerParamsHandler(
                                  optimizer, tag='training'),
                              event_name=ie.Events.ITERATION_COMPLETED)
        self.tb_logger.attach(trainer,
                              log_handler=tbl.OutputHandler(
                                  tag='train', metric_names='all'),
                              event_name=ie.Events.EPOCH_COMPLETED)
        self.tb_logger.attach(
            evaluator,
            log_handler=tbl.OutputHandler(
                tag='dev',
                metric_names='all',
                global_step_transform=tbl.global_step_from_engine(trainer)),
            event_name=ie.Events.EPOCH_COMPLETED)

        if self.opts.debug:
            self.tb_logger.attach(trainer,
                                  log_handler=tbl.OptimizerParamsHandler(
                                      optimizer, tag=self.DEBUG_TAG),
                                  event_name=ie.Events.ITERATION_COMPLETED)
            self.tb_logger.attach(trainer,
                                  log_handler=tbl.WeightsHistHandler(
                                      model, tag=self.DEBUG_TAG),
                                  event_name=ie.Events.ITERATION_COMPLETED)
            self.tb_logger.attach(trainer,
                                  log_handler=tbl.WeightsScalarHandler(
                                      model, tag=self.DEBUG_TAG),
                                  event_name=ie.Events.ITERATION_COMPLETED)
            self.tb_logger.attach(trainer,
                                  log_handler=tbl.GradsHistHandler(
                                      model, tag=self.DEBUG_TAG),
                                  event_name=ie.Events.EPOCH_COMPLETED)
            self.tb_logger.attach(trainer,
                                  log_handler=tbl.GradsScalarHandler(
                                      model, tag=self.DEBUG_TAG),
                                  event_name=ie.Events.ITERATION_COMPLETED)
            self.tb_logger.attach(trainer,
                                  log_handler=tbl.GradsHistHandler(
                                      model, tag=self.DEBUG_TAG),
                                  event_name=ie.Events.EPOCH_COMPLETED)
def setup_ignite(engine: Engine,
                 exp_source,
                 run_name: str,
                 extra_metrics: Iterable[str] = ()):
    warnings.simplefilter('ignore', category=UserWarning)

    handler = ptan_ignite.EndOfEpisodeHandler(exp_source,
                                              subsample_end_of_episode=100)

    handler.attach(engine)
    ptan_ignite.EpisodeFPSHandler().attach(engine)

    @engine.on(ptan_ignite.EpisodeEvents.EPISODE_COMPLETED)
    def episode_completed(trainer: Engine):
        passed = trainer.state.metrics.get('time_passed', 0)
        print('Episode %d: reward=%0.f, steps=%s, speed=%.1f f/s, elapsed=%s' %
              (trainer.state.episode, trainer.state.episode_reward,
               trainer.state.episode_steps,
               trainer.state.metrics.get('avg_fps',
                                         0), timedelta(seconds=int(passed))))

    now = datetime.now().isoformat(timespec='minutes')
    logdir = f'runs-{now}-{run_name}'.replace(':', '')
    tb = tb_logger.TensorboardLogger(logdir=logdir)
    run_avg = RunningAverage(output_transform=lambda v: v['loss'])
    run_avg.attach(engine, 'avg_loss')

    metrics = ['reward', 'steps', 'avg_reward']
    handler = tb_logger.OutputHandler(tag='episodes', metric_names=metrics)

    event = ptan_ignite.EpisodeEvents.EPISODE_COMPLETED
    tb.attach(engine, log_handler=handler, event_name=event)

    # write to tensorboard every 100 iterations
    ptan_ignite.PeriodicEvents().attach(engine)
    metrics = ['avg_loss', 'avg_fps']
    metrics.extend(extra_metrics)
    handler = tb_logger.OutputHandler(tag='train',
                                      metric_names=metrics,
                                      output_transform=lambda a: a)

    event = ptan_ignite.PeriodEvents.ITERS_1000_COMPLETED
    tb.attach(engine, log_handler=handler, event_name=event)

    return tb
Exemple #6
0
def setup_ignite(engine: Engine,
                 exp_source,
                 run_name: str,
                 extra_metrics: Iterable[str] = ()):
    # get rid of missing metrics warning
    warnings.simplefilter("ignore", category=UserWarning)

    handler = ptan_ignite.EndOfEpisodeHandler(exp_source,
                                              subsample_end_of_episode=100)
    handler.attach(engine)
    ptan_ignite.EpisodeFPSHandler().attach(engine)

    @engine.on(ptan_ignite.EpisodeEvents.EPISODE_COMPLETED)
    def episode_completed(trainer: Engine):
        passed = trainer.state.metrics.get("time_passed", 0)
        print("Episode %d: reward=%.0f, steps=%s, "
              "speed=%.1f f/s, elapsed=%s" % (
                  trainer.state.episode,
                  trainer.state.episode_reward,
                  trainer.state.episode_steps,
                  trainer.state.metrics.get("avg_fps", 0),
                  timedelta(seconds=int(passed)),
              ))

    now = datetime.now().isoformat(timespec="minutes")
    logdir = f"runs/{now}-{run_name}"
    tb = tb_logger.TensorboardLogger(log_dir=logdir)
    run_avg = RunningAverage(output_transform=lambda v: v["loss"])
    run_avg.attach(engine, "avg_loss")

    metrics = ["reward", "steps", "avg_reward"]
    handler = tb_logger.OutputHandler(tag="episodes", metric_names=metrics)
    event = ptan_ignite.EpisodeEvents.EPISODE_COMPLETED
    tb.attach(engine, log_handler=handler, event_name=event)

    ptan_ignite.PeriodicEvents().attach(engine)
    metrics = ["avg_loss", "avg_fps"]
    metrics.extend(extra_metrics)
    handler = tb_logger.OutputHandler(tag="train",
                                      metric_names=metrics,
                                      output_transform=lambda a: a)
    event = ptan_ignite.PeriodEvents.ITERS_1000_COMPLETED
    tb.attach(engine, log_handler=handler, event_name=event)
    return tb
Exemple #7
0
def setup_ignite(engine: Engine,
                 params: SimpleNamespace,
                 exp_source,
                 run_name: str,
                 net,
                 extra_metrics: Iterable[str] = ()):
    warnings.simplefilter("ignore", category=UserWarning)
    handler = ptan_ignite.EndOfEpisodeHandler(exp_source,
                                              subsample_end_of_episode=100)
    handler.attach(engine)
    ptan_ignite.EpisodeFPSHandler().attach(engine)

    @engine.on(ptan_ignite.EpisodeEvents.EPISODE_COMPLETED)
    def episode_completed(trainer: Engine):
        passed = trainer.state.metrics.get('time_passed', 0)
        print("Episode %d: reward=%.0f, steps=%s, "
              "elapsed=%s" %
              (trainer.state.episode, trainer.state.episode_reward,
               trainer.state.episode_steps, timedelta(seconds=int(passed))))
        path = './saves/(episode-%.3f.data' % trainer.state.episode
        torch.save(net.state_dict(), path)

    now = datetime.now().isoformat(timespec='minutes').replace(':', '')
    logdir = f"runs2/{now}-{params.run_name}-{run_name}"
    tb = tb_logger.TensorboardLogger(log_dir=logdir)
    run_avg = RunningAverage(output_transform=lambda v: v['loss'])
    run_avg.attach(engine, 'avg_loss')

    metrics = ['reward', 'steps', 'avg_reward']
    handler = tb_logger.OutputHandler(tag='episodes', metric_names=metrics)
    event = ptan_ignite.EpisodeEvents.EPISODE_COMPLETED
    tb.attach(engine, log_handler=handler, event_name=event)

    # write to tb every 100 Iterations
    ptan_ignite.PeriodicEvents().attach(engine)
    metrics = ['avg_loss', 'avg_fps']
    metrics.extend(extra_metrics)
    handler = tb_logger.OutputHandler(tag="train",
                                      metric_names=metrics,
                                      output_transform=lambda a: a)
    event = ptan_ignite.PeriodEvents.ITERS_1000_COMPLETED
    tb.attach(engine, log_handler=handler, event_name=event)
    return tb
Exemple #8
0
        print("%d: val:%s" % (engine.state.iteration, res))

        for key, val in res.items():
            engine.state.metrics[key + "_val"] = val

        val_reward = res["episode_reward"]

        if getattr(engine.state, "best_val_reward", None) is None:
            engine.state.best_val_reward = val_reward

        if engine.state.best_val_reward < val_reward:
            print("Best validation reward updated: %.3f -> %.3f, model saved" %
                  (engine.state.best_val_reward, val_reward))

            engine.state.best_val_reward = val_reward
            path = saves_path / ("val_reward-%.3f.data" % val_reward)
            torch.save(net.state_dict(), path)

    event = ptan.ignite.PeriodEvents.ITERS_10000_COMPLETED
    tst_metrics = [m + "_tst" for m in validation.METRICS]
    tst_handler = tb_logger.OutputHandler(tag="test", metric_names=tst_metrics)

    tb.attach(engine, log_handler=tst_handler, event_name=event)

    val_metrics = [m + "_val" for m in validation.METRICS]
    val_handler = tb_logger.OutputHandler(tag="validation",
                                          metric_names=val_metrics)

    tb.attach(engine, log_handler=val_handler, event_name=event)
    engine.run(common.batch_generator(buffer, REPLAY_INITIAL, BATCH_SIZE))
    @engine.on(ptan_ignite.EpisodeEvents.BOUND_REWARD_REACHED)
    def game_solved(trainer: Engine):
        print('Game solved in %s, after %d episode and %d iterations' %
              (timedelta(seconds=trainer.state.metrics['time_passed']),
               trainer.state.episode, trainer.state.iteration))

        trainer.should_terminate = True

    logdir = f'runs-{params.run_name}-{NAME}'

    tb = tb_logger.TensorboardLogger(log_dir=logdir)
    RunningAverage(output_transform=lambda v: v["loss"]).attach(
        engine, "avg_loss")

    episode_handler = tb_logger.OutputHandler(
        tag="episodes", metric_names=["reward", "steps", "avg_reward"])
    tb.attach(engine,
              log_handler=episode_handler,
              event_name=ptan_ignite.EpisodeEvents.EPISODE_COMPLETED)

    # write to tensorboard every 100 iterations
    ptan_ignite.PeriodicEvents().attach(engine)
    handler = tb_logger.OutputHandler(tag="train",
                                      metric_names=["avg_loss", "avg_fps"],
                                      output_transform=lambda a: a)

    tb.attach(engine,
              log_handler=handler,
              event_name=ptan_ignite.PeriodEvents.ITERS_100_COMPLETED)

    engine.run(
Exemple #10
0
            real_img = vutils.make_grid(batch_v.data[:], normalize=False)
            trainer.tb.writer.add_image("real", real_img,
                                        trainer.state.iteration)
            trainer.tb.writer.flush()

        return dis_loss.item(), gen_loss.item()

    engine = Engine(process_batch)
    tb = tb_logger.TensorboardLogger(log_dir=None)
    engine.tb = tb
    RunningAverage(output_transform=lambda out: out[0]).attach(
        engine, "avg_loss_gen")
    RunningAverage(output_transform=lambda out: out[1]).attach(
        engine, "avg_loss_dis")

    handler = tb_logger.OutputHandler(
        tag="train", metric_names=["avg_loss_gen", "avg_loss_dis"])
    tb.attach(engine,
              log_handler=handler,
              event_name=Events.ITERATION_COMPLETED)

    @engine.on(Events.ITERATION_COMPLETED)
    def log_losses(trainer):
        if trainer.state.iteration % REPORT_EVERY_ITER == 0:
            log.info("%d: gen_loss=%f, dis_loss=%f", trainer.state.iteration,
                     trainer.state.metrics["avg_loss_gen"],
                     trainer.state.metrics["avg_loss_dis"])

    engine.run(data=iterate_batches(envs), epoch_length=20000)
def setup_ignite(
        engine: Engine,
        params: SimpleNamespace,
        exp_source,
        run_name: str,
        model,
        optimizer,
        extra_metrics: Iterable[str] = (),
):
    warnings.simplefilter("ignore", category=UserWarning)
    handler = ptan_ignite.EndOfEpisodeHandler(
        exp_source, bound_avg_reward=params.stop_reward)
    handler.attach(engine)
    ptan_ignite.EpisodeFPSHandler().attach(engine)

    objects_to_checkpoint = {
        'model': model,
        'optimizer': optimizer,
        'trainer': engine
    }
    checkpoint_dir = Path("models")
    saver = DiskSaver(str(checkpoint_dir),
                      create_dir=True,
                      require_empty=False)
    handler = Checkpoint(objects_to_checkpoint, saver, n_saved=2)
    engine.add_event_handler(Events.ITERATION_COMPLETED(every=1000), handler)

    checkpoints_paths = list(checkpoint_dir.iterdir())
    if checkpoints_paths:
        checkpoint = torch.load(checkpoints_paths[-1])
        print(f"Loading checkpoint {checkpoints_paths[-1].name}")
        Checkpoint.load_objects(to_load=objects_to_checkpoint,
                                checkpoint=checkpoint)

    @engine.on(ptan_ignite.EpisodeEvents.EPISODE_COMPLETED)
    def episode_completed(trainer: Engine):
        passed = trainer.state.metrics.get('time_passed', 0)
        print("Episode %d: reward=%.2f, steps=%s, "
              "speed=%.1f f/s, elapsed=%s" %
              (trainer.state.episode, trainer.state.episode_reward,
               trainer.state.episode_steps,
               trainer.state.metrics.get('avg_fps',
                                         0), timedelta(seconds=int(passed))))

    @engine.on(ptan_ignite.EpisodeEvents.BOUND_REWARD_REACHED)
    def game_solved(trainer: Engine):
        passed = trainer.state.metrics['time_passed']
        print("Game solved in %s, after %d episodes "
              "and %d iterations!" %
              (timedelta(seconds=int(passed)), trainer.state.episode,
               trainer.state.iteration))
        trainer.should_terminate = True

    now = datetime.now().isoformat(timespec='minutes').replace(":", "-")
    logdir = f"runs/{now}-{params.run_name}-{run_name}"
    tb = tb_logger.TensorboardLogger(log_dir=logdir)
    run_avg = RunningAverage(output_transform=lambda v: v['loss'])
    run_avg.attach(engine, "avg_loss")

    metrics = ['reward', 'steps', 'avg_reward']
    handler = tb_logger.OutputHandler(tag="episodes", metric_names=metrics)
    event = ptan_ignite.EpisodeEvents.EPISODE_COMPLETED
    tb.attach(engine, log_handler=handler, event_name=event)

    # write to tensorboard every 100 iterations
    ptan_ignite.PeriodicEvents().attach(engine)
    metrics = ['avg_loss', 'avg_fps']
    metrics.extend(extra_metrics)
    handler = tb_logger.OutputHandler(tag="train",
                                      metric_names=metrics,
                                      output_transform=lambda a: a)
    event = ptan_ignite.PeriodEvents.ITERS_100_COMPLETED
    tb.attach(engine, log_handler=handler, event_name=event)
Exemple #12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--cuda",
                        default=False,
                        action="store_true",
                        help="Enable cuda computation")
    args = parser.parse_args()

    device = torch.device("cuda" if args.cuda else "cpu")
    envs = [
        InputWrapper(gym.make(name))
        for name in ("Breakout-v0", "AirRaid-v0", "Pong-v0")
    ]
    input_shape = envs[0].observation_space.shape

    net_discr = Discriminator(input_shape=input_shape).to(device)
    net_gener = Generator(output_shape=input_shape).to(device)

    objective = nn.BCELoss()
    gen_optimizer = optim.Adam(params=net_gener.parameters(),
                               lr=LEARNING_RATE,
                               betas=(0.5, 0.999))
    dis_optimizer = optim.Adam(params=net_discr.parameters(),
                               lr=LEARNING_RATE,
                               betas=(0.5, 0.999))

    true_labels_v = torch.ones(BATCH_SIZE, device=device)
    fake_labels_v = torch.zeros(BATCH_SIZE, device=device)

    def process_batch(trainer, batch):
        gen_input_v = torch.FloatTensor(BATCH_SIZE, LATENT_VECTOR_SIZE, 1, 1)
        gen_input_v.normal_(0, 1)
        gen_input_v = gen_input_v.to(device)
        batch_v = batch.to(device)
        gen_output_v = net_gener(gen_input_v)

        # train discriminator
        dis_optimizer.zero_grad()
        dis_output_true_v = net_discr(batch_v)
        dis_output_fake_v = net_discr(gen_output_v.detach())
        dis_loss = objective(dis_output_true_v, true_labels_v) + objective(
            dis_output_fake_v, fake_labels_v)
        dis_loss.backward()
        dis_optimizer.step()

        # train generator
        gen_optimizer.zero_grad()
        dis_output_v = net_discr(gen_output_v)
        gen_loss = objective(dis_output_v, true_labels_v)
        gen_loss.backward()
        gen_optimizer.step()

        if trainer.state.iteration % SAVE_IMAGE_EVERY_ITER == 0:
            fake_img = vutils.make_grid(gen_output_v.data[:64], normalize=True)
            trainer.tb.writer.add_image("fake", fake_img,
                                        trainer.state.iteration)
            real_img = vutils.make_grid(batch_v.data[:64], normalize=True)
            trainer.tb.writer.add_image("real", real_img,
                                        trainer.state.iteration)
            trainer.tb.writer.flush()
        return dis_loss.item(), gen_loss.item()

    engine = Engine(process_batch)
    tb = tb_logger.TensorboardLogger(log_dir=None)
    engine.tb = tb
    RunningAverage(output_transform=lambda out: out[1]).attach(
        engine, "avg_loss_gen")
    RunningAverage(output_transform=lambda out: out[0]).attach(
        engine, "avg_loss_dis")

    handler = tb_logger.OutputHandler(
        tag="train", metric_names=["avg_loss_gen", "avg_loss_dis"])
    tb.attach(engine,
              log_handler=handler,
              event_name=Events.ITERATION_COMPLETED)

    @engine.on(Events.ITERATION_COMPLETED)
    def log_losses(trainer):
        if trainer.state.iteration % REPORT_EVERY_ITER == 0:
            log.info(
                "%d: gen_loss=%f, dis_loss=%f",
                trainer.state.iteration,
                trainer.state.metrics["avg_loss_gen"],
                trainer.state.metrics["avg_loss_dis"],
            )

    engine.run(data=iterate_batches(envs))
Exemple #13
0
def train(args, hyper_params):

    print(args)
    print(hyper_params)

    args.channels.sort(
        key=lambda x: src.dataset.Traffic4CastSample.channel_to_index[x])

    model = MODELS[args.model_type](**filter_dict(hyper_params, "model"))
    slice_size = model.past + model.future

    assert model.future == 3

    if args.model is not None:
        model_path = args.model
        model_name = os.path.basename(args.model)
        model.load(model_path)
    else:
        model_name = f"{args.model_type}_" + "_".join(args.channels +
                                                      args.cities)
        model_path = f"output/models/{model_name}.pth"

    if model.num_channels != len(args.channels):
        print(f"ERROR: Model to channels missmatch. Model can predict "
              f"{model.num_channels} channels. {len(args.channels)} were "
              "selected.")
        sys.exit(1)

    transforms = [
        lambda x: x.float(),
        lambda x: x / 255,
        src.dataset.Traffic4CastSample.Transforms.Permute("TCHW"),
        src.dataset.Traffic4CastSample.Transforms.SelectChannels(
            args.channels),
    ]
    train_dataset = src.dataset.Traffic4CastDataset(ROOT, "training",
                                                    args.cities, transforms)
    valid_dataset = src.dataset.Traffic4CastDataset(ROOT, "validation",
                                                    args.cities, transforms)

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=1,
        collate_fn=src.dataset.Traffic4CastDataset.collate_list,
        shuffle=True)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=1,
        collate_fn=src.dataset.Traffic4CastDataset.collate_list,
        shuffle=False)

    ignite_train = ignite_selected(
        train_loader,
        slice_size=slice_size,
        **filter_dict(hyper_params, "ignite_selected"),
    )

    optimizer = torch.optim.Adam(
        model.parameters(),
        **filter_dict(hyper_params, "optimizer"),
    )
    loss = nn.MSELoss()

    best_loss = 1.0

    device = args.device
    if device.find('cuda') != -1 and not torch.cuda.is_available():
        device = 'cpu'
    trainer = engine.create_supervised_trainer(
        model,
        optimizer,
        loss,
        device=device,
        prepare_batch=model.ignite_batch)
    evaluator = engine.create_supervised_evaluator(
        model,
        metrics={'loss': ignite.metrics.Loss(loss)},
        device=device,
        prepare_batch=model.ignite_batch)

    @trainer.on(engine.Events.ITERATION_COMPLETED)
    def log_training_loss(trainer):
        print("Epoch {:3d} Train loss: {:8.6f}".format(trainer.state.epoch,
                                                       trainer.state.output))

    @trainer.on(engine.Events.EPOCH_COMPLETED)
    def log_validation_loss(trainer):
        evaluator.run(ignite_selected(valid_loader, slice_size=slice_size))
        metrics = evaluator.state.metrics
        print("Epoch {:3d} Valid loss: {:8.6f} ←".format(
            trainer.state.epoch, metrics['loss']))
        trainer.state.dataloader = ignite_selected(train_loader,
                                                   slice_size=slice_size,
                                                   **filter_dict(
                                                       hyper_params,
                                                       "ignite_selected"))
        nonlocal best_loss
        best_loss = min(best_loss, metrics['loss'])

    if "learning-rate-scheduler" in args.callbacks:
        lr_reduce = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   verbose=args.verbose,
                                                   **LR_REDUCE_PARAMS)

        @evaluator.on(engine.Events.COMPLETED)
        def update_lr_reduce(engine):
            loss = engine.state.metrics['loss']
            lr_reduce.step(loss)

    def score_function(engine):
        return -engine.state.metrics['loss']

    if "early-stopping" in args.callbacks:
        early_stopping_handler = ignite.handlers.EarlyStopping(
            patience=PATIENCE, score_function=score_function, trainer=trainer)
        evaluator.add_event_handler(engine.Events.EPOCH_COMPLETED,
                                    early_stopping_handler)

    if "model-checkpoint" in args.callbacks:
        checkpoint_handler = ignite.handlers.ModelCheckpoint(
            "output/models/checkpoints",
            model_name,
            score_function=score_function,
            n_saved=1,
            require_empty=False,
            create_dir=True)
        evaluator.add_event_handler(engine.Events.EPOCH_COMPLETED,
                                    checkpoint_handler, {"model": model})

    if "tensorboard" in args.callbacks:
        logger = tensorboard_logger.TensorboardLogger(
            log_dir=f"output/tensorboard/{model_name}")
        logger.attach(trainer,
                      log_handler=tensorboard_logger.OutputHandler(
                          tag="training",
                          output_transform=lambda loss: {'loss': loss}),
                      event_name=engine.Events.ITERATION_COMPLETED)
        logger.attach(evaluator,
                      log_handler=tensorboard_logger.OutputHandler(
                          tag="validation",
                          metric_names=["loss"],
                          another_engine=trainer),
                      event_name=engine.Events.EPOCH_COMPLETED)

    trainer.run(ignite_train, **filter_dict(hyper_params, "trainer_run"))

    if "save-model" in args.callbacks and not "model-checkpoint" in args.callbacks:
        torch.save(model.state_dict(), model_path)
        print("Model saved at:", model_path)
    elif "save-model" in args.callbacks:
        # Move best model from checkpoint directory to output/models
        checkpoints_dir = "output/models/checkpoints"
        source, *_ = [
            f for f in reversed(utils.sorted_ls(checkpoints_dir))
            if f.startswith(model_name)
        ]  # get most recent model
        os.rename(os.path.join(checkpoints_dir, source), model_path)
        print("Model saved at:", model_path)

    return {
        'loss': best_loss,  # HpBandSter always minimizes!
        'info': {
            'args': vars(args),
            'hyper-params': hyper_params,
        },
    }
Exemple #14
0
    def fit(self,
            train_loader: _data.DataLoader,
            val_loader: _data.DataLoader,
            epochs: int = 1,
            batches: int = None,
            learning_rate: float = 1e-3) -> None:
        if batches is None:
            batches = VocalExtractor.get_number_of_batches(train_loader)

        loss_fn = nn.BCELoss()
        optimizer = _optim.Adam(self.model.parameters(), lr=learning_rate)

        trainer = _engine.create_supervised_trainer(self.model,
                                                    optimizer,
                                                    loss_fn,
                                                    device=self.device)

        _metrics.RunningAverage(output_transform=lambda x: x,
                                device=self.device).attach(trainer, 'loss')
        progressbar = _chandlers.ProgressBar(
            bar_format=
            "{desc}[{n_fmt}/{total_fmt}] {percentage:3.0f}%|{bar:20}| "
            "[{elapsed}<{remaining}]{postfix}",
            persist=True,
            ascii=" #")
        progressbar.attach(trainer, ['loss'])

        def get_metrics_fn() -> Dict[str, _metrics.Metric]:
            def rounded_transform(output):
                y_pred, y = output
                return torch.round(y_pred), y

            transform = rounded_transform
            accuracy = _metrics.Accuracy(transform, device=self.device)
            precision = _metrics.Precision(transform, device=self.device)
            recall = _metrics.Recall(transform, device=self.device)
            f1 = precision * recall * 2 / (precision + recall + 1e-20)
            return {
                'loss': _metrics.Loss(loss_fn),
                'accuracy': accuracy,
                'precision': precision,
                'recall': recall,
                'f1': f1
            }

        evaluator = _engine.create_supervised_evaluator(
            self.model, metrics=get_metrics_fn(), device=self.device)

        score_fn_name = "f1"

        def score_function(engine: _engine.Engine):
            return engine.state.metrics[score_fn_name]

        best_model_saver = _handlers.ModelCheckpoint(
            dirname="best_models",
            filename_prefix="vocal_extractor",
            score_name=score_fn_name,
            score_function=score_function,
            n_saved=5,
            create_dir=True)
        evaluator.add_event_handler(_engine.Events.COMPLETED, best_model_saver,
                                    {"model": self.model})

        each_model_saver = _handlers.ModelCheckpoint(
            dirname="all_models",
            filename_prefix="vocal_extractor",
            score_name=score_fn_name,
            score_function=score_function,
            n_saved=None,
            create_dir=True)
        evaluator.add_event_handler(_engine.Events.COMPLETED, each_model_saver,
                                    {"model": self.model})

        @trainer.on(_engine.Events.EPOCH_COMPLETED)
        def on_epoch_completed(engine: _engine.Engine) -> None:
            metrics = VocalExtractor.compute_metrics(val_loader, evaluator)
            string = ", ".join(f"val_{k}: {v:.4f}" for k, v in metrics.items())
            progressbar.log_message(string + "\n")

        with _tb_logger.TensorboardLogger(log_dir="tb_logs") as tb_logger:
            global_step = _tb_logger.global_step_from_engine(trainer)

            train_running_loss_log_handler = _tb_logger.OutputHandler(
                tag="training", output_transform=lambda x: {'running_loss': x})
            tb_logger.attach(trainer,
                             log_handler=train_running_loss_log_handler,
                             event_name=_engine.Events.ITERATION_COMPLETED)

            val_metrics_log_handler = _tb_logger.OutputHandler(
                tag="validation",
                metric_names=[name for name, _ in get_metrics_fn().items()],
                global_step_transform=global_step)
            tb_logger.attach(evaluator,
                             log_handler=val_metrics_log_handler,
                             event_name=_engine.Events.EPOCH_COMPLETED)

            tb_logger.attach(
                trainer,
                log_handler=_tb_logger.OptimizerParamsHandler(optimizer),
                event_name=_engine.Events.ITERATION_STARTED)

            tb_logger.attach(trainer,
                             log_handler=_tb_logger.WeightsScalarHandler(
                                 self.model),
                             event_name=_engine.Events.ITERATION_COMPLETED)
            tb_logger.attach(trainer,
                             log_handler=_tb_logger.WeightsHistHandler(
                                 self.model),
                             event_name=_engine.Events.EPOCH_COMPLETED)

            tb_logger.attach(trainer,
                             log_handler=_tb_logger.GradsScalarHandler(
                                 self.model),
                             event_name=_engine.Events.ITERATION_COMPLETED)
            tb_logger.attach(trainer,
                             log_handler=_tb_logger.GradsHistHandler(
                                 self.model),
                             event_name=_engine.Events.EPOCH_COMPLETED)

        torchsummary.summary(self.model,
                             input_size=(1, self.freq_bins, self.time_bins),
                             batch_size=train_loader.batch_size,
                             device=self.device)
        trainer.run(data=train_loader, epoch_length=batches, max_epochs=epochs)
                                         normalize=True),
                trainer.state.iteration)
            trainer.tb.writer.add_image(
                'real', vutils.make_grid(batch_v.data[:64], normalize=True),
                trainer.state.iteration)

        return dis_loss.item(), gen_loss.item()

    engine = Engine(process_batch)
    tb = tb_logger.TensorboardLogger(log_dir=None)
    engine.tb = tb
    RunningAverage(output_transform=lambda out: out[0]).attach(
        engine, 'avg_loss_dis')
    RunningAverage(output_transform=lambda out: out[1]).attach(
        engine, 'avg_loss_gen')
    handler = tb_logger.OutputHandler(
        tag='train', metric_names=['avg_loss_dis', 'avg_loss_gen'])
    tb.attach(engine,
              log_handler=handler,
              event_name=Events.ITERATION_COMPLETED)

    @engine.on(Events.ITERATION_COMPLETED)
    def log_losses(trainer):
        if trainer.state.iteration % REPORT_EVERY_ITER == 0:
            log.info('Iter %d: gen_loss=%.3f, dis_loss=%.3f',
                     trainer.state.iteration,
                     trainer.state.metrics['avg_loss_gen'],
                     trainer.state.metrics['avg_loss_dis'])

    engine.run(data=iterate_batches(envs))
Exemple #16
0
             trainer.state.episode_steps, trainer.state.metrics.get('fps', 0),
             timedelta(seconds=trainer.state.metrics.get('time_passed', 0))))

    @engine.on(ptan_ignite.EpisodeEvents.BOUND_REWARD_REACHED)
    def game_solved(trainer: Engine):
        print("Game solved in %s, after %d episodes and %d iterations!" %
              (timedelta(seconds=trainer.state.metrics['time_passed']),
               trainer.state.episode, trainer.state.iteration))
        trainer.should_terminate = True

    logdir = f"runs/{datetime.now().isoformat(timespec='minutes')}-{params.run_name}-{NAME}={args.envs}"
    tb = tb_logger.TensorboardLogger(log_dir=logdir)
    RunningAverage(output_transform=lambda v: v['loss']).attach(
        engine, "avg_loss")

    episode_handler = tb_logger.OutputHandler(
        tag="episodes", metric_names=['reward', 'steps', 'avg_reward'])
    tb.attach(engine,
              log_handler=episode_handler,
              event_name=ptan_ignite.EpisodeEvents.EPISODE_COMPLETED)

    # write to tensorboard every 100 iterations
    ptan_ignite.PeriodicEvents().attach(engine)
    handler = tb_logger.OutputHandler(tag="train",
                                      metric_names=['avg_loss', 'avg_fps'],
                                      output_transform=lambda a: a)
    tb.attach(engine,
              log_handler=handler,
              event_name=ptan_ignite.PeriodEvents.ITERS_100_COMPLETED)

    engine.run(
        batch_generator(buffer, params.replay_initial, params.batch_size,
Exemple #17
0
def train():
    learning_rate = 0.0001
    save_on_iter_count = 100
    device = "cuda"
    envs = [
        ObservationScaler(gym.make(name))
        for name in ("Breakout-v0", "Pong-v0", "AirRaid-v0")
    ]
    discriminator = Discriminator(img_size=64).to(device)
    generator = Generator().to(device)
    objective = nn.BCELoss()
    discr_optimizer = optim.Adam(params=discriminator.parameters(),
                                 lr=learning_rate,
                                 betas=(0.5, 0.999))
    gen_optimizer = optim.Adam(params=generator.parameters(),
                               lr=learning_rate,
                               betas=(0.5, 0.999))

    def process_batch(trainer, batch):
        batch_size = batch.shape[0]
        gen_input_size = 10

        # get labels and inputs
        generator_inputs = torch.randn(
            (batch_size, gen_input_size, 1, 1)).to(device)
        fake_inputs = generator(generator_inputs).to(device)
        true_inputs = batch.to(device)
        fake_image_labels = torch.zeros((batch_size, )).to(device)
        true_image_labels = torch.ones((batch_size, )).to(device)

        # train discriminator
        discr_optimizer.zero_grad()
        discr_fake_image_output = discriminator(fake_inputs.detach())
        discr_true_image_output = discriminator(true_inputs)

        discr_loss = objective(discr_fake_image_output,
                               fake_image_labels) + objective(
                                   discr_true_image_output, true_image_labels)

        discr_loss.backward()
        discr_optimizer.step()

        # train generator
        gen_optimizer.zero_grad()
        discr_output = discriminator(fake_inputs)
        gen_loss = objective(discr_output, true_image_labels)
        gen_loss.backward()
        gen_optimizer.step()

        # save images
        if trainer.state.iteration % save_on_iter_count == 0:
            fake_img = vutils.make_grid(fake_inputs.data[:64], normalize=True)
            trainer.tb.writer.add_image("fake", fake_img,
                                        trainer.state.iteration)
            real_img = vutils.make_grid(true_inputs.data[:64], normalize=True)
            trainer.tb.writer.add_image("real", real_img,
                                        trainer.state.iteration)
            trainer.tb.writer.flush()
        return discr_loss.item(), gen_loss.item()

    engine = Engine(process_batch)
    tb = tb_logger.TensorboardLogger(log_dir=None)
    engine.tb = tb
    RunningAverage(output_transform=lambda out: out[1]).attach(
        engine, "avg_loss_gen")
    RunningAverage(output_transform=lambda out: out[0]).attach(
        engine, "avg_loss_dis")

    handler = tb_logger.OutputHandler(
        tag="train", metric_names=["avg_loss_gen", "avg_loss_dis"])
    tb.attach(engine,
              log_handler=handler,
              event_name=Events.ITERATION_COMPLETED)

    @engine.on(Events.ITERATION_COMPLETED(every=100))
    def log_training_loss(engine):
        print(f"Epoch[{engine.state.iteration}] Loss:", engine.state.output)

    engine.run(data=generate_batch(envs))