def __call__(self, trainer: chainer.training.Trainer): if self.writer is None: self.writer = SummaryWriter(Path(trainer.out)) observations = trainer.observation n_iter = trainer.updater.iteration for n, v in observations.items(): if isinstance(v, chainer.Variable): v = v.data if isinstance(v, chainer.cuda.cupy.ndarray): v = chainer.cuda.to_cpu(v) self.writer.add_scalar(n, v, n_iter)
class TensorBoardReport(chainer.training.Extension): def __init__(self, writer=None, isOnGPU=True): self.writer = writer self.isOnGPU = isOnGPU def __call__(self, trainer: chainer.training.Trainer): if self.writer is None: self.writer = SummaryWriter(Path(trainer.out)) observations = trainer.observation n_iter = trainer.updater.iteration for n, v in observations.items(): if isinstance(v, chainer.Variable): v = v.data if self.isOnGPU and isinstance(v, chainer.cuda.cupy.ndarray): v = chainer.cuda.to_cpu(v) self.writer.add_scalar(n, v, n_iter)
class Tensorboard(extension.Extension): """Trainer extension to tensorboard the accumulated results to ABEJA Platform. This extension uses the log accumulated by a :class:`LogReport` extension to print specified entries of the log in a human-readable format. Args: entries (list of str): List of keys of observations to print. log_report (str or LogReport): Log report to accumulate the observations. This is either the name of a LogReport extensions registered to the trainer, or a LogReport instance to use internally. """ def __init__(self, entries, out_dir='logs', log_report='LogReport'): self._entries = entries self._log_report = log_report self._log_len = 0 # number of observations already printed self.writer = SummaryWriter(out_dir) def __call__(self, trainer): log_report = self._log_report if isinstance(log_report, str): log_report = trainer.get_extension(log_report) elif isinstance(log_report, log_report_module.LogReport): log_report(trainer) # update the log report else: raise TypeError('log report has a wrong type %s' % type(log_report)) log = log_report.log log_len = self._log_len while len(log) > log_len: self._print(log[log_len]) log_len += 1 self._log_len = log_len def serialize(self, serializer): log_report = self._log_report if isinstance(log_report, log_report_module.LogReport): log_report.serialize(serializer['_log_report']) def _print(self, observation): epoch = observation['epoch'] for key, value in observation.items(): self.writer.add_scalar(key, value, epoch)
def __call__(self, trainer: chainer.training.Trainer): if self.writer is None: self.writer = SummaryWriter(Path(trainer.out)) observations = trainer.observation n_iter = trainer.updater.iteration for n, v in observations.items(): if isinstance(v, chainer.Variable): v = v.data if isinstance(v, chainer.cuda.cupy.ndarray): v = chainer.cuda.to_cpu(v) self.writer.add_scalar(n, v, n_iter) link = trainer.updater.get_optimizer('main').target for name, param in link.namedparams(): self.writer.add_histogram(name, chainer.cuda.to_cpu(param.data), n_iter, bins=100)
class TensorBoardReport(chainer.training.Extension): def __init__(self, out_dir): self.writer = SummaryWriter(out_dir) def __call__(self, trainer): observations = trainer.observation n_iter = trainer.updater.iteration for n, v in observations.items(): if isinstance(v, chainer.Variable): value = v.data # elif isinstance(v, chainer.cuda.cupy.ndarray): elif isinstance(v, chainer.backends.cuda.ndarray): value = chainer.cuda.to_cpu(v) else: value = v self.writer.add_scalar(n, value, n_iter) # Optimizer link = trainer.updater.get_optimizer('main').target for name, param in link.namedparams(): self.writer.add_histogram(name, chainer.cuda.to_cpu(param.data), n_iter)
train_act_v = Variable(np.asarray(train_act, dtype=np.int32)) return train_obs_v, train_act_v, reward_bound, reward_mean if __name__ == "__main__": env = gym.make("CartPole-v0") # env = gym.wrappers.Monitor(env, directory="mon", force=True) obs_size = env.observation_space.shape[0] n_actions = env.action_space.n print("obs_space:%d\tn_actions:%d" % (obs_size, n_actions)) net = Net(obs_size, HIDDEN_SIZE, n_actions) net.cleargrads() # zero_grad optimizer = optimizers.Adam(alpha=0.01) optimizer.setup(net) writer = SummaryWriter(comment="-cartpole") for iter_no, batch in enumerate(iterate_batches(env, net, BATCH_SIZE)): obs_v, acts_v, reward_b, reward_m = filter_batch(batch, PERCENTILE) # net.cleargrads() # zero_grad action_scores_v = net(obs_v) loss_v = F.softmax_cross_entropy(action_scores_v, acts_v) loss_v.backward() optimizer.update() print("%d: loss=%.3f, reward_mean=%.1f, reward_bound=%.1f" % (iter_no, loss_v.data, reward_m, reward_b)) writer.add_scalar("loss", loss_v.data, iter_no) writer.add_scalar("reward_bound", reward_b, iter_no) writer.add_scalar("reward_mean", reward_m, iter_no)
def main(): print_config() relative_paths = np.loadtxt(CONFIG['imagefile_path'], dtype=str) images_base_path = os.path.dirname(CONFIG['imagefile_path']) absolute_paths = [images_base_path + i.strip('.') for i in relative_paths] bboxes = np.load(CONFIG['boxfile_path'], allow_pickle=True) env = TextLocEnv(absolute_paths, bboxes, CONFIG['gpu_id']) n_actions = env.action_space.n q_func = chainerrl.q_functions.SingleModelStateQFunctionWithDiscreteAction( CustomModel(n_actions)) if CONFIG['gpu_id'] != -1: q_func = q_func.to_gpu(CONFIG['gpu_id']) # Use Adam to optimize q_func. eps=1e-2 is for stability. optimizer = chainer.optimizers.Adam(eps=CONFIG['epsilon'], amsgrad=True, alpha=CONFIG['learning_rate']) optimizer.setup(q_func) # Use epsilon-greedy for exploration explorer = chainerrl.explorers.LinearDecayEpsilonGreedy( start_epsilon=CONFIG['start_epsilon'], end_epsilon=CONFIG['end_epsilon'], decay_steps=CONFIG['decay_steps'], random_action_func=env.action_space.sample) # DQN uses Experience Replay. # Specify a replay buffer and its capacity. replay_buffer = chainerrl.replay_buffer.EpisodicReplayBuffer( capacity=CONFIG['replay_buffer_capacity']) # Now create an agent that will interact with the environment. agent = chainerrl.agents.DQN( q_func, optimizer, replay_buffer, CONFIG['gamma'], explorer, gpu=CONFIG['gpu_id'], replay_start_size=CONFIG['replay_start_size'], update_interval=CONFIG['update_interval'], target_update_interval=CONFIG['target_update_interval']) logging.basicConfig(level=logging.INFO, stream=sys.stdout, format='') eval_run_count = 10 timestr = time.strftime("%Y%m%d-%H%M%S") agentClassName = agent.__class__.__name__[:10] step_hooks = [] logger = None if CONFIG['use_tensorboard']: writer = SummaryWriter("tensorboard/tensorBoard_exp_" + timestr + "_" + agentClassName) step_hooks = [TensorBoardLoggingStepHook(writer)] handler = TensorBoardEvaluationLoggingHandler(writer, agent, eval_run_count) logger = logging.getLogger() logger.addHandler(handler) gradients_weights_log_interval = 100 optimizer.add_hook( TensorboardGradientPlotter( summary_writer=writer, log_interval=gradients_weights_log_interval)) # save config file to results dir after initializing agent write_config() # Overwrite the normal evaluation method # chainerrl.experiments.evaluator.run_evaluation_episodes = run_localization_evaluation_episodes train_agent_with_evaluation( agent, env, steps=CONFIG['steps'], # Train the agent for no of steps eval_n_episodes=CONFIG[ 'eval_n_episodes'], # episodes are sampled for each evaluation eval_n_steps=None, train_max_episode_len=CONFIG[ 'train_max_episode_len'], # Maximum length of each episodes eval_interval=CONFIG[ 'eval_interval'], # Evaluate the agent after every no of steps outdir=CONFIG['resultdir_path'], # Save everything to directory step_hooks=step_hooks, logger=logger) agent.save('agent_' + timestr + "_" + agentClassName)
def __init__(self, out_dir): self.writer = SummaryWriter(out_dir)
def main(steps, gpu, imagefile, boxfile, tensorboard): print(steps) print(gpu) print(imagefile) print(boxfile) relative_paths = np.loadtxt(imagefile, dtype=str) images_base_path = os.path.dirname(imagefile) absolute_paths = [images_base_path + i.strip('.') for i in relative_paths] bboxes = np.load(boxfile) env = TextLocEnv(absolute_paths, bboxes, gpu) obs_size = 2139 n_actions = env.action_space.n q_func = chainerrl.q_functions.FCStateQFunctionWithDiscreteAction( obs_size, n_actions, n_hidden_layers=2, n_hidden_channels=1024) if gpu != -1: q_func = q_func.to_gpu(gpu) # Use Adam to optimize q_func. eps=1e-2 is for stability. optimizer = chainer.optimizers.Adam(eps=1e-2) optimizer.setup(q_func) # Set the discount factor that discounts future rewards. gamma = 0.95 # Use epsilon-greedy for exploration explorer = chainerrl.explorers.LinearDecayEpsilonGreedy( start_epsilon=1.0, end_epsilon=0.1, decay_steps=300000, random_action_func=env.action_space.sample) # DQN uses Experience Replay. # Specify a replay buffer and its capacity. replay_buffer = chainerrl.replay_buffer.ReplayBuffer(capacity=10 ** 6) # Now create an agent that will interact with the environment. agent = chainerrl.agents.DQN( q_func, optimizer, replay_buffer, gamma, explorer, gpu=gpu, replay_start_size=500, update_interval=1, target_update_interval=100) logging.basicConfig(level=logging.INFO, stream=sys.stdout, format='') eval_run_count = 10 timestr = time.strftime("%Y%m%d-%H%M%S") agentClassName = agent.__class__.__name__[:10] step_hooks = [] logger = None if tensorboard: writer = SummaryWriter("tensorboard/tensorBoard_exp_" + timestr + "_" + agentClassName) step_hooks = [TensorBoardLoggingStepHook(writer)] handler = TensorBoardEvaluationLoggingHandler(writer, agent, eval_run_count) logger = logging.getLogger() logger.addHandler(handler) # Overwrite the normal evaluation method chainerrl.experiments.evaluator.run_evaluation_episodes = run_localization_evaluation_episodes train_agent_with_evaluation( agent, env, steps=steps, # Train the agent for 5000 steps eval_n_runs=eval_run_count, # 10 episodes are sampled for each evaluation max_episode_len=50, # Maximum length of each episodes eval_interval=500, # Evaluate the agent after every 100 steps outdir='result', # Save everything to 'result' directory step_hooks=step_hooks, logger=logger) agent.save('agent_' + timestr + "_" + agentClassName)
id='FrozenLakeNotSlippery-v0', entry_point='gym.envs.toy_text:FrozenLakeEnv', kwargs={'is_slippery': False}, max_episode_steps=100, ) # env = gym.wrappers.TimeLimit(env, max_episode_steps=100) env = DiscreteOneHotWrapper(gym.make('FrozenLakeNotSlippery-v0')) obs_size = env.observation_space.shape[0] n_actions = env.action_space.n net = Net(obs_size, HIDDEN_SIZE, n_actions) net.cleargrads() # zeros gradients optimizer = optimizers.Adam(alpha=0.001) optimizer.setup(net) writer = SummaryWriter(comment='-frozenlake-nonslippery') full_batch = [] for iter_no, batch in enumerate(iterate_batches(env, net, BATCH_SIZE)): # reward_mean = float(np.mean(list(map(lambda s: s.reward, batch)))) reward_mean = float(np.mean([s.reward for s in batch])) full_batch, obs, acts, reward_bound = filter_batch(full_batch + batch, PERCENTILE) if not full_batch: continue obs_v = Variable(np.asarray(obs, dtype=np.float32)) acts_v = Variable(np.asarray(acts, dtype=np.int)) full_batch = full_batch[-500:] action_scores_v = net(obs_v) loss_v = F.softmax_cross_entropy(action_scores_v, acts_v)
def main(): parser = argparse.ArgumentParser(description='Train script') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--dataset_type', choices=['mug', 'mnist'], default='mug', help="dataset type") parser.add_argument('--dataset', default='data/dataset/train', help="dataset root path") parser.add_argument('--batchsize', type=int, default=100, help="batchsize") parser.add_argument('--max_epoch', type=int, default=1000, help="num learning epochs") parser.add_argument('--model', type=str, choices=['normal', 'cgan', 'infogan'], default="normal", help="MoCoGAN model") parser.add_argument('--save_name', default=datetime.now(timezone('Asia/Tokyo')).strftime("%Y_%m%d_%H%M"), \ help="save path for log, snapshot etc") parser.add_argument('--display_interval', type=int, default=1, help='interval of displaying log to console') parser.add_argument('--snapshot_interval', type=int, default=10, help='interval of snapshot') parser.add_argument( '--log_tensorboard_interval', type=int, default=10, help='interval of log to tensorboard (genenrate samples too)') parser.add_argument('--num_gen_samples', type=int, default=36, help='num generate samples') parser.add_argument('--dim_zc', type=int, default=50, help='number of dimensions of z content') parser.add_argument('--dim_zm', type=int, default=10, help='number of dimensions of z motion') parser.add_argument('--n_filters_gen', type=int, default=64, help='number of channelsof image generator') parser.add_argument('--n_filters_idis', type=int, default=64, help='number of channel of image discriminator') parser.add_argument('--n_filters_vdis', type=int, default=64, help='number of channel of video discriminator') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') args = parser.parse_args() # parameters size = 64 # image size channel = 3 # num channels video_length = 16 # video length dim_zc = args.dim_zc dim_zm = args.dim_zm n_filters_gen = args.n_filters_gen n_filters_idis = args.n_filters_idis n_filters_vdis = args.n_filters_vdis use_noise = True noise_sigma = 0.2 # Set up dataset if args.dataset_type == "mug": num_labels = 6 train_dataset = MugDataset(args.dataset, video_length) elif args.dataset_type == "mnist": num_labels = 0 train_dataset = MovingMnistDataset(args.dataset, video_length) train_iter = chainer.iterators.SerialIterator(train_dataset, args.batchsize) # Set up models if args.model == "normal": use_label = False image_gen = ImageGenerator(dim_zc, dim_zm, num_labels, channel, n_filters_gen, video_length) image_dis = ImageDiscriminator(channel, 1, n_filters_gen, use_noise, noise_sigma) video_dis = VideoDiscriminator(channel, 1, n_filters_gen, use_noise, noise_sigma) elif args.model == "cgan": if num_labels == 0: raise ValueError("Called cgan model, but dataset has no label.") use_label = True image_gen = ImageGenerator(dim_zc, dim_zm, num_labels, channel, n_filters_gen, video_length) image_dis = ImageDiscriminator(channel + num_labels, 1, n_filters_gen, use_noise, noise_sigma) video_dis = VideoDiscriminator(channel + num_labels, 1, n_filters_gen, use_noise, noise_sigma) elif args.model == "infogan": if num_labels == 0: raise ValueError("Called cgan model, but dataset has no label.") use_label = True image_gen = ImageGenerator(dim_zc, dim_zm, num_labels, channel, n_filters_gen, video_length) image_dis = ImageDiscriminator(channel, 1 + num_labels, n_filters_gen, use_noise, noise_sigma) video_dis = VideoDiscriminator(channel, 1 + num_labels, n_filters_gen, use_noise, noise_sigma) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() image_gen.to_gpu() image_dis.to_gpu() video_dis.to_gpu() def make_optimizer(model, alpha=1e-3, beta1=0.9, beta2=0.999): optimizer = chainer.optimizers.Adam(alpha=alpha, beta1=beta1) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(1e-5), 'hook_dec') return optimizer opt_image_gen = make_optimizer(image_gen, 2e-4, 5e-5, 0.999) opt_image_dis = make_optimizer(image_dis, 2e-4, 5e-5, 0.999) opt_video_dis = make_optimizer(video_dis, 2e-4, 5e-5, 0.999) # tensorboard writer writer = SummaryWriter(Path('runs') / args.save_name) # updater args updater_args = { "model": args.model, "models": (image_gen, image_dis, video_dis), "video_length": video_length, "img_size": size, "channel": channel, "dim_zl": num_labels, "iterator": train_iter, "tensorboard_writer": writer, "optimizer": { 'image_gen': opt_image_gen, 'image_dis': opt_image_dis, 'video_dis': opt_video_dis, }, "device": args.gpu } # Setup updater updater = Updater(**updater_args) # Setup logging save_path = Path('result') / args.save_name save_path.mkdir(parents=True, exist_ok=True) # trainer trainer = training.Trainer(updater, (args.max_epoch, 'epoch'), out=save_path) # snapshot setting snapshot_interval = (args.snapshot_interval, 'epoch') trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( image_gen, 'image_gen_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( image_dis, 'image_dis_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( video_dis, 'video_dis_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) # loss setting display_interval = (args.display_interval, 'epoch') trainer.extend(extensions.LogReport(trigger=display_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'image_gen/loss', 'image_dis/loss', 'video_dis/loss' ]), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=1)) # tensorboard-chainer log_tensorboard_interval = (args.log_tensorboard_interval, 'epoch') if np.sqrt(args.num_gen_samples) % 1.0 != 0: raise ValueError('--num_gen_samples must be n^2 (n: natural number).') trainer.extend(log_tensorboard(image_gen, args.num_gen_samples, video_length, writer), trigger=log_tensorboard_interval) if args.resume: chainer.serializers.load_npz(args.resume, trainer) print('[ Training configuration ]') print('# gpu: {}'.format(args.gpu)) print('# minibatch size: {}'.format(args.batchsize)) print('# max epoch: {}'.format(args.max_epoch)) print('# num batches: {}'.format(len(train_dataset) // args.batchsize)) print('# data size: {}'.format(len(train_dataset))) print('# data shape: {}'.format(train_dataset[0][0].shape)) print('# num filters igen: {}'.format(n_filters_gen)) print('# num filters idis: {}'.format(n_filters_idis)) print('# num filters vdis: {}'.format(n_filters_vdis)) print('# use noise: {}(sigma={})'.format(use_noise, noise_sigma)) print('# use label: {}'.format(use_label)) print('# snapshot interval: {}'.format(args.snapshot_interval)) print('# log tensorboard interval: {}'.format( args.log_tensorboard_interval)) print('# num generate samples: {}'.format(args.num_gen_samples)) print('') # start training trainer.run() if args.gpu >= 0: image_gen.to_cpu() image_dis.to_cpu() video_dis.to_cpu() chainer.serializers.save_npz(save_path / 'image_gen_epoch_fianl.npz', image_gen) chainer.serializers.save_npz(save_path / 'image_dis_epoch_fianl.npz', image_dis) chainer.serializers.save_npz(save_path / 'video_dis_epoch_fianl.npz', video_dis)
class MLP(chainer.Chain): def __init__(self, n_units, n_out): super(MLP, self).__init__() with self.init_scope(): self.l1 = L.Linear(None, n_units) # n_in -> n_units self.l2 = L.Linear(None, n_units) # n_units -> n_units self.l3 = L.Linear(None, n_out) # n_units -> n_out @within_name_scope('MLP') def __call__(self, x): with name_scope('linear1', self.l1.params()): h1 = F.relu(self.l1(x)) with name_scope('linear2', self.l2.params()): h2 = F.relu(self.l2(h1)) with name_scope('linear3', self.l3.params()): o = self.l3(h2) return o model = L.Classifier(MLP(1000, 10)) res = model(chainer.Variable(np.random.rand(1, 784).astype(np.float32)), chainer.Variable(np.random.rand(1).astype(np.int32))) writer = SummaryWriter('runs/' + datetime.now().strftime('%B%d %H:%M:%S')) writer.add_graph([res]) writer.add_all_variable_images([res], pattern='.*MLP.*') writer.add_all_parameter_histograms([res], pattern='.*MLP.*') writer.close()
return total_reward def value_iteration(self): for state in range(self.env.observation_space.n): state_values = [ self.calc_action_value(state, action) for action in range(self.env.action_space.n) ] self.values[state] = max(state_values) if __name__ == "__main__": test_env = gym.make(ENV_NAME) agent = Agent() writer = SummaryWriter(comment='-v-iteration') iter_no = 0 best_reward = 0.0 while True: iter_no += 1 agent.play_n_random_steps(100) agent.value_iteration() reward = 0.0 for _ in range(TEST_EPISODES): reward += agent.play_episode(test_env) reward /= TEST_EPISODES writer.add_scalar("reward", reward, iter_no) if reward > best_reward:
import chainer from chainer.dataset import convert import chainer.links as L import chainer.functions as F from chainer import serializers from chainer.datasets import get_cifar10 from chainer.datasets import get_cifar100 import utils # In[2]: from tb_chainer import SummaryWriter, name_scope, within_name_scope try: writer = SummaryWriter('runs/{}_{}'.format(__file__, utils.now(isabout=True))) except: writer = SummaryWriter('runs/' + utils.now(isabout=True)) # In[3]: from easydict import EasyDict args = EasyDict({ 'bs': 64, 'epoch': 100, 'lr': 0.05, 'gpu': 0, 'out': 'result', 'resume': '', 'n_in': 32,
loss_config=config.loss, predictor=predictor, discriminator=discriminator, device=config.train.gpu, iterator=train_iter, optimizer=opts, converter=converter, ) # trainer trigger_log = (config.train.log_iteration, 'iteration') trigger_snapshot = (config.train.snapshot_iteration, 'iteration') trigger_stop = (config.train.stop_iteration, 'iteration') if config.train.stop_iteration is not None else None trainer = training.Trainer(updater, stop_trigger=trigger_stop, out=arguments.output) tb_writer = SummaryWriter(Path(arguments.output)) ext = extensions.Evaluator(test_iter, models, converter, device=config.train.gpu, eval_func=updater.forward) trainer.extend(ext, name='test', trigger=trigger_log) ext = extensions.Evaluator(train_eval_iter, models, converter, device=config.train.gpu, eval_func=updater.forward) trainer.extend(ext, name='train', trigger=trigger_log) trainer.extend(extensions.dump_graph('predictor/loss')) ext = extensions.snapshot_object(predictor, filename='predictor_{.updater.iteration}.npz') trainer.extend(ext, trigger=trigger_snapshot) trainer.extend(extensions.LogReport(trigger=trigger_log)) trainer.extend(TensorBoardReport(writer=tb_writer), trigger=trigger_log) if trigger_stop is not None:
def train_agent(experiments_dir='./experiments'): logging.basicConfig(level=logging.INFO, stream=sys.stdout, format='') print_config() dataset = load_dataset(CONFIG['dataset'], CONFIG['dataset_path']) env = create_env(dataset, CONFIG) agent = create_agent(env, CONFIG) # Seeding for reproducable experiments set_random_seed(CONFIG['seed_agent'], gpus=[CONFIG['gpu_id']]) env.seed(CONFIG['seed_environment']) # Prepare experiment directory now_date = datetime.datetime.now() timestr = now_date.strftime("%Y-%m-%d+%H-%M-%S") experiment_path = os.path.join(experiments_dir, CONFIG['experiment_id'] + "_" + timestr) ensure_folder(experiment_path) write_config(experiment_path) step_hooks = [] logger = None if CONFIG['use_tensorboard']: tensorboard_path = os.path.join(experiment_path, "tensorboard") ensure_folder(tensorboard_path) eval_run_count = 10 writer = SummaryWriter(tensorboard_path) step_hooks = [TensorBoardLoggingStepHook(writer)] handler = TensorBoardEvaluationLoggingHandler(writer, agent, eval_run_count) logger = logging.getLogger() logger.addHandler(handler) # Inject hook for recording custom stats during training record_stats = chainerrl.experiments.evaluator.record_stats chainerrl.experiments.evaluator.record_stats = create_stats_decorator(env)( record_stats) train_agent_with_evaluation( agent, env, steps=CONFIG['steps'], # Train the agent for no of steps eval_n_episodes=CONFIG[ 'eval_n_episodes'], # Episodes are sampled for each evaluation eval_n_steps=None, train_max_episode_len=CONFIG[ 'train_max_episode_len'], # Maximum length of each episodes eval_interval=CONFIG[ 'eval_interval'], # Evaluate the agent after every no of steps outdir=experiment_path, # Save everything to experiment directory step_hooks=step_hooks, logger=logger) # Save the final model agent_classname = agent.__class__.__name__[:10] agent_path = os.path.join(experiment_path, "agent" + "_" + agent_classname) ensure_folder(agent_path) agent.save(agent_path) # Plot training summary if not os.path.exists(os.path.join(experiment_path, 'training')): plot_training_summary(experiment_path) return experiment_path
def __init__(self, entries, out_dir='logs', log_report='LogReport'): self._entries = entries self._log_report = log_report self._log_len = 0 # number of observations already printed self.writer = SummaryWriter(out_dir)
import math import chainer import numpy as np from datetime import datetime from tb_chainer import utils, SummaryWriter vgg = chainer.links.VGG16Layers() writer = SummaryWriter('runs/'+datetime.now().strftime('%B%d %H:%M:%S')) sample_rate = 44100 freqs = [262, 294, 330, 349, 392, 440, 440, 440, 440, 440, 440] for n_iter in range(100): M_global = np.random.rand(1) # value to keep writer.add_scalar('M_global', M_global[0], n_iter) x = np.random.rand(32, 3, 64, 64) # output from network if n_iter % 10 == 0: x = utils.make_grid(x) writer.add_image('Image', x, n_iter) x = np.zeros(sample_rate*2) for i in range(x.shape[0]): x[i] = np.cos(freqs[n_iter//10] * np.pi * float(i) / float(sample_rate)) # sound amplitude should in [-1, 1] writer.add_audio('Audio', x, n_iter) for name, param in vgg.namedparams(): writer.add_histogram(name, chainer.cuda.to_cpu(param.data), n_iter) writer.add_text('Text', 'text logged at step:'+str(n_iter), n_iter) writer.add_text('another Text', 'another text logged at step:'+str(n_iter), n_iter) writer.close()
def write_image(image, path): image *= 255 image = image.transpose(1, 2, 0) image = image.astype(np.uint8) result = Image.fromarray(image) result.save(path) if args.images: sequencelist = [args.images] else: sequencelist = load_list(args.sequences, args.root) if args.tensorboard: writer = SummaryWriter('runs/' + datetime.now().strftime('%B%d %H:%M:%S')) if args.test == True: logf = open('prediction_loss.csv', 'w') for seq in range(len(sequencelist)): imagelist = load_list(sequencelist[seq], args.root) prednet.reset_state() loss = 0 batchSize = 1 x_batch = np.ndarray( (batchSize, args.channels[0], args.size[1], args.size[0]), dtype=np.float32) y_batch = np.ndarray( (batchSize, args.channels[0], args.size[1], args.size[0]), dtype=np.float32) for i in range(0, len(imagelist) - 1): print('frameNo:' + str(i))