def get(self): logging.info("SummaryTask starting...") # init class and variables bucket_name = os.environ.get( 'BUCKET_NAME', app_identity.get_default_gcs_bucket_name()) bucket = '/' + bucket_name trendManager = TrendManager() dataModelConverter = DataModelConverter() csvUtils = CsvUtils() cloudStorageUtils = CloudStorageUtils() previous_day_timestamp = int(time.time()) - Globals._1_DAY q_futures = [] for region in self.getRegions(): try: date = TimezoneAwareDate(region, self.request.get('date')) trendsJson = self.getTrends(region, trendManager) self.saveToCloudStorage(dataModelConverter, csvUtils, cloudStorageUtils, trendsJson, region, bucket, date) self.saveToDatastore(q_futures, trendsJson, region, date) self.deleteFromDatastore(q_futures, region, previous_day_timestamp) except Exception, e: traceback.print_exc() Error(msg=str(e), timestamp=int(time.time())).put() SendEmail().send('Error on SummaryTask', str(e)) self.retry()
def run(): game = ple.games.flappybird.FlappyBird() # game = ple.games.snake.Snake(width=512, height=512) # game = ple.games.pong.Pong(width=512, height=512) p = ple.PLE(game, fps=30, display_screen=args.is_render) p.init() plt.figure() all_scores = [] all_losses = [] all_t = [] agent = PGAgent(len(p.getGameState()), len(p.getActionSet())) is_end = p.game_over() for e in range(args.episodes): p.reset_game() s_t0 = np.asarray(list(p.getGameState().values()), dtype=np.float32) reward_total = 0 pipes = 0 transitions = [] for t in range(args.max_steps): a_t0_idx = agent.act(s_t0) a_t0 = p.getActionSet()[a_t0_idx] r_t1 = p.act(a_t0) is_end = p.game_over() s_t1 = np.asarray(list(p.getGameState().values()), dtype=np.float32) reward_total += r_t1 if r_t1 == 1.0: pipes += 1 if t == args.max_steps - 1: r_t1 = -100 is_end = True transitions.append([s_t0, a_t0_idx, r_t1]) s_t0 = s_t1 if is_end: all_scores.append(reward_total) break for t in range(len(transitions)): R = 0 for t_c, (s_t0, a_t0_idx, r_t) in enumerate(transitions[t:]): R += args.gamma**t_c * r_t s_t0, a_t0_idx, r_t1 = transitions[t] tr = [s_t0, a_t0_idx, R] agent.replay_memory.push(tr) loss = 0 if len(agent.replay_memory) > args.batch_size: loss = agent.replay() all_losses.append(loss) all_t.append(t) metrics_episode = { 'loss': loss, 'score': reward_total, 't': t, 'e': agent.epsilon, 'pipes': pipes } if args.is_csv is True: CsvUtils.add_hparams(sequence_dir=os.path.join( '.', args.sequence_name), sequence_name=args.sequence_name, run_name=args.run_name, args_dict=args.__dict__, metrics_dict=metrics_episode, global_step=e) else: logging.info(f'episode: {e}/{args.episodes} ', metrics_episode) print(f'episode: {e}/{args.episodes} ', metrics_episode) if e % 100 == 0 and not args.is_inference: # save logs, graphics and weights during training plt.clf() plt.subplot(3, 1, 1) plt.ylabel('Score') plt.plot(all_scores) plt.subplot(3, 1, 2) plt.ylabel('Loss') plt.plot(all_losses) plt.subplot(3, 1, 3) plt.ylabel('Steps') plt.plot(all_t) plt.xlabel('Episode') plt.savefig(os.path.join(seq_run_name, f'plt-{e}.png')) torch.save(agent.p_model.cpu().state_dict(), os.path.join(seq_run_name, f'model-{e}.pt'))
def run(): # environment name env = gym.make('LunarLander-v2') plt.figure() all_scores = [] all_losses = [] all_t = [] agent = DDQNAgent( env.observation_space.shape[0], # first 2 are position in x axis and y axis(hieght) , other 2 are the x,y axis velocity terms, # lander angle and angular velocity, left and right left contact points (bool) env.action_space.n, args) is_end = False t_total = 0 for e in range(args.episodes): s_t0 = env.reset() reward_total = 0 episode_loss = [] is_win = False for t in range(args.max_steps): t_total += 1 if t_total % args.target_update == 0: agent.update_q_t_model() if args.is_render and len(all_scores): # and all_scores[-1] > 0: # if e % 10 == 0 and all_scores[-1] > 0: env.render() a_t0 = agent.act(s_t0) s_t1, r_t1, is_end, _ = env.step(a_t0) reward_total += r_t1 if t == args.max_steps - 1: r_t1 = -100 is_end = True agent.replay_memory.push((s_t0, a_t0, r_t1, s_t1, is_end)) s_t0 = s_t1 if len(agent.replay_memory) > args.batch_size: loss = agent.replay() episode_loss.append(loss) if is_end: all_scores.append(reward_total) all_losses.append(np.mean(episode_loss)) ''' if terminal reward is =100 => landed https://github.com/openai/gym/blob/master/gym/envs/box2d/lunar_lander.py#L381 ''' if r_t1 >= 100: is_win = True break all_t.append(t) metrics_episode = { 'loss': all_losses[-1], 'score': reward_total, 't': t, 'e': agent.epsilon, 'is_win': is_win } if args.is_csv is True: CsvUtils.add_hparams(sequence_dir=os.path.join( '.', args.sequence_name), sequence_name=args.sequence_name, run_name=args.run_name, args_dict=args.__dict__, metrics_dict=metrics_episode, global_step=e) else: logging.info(f'episode: {e}/{args.episodes} ', metrics_episode) print(f'episode: {e}/{args.episodes} ', metrics_episode) if e % 100 == 0 and not args.is_inference: # save logs, graphics and weights during training plt.clf() plt.subplot(3, 1, 1) plt.ylabel('Score') plt.plot(all_scores) plt.subplot(3, 1, 2) plt.ylabel('Loss') plt.plot(all_losses) plt.subplot(3, 1, 3) plt.ylabel('Steps') plt.plot(all_t) plt.xlabel('Episode') plt.savefig(os.path.join(seq_run_name, f'plt-{e}.png')) torch.save(agent.q_model.cpu().state_dict(), os.path.join(seq_run_name, f'model-{e}.pt')) env.close()
def main(): data_loader_train = torch.utils.data.DataLoader( dataset=DatasetFashionMNIST(is_train=True, dataset_path=args.dataset_path), batch_size=BATCH_SIZE, shuffle=True) data_loader_test = torch.utils.data.DataLoader(dataset=DatasetFashionMNIST( is_train=False, dataset_path=args.dataset_path), batch_size=BATCH_SIZE, shuffle=False) model = ResNet(in_channels=1, n_classes=10) model = model.to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) metrics = {} for stage in ['train', 'test']: for metric in ['loss', 'acc']: metrics[f'{stage}_{metric}'] = [] for epoch in range(EPOCHS): metrics_epoch = {key: [] for key in metrics.keys()} for data_loader in [data_loader_train, data_loader_test]: stage = 'train' torch.set_grad_enabled(True) if data_loader == data_loader_test: stage = 'test' torch.set_grad_enabled(False) # inference for x, y in data_loader: x = x.to(DEVICE) y = y.to(DEVICE) y_prim = model.forward(x) loss = -torch.mean(y * torch.log(y_prim + 1e-8)) if data_loader == data_loader_train: loss.backward() optimizer.step() optimizer.zero_grad() # calculate metrics per batch metrics_epoch[f'{stage}_loss'].append( loss.cpu().item()) # Tensor(0.1) => 0.1f metrics_epoch[f'{stage}_acc'].append(acc(y_prim, y)) # calculate metrics per epoch metrics_epoch_str = [] for key in metrics_epoch.keys(): metrics_epoch[key] = np.mean(metrics_epoch[key]) metrics_epoch_str.append(f'{key}: {round(metrics_epoch[key], 2)}') summary_writer.flush() print(f'epoch: {epoch} {" ".join(metrics_epoch_str)}') # add hparams summary_writer.add_hparams(hparam_dict=args.__dict__, metric_dict=metrics_epoch, name=args.run_name, global_step=epoch) if args.is_csv is True: CsvUtils.add_hparams(sequence_dir=os.path.join( '.', f'{args.sequence_name}-csv'), sequence_name=args.sequence_name, run_name=args.run_name, args_dict=args.__dict__, metrics_dict=metrics_epoch, global_step=epoch) # append metrics per epoch to global metrics for key in metrics_epoch.keys(): metrics[key].append(metrics_epoch[key]) summary_writer.flush() summary_writer.close()
def write(self, data, output_csv): CsvUtils.write_to_csv(data, output_csv, self.csv_delimiter)
def run(): game = ple.games.flappybird.FlappyBird() # game = ple.games.snake.Snake(width=512, height=512) # game = ple.games.pong.Pong(width=512, height=512) p = ple.PLE(game, fps=30, display_screen=args.is_render) p.init() plt.figure() all_scores = [] all_losses = [] all_losses_a = [] all_losses_c = [] all_t = [] agent = A2CAgent(len(p.getGameState()), len(p.getActionSet())) is_end = p.game_over() for e in range(args.episodes): p.reset_game() s_t0 = np.asarray(list(p.getGameState().values()), dtype=np.float32) reward_total = 0 pipes = 0 transitions = [] states_t1 = [] end_t1 = [] for t in range(args.max_steps): a_t0_idx = agent.act(s_t0) a_t0 = p.getActionSet()[a_t0_idx] r_t1 = p.act(a_t0) is_end = p.game_over() s_t1 = np.asarray(list(p.getGameState().values()), dtype=np.float32) end_t1.append(is_end) reward_total += r_t1 if r_t1 == 1.0: pipes += 1 transitions.append([s_t0, a_t0_idx, r_t1]) states_t1.append(s_t1) s_t0 = s_t1 if is_end: all_scores.append(reward_total) break t_states_t1 = torch.FloatTensor(states_t1).to(args.device) v_t1 = agent.model_c.forward(t_states_t1) np_v_t1 = v_t1.cpu().data.numpy().squeeze() for t in range(len(transitions)): s_t0, a_t0_idx, r_t1 = transitions[t] is_end = end_t1[t] delta = r_t1 if not is_end: delta = r_t1 + args.gamma * np_v_t1[t] agent.replay_memory.push([s_t0, a_t0_idx, delta]) loss = loss_a = loss_c = 0 if len(agent.replay_memory) > args.batch_size: loss_a, loss_c = agent.replay() loss = loss_a + loss_c all_losses.append(loss) all_losses_a.append(loss_a) all_losses_c.append(loss_c) all_t.append(t) metrics_episode = { 'loss': loss, 'loss_a': loss_a, 'loss_c': loss_c, 'score': reward_total, 't': t, 'e': agent.epsilon, 'pipes': pipes } if args.is_csv is True: CsvUtils.add_hparams( sequence_dir=os.path.join('.', args.sequence_name), sequence_name=args.sequence_name, run_name=args.run_name, args_dict=args.__dict__, metrics_dict=metrics_episode, global_step=e ) else: logging.info(f'episode: {e}/{args.episodes} ', metrics_episode) print(f'episode: {e}/{args.episodes} ', metrics_episode) if e % 100 == 0: plt.clf() plt.subplot(5, 1, 1) plt.ylabel('Score') plt.plot(all_scores) plt.subplot(5, 1, 2) plt.ylabel('Loss') plt.plot(all_losses) plt.subplot(5, 1, 3) plt.ylabel('Loss Actor') plt.plot(all_losses_a) plt.subplot(5, 1, 4) plt.ylabel('Loss Critic') plt.plot(all_losses_c) plt.subplot(5, 1, 5) plt.ylabel('Steps') plt.plot(all_t) plt.xlabel('Episode') plt.savefig(os.path.join(seq_run_name, f'plt-{e}.png')) torch.save(agent.model_c.cpu().state_dict(), os.path.join(seq_run_name, f'model-{e}-c.pt')) torch.save(agent.model_a.cpu().state_dict(), os.path.join(seq_run_name, f'model-{e}-a.pt'))
def run(): game = ple.games.flappybird.FlappyBird() # game = ple.games.snake.Snake(width=512, height=512) # game = ple.games.pong.Pong(width=512, height=512) p = ple.PLE(game, fps=30, display_screen=args.is_render) p.init() plt.figure() all_scores = [] all_losses = [] all_t = [] agent = DQNAgent(len(p.getGameState()), len(p.getActionSet()), args) is_end = p.game_over() for e in range(args.episodes): p.reset_game() s_t0 = np.asarray(list(p.getGameState().values()), dtype=np.float32) reward_total = 0 pipes = 0 episode_loss = [] for t in range(args.max_steps): a_t0_idx = agent.act(s_t0) a_t0 = p.getActionSet()[a_t0_idx] r_t1 = p.act(a_t0) is_end = p.game_over() s_t1 = np.asarray(list(p.getGameState().values()), dtype=np.float32) reward_total += r_t1 ''' from /PyGame-Learning-Environment/ple/games/base/pygamewrapper.py self.rewards = { "positive": 1.0, "negative": -1.0, "tick": 0, "loss": -5.0, "win": 5.0 } ''' if r_t1 == 1.0: pipes += 1 if t == args.max_steps - 1: r_t1 = -100 is_end = True agent.replay_memory.push( (s_t0, a_t0_idx, r_t1, s_t1, is_end) ) s_t0 = s_t1 if len(agent.replay_memory) > args.batch_size: loss = agent.replay() episode_loss.append(loss) if is_end: all_scores.append(reward_total) all_losses.append(np.mean(episode_loss)) break all_t.append(t) metrics_episode = { 'loss': all_losses[-1], 'score': reward_total, 't': t, 'e': agent.epsilon, 'pipes': pipes } if args.is_csv is True: CsvUtils.add_hparams( sequence_dir=os.path.join('.', args.sequence_name), sequence_name=args.sequence_name, run_name=args.run_name, args_dict=args.__dict__, metrics_dict=metrics_episode, global_step=e ) else: logging.info(f'episode: {e}/{args.episodes} ', metrics_episode) print(f'episode: {e}/{args.episodes} ', metrics_episode) if e % 100 == 0 and not args.is_inference: # save logs, graphics and weights during training plt.clf() plt.subplot(3, 1, 1) plt.ylabel('Score') plt.plot(all_scores) plt.subplot(3, 1, 2) plt.ylabel('Loss') plt.plot(all_losses) plt.subplot(3, 1, 3) plt.ylabel('Steps') plt.plot(all_t) plt.xlabel('Episode') plt.savefig(os.path.join(seq_run_name, f'plt-{e}.png')) torch.save(agent.q_model.cpu().state_dict(), os.path.join(seq_run_name, f'model-{e}.pt'))