def test_all_available(self): agent_data, final_count, roles = get_data("all_available") agents = get_agent(agent_data, "all_available", roles) agents_count = self.count_available(agents, roles) self.assertEqual(agents_count, final_count)
def test_least_busy(self): agent_data, time_available, roles = get_data("least_busy") agents = get_agent(agent_data, "least_busy", roles) agents_count = self.count_available(agents, roles) self.assertEqual(agents_count, 1) self.assertEqual(agents[0]['available_since'], time_available)
def main(): # create experiment config config = get_config('pqnet')('train') # create network and training agent tr_agent = get_agent(config) # load from checkpoint if provided if config.cont: tr_agent.load_ckpt(config.ckpt) # create dataloader train_loader = get_dataloader('train', config) val_loader = get_dataloader('val', config) val_loader = cycle(val_loader) # start training clock = tr_agent.clock for e in range(clock.epoch, config.nr_epochs): # begin iteration pbar = tqdm(train_loader) for b, data in enumerate(pbar): # train step outputs, losses = tr_agent.train_func(data) # visualize if config.vis and clock.step % config.vis_frequency == 0: tr_agent.visualize_batch(data, 'train', outputs=outputs) pbar.set_description("EPOCH[{}][{}]".format(e, b)) pbar.set_postfix( OrderedDict({k: v.item() for k, v in losses.items()})) # validation step if clock.step % config.val_frequency == 0: data = next(val_loader) outputs, losses = tr_agent.val_func(data) if config.vis and clock.step % config.vis_frequency == 0: tr_agent.visualize_batch(data, 'validation', outputs=outputs) clock.tick() # update lr by scheduler tr_agent.update_learning_rate() # update teacher forcing ratio if config.module == 'seq2seq': tr_agent.update_teacher_forcing_ratio() clock.tock() if clock.epoch % config.save_frequency == 0: tr_agent.save_ckpt() tr_agent.save_ckpt('latest')
def test_random(self): agent_data = get_data("random") role = [ 'management', ] agents = get_agent(agent_data, "random", role) agents_count = self.count_available(agents, role) self.assertEqual(agents_count, 1)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--continue', dest='cont', action='store_true', help="continue training from checkpoint") parser.add_argument('--ckpt', type=str, default='latest', required=False, help="desired checkpoint to restore") parser.add_argument('-g', '--gpu_ids', type=int, default=0, required=False, help="specify gpu ids") parser.add_argument('--vis', action='store_true', default=False, help="visualize output in training") args = parser.parse_args() # create experiment config config = get_config(args) print(config) # create network and training agent tr_agent = get_agent(config) print(tr_agent.net) # load from checkpoint if provided if args.cont: tr_agent.load_ckpt(args.ckpt) # writer = SummaryWriter() # create dataloader # train_loader = get_dataloader(PHASE_TRAINING, batch_size=config.batch_size, num_workers=2, dataset_json="/home/huydd/train_noise/result_json/result.json") val_loader = get_dataloader( PHASE_TESTING, batch_size=config.batch_size, num_workers=2, dataset_json="/home/huydd/other_done/result_json/result.json") val_loader_step = get_dataloader( PHASE_TESTING, batch_size=config.batch_size, num_workers=2, dataset_json="/home/huydd/other_done/result_json/result.json") val_loader_step = cycle(val_loader_step) epoch_acc = tr_agent.evaluate(val_loader) print(epoch_acc)
def main(): # create experiment config containing all hyperparameters config = get_config('train') # create network and training agent tr_agent = get_agent(config) # load from checkpoint if provided if config.cont: tr_agent.load_ckpt(config.ckpt) # create dataloader train_loader = get_dataloader('train', config) val_loader = get_dataloader('validation', config) val_loader = cycle(val_loader) # start training clock = tr_agent.clock for e in range(clock.epoch, config.nr_epochs): # begin iteration pbar = tqdm(train_loader) for b, data in enumerate(pbar): # train step tr_agent.train_func(data) # visualize if config.vis and clock.step % config.vis_frequency == 0: tr_agent.visualize_batch(data, "train") pbar.set_description("EPOCH[{}][{}]".format(e, b)) losses = tr_agent.collect_loss() pbar.set_postfix( OrderedDict({k: v.item() for k, v in losses.items()})) # validation step if clock.step % config.val_frequency == 0: data = next(val_loader) tr_agent.val_func(data) if config.vis and clock.step % config.vis_frequency == 0: tr_agent.visualize_batch(data, "validation") clock.tick() tr_agent.update_learning_rate() clock.tock() if clock.epoch % config.save_frequency == 0: tr_agent.save_ckpt() tr_agent.save_ckpt('latest')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--continue', dest='cont', action='store_true', help="continue training from checkpoint") parser.add_argument('--ckpt', type=str, default='latest', required=False, help="desired checkpoint to restore") parser.add_argument('-g', '--gpu_ids', type=int, default=0, required=False, help="specify gpu ids") parser.add_argument('--vis', action='store_true', default=False, help="visualize output in training") args = parser.parse_args() # create experiment config config = get_config(args) print(config) # create network and training agent tr_agent = get_agent(config) print(tr_agent.net) # load from checkpoint if provided if args.cont: tr_agent.load_ckpt(args.ckpt) # create dataloader train_loader = get_dataloader(PHASE_TRAINING, batch_size=config.batch_size, num_workers=config.num_workers) val_loader = get_dataloader(PHASE_TESTING, batch_size=config.batch_size, num_workers=config.num_workers) val_loader_step = get_dataloader(PHASE_TESTING, batch_size=config.batch_size, num_workers=config.num_workers) val_loader_step = cycle(val_loader_step) # val_loader = cycle(val_loader) # start training clock = tr_agent.clock for e in range(clock.epoch, config.nr_epochs): # begin iteration pbar = tqdm(train_loader) for b, data in enumerate(pbar): # train step outputs, losses = tr_agent.train_func(data) # visualize if args.vis and clock.step % config.visualize_frequency == 0: tr_agent.visualize_batch(data, PHASE_TRAINING, outputs) pbar.set_description("EPOCH[{}][{}]".format(e, b)) pbar.set_postfix(OrderedDict({k: v.item() for k, v in losses.items()})) # validation step if clock.step % config.val_frequency == 0: # data = next(val_loader) data = next(val_loader_step) outputs, losses = tr_agent.val_func(data) if args.vis and clock.step % config.visualize_frequency == 0: tr_agent.visualize_batch(data, PHASE_TESTING, outputs) clock.tick() tr_agent.evaluate(val_loader) tr_agent.update_learning_rate() clock.tock() if clock.epoch % config.save_frequency == 0: tr_agent.save_ckpt() tr_agent.save_ckpt('latest')
def main(): test_data = True pretrain = True # create experiment config config = get_config() # create network and training agent tr_agent = get_agent(config) print(tr_agent.net) # load from checkpoint if provided if pretrain: tr_agent.load_ckpt("latest") # create dataloader train_loader = get_dataloader(config, 'train') val_loader = get_dataloader(config, 'validation') test_loader = get_dataloader(config, 'test') # start training clock = tr_agent.clock # test if test_data == True: pbar = tqdm(test_loader) writer = csv.writer(open("../result.csv", "w")) writer.writerow(["id", "clip_count"]) for b, data in enumerate(pbar): outputs, losses = tr_agent.val_func(data[0].cuda(), data[1].cuda()) outputs = outputs.argmax().cpu().numpy() writer.writerow([b + 25001, outputs]) for e in range(clock.epoch, config.epochs): if e % config.val_frequency == 0: loss = 0 for b, data in enumerate(val_loader): outputs, losses = tr_agent.val_func(data[0].cuda(), data[1].cuda()) loss += losses['loss'] loss /= len(val_loader) print("EPOCH {} valid loss : {}".format(e, loss)) # begin iteration pbar = tqdm(train_loader) for b, data in enumerate(pbar): # train step outputs, losses = tr_agent.train_func(data[0].cuda(), data[1].cuda()) pbar.set_description("EPOCH[{}][{}]".format(e, b)) pbar.set_postfix( OrderedDict({k: v.item() for k, v in losses.items()})) clock.tick() tr_agent.update_learning_rate() clock.tock() if clock.epoch % config.save_frequency == 0: tr_agent.save_ckpt() tr_agent.save_ckpt('latest')
rew = None done = None info = None agent_obs = [[]] * (hiders + seekers) agent_act = [[]] * (hiders + seekers) time_steps = 80 episodes = 2000 interval = 500 for a in agents: a.training = True acc_rew = np.zeros([hiders + seekers, episodes]) for i in range(hiders + seekers): agents.append(get_agent(env, i, model=2)) if load_weights: agents[-1].load_weights("agent_%i_weights.h5f" % (i)) # https://github.com/keras-rl/keras-rl/blob/master/rl/core.py obs = None rew = None done = None info = None agent_obs = [[]] * (hiders + seekers) agent_act = [[]] * (hiders + seekers) if display: env.render() for a in agents: a.training = True
args = arg_parser.parse_args() return args if __name__ == "__main__": # Parse Arguments args = parse_args() # Other Defaults individual_types = ['Susceptible', 'Infected', 'Immune', 'Vaccinated'] color_list = ['black', 'red', 'white', 'blue'] # RL Environment and Agent env = game_env(args.grid_size, individual_types, color_list, args.vax_size) agent = get_agent(env, args) # RL run episode_rewards = [] eps_history = [] for episode in range(args.max_epd): state = env.reset(args.grid_size) episode_reward = 0 done = False step = 0 while not done: action = agent.get_action(state) next_state, reward, done, _ = env.step(action) agent.learn(state, action, reward, next_state, done,
display = True load_weights = False env = hide_and_seek.make_env(n_hiders=hiders, n_seekers=seekers, n_boxes=boxes, n_ramps=ramps, n_food=food, n_rooms=rooms) # # probably shouldn't use those two. but was testing. # rewardWrapper = hide_and_seek.HideAndSeekRewardWrapper(env, n_hiders=hiders, n_seekers=seekers) # trackStatW = hide_and_seek.TrackStatWrapper(env, boxes, ramps, food) # run one episode env.seed(42) env.reset() agents = [] for i in range(hiders+seekers): agents.append(get_agent(env,i)) if load_weights: agents[-1].load_weights("agent_%i_weights.h5f"%(i)) #https://github.com/keras-rl/keras-rl/blob/master/rl/core.py obs = None rew = None done = None info = None agent_obs = [[]]*(hiders+seekers) agent_act = [[]]*(hiders+seekers) if display: env.render() for a in agents: a.training = True
def main(): parser = argparse.ArgumentParser() parser.add_argument('-c', '--continue', dest='continue_path', type=str, required=False) parser.add_argument('-g', '--gpu_ids', type=int, default=0, required=False, help="specify gpu ids") parser.add_argument('--vis', action='store_true', default=False, help="visualize output in training") args = parser.parse_args() # create experiment config config = get_config(stage) os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_ids) config.device = torch.device("cuda:0") print(config) # create soft link to experiment log directory if not os.path.exists('train_log'): os.symlink(config.exp_dir, 'train_log') # create network and training agent tr_agent = get_agent(config) # load from checkpoint if provided if args.continue_path: tr_agent.load_ckpt(args.continue_path) print(tr_agent.net) # create tensorboard writer train_tb = SummaryWriter(os.path.join(config.log_dir, 'train.events')) val_tb = SummaryWriter(os.path.join(config.log_dir, 'val.events')) # create dataloader train_loader = get_dataloader('train', batch_size=config.batch_size, num_workers=config.num_workers) val_loader = get_dataloader('validation', batch_size=config.batch_size, num_workers=config.num_workers) val_loader = cycle(val_loader) # start training clock = tr_agent.clock for e in range(clock.epoch, config.nr_epochs): # begin iteration pbar = tqdm(train_loader) for b, data in enumerate(pbar): # train step outputs, losses = tr_agent.train_func(data) losses_values = {k: v.item() for k, v in losses.items()} # record loss to tensorboard for k, v in losses_values.items(): train_tb.add_scalar(k, v, clock.step) # visualize if args.vis and clock.step % config.visualize_frequency == 0: pass # with torch.no_grad(): # tr_agent.visualize_batch(data['path'][0], train_tb) pbar.set_description("EPOCH[{}][{}]".format(e, b)) pbar.set_postfix(OrderedDict(losses_values)) # validation step if clock.step % config.val_frequency == 0: data = next(val_loader) outputs, losses = tr_agent.val_func(data) losses_values = {k: v.item() for k, v in losses.items()} for k, v in losses_values.items(): val_tb.add_scalar(k, v, clock.step) if args.vis and clock.step % config.visualize_frequency == 0: pass # with torch.no_grad(): # tr_agent.visualize_batch(data['path'][0], val_tb) clock.tick() train_tb.add_scalar('learning_rate', tr_agent.optimizer.param_groups[-1]['lr'], clock.epoch) tr_agent.update_learning_rate() clock.tock() if clock.epoch % config.save_frequency == 0: tr_agent.save_ckpt() tr_agent.save_ckpt('latest.pth.tar')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--continue', dest='cont', action='store_true', help="continue training from checkpoint") parser.add_argument('--ckpt', type=str, default='latest', required=False, help="desired checkpoint to restore") parser.add_argument('-g', '--gpu_ids', type=int, default=0, required=False, help="specify gpu ids") parser.add_argument('--vis', action='store_true', default=False, help="visualize output in training") args = parser.parse_args() # create experiment config config = get_config(args) print(config) # create network and training agent tr_agent = get_agent(config) print(tr_agent.net) # load from checkpoint if provided if args.cont: tr_agent.load_ckpt(args.ckpt) writer = SummaryWriter() # create dataloader train_loader = get_dataloader( PHASE_TRAINING, batch_size=config.batch_size, num_workers=2, dataset_json= "/opt/hdd2/huydd/data_with_noise/train_data/result_json/train.json") val_loader = get_dataloader( PHASE_TESTING, batch_size=config.batch_size, num_workers=config.num_workers, dataset_json= "/opt/hdd2/huydd/data_with_noise/val_data/result_json/val.json") val_loader_step = get_dataloader( PHASE_TESTING, batch_size=config.batch_size, num_workers=config.num_workers, dataset_json= "/opt/hdd2/huydd/data_with_noise/val_data/result_json/val.json") val_loader_step = cycle(val_loader_step) # val_loader = cycle(val_loader) # start training clock = tr_agent.clock max_epoch_acc = 0 for e in range(clock.epoch, config.nr_epochs): n = 0 # begin iteration pbar = tqdm(train_loader) for b, data in enumerate(pbar): # train step n += 1 outputs, train_losses = tr_agent.train_func(data) if n == 1: train_losses_sum = train_losses['bce'] else: train_losses_sum += train_losses['bce'] # visualize # if args.vis and clock.step % config.visualize_frequency == 0: # tr_agent.visualize_batch(data, "train", outputs) pbar.set_description("EPOCH[{}][{}]".format(e, b)) pbar.set_postfix( OrderedDict({k: v.item() for k, v in train_losses.items()})) # print("\nTrain Loss {}".format(train_losses)) # validation step if clock.step % config.val_frequency == 0: data = next(val_loader_step) outputs, losses = tr_agent.val_func(data) # print("Val Loss {}".format(losses)) # visualize # if args.vis and clock.step % config.visualize_frequency == 0: # tr_agent.visualize_batch(data, "validation", outputs) clock.tick() train_losses_sum = train_losses_sum / (n * config.batch_size) print("\nResult Epoch {} Train Loss {} ".format(e, train_losses_sum)) writer.add_scalar('Loss/train', train_losses_sum, e) # save the best accuracy epoch_acc = tr_agent.evaluate(val_loader) print("Epoch {} - accuracy {}".format(e, epoch_acc)) writer.add_scalar('Val accuracy', epoch_acc, e) if epoch_acc > max_epoch_acc: tr_agent.save_ckpt('best_acc') max_epoch_acc = epoch_acc tr_agent.update_learning_rate() clock.tock() if clock.epoch % config.save_frequency == 0: tr_agent.save_ckpt() tr_agent.save_ckpt('latest') writer.close()
import gym from tensorboardX import SummaryWriter from agent import get_agent from common import config from train_process import single_ac_train from wrapper import atari_env if __name__ == '__main__': env = gym.make('CartPole-v1') # env = atari_env(config.game_name) actor = get_agent('actor', n_ac=config.n_ac, lr=1e-2, test=True) critic = get_agent('critic', lr=1e-2, discount=config.discount, test=True) single_ac_train(env, actor, critic, config.base_path, config.batch_size, config.epsilon, config.save_interval, config.update_interval, config.learning_starts, config.memory_size, config.max_epoch, config.max_iter)