def main(): """Run main function for the program.""" args = get_args() args_dict = vars(args) print(args_dict) print('Argument list to program') print('\n'.join( ['--{0} {1}'.format(arg, args_dict[arg]) for arg in args_dict])) print('\n') gt = process_file(args.gt) pred = process_file(args.pred, mode='pred') # threshs = [0.5, 0.6, 0.7, 0.8, 0.9] scores = [0, 2000, 3000, 5000, 8000, 9000, 10000] scores = np.linspace(0, 16000, 5) total_p, total_r, fps, tps = (calculate_different_recalls_single_thresh( gt, pred, 0.5, scores)) plot_PR(total_p, total_r, [0.5]) plot_ROC(fps, tps)
def main(): args = get_args() log_dir = create_log_dir(args) if not args.test: writer = SummaryWriter(log_dir) else: writer = None SEED = 721 if args.ram_obs or args.env == "slimevolley_v0": obs_type = 'ram' else: obs_type = 'rgb_image' env = make_env(args.env, SEED, obs_type=obs_type) state_spaces = env.observation_spaces action_spaces = env.action_spaces print('state_spaces: ', state_spaces, ', action_spaces: ', action_spaces) learner_args = {'device': args.device} env.reset() print(env.agents) agents = env.agents if args.train_both: fixed_agents = [] else: fixed_agents = [ 'first_0' ] # SlimeVolley: opponent is the first, the second agent is the learnable one if obs_type == 'ram': model = MultiPPODiscrete(agents, state_spaces, action_spaces, 'MLP', fixed_agents, learner_args, **hyperparams).to(args.device) else: # model = PPODiscrete(state_space, action_space, 'CNN', learner_args, **hyperparams).to(device) model = MultiPPODiscrete(agents, state_spaces, action_spaces, 'CNN', fixed_agents, learner_args, **hyperparams).to(args.device) load_model(model, args) for individual_model in model.agents.values(): individual_model.policy.share_memory() individual_model.policy_old.share_memory() individual_model.value.share_memory() ShareParameters(individual_model.optimizer) path = 'model/' + args.env os.makedirs(path, exist_ok=True) if args.fictitious: path = path + '/fictitious_' processes = [] for p in range(args.num_envs): process = Process(target=parallel_rollout, args=(p, args.env, model, writer, max_eps, \ max_timesteps, selfplay_interval,\ args.render, path, args.against_baseline, \ args.selfplay, args.fictitious, SEED)) # the args contain shared and not shared process.daemon = True # all processes closed when the main stops processes.append(process) [p.start() for p in processes] [p.join() for p in processes] # finished at the same time env.close()
'n_iter': n_iter, 'opts': opts, 'val_l1_norm': val_l1_norm, 'best_val_l1_norm': best_val_l1_norm } model_path = os.path.join(opts.save_path, 'model_best.net') torch.save(save_state, model_path) save_state = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'n_iter': n_iter, 'opts': opts, 'val_l1_norm': val_l1_norm, 'best_val_l1_norm': best_val_l1_norm } model_path = os.path.join(opts.save_path, 'model_latest.net') torch.save(save_state, model_path) if __name__ == '__main__': opts = get_args() set_random_seeds(opts.seed) if opts.mode == 'train': train(opts) else: raise NotImplementedError('Unrecognised mode')
def main(): args = get_args() log_dir = create_log_dir(args) if not args.test: writer = SummaryWriter(log_dir) else: writer = None SEED = 721 if args.ram_obs or args.env == "slimevolley_v0": obs_type = 'ram' else: obs_type = 'rgb_image' # env = make_env(args.env, SEED, obs_type=obs_type) VectorEnv = [ DummyVectorEnv, SubprocVectorEnv ][1] # https://github.com/thu-ml/tianshou/blob/master/tianshou/env/venvs.py envs = VectorEnv([ lambda: make_env(args.env, obs_type=obs_type) for _ in range(args.num_envs) ]) envs.seed(np.random.randint(1000, size=args.num_envs).tolist()) # random seeding state_spaces = envs.observation_spaces[ 0] # same for all env instances, so just take one action_spaces = envs.action_spaces[ 0] # same for all env instances, so just take one print('state_spaces: ', state_spaces, ', action_spaces: ', action_spaces) learner_args = {'device': args.device} envs.reset() agents = envs.agents[0] # same for all env instances, so just take one print('agents: ', agents) if args.train_both: fixed_agents = [] else: fixed_agents = [ 'first_0' ] # SlimeVolley: opponent is the first, the second agent is the learnable one if obs_type == 'ram': model = ParallelMultiPPODiscrete(args.num_envs, agents, state_spaces, action_spaces, 'MLP', fixed_agents, learner_args, **hyperparams).to(args.device) else: model = ParallelMultiPPODiscrete(args.num_envs, agents, state_spaces, action_spaces, 'CNN', fixed_agents, learner_args, **hyperparams).to(args.device) load_model(model, args) path = f'model/{args.env}/' os.makedirs(path, exist_ok=True) if args.fictitious: path = path + 'fictitious_' parallel_rollout(envs, model, writer, max_eps=max_eps, max_timesteps=max_timesteps, selfplay_interval=selfplay_interval,\ render=args.render, model_path=path, against_baseline=args.against_baseline, selfplay=args.selfplay, \ fictitious=args.fictitious, test=args.test, args=args) envs.close()
from utils.log import Train_Log from utils.loss import hinge_loss, accuracy, bce_loss from utils.arguments import get_args import torch from torch.utils.data import DataLoader from torch.utils.data.sampler import SubsetRandomSampler from torch.optim import Adam import numpy as np import model import dataset from torchvision import transforms shuffle_dataset = True args = get_args() val_split = args.test_split logger = Train_Log(args) def gen_split_sampler(dataset): dataset_size = len(dataset) indices = list(range(dataset_size)) if shuffle_dataset: np.random.seed(42) np.random.shuffle(indices) val_size = int(np.floor(dataset_size * val_split)) train_indices, val_indices = indices[val_size:], indices[:val_size] return SubsetRandomSampler(train_indices), SubsetRandomSampler(val_indices) def evaluate(model, testset, device): # eval on valid set loss_val_, acc_val_ = 0., 0. model.eval()
def main(): args = get_args() log_dir = create_log_dir(args) if not args.test: writer = SummaryWriter(log_dir) else: writer = None SEED = 721 if args.ram_obs or args.env == "slimevolley_v0": obs_type = 'ram' else: obs_type = 'rgb_image' env = make_env( args.env, SEED, obs_type=obs_type ) # TODO used for providing spaces info, can also modify SubprocVecEnv wrapper # https://stable-baselines.readthedocs.io/en/master/guide/vec_envs.html?highlight=multiprocessing envs = SubprocVecEnv([ lambda: make_env(args.env, obs_type=obs_type) for _ in range(args.num_envs) ], start_method='spawn') # envs.seed(np.random.randint(1000, size=args.num_envs).tolist()) # random seeding envs.seed(SEED) # fix seeding state_spaces = env.observation_spaces action_spaces = env.action_spaces print('state_spaces: ', state_spaces, ', action_spaces: ', action_spaces) learner_args = {'device': args.device} env.reset() agents = env.agents print('agents: ', agents) if args.train_both: fixed_agents = [] else: fixed_agents = [ 'first_0' ] # SlimeVolley: opponent is the first, the second agent is the learnable one if obs_type == 'ram': model = ParallelMultiPPODiscrete(args.num_envs, agents, state_spaces, action_spaces, 'MLP', fixed_agents, learner_args, **hyperparams).to(args.device) else: model = ParallelMultiPPODiscrete(args.num_envs, agents, state_spaces, action_spaces, 'CNN', fixed_agents, learner_args, **hyperparams).to(args.device) load_model(model, args) path = f"model/{args.env}/" os.makedirs(path, exist_ok=True) if args.fictitious: path = path + 'fictitious_' parallel_rollout(envs, model, writer, max_eps=max_eps, max_timesteps=max_timesteps, selfplay_interval=selfplay_interval,\ render=args.render, model_path=path, against_baseline=args.against_baseline, selfplay=args.selfplay, \ fictitious=args.fictitious, test=args.test) envs.close()
def main(): args = get_args() print_args(args) log_dir = create_log_dir(args) if not args.test: writer = SummaryWriter(log_dir) else: writer = None SEED = 721 if args.ram_obs or args.env == "slimevolley_v0": obs_type = 'ram' else: obs_type = 'rgb_image' env = make_env(args.env, SEED, obs_type=obs_type) state_spaces = env.observation_spaces action_spaces = env.action_spaces print('state_spaces: ', state_spaces, ', action_spaces: ', action_spaces) learner_args = {'device': args.device} env.reset() print(env.agents) agents = env.agents if args.train_both: fixed_agents = [] else: fixed_agents = [ 'first_0' ] # SlimeVolley: opponent is the first, the second agent is the learnable one path = f"model/{args.env}/" os.makedirs(path, exist_ok=True) data_path = f"data/{args.env}/" os.makedirs(data_path, exist_ok=True) if obs_type == 'ram': model = MultiPPODiscrete(agents, state_spaces, action_spaces, 'MLP', fixed_agents, learner_args, **hyperparams).to(args.device) else: # model = PPODiscrete(state_space, action_space, 'CNN', learner_args, **hyperparams).to(device) model = MultiPPODiscrete(agents, state_spaces, action_spaces, 'CNN', fixed_agents, learner_args, **hyperparams).to(args.device) path = path + 'cnn_' if args.selfplay: os.makedirs(path + 'selfplay/', exist_ok=True) load_model(model, args) if args.fictitious: path = path + 'fictitious_' eval_env = make_env(args.env, np.random.randint(0, 100), obs_type=obs_type) evaluater = Evaluater(eval_env, max_timesteps) parallel_rollout(env, model, writer, evaluater=evaluater, max_eps=max_eps, max_timesteps=max_timesteps, selfplay_interval=selfplay_interval,\ render=args.render, model_path=path, against_baseline=args.against_baseline, selfplay=args.selfplay, \ fictitious=args.fictitious, test=args.test) env.close()