def run(config): print(config) checkConfigParams(config) np.random.seed(config["random_seed"]) random.seed(config["random_seed"]) dataset = Dataset(**config) # preconditions: the dataset has generated a pool of data, and the folds have been generated if config["kfcv"] == 1 and config["kfcv_serial"] == 1: for i in range(0, config["folds"]): neuralnet = ConvModel(dataset, **config) #validate that the folds work #dataset.kfcvPrintFoldInfo() #xit() gc.collect() runNet(neuralnet, config, i) # preconditions: the dataset has generated a pool of data, and the folds have been generated elif config["kfcv"] == 1: neuralnet = ConvModel(dataset, **config) runNet(neuralnet, config) # preconditions: the dataset has generated distinct training and testing set else: neuralnet = ConvModel(dataset, **config) runNet(neuralnet, config)
def main(): args = parser.parse_args() if os.path.isdir(args.output_folder): raise Exception("Experiment name " + args.output_folder + " already exists.") os.mkdir(args.output_folder) with open(args.output_folder + "/args.pckl", "wb") as f: pickle.dump(args, f) transform = None if args.normalize: transform = NormalizeFixedFactor(1280) # utterance_dict = build_dataset_structure(args.utterance_folder) # metadata_structure = [utterance_dict] if "Text" in args.model: dataset = FastTextPoseDataset(args.data, args.max_frames, transform, use_rand_tokens=args.rand_tokens) else: dataset = FastPoseDataset(args.data, args.max_frames, transform) loader = DataLoader(dataset, batch_size=1, collate_fn=collate_function) if args.model == "Conv": model = ConvModel(args.conv_channels, activation="ReLU", pos_emb=args.conv_pos_emb) elif args.model == "TransformerEncoder": model = TransformerEncoder(args, 100) elif args.model == "ConvTransformerEncoder": model = ConvTransformerEncoder(args, 21 * 2) elif args.model == "TransformerEnc": model = TransformerEnc(ninp=12 * 2, nhead=4, nhid=100, nout=21 * 2, nlayers=4, dropout=0.0) elif args.model == "TextPoseTransformer": model = TextPoseTransformer(n_tokens=1000, n_joints=12, joints_dim=2, nhead=4, nhid=128, nout=21 * 2, n_enc_layers=4, n_dec_layers=4, dropout=args.transformer_dropout) else: raise ValueError() model.load_state_dict(torch.load(args.model_checkpoint)) infer_utterance(model, loader, args)
def run(args, use_cuda, output_dir): trial_list = list(range(args.n_trials)) np.random.shuffle(trial_list) for trial_i in trial_list: trial_dir = os.path.join(output_dir, 'trial_{}'.format(trial_i)) os.makedirs(trial_dir, exist_ok=True) loaders, params = get_dataloaders(args.batch_size, trial_i, args.dataset, args.augment_data, early_stop=args.early_stop) if args.network_type == 'fc': model = DenseModel(input_dim=np.prod(params['input_shape']), output_dim=params['output_dim'], hidden_nodes=args.hidden_nodes, num_modules=args.n_modules, activation=args.activation) elif args.network_type == 'conv': model = ConvModel(input_shape=params['input_shape'], output_dim=params['output_dim'], num_filters=args.filters, kernel_sizes=args.kernels, strides=args.strides, dilations=args.dilations, num_modules=args.n_modules, activation=args.activation, final_layer=args.conv_final_layer) elif args.network_type == 'densenet': model = DenseNet(input_shape=params['input_shape'], output_dim=params['output_dim'], growth_rate=args.densenet_k, depth=args.densenet_depth, reduction=args.densenet_reduction, bottleneck=args.densenet_bottleneck, num_modules=args.n_modules) logging.debug(args) logging.debug('Parameters: {}'.format(model.n_parameters())) device = torch.device("cuda" if use_cuda else "cpu") model = model.to(device) model.reset_parameters() weight_path = os.path.join(trial_dir, 'initial_weights.pt') torch.save(model.state_dict(), weight_path) for lambda_i, (lambda_, learning_rate) in enumerate( zip(args.lambda_values, args.learning_rates)): model.load_state_dict(torch.load(weight_path)) lambda_dir = os.path.join(trial_dir, str(lambda_)) os.makedirs(lambda_dir, exist_ok=True) do_lambda_value(model, lambda_, learning_rate, args, loaders, params['distribution'], device, lambda_dir)
def main(): args = parser.parse_args() if os.path.isfile(args.out_h5data): raise Exception("Experiment name " + args.out_h5data + " already exists.") transforms = [] # TODO Encode body points also differentially to some joint not only hand wrt wrist if args.dif_encoding: transforms.append(WristDifference()) transforms.append(ChestDifference()) # TODO Change Normalization scheme to fixed bone dist if args.normalize: transforms.append(NormalizeFixedFactor(1280)) if args.predict == "right_index": n_input = 12 + 17 n_output = 4 transforms.append(BuildIndexItem()) elif args.predict == "right_3fingers": n_input = 12 + 9 n_output = 12 transforms.append(Build3fingerItem()) elif args.predict == "right_hand": # n_input = 12 n_input = 8 n_output = 21 transforms.append(BuildRightHandItem()) else: raise ValueError() transforms = torchvision.transforms.Compose(transforms) if "Text" in args.model: dataset = TextPoseH5Dataset(args.valid_h5data, args.valid_textdata, args.max_frames, transforms, selection=args.frames_selection, use_rand_tokens=args.rand_tokens) else: dataset = FastPoseDataset(args.data, args.max_frames, transforms) loader = DataLoader(dataset, batch_size=128, collate_fn=collate_function_h5) if args.model == "Conv": model = ConvModel(args.conv_channels, "ReLU", pos_emb=args.conv_pos_emb) elif args.model == "ConvTransformerEncoder": model = ConvTransformerEncoder(args, 21 * 2) elif args.model == "TransformerEnc": model = TransformerEnc(ninp=12*2, nhead=4, nhid=128, nout=21*2, nlayers=4, dropout=args.transformer_dropout) elif args.model == "TextPoseTransformer": model = TextPoseTransformer(n_tokens=1000, n_joints=n_input, joints_dim=2, nhead=4, nhid=128, nout=n_output*2, n_enc_layers=4, n_dec_layers=4, dropout=args.transformer_dropout) else: raise ValueError() model.load_state_dict(torch.load(args.model_checkpoint)) infer_utterance_h5(model, loader, args)
def create_model_and_optimizer(args, dataset, params=None): """creates model from the dataset and load/initializes its parameters if necessary.""" vocab_sizes = [ len(dataset.vocab[k]["w2i"]) for k in ["word", "ent", "num"] ] embed_sizes = [ args.word_embed_size, args.entdist_embed_size, args.numdist_embed_size, ] pads = [dataset.vocab[k]["w2i"]["PAD"] for k in ["word", "ent", "num"]] if args.model == "LSTM": model = BLSTMModel( vocab_sizes, embed_sizes, sum(embed_sizes), args.blstm_fc_hidden_dim, len(dataset.vocab["label"]["w2i"]), pads, args.dropout, ) elif args.model == "CNN": model = ConvModel( vocab_sizes, embed_sizes, sum(embed_sizes), args.conv_fc_hidden_dim, len(dataset.vocab["label"]["w2i"]), pads, args.num_filters, args.dropout, ) if params is not None: model.load_state_dict(params) else: # initialize all the model weights for p in model.parameters(): torch.nn.init.uniform_(p, -args.uniform_init, args.uniform_init) # Make sure that pad vectors are zero model.embed.pad_init() if args.cuda: model = model.to("cuda") optimizer = torch.optim.SGD(model.parameters(), lr=args.initial_lr) return model, optimizer
train_dataset = FastPoseDataset(args.train_data, args.max_frames, transform) valid_dataset = FastPoseDataset(args.valid_data, args.max_frames, transform) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, collate_fn=collate_function) valid_dataloader = DataLoader(valid_dataset, batch_size=args.batch_size, collate_fn=collate_function) if args.model == "Conv": model = ConvModel(args.conv_channels, "ReLU", pos_emb=args.conv_pos_emb) elif args.model == "TransformerEncoder": model = TransformerEncoder(args, 100) elif args.model == "ConvTransformerEncoder": model = ConvTransformerEncoder(args, 21 * 2) elif args.model == "TransformerEnc": model = TransformerEnc(ninp=12 * 2, nhead=4, nhid=100, nout=21 * 2, nlayers=4, dropout=0.0) else: raise ValueError() print(args.resume)
def create_model(self): return ConvModel(self.train_set.in_channels(), self.train_set.out_channels(), dropout=self.helper.opt.cnn.dropout)
import sys import pandas as pd import torch from torch.utils.data import DataLoader from datasets import KPIDataset from models import ConvModel from util import print_progress_bar dataset = KPIDataset('../data/test_preprocessed.csv', seq_length=1001, step_width=1, evaluate=True) model = ConvModel(1001) model.load_state_dict(torch.load('./state')) model = model.cuda() loader = DataLoader(dataset, 256, False) iter_per_epoch = len(loader) result = [] with torch.no_grad(): for i, x in enumerate(loader): x = x.cuda() out = model(x).data.cpu().numpy() result.extend(list(out.argmax(1))) print_progress_bar(i, iter_per_epoch) df = pd.read_csv('../data/test_preprocessed.csv')
def __init__(self, gpu, mode, args): self.rank = gpu self.mode = mode self.args = args self.lr = getattr(self.args, 'lr', 1e-5) self.l2 = getattr(self.args, 'l2', 0.0001) self.model_type = getattr(self.args, 'model_type', 'ConvRNN') if self.args.on_cpu: self.device = torch.device('cpu') else: self.device = torch.device('cuda:{}'.format(self.rank)) dist.init_process_group('nccl', world_size=len(self.args.gpus), rank=self.rank) torch.manual_seed(self.args.seed) if self.rank == 0: print('Creating model...') if self.model_type == 'CNN': self.model = ConvModel(self.args, self.device).to(self.device) else: self.model = ConvRNNModel(self.args, self.device).to(self.device) if not self.args.on_cpu: self.model = torch.nn.parallel.DistributedDataParallel(self.model, device_ids=[self.rank], find_unused_parameters=False) ckpt_file = None if self.mode != 'train': # ckpt_file = self.args.out_dir if self.args.train: ckpt_epoch_offset = 1 ckpt_file = os.path.join(self.args.model_save_dir, self.args.ckpt_file_tmplt.format(self.args.epochs - ckpt_epoch_offset)) while not os.path.exists(ckpt_file) and self.args.epochs - ckpt_epoch_offset >= 0: ckpt_epoch_offset += 1 ckpt_file = os.path.join(self.args.model_save_dir, self.args.ckpt_file_tmplt.format(self.args.epochs - ckpt_epoch_offset)) else: ckpt_file = self.args.ckpt_file if ckpt_file is not None: if self.rank == 0: print('Loading model from checkpoint...') map_location = {'cuda:{}'.format(0): 'cuda:{}'.format(gpu_id) for gpu_id in args.gpus} state_dict = torch.load(ckpt_file, map_location=map_location) self.model.load_state_dict(state_dict) # , strict=False if self.rank == 0: print('Loading dataset...') dataset_start_time = time.time() if self.model_type == 'CNN': frame_undersample_p = getattr(self.args, 'frame_undersample_p', 1.0) frame_undersample_p = None if frame_undersample_p == 1.0 else frame_undersample_p self.dataset = PhysicsFramesDataset(self.mode, self.args.data, self.args, undersample=frame_undersample_p) else: self.dataset = PhysicsDataset(self.mode, self.args.data, self.args) if self.rank == 0: print('Time to read dataset: {0:.2f}s'.format(time.time() - dataset_start_time)) if getattr(self.args, 'weight_xent_loss', False): self.label_weights = self.dataset.label_weights else: self.label_weights = [1.0, 1.0] if self.rank == 0: print('Setting label weights to {}...'.format(self.label_weights)) self.model.module.set_label_weights(self.label_weights) if self.args.on_cpu: data_sampler = None else: data_sampler = torch.utils.data.distributed.DistributedSampler(self.dataset, num_replicas=args.world_size, rank=self.rank, shuffle=True if self.mode == 'train' else False) self.data_loader = DataLoader(self.dataset, batch_size=self.args.batch_size, shuffle=False, num_workers=self.args.n_data_workers, pin_memory=True, sampler=data_sampler) self.n_iters = int(math.ceil(len(self.dataset) / (self.args.batch_size * len(self.args.gpus)))) self.aux_dataset = None self.aux_data_loader = None self.aux_n_iters = None if self.mode == 'train' and (self.args.eval or self.args.eval_every > 0): if self.rank == 0: print('Loading aux dataset...') if self.model_type == 'CNN': frame_undersample_p = getattr(self.args, 'frame_undersample_p', 1.0) frame_undersample_p = None if frame_undersample_p == 1.0 else frame_undersample_p self.aux_dataset = PhysicsFramesDataset('dev', self.args.data, self.args, force_unbalanced=True, undersample=frame_undersample_p) else: self.aux_dataset = PhysicsDataset('dev', self.args.data, self.args, force_unbalanced=True, disallow_supersample=True) if self.args.on_cpu: aux_data_sampler = None else: aux_data_sampler = torch.utils.data.distributed.DistributedSampler(self.aux_dataset, num_replicas=args.world_size, rank=self.rank) self.aux_data_loader = DataLoader(self.aux_dataset, batch_size=self.args.batch_size, shuffle=False, num_workers=self.args.n_data_workers, pin_memory=True, sampler=aux_data_sampler) self.aux_n_iters = int(math.ceil(len(self.aux_dataset) / (self.args.batch_size * len(self.args.gpus)))) self.summary_writer = None if self.rank == 0 and self.mode == 'train': self.summary_writer = SummaryWriter(log_dir=self.args.tb_dir) self.n_epochs = 1 if self.mode == 'train': self.n_epochs = self.args.epochs opt_parms = filter(lambda p: p.requires_grad, self.model.parameters()) self.optimizer = optim.Adam(opt_parms, lr=self.lr, weight_decay=self.l2) n_total_iters = self.n_iters * self.n_epochs n_warmup_steps = self.args.warmup_proportion * n_total_iters if n_warmup_steps > 0: # get_linear_schedule_with_warmup self.scheduler = get_cosine_schedule_with_warmup(self.optimizer, num_warmup_steps=n_warmup_steps, num_training_steps=n_total_iters) # self.scheduler = get_linear_schedule_with_warmup(self.optimizer, # num_warmup_steps=n_warmup_steps, # num_training_steps=n_total_iters) else: self.scheduler = None self.run()
def main(name, test=False, chkpt=None, device="cuda"): if not test: wandb.init(project="dqn-tutorial", name=name) do_boltzman_exploration = False memory_size = 1000000 min_rb_size = 50000 sample_size = 32 lr = 0.0001 # eps_max = 1.0 eps_min = 0.1 eps_decay = 0.999999 env_steps_before_train = 16 tgt_model_update = 5000 epochs_before_test = 1500 env = gym.make("Breakout-v0") env = FrameStackingAndResizingEnv(env, 84, 84, 4) test_env = gym.make("Breakout-v0") test_env = FrameStackingAndResizingEnv(test_env, 84, 84, 4) last_observation = env.reset() m = ConvModel(env.observation_space.shape, env.action_space.n, lr=lr).to(device) if chkpt is not None: m.load_state_dict(torch.load(chkpt)) tgt = ConvModel(env.observation_space.shape, env.action_space.n).to(device) update_tgt_model(m, tgt) rb = ReplayBuffer() steps_since_train = 0 epochs_since_tgt = 0 epochs_since_test = 0 step_num = -1 * min_rb_size episode_rewards = [] rolling_reward = 0 tq = tqdm() try: while True: if test: env.render() time.sleep(0.05) tq.update(1) eps = eps_decay ** (step_num) if test: eps = 0 if do_boltzman_exploration: logits = m(torch.Tensor(last_observation).unsqueeze(0).to(device))[0] action = torch.distributions.Categorical(logits=logits).sample().item() else: if random() < eps: action = ( env.action_space.sample() ) # your agent here (this takes random actions) else: action = m(torch.Tensor(last_observation).unsqueeze(0).to(device)).max(-1)[-1].item() observation, reward, done, info = env.step(action) rolling_reward += reward rb.insert(Sarsd(last_observation, action, reward, observation, done)) last_observation = observation if done: episode_rewards.append(rolling_reward) if test: print(rolling_reward) rolling_reward = 0 observation = env.reset() steps_since_train += 1 step_num += 1 if ( (not test) and rb.idx > min_rb_size and steps_since_train > env_steps_before_train ): loss = train_step( m, rb.sample(sample_size), tgt, env.action_space.n, device ) wandb.log( { "loss": loss.detach().cpu().item(), "eps": eps, "avg_reward": np.mean(episode_rewards), }, step=step_num, ) episode_rewards = [] epochs_since_tgt += 1 epochs_since_test += 1 if epochs_since_test > epochs_before_test: rew, frames = run_test_episode(m, test_env, device) # T, H, W, C wandb.log({'test_reward': rew, 'test_video': wandb.Video(frames.transpose(0, 3, 1, 2), str(rew), fps=25, format='mp4')}) epochs_since_test = 0 if epochs_since_tgt > tgt_model_update: print("updating target model") update_tgt_model(m, tgt) epochs_since_tgt = 0 torch.save(tgt.state_dict(), f"models/{step_num}.pth") steps_since_train = 0 except KeyboardInterrupt: pass env.close()
def main(name=None, chkpt=None, test_run=False, local_run=False): "Sync to wandb cloud as standard, but sync locally if local_run and not at all if test_run" if not test_run: if local_run: os.environ["WANDB_MODE"] = "dryrun" if name == None: name = input("Name the run: ") wandb.init(project="atari-breakout", name=name, config={ 'memory_size': memory_size, 'min_rb_size': min_rb_size, 'sample_size': sample_size, 'lr': lr, 'eps_min': eps_min, 'eps_decay': eps_decay, 'discount_factor': discount_factor, 'env_steps_before_train': env_steps_before_train, 'epochs_before_tgt_model_update': epochs_before_tgt_model_update, 'epochs_before_test': epochs_before_test, 'episode_max_steps': episode_max_steps, 'optimizer_function': optimizer_function.__name__, 'exploration_method': exploration_method.__name__, 'env_type': env_type.__name__ }) "Create enviroments and reset" env = env_type(gym.make("BreakoutDeterministic-v4"), 84, 84, 4) test_env = env_type(gym.make("BreakoutDeterministic-v4"), 84, 84, 4) last_observation = env.reset() "Set the model and targetmodel" m = ConvModel(env.observation_space.shape, env.action_space.n, lr=lr) if chkpt is not None: m.load_state_dict(torch.load(os.path.join(os.path.dirname(__file__), f"Models/{chkpt}"))) target = ConvModel(env.observation_space.shape, env.action_space.n) update_target_model(m, target) "Create replaybuffer and other variables" rb = ReplayBuffer(memory_size) steps_since_train = 0 epochs_since_tgt = 0 epochs_since_test = 0 step_num = -1 * min_rb_size # Want to run the iteration for min_rb_size before starting to actually learn episode_rewards = [] total_reward = 0 tq = tqdm() try: while True: if test_run: env.render() time.sleep(0.05) tq.update(1) "Updating epsilon" eps = eps_decay ** (step_num) if test_run: eps = 0 elif eps < eps_min: eps = eps_min "Exploration vs exploitation, Boltzmann with eps_decay vs. Epsilon Greedy (defined in constants.py)" action = exploration_method(model=m, env=env, last_observation=last_observation, eps=eps) "Perform step and insert observation to replaybuffer" observation, reward, done, _ = env.step(action) total_reward += reward rb.insert(GameInformation(last_observation, action, reward, observation, done)) last_observation = observation "Reset and append total_reward to episode_rewards if done" if done: episode_rewards.append(total_reward) if test_run: print(total_reward) total_reward = 0 observation = env.reset() "Train if ran enough steps since last training" steps_since_train += 1 step_num += 1 if ((not test_run) and rb.i > min_rb_size and steps_since_train > env_steps_before_train): loss = train_step(m, rb.sample(sample_size), target, env.action_space.n) if not local_run: wandb.log( { "loss": loss.detach().item(), "eps": eps, "avg_reward": np.mean(episode_rewards), }, step=step_num, ) episode_rewards = [] epochs_since_tgt += 1 epochs_since_test += 1 "Run test_run episode" if epochs_since_test > epochs_before_test: rew, frames = run_test_episode(m, test_env) if not local_run: wandb.log({'test_reward': rew, 'test_video': wandb.Video(frames.transpose(0, 3, 1, 2), str(rew), fps=25, format='mp4')}) epochs_since_test = 0 "Update target model" if epochs_since_tgt > epochs_before_tgt_model_update: print("updating target model") update_target_model(m, target) epochs_since_tgt = 0 torch.save(target.state_dict(), os.path.join(os.path.dirname(__file__), f"Models/{step_num}.pth")) steps_since_train = 0 except KeyboardInterrupt: pass env.close()
import sys from torch.nn import CrossEntropyLoss from util import Trainer from datasets import KPIDataset from models import ConvModel dataset = KPIDataset( '../data/train_preprocessed.csv', seq_length=1001, step_width=1 ) model = ConvModel(1001) args = { "lr": 0.5e-4, "betas": (0.9, 0.999), "eps": 1e-8, "weight_decay": 0.0 } trainer = Trainer( model, dataset, batch_size=512, epochs=100, log_nth=800, validation_size=0.2, optim_args=args,
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True) # Map for labels, so that we can visualize the results better label_map = { 0: 'Apu', 1: 'Bart', 2: 'Mr. Burns', 3: 'Chief Wiggum', 4: 'Edna', 5: 'Grandpa', 6: 'Homer', 7: 'Krusty', 8: 'Lisa', 9: 'Marge', 10: 'Milhouse', 11: 'Moe', 12: 'Flanders', 13: 'Nelson', 14: 'Patty', 15: 'Skinner', 16: 'Selma', 17: 'Smithers' } # Load test dataset test_dataset = torchvision.datasets.ImageFolder('./data/test', transform=transform) validation_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, drop_last=True) from models import ConvModel model = ConvModel(channels=3, num_classes=18).to(device) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate) print(model) trainer = create_supervised_trainer(model, optimizer, criterion, device=device) metrics = { "accuracy": Accuracy(), "loss": Loss(criterion) } evaluator = create_supervised_evaluator(model,metrics, device=device) @trainer.on(Events.ITERATION_COMPLETED(every=10)) def log_training_loss(engine):
def dqnmain(project_name, do_boltzman_exploration=False, test=False, chkpt=None, hypeparams=hyperparameter_defaults, steps=1000, device='cuda'): image_arr = [] if (not test): wdbrun = wandb.init(project=project_name, config=hypeparams, name=hypeparams['run_name'], reinit=True, monitor_gym=False) # run.save("*.pth") config = wdbrun.config max_reward = config.max_reward max_steps = config.max_steps memory_size = config.memory_size min_rb_size = config.min_rb_size sample_size = config.sample_size env_steps_before_train = config.env_steps_before_train tgt_model_update = config.tgt_model_update reward_scaler = config.reward_scaler eps_min = config.eps_min eps_decay = config.eps_decay gamma = config.gamma learning_rate = config.learning_rate else: max_reward = hypeparams['max_reward'] max_steps = steps memory_size = hypeparams['memory_size'] min_rb_size = hypeparams['min_rb_size'] sample_size = hypeparams['sample_size'] env_steps_before_train = hypeparams['env_steps_before_train'] tgt_model_update = hypeparams['tgt_model_update'] reward_scaler = hypeparams['reward_scaler'] eps_min = hypeparams['eps_min'] eps_decay = hypeparams['eps_decay'] gamma = hypeparams['gamma'] learning_rate = hypeparams['learning_rate'] env = gym.make(hypeparams['env_name']) if hypeparams['env_name'] == 'Breakout-v0': #TODO env = FrameStackingAndResizingEnv(env, 84, 84, 4) # change stack size here env._max_episode_steps = 4000 test_env = gym.make(hypeparams['env_name']) if hypeparams['env_name'] == 'Breakout-v0': #TODO test_env = FrameStackingAndResizingEnv(test_env, 84, 84, 4) # change stack size here test_env._max_episode_steps = 4000 last_observation = env.reset() if hypeparams['env_name'] == 'Breakout-v0': m = ConvModel(env.observation_space.shape, env.action_space.n, learning_rate).to(device) else: m = Model(env.observation_space.shape, env.action_space.n, learning_rate).to(device) if chkpt is not None: m.load_state_dict(torch.load(chkpt)) if hypeparams['env_name'] == 'Breakout-v0': tgt = ConvModel(env.observation_space.shape, env.action_space.n).to(device) else: tgt = Model(env.observation_space.shape, env.action_space.n).to( device) # target model, gets update fewer times update_tgt_model(m, tgt) rb = ReplayBuffer(memory_size) steps_since_train = 0 epochs_since_tgt = 0 step_num = -1 * min_rb_size i = 0 episode_rewards = [] rolling_reward = 0 solved = False try: while (not solved) and step_num < max_steps: if test: screen = env.render('rgb_array') image_arr.append(screen) eps = 0 else: eps = eps_decay**(step_num) if do_boltzman_exploration: if hypeparams['env_name'] == 'Breakout-v0': logits = m( torch.Tensor(last_observation).unsqueeze(0).to( device))[0] action = torch.distributions.Categorical( logits=logits).sample().item() else: logits = m(torch.Tensor(last_observation).to(device))[0] action = torch.distributions.Categorical( logits=logits).sample().item() else: if random.random() < eps: action = env.action_space.sample() else: if hypeparams['env_name'] == 'Breakout-v0': action = m( torch.Tensor(last_observation).unsqueeze(0).to( device)).max(-1)[-1].item() else: action = m(torch.Tensor(last_observation).to( device)).max(-1)[-1].item() observation, reward, done, info = env.step(action) rolling_reward += reward reward = reward / reward_scaler rb.insert(SARS(last_observation, action, reward, done, observation)) last_observation = observation if done: episode_rewards.append(rolling_reward) if test: print(rolling_reward) rolling_reward = 0 observation = env.reset() steps_since_train += 1 i += 1 step_num += 1 if ( not test ) and rb.idx > min_rb_size and steps_since_train > env_steps_before_train: loss = train_step(m, rb.sample(sample_size), tgt, env.action_space.n, gamma, device) ave_reward = np.mean(episode_rewards) wdbrun.log( { 'loss': loss.detach().cpu().item(), 'epsilon': eps, 'avg_reward': ave_reward }, step=step_num) if ave_reward >= max_reward: solved = True episode_rewards = [] epochs_since_tgt += 1 # print(step_num, loss.detach().item()) if epochs_since_tgt > tgt_model_update: # print('updating target model') update_tgt_model(m, tgt) rew, frames = run_test_episode(m, test_env, device) # frames.shape == (T, H, W, C) # wandb.log({'test_reward': rew, 'test_video': wandb.Video(frames.transpose(0, 3, 1, 2), str(rew), fps=25, format='mp4')}) wandb.log({'test_reward': rew}) epochs_since_tgt = 0 torch.save( tgt.state_dict(), f"{wandb.run.dir}/{hypeparams['run_name']}_{step_num}.pth" ) steps_since_train = 0 if ave_reward >= max_reward: solved = True wandb.join() env.close() except KeyboardInterrupt: sys.exit()