def main(args): # Read configs with open(args.cfg_path, "r") as fp: configs = json.load(fp) # Update the configs based on command line args arg_dict = vars(args) for key in arg_dict: if key in configs: if arg_dict[key] is not None: configs[key] = arg_dict[key] configs = utils.ConfigMapper(configs) configs.attack_eps = float(configs.attack_eps) configs.attack_lr = float(configs.attack_lr) print("configs mode: ", configs.mode) print("configs lr: ", configs.lr) print("configs size: ", configs.size) configs.save_path = os.path.join(configs.save_path, configs.mode) experiment_name = exp_name(configs) configs.save_path = os.path.join(configs.save_path, experiment_name) pathlib.Path(configs.save_path).mkdir(parents=True, exist_ok=True) trainer = Trainer(configs) trainer.train() print("training is over!!!")
def main(): parser = argparse.ArgumentParser() parser.add_argument('--learning-rate', '-lr', type=float, default=1e-3) parser.add_argument('--epochs', type=int, default=20) parser.add_argument('--no-cuda', action='store_true') parser.add_argument('--data-parallel', action='store_true') parser.add_argument('--num-d-iterations', type=int, default=1) args = parser.parse_args() args.cuda = torch.cuda.is_available() and not args.no_cuda print(args) device = torch.device('cuda' if args.cuda else 'cpu') net_g = Generator(ch=128).to(device) net_d = Discriminator(ch=128).to(device) optim_g = optim.Adam( net_g.parameters(), lr=args.learning_rate, betas=(0.5, 0.999)) optim_d = optim.Adam( net_d.parameters(), lr=args.learning_rate, betas=(0.5, 0.999)) dataloader = get_cat_dataloader() trainer = Trainer(net_g, net_d, optim_g, optim_d, dataloader, device, args.num_d_iterations) os.makedirs('samples', exist_ok=True) trainer.train(args.epochs)
def main(): create_logging() sess = tf.Session() dl = IMDBDataLoader(sess) model = Model(dl) trainer = Trainer(sess, model, dl) trainer.train()
def main(args): # Read configs with open(args.cfg_path, "r") as fp: configs = json.load(fp) # Update the configs based on command line args arg_dict = vars(args) for key in arg_dict: if key in configs: if arg_dict[key] is not None: configs[key] = arg_dict[key] configs = utils.ConfigMapper(configs) configs.attack_eps = float(configs.attack_eps) / 255 configs.attack_lr = float(configs.attack_lr) / 255 configs.save_path = os.path.join(configs.save_path, configs.mode, configs.alg) pathlib.Path(configs.save_path).mkdir(parents=True, exist_ok=True) if configs.mode == 'train': trainer = Trainer(configs) trainer.train() elif configs.mode == 'eval': evaluator = Evaluator(configs) evaluator.eval() elif configs.mode == 'vis': visualizer = Visualizer(configs) visualizer.visualize() else: raise ValueError('mode should be train, eval or vis')
def main(args): # Read configs with open(args.cfg_path, "r") as fp: configs = json.load(fp) # Update the configs based on command line args arg_dict = vars(args) for key in arg_dict: if key in configs: if arg_dict[key] is not None: configs[key] = arg_dict[key] configs = utils.ConfigMapper(configs) configs.attack_eps = float(configs.attack_eps) / 255 configs.attack_lr = float(configs.attack_lr) / 255 print("configs mode: ", configs.mode) print("configs lr: ", configs.lr) configs.save_path = os.path.join(configs.save_path, configs.mode) experiment_name = exp_name(configs) configs.save_path = os.path.join(configs.save_path, experiment_name) pathlib.Path(configs.save_path).mkdir(parents=True, exist_ok=True) # settings experiment_name = "resnet18_Adversarial_Training_margin" + '_lr_' + str( configs.lr) + '_alpha_' + str(configs.alpha) + '_seed_' + str( configs.seed) + '_epsilon_' + str(configs.attack_eps) trainer = Trainer(configs) trainer.train() print("training is over!!!")
def train2(): global training_data2, n2, t2 layers = [] layers.append({ 'type': 'input', 'out_sx': 28, 'out_sy': 28, 'out_depth': 1 }) layers.append({'type': 'fc', 'num_neurons': 100, 'activation': 'sigmoid'}) layers.append({'type': 'softmax', 'num_classes': 10}) print 'Layers made...' n2 = Net(layers) print 'Net made...' print n2 t2 = Trainer(n2, { 'method': 'adadelta', 'batch_size': 20, 'l2_decay': 0.001 }) print 'Trainer made...' print 'In training of smaller net...' print 'k', 'time\t\t ', 'loss\t ', 'training accuracy' print '----------------------------------------------------' try: for x, y in training_data2: stats = t2.train(x, y) print stats['k'], stats['time'], stats['loss'], stats['accuracy'] except: #hit control-c or other return
def train2(): global training_data2, n2, t2 layers = [] layers.append({'type': 'input', 'out_sx': 28, 'out_sy': 28, 'out_depth': 1}) layers.append({'type': 'fc', 'num_neurons': 100, 'activation': 'sigmoid'}) layers.append({'type': 'softmax', 'num_classes': 10}) print 'Layers made...' n2 = Net(layers) print 'Net made...' print n2 t2 = Trainer(n2, {'method': 'adadelta', 'batch_size': 20, 'l2_decay': 0.001}); print 'Trainer made...' print 'In training of smaller net...' print 'k', 'time\t\t ', 'loss\t ', 'training accuracy' print '----------------------------------------------------' try: for x, y in training_data2: stats = t2.train(x, y) print stats['k'], stats['time'], stats['loss'], stats['accuracy'] except: #hit control-c or other return
def run_small_net(): global training_data2, n2, t2, testing_data layers = [] layers.append({'type': 'input', 'out_sx': 24, 'out_sy': 24, 'out_depth': 1}) #layers.append({'type': 'fc', 'num_neurons': 50, 'activation': 'relu'}) layers.append({'type': 'softmax', 'num_classes': 10}) print 'Layers made...' n2 = Net(layers) print 'Smaller Net made...' print n2 t2 = Trainer(n2, {'method': 'sgd', 'momentum': 0.0}) print 'Trainer made for smaller net...' print 'In training of smaller net...' print 'k', 'time\t\t ', 'loss\t ', 'training accuracy' print '----------------------------------------------------' try: for x, y in training_data2: stats = t2.train(x, y) print stats['k'], stats['time'], stats['loss'], stats['accuracy'] except: #hit control-c or other pass print 'Testing smaller net: 5000 trials' right = 0 count = 5000 for x, y in sample(testing_data, count): n2.forward(x) right += n2.getPrediction() == y accuracy = float(right) / count * 100 print accuracy
def main(): # capture the config path from the run arguments # then process the json configration file try: args = get_args() config = process_config(args.config) except: print("missing or invalid arguments") exit(0) # create tensorflow session tf.reset_default_graph() sess = tf.Session(config=tf_config) # create instance of the model you want model = DLPDE_Model(config) model.load(sess) # create your data generator train_data, test_data, input_train_extra = creat_dataset(config) # create tensorboard logger logger = Logger(sess, config) # create trainer and path all previous components to it trainer = Trainer(sess, model, train_data, test_data, config, logger) # here you train your model trainer.train()
def main(config): logger = config.get_logger('train') data_loader = config.init_obj('data_loader', module_dataloader) torch.hub.set_dir(config['weights_path']) model = config.init_obj('arch', module_model) logger.info(model) # FIXME: refactor needed if config['data_loader']['args']['self_supervised']: criterion = torch.nn.CrossEntropyLoss() else: criterion = torch.nn.CrossEntropyLoss( weight=data_loader.get_label_proportions().to('cuda')) metrics = [getattr(module_metric, met) for met in config['metrics']] trainable_params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = config.init_obj('optimizer', torch.optim, trainable_params) lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler, optimizer) trainer = Trainer(model.get_model(), criterion, metrics, optimizer, config=config, train_data_loader=data_loader.train, valid_data_loader=data_loader.val, test_data_loader=data_loader.test, lr_scheduler=lr_scheduler) trainer.train()
def run_big_net(): global training_data, testing_data, n, t, training_data2 training_data = load_data() testing_data = load_data(False) training_data2 = [] print 'Data loaded...' layers = [] layers.append({ 'type': 'input', 'out_sx': 24, 'out_sy': 24, 'out_depth': 1 }) layers.append({ 'type': 'fc', 'num_neurons': 100, 'activation': 'relu', 'drop_prob': 0.5 }) #layers.append({'type': 'fc', 'num_neurons': 800, 'activation': 'relu', 'drop_prob': 0.5}) layers.append({'type': 'softmax', 'num_classes': 10}) print 'Layers made...' n = Net(layers) print 'Net made...' print n t = Trainer(n, {'method': 'sgd', 'momentum': 0.0}) print 'Trainer made...' print 'In training...' print 'k', 'time\t\t ', 'loss\t ', 'training accuracy' print '----------------------------------------------------' try: for x, y in training_data: stats = t.train(x, y) print stats['k'], stats['time'], stats['loss'], stats['accuracy'] training_data2.append((x, n.getPrediction())) except: #hit control-c or other pass print 'In testing: 5000 trials' right = 0 count = 5000 for x, y in sample(testing_data, count): n.forward(x) right += n.getPrediction() == y accuracy = float(right) / count * 100 print accuracy
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config', type=str, default='configs/config.json') parser.add_argument('--no-cuda', action='store_true') parser.add_argument('--parallel', action='store_true') args = parser.parse_args() args.cuda = torch.cuda.is_available() and not args.no_cuda print(args) device = torch.device('cuda' if args.cuda else 'cpu') config = load_json(args.config) model = MNISTNet() if args.parallel: model = nn.DataParallel(model) model.to(device) optimizer = optim.Adam(model.parameters(), **config['adam']) scheduler = optim.lr_scheduler.StepLR(optimizer, **config['steplr']) train_loader, valid_loader = mnist_loader(**config['dataset']) trainer = Trainer(model, optimizer, train_loader, valid_loader, device) output_dir = os.path.join(config['output_dir'], datetime.now().strftime('%Y%m%d_%H%M%S')) os.makedirs(output_dir, exist_ok=True) # save config to output dir save_json(config, os.path.join(output_dir, 'config.json')) for epoch in range(config['epochs']): scheduler.step() train_loss, train_acc = trainer.train() valid_loss, valid_acc = trainer.validate() print( 'epoch: {}/{},'.format(epoch + 1, config['epochs']), 'train loss: {:.4f}, train acc: {:.2f}%,'.format( train_loss, train_acc * 100), 'valid loss: {:.4f}, valid acc: {:.2f}%'.format( valid_loss, valid_acc * 100)) torch.save( model.state_dict(), os.path.join(output_dir, 'model_{:04d}.pt'.format(epoch + 1)))
def start(): global training_data, testing_data, n, t training_data = load_data() testing_data = load_data(False) print 'Data loaded...' layers = [] layers.append({ 'type': 'input', 'out_sx': 28, 'out_sy': 28, 'out_depth': 1 }) layers.append({ 'type': 'capsule', 'num_neurons': 30, 'num_recog': 3, 'num_gen': 4, 'num_pose': 2, 'dx': 1, 'dy': 0 }) layers.append({'type': 'regression', 'num_neurons': 28 * 28}) print 'Layers made...' n = Net(layers) print 'Net made...' print n t = Trainer(n, {'method': 'sgd', 'batch_size': 20, 'l2_decay': 0.001}) print 'Trainer made...'
def start(): global training_data, network, t, N training_data = load_data() print 'Data loaded...' layers = [] layers.append({'type': 'input', 'out_sx': 1, 'out_sy': 1, 'out_depth': N}) layers.append({'type': 'fc', 'num_neurons': 50, 'activation': 'sigmoid'}) layers.append({'type': 'fc', 'num_neurons': 10, 'activation': 'sigmoid'}) layers.append({'type': 'fc', 'num_neurons': 50, 'activation': 'sigmoid'}) layers.append({'type': 'regression', 'num_neurons': N}) print 'Layers made...' network = Net(layers) print 'Net made...' print network t = Trainer(network, { 'method': 'adadelta', 'batch_size': 4, 'l2_decay': 0.0001 })
def start(): global network, t layers = [] layers.append({ 'type': 'input', 'out_sx': 30, 'out_sy': 30, 'out_depth': 1 }) layers.append({'type': 'fc', 'num_neurons': 100, 'activation': 'sigmoid'}) layers.append({'type': 'softmax', 'num_classes': 7}) print 'Layers made...' network = Net(layers) print 'Net made...' print network t = Trainer(network, { 'method': 'adadelta', 'batch_size': 20, 'l2_decay': 0.001 }) print 'Trainer made...' print t
def start(): global training_data, testing_data, n, t training_data = load_data() testing_data = load_data(False) print 'Data loaded...' layers = [] layers.append({ 'type': 'input', 'out_sx': 28, 'out_sy': 28, 'out_depth': 1 }) layers.append({'type': 'fc', 'num_neurons': 100, 'activation': 'sigmoid'}) layers.append({'type': 'regression', 'num_neurons': 28 * 28}) print 'Layers made...' n = Net(layers) print 'Net made...' print n t = Trainer(n, {'method': 'sgd', 'batch_size': 20, 'l2_decay': 0.001}) print 'Trainer made...'
def start(): global training_data, testing_data, network, t, N all_data = load_data() shuffle(all_data) size = int(len(all_data) * 0.1) training_data, testing_data = all_data[size:], all_data[:size] print 'Data loaded, size: {}...'.format(len(all_data)) layers = [] layers.append({'type': 'input', 'out_sx': 1, 'out_sy': 1, 'out_depth': N}) layers.append({'type': 'fc', 'num_neurons': 50, 'activation': 'sigmoid'}) layers.append({'type': 'fc', 'num_neurons': 10, 'activation': 'sigmoid'}) layers.append({'type': 'fc', 'num_neurons': 50, 'activation': 'sigmoid'}) layers.append({'type': 'softmax', 'num_classes': N}) print 'Layers made...' network = Net(layers) print 'Net made...' print network t = Trainer(network, { 'method': 'adadelta', 'batch_size': 10, 'l2_decay': 0.0001 })
def main(): # Get arguments parsed args = get_args() # Setup for logging output_dir = 'output/{}'.format( datetime.now( timezone('Asia/Shanghai')).strftime('%Y-%m-%d_%H-%M-%S-%f')[:-3]) create_dir(output_dir) LogHelper.setup(log_path='{}/training.log'.format(output_dir), level_str='INFO') _logger = logging.getLogger(__name__) # Save the configuration for logging purpose save_yaml_config(args, path='{}/config.yaml'.format(output_dir)) # Reproducibility set_seed(args.seed) # Get dataset dataset = RealDataset(args.batch_size) _logger.info('Finished generating dataset') device = get_device() model = VAE(args.z_dim, args.num_hidden, args.input_dim, device) trainer = Trainer(args.batch_size, args.num_epochs, args.learning_rate) trainer.train_model(model=model, dataset=dataset, output_dir=output_dir, device=device, input_dim=args.input_dim) _logger.info('Finished training model') # Visualizations samples = sample_vae(model, args.z_dim, device) plot_samples(samples) plot_reconstructions(model, dataset, device) _logger.info('All Finished!')
def run_big_net(): global training_data, testing_data, n, t, training_data2 training_data = load_data() testing_data = load_data(False) training_data2 = [] print 'Data loaded...' layers = [] layers.append({'type': 'input', 'out_sx': 24, 'out_sy': 24, 'out_depth': 1}) layers.append({'type': 'fc', 'num_neurons': 100, 'activation': 'relu', 'drop_prob': 0.5}) #layers.append({'type': 'fc', 'num_neurons': 800, 'activation': 'relu', 'drop_prob': 0.5}) layers.append({'type': 'softmax', 'num_classes': 10}) print 'Layers made...' n = Net(layers) print 'Net made...' print n t = Trainer(n, {'method': 'sgd', 'momentum': 0.0}) print 'Trainer made...' print 'In training...' print 'k', 'time\t\t ', 'loss\t ', 'training accuracy' print '----------------------------------------------------' try: for x, y in training_data: stats = t.train(x, y) print stats['k'], stats['time'], stats['loss'], stats['accuracy'] training_data2.append((x, n.getPrediction())) except: #hit control-c or other pass print 'In testing: 5000 trials' right = 0 count = 5000 for x, y in sample(testing_data, count): n.forward(x) right += n.getPrediction() == y accuracy = float(right) / count * 100 print accuracy
def run(config, norm2d): train_loader, valid_loader = cifar10_loader(config.root, config.batch_size) model = CIFAR10Net(norm2d=norm2d) if config.cuda: model.cuda() optimizer = optim.Adam(model.parameters(), lr=config.lr, weight_decay=1e-4) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1) trainer = Trainer(model, optimizer, train_loader, valid_loader, use_cuda=config.cuda) valid_acc_list = [] for epoch in range(config.epochs): start = time() scheduler.step() train_loss, train_acc = trainer.train(epoch) valid_loss, valid_acc = trainer.validate() print( 'epoch: {}/{},'.format(epoch + 1, config.epochs), 'train loss: {:.4f}, train acc: {:.2f}%,'.format( train_loss, train_acc * 100), 'valid loss: {:.4f}, valid acc: {:.2f}%,'.format( valid_loss, valid_acc * 100), 'time: {:.2f}s'.format(time() - start)) save_dir = os.path.join(config.save_dir, norm2d) os.makedirs(save_dir, exist_ok=True) torch.save(model.state_dict(), os.path.join(save_dir, 'model_{:04d}.pt'.format(epoch + 1))) valid_acc_list.append(valid_acc) return valid_acc_list
def run_small_net(): global training_data2, n2, t2, testing_data layers = [] layers.append({ 'type': 'input', 'out_sx': 24, 'out_sy': 24, 'out_depth': 1 }) #layers.append({'type': 'fc', 'num_neurons': 50, 'activation': 'relu'}) layers.append({'type': 'softmax', 'num_classes': 10}) print 'Layers made...' n2 = Net(layers) print 'Smaller Net made...' print n2 t2 = Trainer(n2, {'method': 'sgd', 'momentum': 0.0}) print 'Trainer made for smaller net...' print 'In training of smaller net...' print 'k', 'time\t\t ', 'loss\t ', 'training accuracy' print '----------------------------------------------------' try: for x, y in training_data2: stats = t2.train(x, y) print stats['k'], stats['time'], stats['loss'], stats['accuracy'] except: #hit control-c or other pass print 'Testing smaller net: 5000 trials' right = 0 count = 5000 for x, y in sample(testing_data, count): n2.forward(x) right += n2.getPrediction() == y accuracy = float(right) / count * 100 print accuracy
def start(): global network, sgd layers = [] layers.append({'type': 'input', 'out_sx': 1, 'out_sy': 1, 'out_depth': 4}) layers.append({'type': 'softmax', 'num_classes': 3}) #svm works too print 'Layers made...' network = Net(layers) print 'Net made...' print network sgd = Trainer(network, {'momentum': 0.1, 'l2_decay': 0.001}) print 'Trainer made...' print sgd
def start(): global network, sgd layers = [] layers.append({'type': 'input', 'out_sx': 1, 'out_sy': 1, 'out_depth': 7}) #layers.append({'type': 'fc', 'num_neurons': 30, 'activation': 'relu'}) #layers.append({'type': 'fc', 'num_neurons': 30, 'activation': 'relu'}) layers.append({'type': 'softmax', 'num_classes': 2}) #svm works too print 'Layers made...' network = Net(layers) print 'Net made...' print network sgd = Trainer(network, {'momentum': 0.2, 'l2_decay': 0.001}) print 'Trainer made...' print sgd
def test_trainer(model, optimizer, scheduler, data_loader, criterion): args = DictConfig({'experiment': {'debug': False}}) App.init(args) output_dirpath = Path.cwd() # basic training run of model pre_training_model = copy.deepcopy(model) logger = Logger() trainer = Trainer(model=model, train_dataloader=data_loader, val_dataloader=data_loader, optimizer=optimizer, scheduler=scheduler, epochs=2, logger=logger, debug=True, criterion=criterion, output_path=output_dirpath) trainer.train() check_variable_change(pre_training_model, model) assert trainer.epochs_trained == 2 assert len(trainer.logger.metrics_dict) == 4 assert (0 <= trainer.logger.metrics_dict['accuracy'] <= 1) # check that can load from checkpoint pre_training_model = copy.deepcopy(model) trainer = Trainer(model=model, train_dataloader=data_loader, val_dataloader=data_loader, optimizer=optimizer, scheduler=scheduler, epochs=4, logger=logger, debug=True, criterion=criterion, checkpoint='last.pth', output_path=output_dirpath) trainer.train() os.remove('last.pth') os.remove('best.pth') check_variable_change(pre_training_model, model) assert trainer.epochs_trained == 4
def main(not_parsed_args): if len(not_parsed_args) > 1: print("Unknown args:%s" % not_parsed_args) exit() dpt = os.path.join(DATA_DIR, FLAGS.data_file) testp = os.path.join(DATA_DIR, FLAGS.test_file) DL = MnistLoader(testp, FLAGS, dpt) md = ConvLSTMNetwork(FLAGS) config = tf.ConfigProto() config.gpu_options.allow_growth = False sess = tf.Session(config=config, graph=md.graph) print('Sess created.') trainer = Trainer(DL, md, sess, FLAGS) trainer.load_model() trainer.train() sess.close()
def start(conv): global training_data, testing_data, n, t training_data = load_data() testing_data = load_data(False) print 'Data loaded...' layers = [] layers.append({ 'type': 'input', 'out_sx': 24, 'out_sy': 24, 'out_depth': 1 }) if conv: layers.append({ 'type': 'conv', 'sx': 5, 'filters': 8, 'stride': 1, 'pad': 2, 'activation': 'relu', 'drop_prob': 0.5 }) layers.append({'type': 'pool', 'sx': 2, 'stride': 2, 'drop_prob': 0.5}) else: layers.append({'type': 'fc', 'num_neurons': 100, 'activation': 'relu'}) #layers.append({'type': 'sim', 'num_neurons': 100, 'activation': 'mex'}) layers.append({'type': 'softmax', 'num_classes': 10}) print 'Layers made...' n = Net(layers) print 'Net made...' print n t = Trainer(n, {'method': 'adadelta', 'batch_size': 20, 'l2_decay': 0.001}) print 'Trainer made...'
def start(): global training_data, n, t training_data = load_data() print 'Data loaded...' layers = [] layers.append({'type': 'input', 'out_sx': 1, 'out_sy': 1, 'out_depth': 255}) layers.append({'type': 'fc', 'num_neurons': 100, 'activation': 'sigmoid'}) layers.append({'type': 'softmax', 'num_classes': 255}) print 'Layers made...' n = Net(layers) print 'Net made...' print n t = Trainer(n, {'method': 'adadelta', 'batch_size': 10, 'l2_decay': 0.0001}); print 'Trainer made...'
def __init__(self, num_states, num_actions, opt={}): """ in number of time steps, of temporal memory the ACTUAL input to the net will be (x,a) temporal_window times, and followed by current x so to have no information from previous time step going into value function, set to 0. """ self.temporal_window = getopt(opt, 'temporal_window', 1) """size of experience replay memory""" self.experience_size = getopt(opt, 'experience_size', 30000) """number of examples in experience replay memory before we begin learning""" self.start_learn_threshold = getopt( opt, 'start_learn_threshold', int(min(self.experience_size * 0.1, 1000))) """gamma is a crucial parameter that controls how much plan-ahead the agent does. In [0,1]""" self.gamma = getopt(opt, 'gamma', 0.8) """number of steps we will learn for""" self.learning_steps_total = getopt(opt, 'learning_steps_total', 100000) """how many steps of the above to perform only random actions (in the beginning)?""" self.learning_steps_burnin = getopt(opt, 'learning_steps_burnin', 3000) """what epsilon value do we bottom out on? 0.0 => purely deterministic policy at end""" self.epsilon_min = getopt(opt, 'epsilon_min', 0.05) """what epsilon to use at test time? (i.e. when learning is disabled)""" self.epsilon_test_time = getopt(opt, 'epsilon_test_time', 0.01) """ advanced feature. Sometimes a random action should be biased towards some values for example in flappy bird, we may want to choose to not flap more often """ if 'random_action_distribution' in opt: #this better sum to 1 by the way, and be of length this.num_actions self.random_action_distribution = opt['random_action_distribution'] if len(self.random_action_distribution) != num_actions: print 'TROUBLE. random_action_distribution should be same length as num_actions.' a = self.random_action_distribution s = sum(a) if abs(s - 1.0) > 0.0001: print 'TROUBLE. random_action_distribution should sum to 1!' else: self.random_action_distribution = [] """ states that go into neural net to predict optimal action look as x0,a0,x1,a1,x2,a2,...xt this variable controls the size of that temporal window. Actions are encoded as 1-of-k hot vectors """ self.net_inputs = num_states * self.temporal_window + num_actions * self.temporal_window + num_states self.num_states = num_states self.num_actions = num_actions self.window_size = max( self.temporal_window, 2) #must be at least 2, but if we want more context even more self.state_window = zeros(self.window_size) self.action_window = zeros(self.window_size) self.reward_window = zeros(self.window_size) self.net_window = zeros(self.window_size) #create [state -> value of all possible actions] modeling net for the value function layers = [] if 'layers' in opt: """ this is an advanced usage feature, because size of the input to the network, and number of actions must check out. """ layers = opt['layers'] if len(layers) < 2: print 'TROUBLE! must have at least 2 layers' if layers[0]['type'] != 'input': print 'TROUBLE! first layer must be input layer!' if layers[-1]['type'] != 'regression': print 'TROUBLE! last layer must be input regression!' if layers[0]['out_depth'] * layers[0]['out_sx'] * layers[0][ 'out_sy'] != self.net_inputs: print 'TROUBLE! Number of inputs must be num_states * temporal_window + num_actions * temporal_window + num_states!' if layers[-1]['num_neurons'] != self.num_actions: print 'TROUBLE! Number of regression neurons should be num_actions!' else: #create a very simple neural net by default layers.append({ 'type': 'input', 'out_sx': 1, 'out_sy': 1, 'out_depth': self.net_inputs }) if 'hidden_layer_sizes' in opt: #allow user to specify this via the option, for convenience for size in opt['hidden_layer_sizes']: layers.append({ 'type': 'fc', 'num_neurons': size, 'activation': 'relu' }) layers.append({ 'type': 'regression', 'num_neurons': self.num_actions }) #value function output self.value_net = Net(layers) #and finally we need a Temporal Difference Learning trainer! trainer_ops_default = { 'learning_rate': 0.01, 'momentum': 0.0, 'batch_size': 64, 'l2_decay': 0.01 } tdtrainer_options = getopt(opt, 'tdtrainer_options', trainer_ops_default) self.tdtrainer = Trainer(self.value_net, tdtrainer_options) #experience replay self.experience = [] #various housekeeping variables self.age = 0 #incremented every backward() self.forward_passes = 0 #incremented every forward() self.epsilon = 1.0 #controls exploration exploitation tradeoff. Should be annealed over time self.latest_reward = 0 self.last_input_array = [] self.average_reward_window = Window(1000, 10) self.average_loss_window = Window(1000, 10) self.learning = True
class Brain(object): def __init__(self, num_states, num_actions, opt={}): """ in number of time steps, of temporal memory the ACTUAL input to the net will be (x,a) temporal_window times, and followed by current x so to have no information from previous time step going into value function, set to 0. """ self.temporal_window = getopt(opt, 'temporal_window', 1) """size of experience replay memory""" self.experience_size = getopt(opt, 'experience_size', 30000) """number of examples in experience replay memory before we begin learning""" self.start_learn_threshold = getopt( opt, 'start_learn_threshold', int(min(self.experience_size * 0.1, 1000))) """gamma is a crucial parameter that controls how much plan-ahead the agent does. In [0,1]""" self.gamma = getopt(opt, 'gamma', 0.8) """number of steps we will learn for""" self.learning_steps_total = getopt(opt, 'learning_steps_total', 100000) """how many steps of the above to perform only random actions (in the beginning)?""" self.learning_steps_burnin = getopt(opt, 'learning_steps_burnin', 3000) """what epsilon value do we bottom out on? 0.0 => purely deterministic policy at end""" self.epsilon_min = getopt(opt, 'epsilon_min', 0.05) """what epsilon to use at test time? (i.e. when learning is disabled)""" self.epsilon_test_time = getopt(opt, 'epsilon_test_time', 0.01) """ advanced feature. Sometimes a random action should be biased towards some values for example in flappy bird, we may want to choose to not flap more often """ if 'random_action_distribution' in opt: #this better sum to 1 by the way, and be of length this.num_actions self.random_action_distribution = opt['random_action_distribution'] if len(self.random_action_distribution) != num_actions: print 'TROUBLE. random_action_distribution should be same length as num_actions.' a = self.random_action_distribution s = sum(a) if abs(s - 1.0) > 0.0001: print 'TROUBLE. random_action_distribution should sum to 1!' else: self.random_action_distribution = [] """ states that go into neural net to predict optimal action look as x0,a0,x1,a1,x2,a2,...xt this variable controls the size of that temporal window. Actions are encoded as 1-of-k hot vectors """ self.net_inputs = num_states * self.temporal_window + num_actions * self.temporal_window + num_states self.num_states = num_states self.num_actions = num_actions self.window_size = max( self.temporal_window, 2) #must be at least 2, but if we want more context even more self.state_window = zeros(self.window_size) self.action_window = zeros(self.window_size) self.reward_window = zeros(self.window_size) self.net_window = zeros(self.window_size) #create [state -> value of all possible actions] modeling net for the value function layers = [] if 'layers' in opt: """ this is an advanced usage feature, because size of the input to the network, and number of actions must check out. """ layers = opt['layers'] if len(layers) < 2: print 'TROUBLE! must have at least 2 layers' if layers[0]['type'] != 'input': print 'TROUBLE! first layer must be input layer!' if layers[-1]['type'] != 'regression': print 'TROUBLE! last layer must be input regression!' if layers[0]['out_depth'] * layers[0]['out_sx'] * layers[0][ 'out_sy'] != self.net_inputs: print 'TROUBLE! Number of inputs must be num_states * temporal_window + num_actions * temporal_window + num_states!' if layers[-1]['num_neurons'] != self.num_actions: print 'TROUBLE! Number of regression neurons should be num_actions!' else: #create a very simple neural net by default layers.append({ 'type': 'input', 'out_sx': 1, 'out_sy': 1, 'out_depth': self.net_inputs }) if 'hidden_layer_sizes' in opt: #allow user to specify this via the option, for convenience for size in opt['hidden_layer_sizes']: layers.append({ 'type': 'fc', 'num_neurons': size, 'activation': 'relu' }) layers.append({ 'type': 'regression', 'num_neurons': self.num_actions }) #value function output self.value_net = Net(layers) #and finally we need a Temporal Difference Learning trainer! trainer_ops_default = { 'learning_rate': 0.01, 'momentum': 0.0, 'batch_size': 64, 'l2_decay': 0.01 } tdtrainer_options = getopt(opt, 'tdtrainer_options', trainer_ops_default) self.tdtrainer = Trainer(self.value_net, tdtrainer_options) #experience replay self.experience = [] #various housekeeping variables self.age = 0 #incremented every backward() self.forward_passes = 0 #incremented every forward() self.epsilon = 1.0 #controls exploration exploitation tradeoff. Should be annealed over time self.latest_reward = 0 self.last_input_array = [] self.average_reward_window = Window(1000, 10) self.average_loss_window = Window(1000, 10) self.learning = True def random_action(self): """ a bit of a helper function. It returns a random action we are abstracting this away because in future we may want to do more sophisticated things. For example some actions could be more or less likely at "rest"/default state. """ if len(random_action_distribution) == 0: return randi(0, self.num_actions) else: #okay, lets do some fancier sampling p = randf(0, 1.0) cumprob = 0.0 for k in xrange(self.num_actions): cumprob += self.random_action_distribution[k] if p < cumprob: return k def policy(self, s): """ compute the value of doing any action in this state and return the argmax action and its value """ V = Vol(s) action_values = self.value_net.forward(V) weights = action_values.w max_val = max(weights) max_k = weights.index(maxval) return {'action': max_k, 'value': max_val} def getNetInput(self, xt): """ return s = (x,a,x,a,x,a,xt) state vector It's a concatenation of last window_size (x,a) pairs and current state x """ w = [] w.extend(xt) #start with current state #and now go backwards and append states and actions from history temporal_window times n = self.window_size for k in xrange(self.temporal_window): index = n - 1 - k w.extend(self.state_window[index]) #state #action, encoded as 1-of-k indicator vector. We scale it up a bit because #we dont want weight regularization to undervalue this information, as it only exists once action1ofk = zeros(self.num_actions) action1ofk[index] = 1.0 * self.num_states w.extend(action1ofk) return w def forward(self, input_array): self.forward_passes += 1 self.last_input_array = input_array # create network input action = None if self.forward_passes > self.temporal_window: #we have enough to actually do something reasonable net_input = self.getNetInput(input_array) if self.learning: #compute epsilon for the epsilon-greedy policy self.epsilon = min( 1.0, max( self.epsilon_min, 1.0 - \ (self.age - self.learning_steps_burnin) / \ (self.learning_steps_total - self.learning_steps_burnin) ) ) else: self.epsilon = self.epsilon_test_time #use test-time value rf = randf(0, 1) if rf < self.epsilon: #choose a random action with epsilon probability action = self.random_action() else: #otherwise use our policy to make decision maxact = self.policy(net_input) action = maxact['action'] else: #pathological case that happens first few iterations #before we accumulate window_size inputs net_input = [] action = self.random_action() #remember the state and action we took for backward pass self.net_window.pop(0) self.net_window.append(net_input) self.state_window.pop(0) self.state_window.append(input_array) self.action_window.pop(0) self.action_window.append(action) def backward(self, reward): self.latest_reward = reward self.average_reward_window.add(reward) self.reward_window.pop(0) self.reward_window.append(reward) if not self.learning: return self.age += 1 #it is time t+1 and we have to store (s_t, a_t, r_t, s_{t+1}) as new experience #(given that an appropriate number of state measurements already exist, of course) if self.forward_passes > self.temporal_window + 1: n = self.window_size e = Experience(self.net_window[n - 2], self.action_window[n - 2], self.reward_window[n - 2], self.net_window[n - 1]) if len(self.experience) < self.experience_size: self.experience.append(e) else: ri = randi(0, self.experience_size) self.experience[ri] = e #learn based on experience, once we have some samples to go on #this is where the magic happens... if len(self.experience) > self.start_learn_threshold: avcost = 0.0 for k in xrange(self.tdtrainer.batch_size): re = randi(0, len(self.experience)) e = self.experience[re] x = Vol(1, 1, self.net_inputs) x.w = e.state0 maxact = self.policy(e.state1) r = e.reward0 + self.gamma * maxact.value ystruct = {'dim': e.action0, 'val': r} stats = self.tdtrainer.train(x, ystruct) avcost += stats['loss'] avcost /= self.tdtrainer.batch_size self.average_loss_window.add(avcost)
agent = create_third_level_agent(concept_path, args.load_concept_id, args.n_concepts, noisy=noisy, n_heads=n_heads, init_log_alpha=args.init_log_alpha, latent_dim=args.vision_latent_dim, parallel=args.parallel_q_nets, lr=args.lr, lr_alpha=args.lr_alpha, lr_actor=args.lr_actor, min_entropy_factor=args.entropy_factor, lr_c=args.lr_c, lr_Alpha=args.lr_c_Alpha, entropy_update_rate=args.entropy_update_rate, init_Epsilon=args.init_epsilon_MC, delta_Epsilon=args.delta_epsilon_MC) if args.load_id is not None: if args.load_best: agent.load(MODEL_PATH + env_name + '/best_', args.load_id) else: agent.load(MODEL_PATH + env_name + '/last_', args.load_id) agents = collections.deque(maxlen=args.n_agents) agents.append(agent) os.makedirs(MODEL_PATH + env_name, exist_ok=True) database = ExperienceBuffer(buffer_size, level=2) trainer = Trainer(optimizer_kwargs=optimizer_kwargs) returns = trainer.loop(env, agents, database, n_episodes=n_episodes, render=args.render, max_episode_steps=n_steps_in_second_level_episode, store_video=store_video, wandb_project=wandb_project, MODEL_PATH=MODEL_PATH, train=(not args.eval), initialization=initialization, init_buffer_size=init_buffer_size, save_step_each=save_step_each, train_each=args.train_each, n_step_td=n_step_td, train_n_MC=args.train_n_mc, rest_n_MC=args.rest_n_mc, eval_MC=args.eval_MC) G = returns.mean() print("Mean episode return: {:.2f}".format(G))
def __init__(self, num_states, num_actions, opt={}): """ in number of time steps, of temporal memory the ACTUAL input to the net will be (x,a) temporal_window times, and followed by current x so to have no information from previous time step going into value function, set to 0. """ self.temporal_window = getopt(opt, 'temporal_window', 1) """size of experience replay memory""" self.experience_size = getopt(opt, 'experience_size', 30000) """number of examples in experience replay memory before we begin learning""" self.start_learn_threshold = getopt(opt, 'start_learn_threshold', int(min(self.experience_size * 0.1, 1000))) """gamma is a crucial parameter that controls how much plan-ahead the agent does. In [0,1]""" self.gamma = getopt(opt, 'gamma', 0.8) """number of steps we will learn for""" self.learning_steps_total = getopt(opt, 'learning_steps_total', 100000) """how many steps of the above to perform only random actions (in the beginning)?""" self.learning_steps_burnin = getopt(opt, 'learning_steps_burnin', 3000) """what epsilon value do we bottom out on? 0.0 => purely deterministic policy at end""" self.epsilon_min = getopt(opt, 'epsilon_min', 0.05) """what epsilon to use at test time? (i.e. when learning is disabled)""" self.epsilon_test_time = getopt(opt, 'epsilon_test_time', 0.01) """ advanced feature. Sometimes a random action should be biased towards some values for example in flappy bird, we may want to choose to not flap more often """ if 'random_action_distribution' in opt: #this better sum to 1 by the way, and be of length this.num_actions self.random_action_distribution = opt['random_action_distribution'] if len(self.random_action_distribution) != num_actions: print 'TROUBLE. random_action_distribution should be same length as num_actions.' a = self.random_action_distribution s = sum(a) if abs(s - 1.0) > 0.0001: print 'TROUBLE. random_action_distribution should sum to 1!' else: self.random_action_distribution = [] """ states that go into neural net to predict optimal action look as x0,a0,x1,a1,x2,a2,...xt this variable controls the size of that temporal window. Actions are encoded as 1-of-k hot vectors """ self.net_inputs = num_states * self.temporal_window + num_actions * self.temporal_window + num_states self.num_states = num_states self.num_actions = num_actions self.window_size = max(self.temporal_window, 2) #must be at least 2, but if we want more context even more self.state_window = zeros(self.window_size) self.action_window = zeros(self.window_size) self.reward_window = zeros(self.window_size) self.net_window = zeros(self.window_size) #create [state -> value of all possible actions] modeling net for the value function layers = [] if 'layers' in opt: """ this is an advanced usage feature, because size of the input to the network, and number of actions must check out. """ layers = opt['layers'] if len(layers) < 2: print 'TROUBLE! must have at least 2 layers' if layers[0]['type'] != 'input': print 'TROUBLE! first layer must be input layer!' if layers[-1]['type'] != 'regression': print 'TROUBLE! last layer must be input regression!' if layers[0]['out_depth'] * layers[0]['out_sx'] * layers[0]['out_sy'] != self.net_inputs: print 'TROUBLE! Number of inputs must be num_states * temporal_window + num_actions * temporal_window + num_states!' if layers[-1]['num_neurons'] != self.num_actions: print 'TROUBLE! Number of regression neurons should be num_actions!' else: #create a very simple neural net by default layers.append({'type': 'input', 'out_sx': 1, 'out_sy': 1, 'out_depth': self.net_inputs}) if 'hidden_layer_sizes' in opt: #allow user to specify this via the option, for convenience for size in opt['hidden_layer_sizes']: layers.append({'type': 'fc', 'num_neurons': size, 'activation': 'relu'}) layers.append({'type': 'regression', 'num_neurons': self.num_actions}) #value function output self.value_net = Net(layers) #and finally we need a Temporal Difference Learning trainer! trainer_ops_default = {'learning_rate': 0.01, 'momentum': 0.0, 'batch_size': 64, 'l2_decay': 0.01} tdtrainer_options = getopt(opt, 'tdtrainer_options', trainer_ops_default) self.tdtrainer = Trainer(self.value_net, tdtrainer_options) #experience replay self.experience = [] #various housekeeping variables self.age = 0 #incremented every backward() self.forward_passes = 0 #incremented every forward() self.epsilon = 1.0 #controls exploration exploitation tradeoff. Should be annealed over time self.latest_reward = 0 self.last_input_array = [] self.average_reward_window = Window(1000, 10) self.average_loss_window = Window(1000, 10) self.learning = True
class Brain(object): def __init__(self, num_states, num_actions, opt={}): """ in number of time steps, of temporal memory the ACTUAL input to the net will be (x,a) temporal_window times, and followed by current x so to have no information from previous time step going into value function, set to 0. """ self.temporal_window = getopt(opt, 'temporal_window', 1) """size of experience replay memory""" self.experience_size = getopt(opt, 'experience_size', 30000) """number of examples in experience replay memory before we begin learning""" self.start_learn_threshold = getopt(opt, 'start_learn_threshold', int(min(self.experience_size * 0.1, 1000))) """gamma is a crucial parameter that controls how much plan-ahead the agent does. In [0,1]""" self.gamma = getopt(opt, 'gamma', 0.8) """number of steps we will learn for""" self.learning_steps_total = getopt(opt, 'learning_steps_total', 100000) """how many steps of the above to perform only random actions (in the beginning)?""" self.learning_steps_burnin = getopt(opt, 'learning_steps_burnin', 3000) """what epsilon value do we bottom out on? 0.0 => purely deterministic policy at end""" self.epsilon_min = getopt(opt, 'epsilon_min', 0.05) """what epsilon to use at test time? (i.e. when learning is disabled)""" self.epsilon_test_time = getopt(opt, 'epsilon_test_time', 0.01) """ advanced feature. Sometimes a random action should be biased towards some values for example in flappy bird, we may want to choose to not flap more often """ if 'random_action_distribution' in opt: #this better sum to 1 by the way, and be of length this.num_actions self.random_action_distribution = opt['random_action_distribution'] if len(self.random_action_distribution) != num_actions: print 'TROUBLE. random_action_distribution should be same length as num_actions.' a = self.random_action_distribution s = sum(a) if abs(s - 1.0) > 0.0001: print 'TROUBLE. random_action_distribution should sum to 1!' else: self.random_action_distribution = [] """ states that go into neural net to predict optimal action look as x0,a0,x1,a1,x2,a2,...xt this variable controls the size of that temporal window. Actions are encoded as 1-of-k hot vectors """ self.net_inputs = num_states * self.temporal_window + num_actions * self.temporal_window + num_states self.num_states = num_states self.num_actions = num_actions self.window_size = max(self.temporal_window, 2) #must be at least 2, but if we want more context even more self.state_window = zeros(self.window_size) self.action_window = zeros(self.window_size) self.reward_window = zeros(self.window_size) self.net_window = zeros(self.window_size) #create [state -> value of all possible actions] modeling net for the value function layers = [] if 'layers' in opt: """ this is an advanced usage feature, because size of the input to the network, and number of actions must check out. """ layers = opt['layers'] if len(layers) < 2: print 'TROUBLE! must have at least 2 layers' if layers[0]['type'] != 'input': print 'TROUBLE! first layer must be input layer!' if layers[-1]['type'] != 'regression': print 'TROUBLE! last layer must be input regression!' if layers[0]['out_depth'] * layers[0]['out_sx'] * layers[0]['out_sy'] != self.net_inputs: print 'TROUBLE! Number of inputs must be num_states * temporal_window + num_actions * temporal_window + num_states!' if layers[-1]['num_neurons'] != self.num_actions: print 'TROUBLE! Number of regression neurons should be num_actions!' else: #create a very simple neural net by default layers.append({'type': 'input', 'out_sx': 1, 'out_sy': 1, 'out_depth': self.net_inputs}) if 'hidden_layer_sizes' in opt: #allow user to specify this via the option, for convenience for size in opt['hidden_layer_sizes']: layers.append({'type': 'fc', 'num_neurons': size, 'activation': 'relu'}) layers.append({'type': 'regression', 'num_neurons': self.num_actions}) #value function output self.value_net = Net(layers) #and finally we need a Temporal Difference Learning trainer! trainer_ops_default = {'learning_rate': 0.01, 'momentum': 0.0, 'batch_size': 64, 'l2_decay': 0.01} tdtrainer_options = getopt(opt, 'tdtrainer_options', trainer_ops_default) self.tdtrainer = Trainer(self.value_net, tdtrainer_options) #experience replay self.experience = [] #various housekeeping variables self.age = 0 #incremented every backward() self.forward_passes = 0 #incremented every forward() self.epsilon = 1.0 #controls exploration exploitation tradeoff. Should be annealed over time self.latest_reward = 0 self.last_input_array = [] self.average_reward_window = Window(1000, 10) self.average_loss_window = Window(1000, 10) self.learning = True def random_action(self): """ a bit of a helper function. It returns a random action we are abstracting this away because in future we may want to do more sophisticated things. For example some actions could be more or less likely at "rest"/default state. """ if len(random_action_distribution) == 0: return randi(0, self.num_actions) else: #okay, lets do some fancier sampling p = randf(0, 1.0) cumprob = 0.0 for k in xrange(self.num_actions): cumprob += self.random_action_distribution[k] if p < cumprob: return k def policy(self, s): """ compute the value of doing any action in this state and return the argmax action and its value """ V = Vol(s) action_values = self.value_net.forward(V) weights = action_values.w max_val = max(weights) max_k = weights.index(maxval) return { 'action': max_k, 'value': max_val } def getNetInput(self, xt): """ return s = (x,a,x,a,x,a,xt) state vector It's a concatenation of last window_size (x,a) pairs and current state x """ w = [] w.extend(xt) #start with current state #and now go backwards and append states and actions from history temporal_window times n = self.window_size for k in xrange(self.temporal_window): index = n - 1 - k w.extend(self.state_window[index]) #state #action, encoded as 1-of-k indicator vector. We scale it up a bit because #we dont want weight regularization to undervalue this information, as it only exists once action1ofk = zeros(self.num_actions) action1ofk[index] = 1.0 * self.num_states w.extend(action1ofk) return w def forward(self, input_array): self.forward_passes += 1 self.last_input_array = input_array # create network input action = None if self.forward_passes > self.temporal_window: #we have enough to actually do something reasonable net_input = self.getNetInput(input_array) if self.learning: #compute epsilon for the epsilon-greedy policy self.epsilon = min( 1.0, max( self.epsilon_min, 1.0 - \ (self.age - self.learning_steps_burnin) / \ (self.learning_steps_total - self.learning_steps_burnin) ) ) else: self.epsilon = self.epsilon_test_time #use test-time value rf = randf(0, 1) if rf < self.epsilon: #choose a random action with epsilon probability action = self.random_action() else: #otherwise use our policy to make decision maxact = self.policy(net_input) action = maxact['action'] else: #pathological case that happens first few iterations #before we accumulate window_size inputs net_input = [] action = self.random_action() #remember the state and action we took for backward pass self.net_window.pop(0) self.net_window.append(net_input) self.state_window.pop(0) self.state_window.append(input_array) self.action_window.pop(0) self.action_window.append(action) def backward(self, reward): self.latest_reward = reward self.average_reward_window.add(reward) self.reward_window.pop(0) self.reward_window.append(reward) if not self.learning: return self.age += 1 #it is time t+1 and we have to store (s_t, a_t, r_t, s_{t+1}) as new experience #(given that an appropriate number of state measurements already exist, of course) if self.forward_passes > self.temporal_window + 1: n = self.window_size e = Experience( self.net_window[n - 2], self.action_window[n - 2], self.reward_window[n - 2], self.net_window[n - 1] ) if len(self.experience) < self.experience_size: self.experience.append(e) else: ri = randi(0, self.experience_size) self.experience[ri] = e #learn based on experience, once we have some samples to go on #this is where the magic happens... if len(self.experience) > self.start_learn_threshold: avcost = 0.0 for k in xrange(self.tdtrainer.batch_size): re = randi(0, len(self.experience)) e = self.experience[re] x = Vol(1, 1, self.net_inputs) x.w = e.state0 maxact = self.policy(e.state1) r = e.reward0 + self.gamma * maxact.value ystruct = {'dim': e.action0, 'val': r} stats = self.tdtrainer.train(x, ystruct) avcost += stats['loss'] avcost /= self.tdtrainer.batch_size self.average_loss_window.add(avcost)
"--vision_latent_dim", default=DEFAULT_VISION_LATENT_DIM, help="Dimensionality of feature vector added to inner state, default=" + str(DEFAULT_VISION_LATENT_DIM)) args = parser.parse_args() render_kwargs = { 'pixels': { 'width': 168, 'height': 84, 'camera_name': 'front_camera' } } database = ExperienceBuffer(args.buffer_size, level=3) trainer = Trainer() env_model_pairs = load_env_model_pairs(args.file) n_envs = len(env_model_pairs) n_episodes = (args.buffer_size * args.save_step_each) // args.n_steps store_video = False for env_number, (env_name, model_id) in enumerate(env_model_pairs.items()): task_database = ExperienceBuffer(args.buffer_size // n_envs, level=2) env = AntPixelWrapper( PixelObservationWrapper(gym.make(env_name).unwrapped, pixels_only=False, render_kwargs=render_kwargs.copy())) agent = load_agent(env_name, model_id, args.load_best,
dim_z_motion, video_length) image_discriminator = build_discriminator(image_discriminator, n_channels=n_channels, use_noise=use_noise, noise_sigma=noise_sigma) video_discriminator = build_discriminator(video_discriminator, dim_categorical=dim_z_category, n_channels=n_channels, use_noise=use_noise, noise_sigma=noise_sigma) if torch.cuda.is_available(): generator.cuda() image_discriminator.cuda() video_discriminator.cuda() trainer = Trainer(image_loader, video_loader, image_loader, video_loader, print_every, batches, log_folder, use_cuda=torch.cuda.is_available(), use_infogan=use_infogan, use_categories=use_categories) trainer.train(generator, image_discriminator, video_discriminator)
shuffle=not args.get('sort_dataset'), num_workers=args.get('num_workers')) # test_dataset = TextDataset(test_data, dictionary, args.get('sort_dataset'), args.get('min_length'), args.get('max_length')) # test_dataloader = TextDataLoader(dataset=test_dataset, dictionary=dictionary, batch_size=args.get('batch_size'), shuffle = not args.get('sort_dataset')) logger.info("Training...") # trainable_params = [p for p in model.parameters() if p.requires_grad] if args.get('optimizer') == 'Adam': optimizer = Adam(model.parameters(), lr=args.get('initial_lr')) elif args.get('optimizer') == 'Adadelta': optimizer = Adadelta(params=trainable_params, lr=args.get('initial_lr'), weight_decay=0.95) else: raise NotImplementedError() lr_plateau = lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.2, patience=5) criterion = nn.CrossEntropyLoss trainer = Trainer(model, train_dataloader, val_dataloader, criterion=criterion, optimizer=optimizer, lr_schedule=args.get('lr_schedule'), lr_scheduler=lr_plateau, use_gpu=args.get('use_gpu'), logger=logger) trainer.run(epochs=args.get('epochs')) logger.info("Evaluating...") logger.info('Best Model: {}'.format(trainer.best_checkpoint_filepath))