def __init__(self, input_size, hidden_size, labels): self.input_size = input_size self.hidden_size = hidden_size self.output_size = len(labels) self.labels = labels self.label_index = {labels[i]: i for i in range(len(labels))} self.variables = self._init_variables() self.optimizer = SGD(self.variables)
def testNetwork(): # noqa D103 net = Network([Linear(10, 64), ReLU(), Linear(64, 2), Sigmoid()]) x = np.random.randn(32, 10) y = np.random.randn(32, 2) mse = MSE() optim = SGD(0.001, 0.001) pred = net(x) _ = mse(pred, y) _ = net.backward(mse.grad) optim.step(net)
def test00_batch_creation(self): m = 52 _sgd = SGD(batch_size=10, m=m) batches = _sgd.create_batches() all = [] for batch in batches: all += batch for i in range(m): if i not in all: self.assertTrue(False) self.assertTrue(True)
def get_model_optimizer(args): model = SVM(c=args.c, penalty=args.penalty) if args.gpu >= 0: model.to_gpu() if args.penalty == 'L2': optimizer = optimizers.SGD(lr=args.lr) elif args.penalty == 'L1': optimizer = SGD(lr=args.lr) optimizer.setup(model) return model, optimizer
def test02_sgd_sanity_with_epochs(self): C, W0, X, _, _ = create_C_W_X_d() optimizer = SGD(batch_size=256, m=X.shape[1]) W = W0.copy() for epoch in range(15): W = optimizer.optimize(W, X, C, objective_soft_max, objective_soft_max_gradient_W, lr=1) self.assertTrue(True)
def main(): batch_size = 10 wordvec_size = 100 hidden_size = 100 time_size = 5 lr = 0.1 max_epoch = 100 corpus, word_to_id, id_to_word = ptb.load_data('train') corpus_size = 1000 corpus = corpus[:1000] vocab_size = int(max(corpus) + 1) xs = corpus[:-1] ts = corpus[1:] data_size = len(xs) print(f'corpus size: {corpus_size}, vocabulary size: {vocab_size}') max_iters = data_size // (batch_size + time_size) time_idx = 0 total_loss = 0 loss_count = 0 ppl_list = [] model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size) optimizer = SGD(lr) jump = (corpus_size - 1) // batch_size offsets = [i * jump for i in range(batch_size)] for epoch in range(1, max_epoch + 1): for iter_ in range(max_iters): batch_x = np.empty((batch_size, time_size), dtype=int) batch_t = np.empty((batch_size, time_size), dtype=int) for t in range(time_size): for i, offset in enumerate(offsets): batch_x[i, t] = xs[(offset + time_idx) % data_size] batch_t[i, t] = xs[(offset + time_idx) % data_size] time_idx += 1 loss = model.forward(batch_x, batch_t) model.backward() optimizer.update(model.params, model.grads) total_loss += loss loss_count += 1 ppl = np.exp(total_loss / loss_count) print(f'| epoch {epoch} | perplexity {ppl}') ppl_list.append(float(ppl)) total_loss, loss_count = 0, 0 print('DONE')
def main(): batch_size = 20 wordvec_size = 650 hidden_size = 650 time_size = 35 lr = 20.0 #max_epoch = 40 max_epoch = 1 max_grad = 0.25 dropout = 0.5 corpus, word_to_id, _ = ptb.load_data('train') corpus_val, _, _ = ptb.load_data('val') corpus_test, _, _ = ptb.load_data('test') vocab_size = len(word_to_id) xs = corpus[:-1] ts = corpus[1:] model = BetterRnnlm(vocab_size, wordvec_size, hidden_size, dropout) optimizer = SGD(lr) trainer = RnnlmTrainer(model, optimizer) best_ppl = float('inf') for _ in range(max_epoch): trainer.fit(xs, ts, 1, batch_size, time_size, max_grad, eval_interval=20) model.reset_state() ppl = eval_perplexity(model, corpus_val) print(f'valid perplexity: {ppl}') if best_ppl > ppl: best_ppl = ppl model.save_params() else: lr /= 4.0 optimizer.lr = lr model.reset_state() print('-' * 50) model.reset_state() ppl_test = eval_perplexity(model, corpus_test) print(f'valid perplexity: {ppl_test}') print('DONE')
def test_categorical_classifier( M: int = 3, log_loss_function: Callable = softmax_cross_entropy_log_loss): """Test case for layer matmul class """ N = 10 D = 2 W = weights.he(M, D + 1) optimizer = SGD(lr=TYPE_FLOAT(0.1)) X, T, V = linear_separable_sectors(n=N, d=D, m=M) assert X.shape == (N, D) X, T = transform_X_T(X, T) def callback(W): W profiler = cProfile.Profile() profiler.enable() train_binary_classifier(N=N, D=D, M=M, X=X, T=T, W=W, log_loss_function=log_loss_function, optimizer=optimizer, test_numerical_gradient=True, log_level=logging.WARNING, callback=callback) profiler.disable() profiler.print_stats(sort="cumtime")
def _test_binary_classifier( M: int = 2, log_loss_function: Callable = softmax_cross_entropy_log_loss, num_epochs: int = 100): """Test case for layer matmul class """ N = 50 D = 2 W = weights.he(M, D + 1) optimizer = SGD(lr=TYPE_FLOAT(0.1)) X, T, V = linear_separable(d=D, n=N) # X, T = transform_X_T(X, T) def callback(W): return W train_binary_classifier(N=N, D=D, M=M, X=X, T=T, W=W, log_loss_function=log_loss_function, optimizer=optimizer, num_epochs=num_epochs, test_numerical_gradient=True, callback=callback)
def main(): batch_size = 20 wordvec_size = 100 hidden_size = 100 time_size = 35 lr = 20.0 #max_epoch = 4 max_epoch = 1 max_grad = 0.25 corpus, word_to_id, _ = ptb.load_data('train') corpus_test, _, _ = ptb.load_data('test') vocab_size = len(word_to_id) xs = corpus[:-1] ts = corpus[1:] model = Rnnlm(vocab_size, wordvec_size, hidden_size) optimizer = SGD(lr) trainer = RnnlmTrainer(model, optimizer) trainer.fit(xs, ts, max_epoch, batch_size, time_size, max_grad, eval_interval=20) model.reset_state() ppl_test = eval_perplexity(model, corpus_test) print(f'test perplexity: {ppl_test}') model.save_params() print('DONE')
def test_matmul_bn_relu_classifier(M: int = 3): """Test case for layer matmul class """ N = 10 D = 2 W = weights.he(M, D + 1) optimizer = SGD(lr=TYPE_FLOAT(0.5)) X, T, V = linear_separable_sectors(n=N, d=D, m=M) assert X.shape == (N, D) X, T = transform_X_T(X, T) def callback(W): """Dummy callback""" W profiler = cProfile.Profile() profiler.enable() train_matmul_bn_relu_classifier( N=N, D=D, M=M, X=X, T=T, W=W, log_loss_function=softmax_cross_entropy_log_loss, optimizer=optimizer, test_numerical_gradient=True, callback=callback) profiler.disable() profiler.print_stats(sort="cumtime")
def run(): global args args = parser.parse_args() with open(args.config) as f: config = yaml.load(f) for key in config: for k, v in config[key].items(): setattr(args, k, v) # Prepare MNIST data train_data, train_label, val_data, val_label = load_mnist_trainval() test_data, test_label = load_mnist_test() # Create a model if args.type == 'SoftmaxRegression': model = SoftmaxRegression() elif args.type == 'TwoLayerNet': model = TwoLayerNet(hidden_size=args.hidden_size) # Optimizer optimizer = SGD(learning_rate=args.learning_rate, reg=args.reg) train_loss_history = [] train_acc_history = [] valid_loss_history = [] valid_acc_history = [] best_acc = 0.0 best_model = None for epoch in range(args.epochs): batched_train_data, batched_train_label = generate_batched_data(train_data, train_label, batch_size=args.batch_size, shuffle=True) epoch_loss, epoch_acc = train(epoch, batched_train_data, batched_train_label, model, optimizer, args.debug) train_loss_history.append(epoch_loss) train_acc_history.append(epoch_acc) # evaluate on test data batched_test_data, batched_test_label = generate_batched_data(val_data, val_label, batch_size=args.batch_size) valid_loss, valid_acc = evaluate(batched_test_data, batched_test_label, model, args.debug) if args.debug: print("* Validation Accuracy: {accuracy:.4f}".format(accuracy=valid_acc)) valid_loss_history.append(valid_loss) valid_acc_history.append(valid_acc) if valid_acc > best_acc: best_acc = valid_acc best_model = copy.deepcopy(model) plot_curves(train_loss_history,train_acc_history,valid_loss_history,valid_acc_history) batched_test_data, batched_test_label = generate_batched_data(test_data, test_label, batch_size=args.batch_size) _, test_acc = evaluate(batched_test_data, batched_test_label, best_model) # test the best model if args.debug: print("Final Accuracy on Train Data: {accuracy:.4f}".format(accuracy=train_acc_history[-1])) print("Final Accuracy on Validation Data: {accuracy:.4f}".format(accuracy=valid_acc_history[-1])) print("Final Accuracy on Test Data: {accuracy:.4f}".format(accuracy=test_acc)) return train_loss_history, train_acc_history, valid_loss_history, valid_acc_history
def test_predict(self): gnn1 = GraphNeuralNetwork(2) gnn1.params["W"] = np.arange(1, 5).reshape(2, 2) gnn1.params["A"] = np.arange(1, 3) gnn1.params["b"] = np.array([1]) sgd = SGD() trainer1 = Trainer(gnn1, sgd) graphs = [[[0, 0, 1, 0], [0, 0, 1, 1], [1, 1, 0, 1], [0, 1, 1, 0]]] * 10 vertex_sizes = [4] * 10 expected1 = [1] * 10 actual1 = trainer1.predict(graphs, vertex_sizes) self.assertEqual(expected1, actual1) gnn2 = GraphNeuralNetwork(3) gnn2.params["W"] = -np.arange(1, 10).reshape(3, 3) gnn2.params["b"] = -np.array([1]) trainer2 = Trainer(gnn2, sgd) expected2 = [0] * 10 actual2 = trainer2.predict(graphs, vertex_sizes) self.assertEqual(expected2, actual2)
def __init__(self, learning_rate=1e-1, eps=1e-5, max_iter=1000, batch_size=10, decay='step', reg_lambda=0.1): loss = LOSS["SumOfSquares"](reg_lambda, L2Normalizer()) self.optimizer = SGD(learning_rate=learning_rate, eps=eps, max_iter=max_iter, batch_size=batch_size, loss=loss, decay=decay) self.logger = logger("Ridge") self.params = dict() self.trained = False
def test_020_matmul_build_specification(): name = "matmul01" num_nodes = 8 num_features = 2 weights_initialization_scheme = "he" expected_spec = { _SCHEME: Matmul.class_id(), _PARAMETERS: { _NAME: name, _NUM_NODES: num_nodes, _NUM_FEATURES: num_features, # NOT including bias _WEIGHTS: { _SCHEME: weights_initialization_scheme }, _OPTIMIZER: SGD.specification(name="sgd") } } actual_spec = Matmul.specification( name=name, num_nodes=num_nodes, num_features=num_features, weights_initialization_scheme=weights_initialization_scheme, ) assert expected_spec == actual_spec, \ "expected\n%s\nactual\n%s\n" % (expected_spec, actual_spec)
def __init__(self, learning_rate=1e-3, eps=1e-5, max_iter=1000, batch_size=10, loss="LossWithSoftmax", decay='step', _lambda=0.1): self.loss = LOSS[loss](_lambda, L2Normalizer()) self.optimizer = SGD(learning_rate=learning_rate, eps=eps, max_iter=max_iter, batch_size=batch_size, loss=loss, decay=decay) self.logger = logger("LogisticRegression") self.params = dict() self.trained = False
def __init__(self, learning_rate=1e-1, eps=1e-5, max_iter=1000, batch_size=10, decay='step'): loss = LOSS["SumOfSquares"](0, ZeroNormalizer()) self.optimizer = SGD(learning_rate=learning_rate, eps=eps, max_iter=max_iter, batch_size=batch_size, loss=loss, decay=decay) # self.optimizer = NormalEquation() self.logger = logger("LinearRegression") self.params = dict() self.trained = False
def train(weight_init_std, x_train, t_train, max_epochs): batch_norm_network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True) no_batch_norm_network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std) train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.01 max_iters_times = 1000000000 epoch = max(int(train_size / batch_size), 1) optimizer = SGD(lr=learning_rate) bn_train_acc_list = [] no_bn_train_acc_list = [] epoch_cnt = 0 for i in range(max_iters_times): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for network in (batch_norm_network, no_batch_norm_network): grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) if i % epoch == 0: bn_train_acc = batch_norm_network.accuracy(x_train, t_train) no_bn_train_acc = no_batch_norm_network.accuracy(x_train, t_train) bn_train_acc_list.append(bn_train_acc) no_bn_train_acc_list.append(no_bn_train_acc) print("epoch:" + str(epoch_cnt) + " | " + str(no_bn_train_acc) + " - " + str(bn_train_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break return no_bn_train_acc_list, bn_train_acc_list
def __train(weight_init_std): bn_network = MultiLayerNetExtend( input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std, use_batchnorm=True) network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, weight_init_std=weight_init_std) optimizer = SGD(lr=learning_rate) train_acc_list = [] bn_train_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] for _network in (bn_network, network): grads = _network.gradient(x_batch, t_batch) optimizer.update(_network.params, grads) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) bn_train_acc = bn_network.accuracy(x_train, t_train) train_acc_list.append(train_acc) bn_train_acc_list.append(bn_train_acc) print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break return train_acc_list, bn_train_acc_list
def main(): max_epoch = 300 batch_size = 30 hidden_size = 10 learning_rate = 1.0 x, t = spiral.load_data() model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3) optimizer = SGD(lr=learning_rate) trainer = Trainer(model, optimizer) trainer.fit(x, t, max_epoch, batch_size, eval_interval=10)
def test_sgd(self): model_list = [dict(type='Linear', in_dim=128, out_dim=10)] criterion = dict(type='SoftmaxCrossEntropy') model = ConvNet(model_list, criterion) optimizer = SGD(model) # forward once np.random.seed(1024) x = np.random.randn(32, 128) np.random.seed(1024) y = np.random.randint(10, size=32) tmp = model.forward(x, y) model.backward() optimizer.update(model) # forward twice np.random.seed(512) x = np.random.randn(32, 128) np.random.seed(512) y = np.random.randint(10, size=32) tmp = model.forward(x, y) model.backward() optimizer.update(model) expected_weights = np.load('tests/sgd_weights/w.npy') expected_bias = np.load('tests/sgd_weights/b.npy') self.assertAlmostEquals( np.sum(np.abs(expected_weights - model.modules[0].weight)), 0) self.assertAlmostEquals( np.sum(np.abs(expected_bias - model.modules[0].bias)), 0)
def __init__(self, layers, loss='cross_entropy', optimizer=SGD(), logger=get_logger()): super(Sequential, self).__init__(logger) self.layers = layers self.params = list( itertools.chain( * [layer.params for layer in layers if hasattr(layer, 'params')])) self.optimizer = optimizer
def train(C_train, C_val, X_train, X_val, batch_size, epochs, lr, momentum=0): # ----------------- hyper params init ----------------- W0 = randn(X_train.shape[0], C_train.shape[0]) m, n = W0.shape W = W0.copy() optimizer = SGD(batch_size=batch_size, m=X_train.shape[1]) # ---------------------------------------------------- # ----------------- stats lists init ----------------- W_history = zeros((W.shape[0] * W.shape[1], epochs)) val_score = [] train_score = [] train_acc = [] val_acc = [] # ---------------------------------------------------- for epoch in range(epochs): W = optimizer.optimize(W, X_train, C_train, objective_soft_max, objective_soft_max_gradient_W, lr=lr, momentum=momentum) W_history[:, epoch] = W.reshape(W.shape[0] * W.shape[1]) train_score.append(objective_soft_max(X_train, W, C_train)) val_score.append(objective_soft_max(X_val, W, C_val)) train_acc.append(accuracy(X_train, W, C_train)) val_acc.append(accuracy(X_val, W, C_val)) W_res = average(W_history, axis=1).reshape(m, n) train_score.append(objective_soft_max(X_train, W_res, C_train)) val_score.append(objective_soft_max(X_val, W_res, C_val)) # todo add plot epoch \ accuracy (wrote in train) plot(range(len(train_score)), train_score) return train_score, train_acc, val_score, val_acc
def train(net: NetWork, inputs: Tensor, targets: Tensor, epochs: int = 500, loss: Loss = MSE(), optimizer: Optimizer = SGD(), iterator: DataIterator = BatchIterator(), show_info: bool = False): for epoch in range(epochs): epoch_loss = .0 for batch_inputs, batch_targets in iterator(inputs, targets): predictions = net.forward(batch_inputs) epoch_loss += loss.loss(predictions, batch_targets) grad = loss.grad(predictions, batch_targets) net.backward(grad) optimizer.step(net) if show_info: print('epoch:{}, loss:{}'.format(epoch, epoch_loss))
def test_kfold_cross_val(self): gnn = GraphNeuralNetwork(2) sgd = SGD() trainer = Trainer(gnn, sgd) graphs = [[[0, 0, 1, 0], [0, 0, 1, 1], [1, 1, 0, 1], [0, 1, 1, 0]]] * 100 vertex_sizes = [4] * 100 labels = [0] * 100 expected = gnn.params _ = trainer.kfold_cross_validation(graphs, vertex_sizes, labels) actual = gnn.params self.assertEqual(expected, actual) with self.assertRaises(SplitError): trainer.kfold_cross_validation(graphs, vertex_sizes, labels, minibatch_size=20) trainer.kfold_cross_validation(graphs, vertex_sizes, labels, k=20)
def test_accuracy(self): gnn = GraphNeuralNetwork(2) gnn.params["W"] = np.arange(1, 5).reshape(2, 2) gnn.params["A"] = np.arange(1, 3) gnn.params["b"] = np.array([1]) sgd = SGD() trainer = Trainer(gnn, sgd) graphs = [[[0, 0, 1, 0], [0, 0, 1, 1], [1, 1, 0, 1], [0, 1, 1, 0]]] * 10 vertex_sizes = [4] * 10 labels1 = [1] * 10 expected1 = 1. actual1 = trainer.accuracy(graphs, vertex_sizes, labels1) self.assertEqual(expected1, actual1) labels2 = [1] * 7 + [0] * 3 expected2 = 0.7 actual2 = trainer.accuracy(graphs, vertex_sizes, labels2) self.assertEqual(expected2, actual2)
def test(): M = 1 D = 2 N = 100 X, T, V = linear_separable(d=D, n=N) x_min, x_max = X[:, 0].min(), X[:, 0].max() y_min, y_max = X[:, 1].min(), X[:, 1].max() sigmoid_classifier_specification = { _NAME: "softmax_classifier", _NUM_NODES: M, _LOG_LEVEL: logging.ERROR, _COMPOSITE_LAYER_SPEC: { "matmul01": Matmul.specification( name="matmul", num_nodes=M, num_features=D, weights_initialization_scheme="he", weights_optimizer_specification=SGD.specification( lr=TYPE_FLOAT(0.2), l2=TYPE_FLOAT(1e-3))), "loss": CrossEntropyLogLoss.specification( name="loss", num_nodes=M, loss_function=sigmoid_cross_entropy_log_loss.__qualname__) } } logistic_classifier = SequentialNetwork.build( specification=sigmoid_classifier_specification, ) for i in range(50): logistic_classifier.train(X=X, T=T) prediction = logistic_classifier.predict( np.array([-1., -1.], dtype=TYPE_FLOAT)) np.isin(prediction, [0, 1]) print(prediction)
def test_update(self): sgd = SGD() gnn = GraphNeuralNetwork(vector_size=2) expected = gnn.params sgd.update(gnn) actual = gnn.params self.assertEqual(expected, actual) params = copy.deepcopy(gnn.params) for _ in range(100): gnn.grads["W"] = np.random.rand() gnn.grads["A"] = np.random.rand() gnn.grads["b"] = np.random.rand() sgd.update(gnn) for key, param in params.items(): params[key] = param - gnn.grads[key] * sgd.lr expected = repr(params[key]) actual = repr(gnn.params[key]) self.assertEqual(expected, actual)
from tensor import Tensor from optimizer import SGD from layer import MSELoss, Linear, Tanh, Sigmoid from model import Sequential import numpy as np #Toy example of Using Tensor Class np.random.seed(0) data = Tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), requires_grad=True) target = Tensor(np.array([[0], [1], [0], [1]]), requires_grad=True) #Every element in w, is an Object of Tensor representing weight matrix model = Sequential( Linear(2, 3), Tanh(), Linear(3, 3), Tanh(), Linear(3, 1), ) optim = SGD(parameters=model.get_parameters(), lr=0.1) criterion = MSELoss() for i in range(10): pred = model(data) loss = criterion(pred, target) loss.backward(Tensor(np.ones_like(loss.data), is_grad=True)) optim.step() print(loss.data) print( "------------------------------------------------------------------------")
from utils import load_mnist weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'} (x_train, t_train), (x_test, t_test) = load_mnist( normalize=True, one_hot_label=True) iters_num = 2000 train_size = x_train.shape[0] batch_size = 100 train_loss = {} for key, weight_type in weight_init_types.items(): network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, weight_init_std=weight_type) optimizer = SGD() train_loss[key] = [] for i in range(iters_num): mask = np.random.choice(train_size, batch_size) x_batch = x_train[mask] t_batch = t_train[mask] grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) train_loss[key].append(network.loss(x_batch, t_batch)) markers = {'std=0.01': 'o', 'Xavier': 's', 'He': 'D'} x = np.arange(iters_num) for key in weight_init_types.keys(): plt.plot(x, train_loss[key], marker=markers[key], markevery=100, label=key)
def main(namemark, ncpu, batchsize, generation, lr, sigma, vbn, vbn_test_g, gamename, logfile, modeltype): if modeltype == '2015': from model_15 import build_model elif modeltype == '2013': from model_13 import build_model setup_logging(logfile) logging.info("modeltype: %s", modeltype) logging.info("learning rate: %s", lr) logging.info("sigma: %s", sigma) logging.info("Game name: %s", gamename) logging.info("batchsie: %s", batchsize) logging.info("ncpu:%s", ncpu) logging.info("namemark:%s", namemark) print("learning rate:", lr) print("sigma:", sigma) print("gamename:", gamename) print("batchsie:", batchsize) print("ncpu:", ncpu) print("namemark", namemark) checkpoint_name = gamename + namemark + "-sigma" + str( sigma) + '-lr' + str(lr) + '-model' + modeltype import pandas as pd config = pd.read_csv('config.csv') CONFIG = dict() CONFIG['game'] = gamename + '-v0' # it's for training frames CONFIG['ep_max_step'] = 1500 CONFIG['eval_threshold'] = config[config['gamename'] == gamename].iloc[0, 1] CONFIG['l2coeff'] = 0.005 test_times = ncpu - 1 logging.info("Settings: %s", str(CONFIG)) env = gym.make(gamename + '-v0') CONFIG['n_action'] = env.action_space.n experiment_record = {} experiment_record['kid_rewards'] = [] experiment_record['test_rewards'] = [] device = torch.device("cpu") model = build_model(CONFIG).to(device) model_best = build_model(CONFIG) model_before = build_model(CONFIG) best_test_score = Small_value # utility instead reward for update parameters (rank transformation) base = batchsize # *2 for mirrored sampling if batchsize % 2 == 1: print("need an even batch size") exit() rank = np.arange(1, base + 1) util_ = np.maximum(0, np.log(base / 2 + 1) - np.log(rank)) utility = util_ / util_.sum() - 1 / base optimizer = SGD(model.named_parameters(), lr) pool = mp.Pool(processes=ncpu) test_episodes = 15 # estimate mean and var if vbn: logging.info("start test reference batch statistic") print("start geting reference frame") reference_batch = explore_for_vbn(env, 0.01) reference_batch_torch = torch.zeros((reference_batch_size, 4, 84, 84)) for i in range(reference_batch_size): reference_batch_torch[i] = reference_batch[i] # training mar = None # moving average reward training_timestep_count = 0 best_kid_mean = Small_value test_result_list = [] for g in range(generation): t0 = time.time() model_before.load_state_dict(model.state_dict()) model, kid_rewards, timestep_count = train(model, optimizer, pool, sigma, env, int(batchsize / 2), CONFIG, modeltype, reference_batch_torch) training_timestep_count += timestep_count timestep_count = timestep_count / 4 if training_timestep_count > TIMESTEP_LIMIT: logging.info("satisfied timestep limit") logging.info("Now timestep %s" % training_timestep_count) break kid_rewards_mean = np.array(kid_rewards).mean() experiment_record['kid_rewards'].append( [g, np.array(kid_rewards).mean()]) if g % 5 == 0: logging.info( 'Gen: %s | Kid_avg_R: %.1f | Episodes Number: %s | timestep number: %s| Gen_T: %.2f' % (g, np.array(kid_rewards).mean(), batchsize, timestep_count, time.time() - t0)) print('Gen:', g, '| Kid_avg_R: %.1f' % np.array(kid_rewards).mean(), '| episodes number:', batchsize, '| timestep number:', timestep_count, '| Gen_T: %.2f' % (time.time() - t0)) if kid_rewards_mean > best_kid_mean: best_kid_mean = kid_rewards_mean test_rewards, _ = test(model_before, pool, env, test_times, CONFIG, reference_batch_torch) test_rewards_mean = np.mean(np.array(test_rewards)) experiment_record['test_rewards'].append([g, test_rewards]) logging.info("Gen: %s, test model, Reward: %.1f" % (g, test_rewards_mean)) #logging.info("train progross %s/%s" % (training_timestep_count, TIMESTEP_LIMIT)) print('Gen: ', g, '| Net_R: %.1f' % test_rewards_mean) if test_rewards_mean > best_test_score: best_test_score = test_rewards_mean model_best.load_state_dict(model_before.state_dict()) # save when found a better model #logging.info("Storing Best model") torch.save( model_best.state_dict(), model_storage_path + checkpoint_name + 'best_model.pt') if g % 5 == 0: test_rewards, timestep_count = test(model, pool, env, test_times, CONFIG, reference_batch_torch) test_rewards_mean = np.mean(np.array(test_rewards)) experiment_record['test_rewards'].append([g, test_rewards]) #logging.info("test model, Reward: %.1f" % test_rewards_mean) test_result_list.append(test_rewards_mean) print('Gen: ', g, '| Net_R: %.1f' % test_rewards_mean) if test_rewards_mean > best_test_score: best_test_score = test_rewards_mean model_best.load_state_dict(model.state_dict()) # save when found a better model #logging.info("Storing Best model") torch.save( model_best.state_dict(), model_storage_path + checkpoint_name + 'best_model.pt') if g % 40 == 0: logging.info("train progross %s/%s" % (training_timestep_count, TIMESTEP_LIMIT)) logging.info("best test result:%s" % best_test_score) logging.info("test result:%s" % str(test_result_list)) test_result_list = [] if (g - 1) % 500 == 500 - 1: CONFIG['ep_max_step'] += 150 logging.info("Gen %s | adding max timestep" % g) if (g - 1) % 1000 == 1000 - 1: logging.info("Gen %s | storing model" % g) torch.save( model.state_dict(), model_storage_path + checkpoint_name + 'generation' + str(g) + '.pt') torch.save(model_best.state_dict(), model_storage_path + checkpoint_name + 'best_model.pt') with open( model_storage_path + "experiment_record" + checkpoint_name + 'generation' + str(g) + ".pickle", "wb") as f: pickle.dump(experiment_record, f) test_rewards, _ = test(model, pool, env, test_times, CONFIG, reference_batch_torch) test_rewards_mean = np.mean(np.array(test_rewards)) logging.info("test final model, Mean Reward of %s times: %.1f" % (test_times, test_rewards_mean)) if test_rewards_mean > best_test_score: best_test_score = test_rewards_mean model_best.load_state_dict(model.state_dict()) logging.info("storing Best model") print("best test results :", best_test_score) logging.info("best test results:%s" % best_test_score) # ---------------SAVE--------- torch.save(model_best.state_dict(), model_storage_path + checkpoint_name + 'best_model.pt') torch.save(model.state_dict(), model_storage_path + checkpoint_name + '.pt') with open( model_storage_path + "experiment_record" + str(namemark) + ".pickle", "wb") as f: pickle.dump(experiment_record, f)