def __init__(self, out_shape: int, input_shape: tuple = None, eps=1e-5, axis=0, momentum=.9, beta_opt: Optimizer = None, gamma_opt: Optimizer = None, alpha: float = 1e-7) -> None: super().__init__(out_shape, input_shape, False) self.axis: int = axis self.gamma: np.ndarray = np.ones(self.out_shape, dtype=np.float64) self.beta: np.ndarray = np.zeros(self.out_shape, dtype=np.float64) self.running_mu: np.ndarray = np.zeros(self.out_shape, dtype=np.float64) self.running_var: np.ndarray = np.zeros(self.out_shape, dtype=np.float64) if axis == 1: self.gamma = self.gamma.reshape((-1, 1)) self.beta = self.beta.reshape((-1, 1)) self.running_mu = self.running_mu.reshape((-1, 1)) self.running_var = self.running_var.reshape((-1, 1)) self.mu = 0 self.var = 0 self.std = 0 self.z = 0 self.eps = eps self.alpha = alpha self.momentum = momentum self.beta_opt: Optimizer() = beta_opt if beta_opt else Adam() self.gamma_opt: Optimizer() = gamma_opt if gamma_opt else Adam()
def __init__(self): self.params = dict() self.grads = dict() self.params['W1'] = args.weight_init_std * np.random.randn(784, 256) self.params['B1'] = np.zeros(256) self.params['W2'] = args.weight_init_std * np.random.randn(256, 256) self.params['B2'] = np.zeros(256) self.params['W3'] = args.weight_init_std * np.random.randn(256, 10) self.params['B3'] = np.zeros(10) self.optimizer = Adam()
def main() -> None: (x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt') x_train, x_test = x_train[:, ::-1], x_test[:, ::-1] char_to_id, id_to_char = sequence.get_vocab() vocab_size = len(char_to_id) wordvec_size = 16 hidden_size = 128 batch_size = 128 max_epoch = 25 max_grad = 5.0 model = PeekySeq2seq(vocab_size, wordvec_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) acc_list = [] for epoch in range(1, max_epoch+1): trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad) correct_num = 0 for i in range(len(x_test)): question, correct = x_test[[i]], t_test[[i]] verbose = i < 10 correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose) acc = float(correct_num) / len(x_test) acc_list.append(acc) print(f'val acc {acc*100}%') print('DONE')
def primal_step(self, x, y, learning_rate, input_dim): mlp, cost, probs = self.create_model(x, y, input_dim) cg = ComputationGraph([cost]) weights = VariableFilter(roles=[WEIGHT])(cg.variables) updates = Adam(cost, weights) return mlp, updates, cost, probs
def primal_step(self, x, y, learning_rate, alpha, beta, input_dim, p): mlp, cost = self.create_model(x, y, input_dim, p) cg = ComputationGraph([cost]) weights = VariableFilter(roles=[WEIGHT])(cg.variables) updates = Adam(cost, weights, y, alpha, beta) return mlp, updates, -1 * cost
def primal_step(self, x, y, learning_rate, input_dim, p): self.model = self.model(x, y, input_dim, p) score, probs = self.model.create_model() criterion = self.alpha * p - self.beta * np.float32(1 - p) r = theano.shared(np.float32(0.0), name='tp+fp') q = theano.shared(np.float32(0.0), name='tn+fn') pos_criterion = T.lt(probs, 0.5) * -criterion * score neg_criterion = T.gt(probs, 0.5) * criterion * score cost_weighed = T.mean(pos_criterion * T.gt(criterion, 0) + neg_criterion * T.lt(criterion, 0)) cg = ComputationGraph([cost_weighed]) # Reward version r_temp = (self.t * r + T.mean(score * T.gt(probs, 0.5))) / (self.t + 1) q_temp = (self.t * q + T.mean(score * T.lt(probs, 0.5))) / (self.t + 1) # True Count version # r_temp = (self.t*r + T.mean(1.0 * T.gt(probs, 0.5)))/(self.t + 1) # q_temp = (self.t*q + T.mean(1.0 * T.lt(probs, 0.5)))/(self.t + 1) primal_updates = [(r, r_temp), (q, q_temp), (self.t, self.t + 1)] weights = VariableFilter(roles=[WEIGHT])(cg.variables) updates = Adam(cost_weighed, weights) + primal_updates # r = tp + fp # q = fp + fn primal_var = [r, q] return updates, cost_weighed, score, primal_var
def train(data_path, adaptor, classifier, summ): input_ = Input(FLAGS.train_batch_size, FLAGS.num_points) waves, labels = input_(data_path) # Calculate the loss of the model. if FLAGS.adp: logits = tf.stop_gradient(adaptor(waves)) # logits = adaptor(waves) logits = classifier(logits) else: logits = classifier(waves, expand_dims=True) loss = LossClassification(FLAGS.num_classes)(logits, labels) opt = Adam(FLAGS.learning_rate, lr_decay=True, lr_decay_steps=FLAGS.lr_decay_steps, lr_decay_factor=FLAGS.lr_decay_factor) graph_regularizers = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_regularization_loss = tf.reduce_sum(graph_regularizers) train_op = opt(loss + total_regularization_loss) summ.register('train', 'train_loss', loss) train_summ_op = summ('train') return loss, train_op, train_summ_op
def main(): window_size = 1 hidden_size = 5 batch_size = 3 max_epoch = 1000 text = 'You say goodbye and I say hello.' corpus, word_to_id, id_to_word = preprocess(text) vocab_size = len(word_to_id) contexts, target = create_context_target(corpus, window_size) one_hot_target = convert_one_hot(target, vocab_size) one_hot_contexts = convert_one_hot(contexts, vocab_size) model = SimpleCBOW(vocab_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) trainer.fit(one_hot_contexts, one_hot_target, max_epoch, batch_size) # trainer.plot() word_vecs = model.word_vecs for word_id, word in id_to_word.items(): print(word, word_vecs[word_id]) print('DONE')
def train(data_path, image_upsampler, wm_upsampler, blender, downsampler, extrator, summ): data_file = os.path.join(data_path, 'train_images.tfr') wm_file = os.path.join(data_path, 'watermark.mat') assert os.path.isfile(data_file), "Invalid file name" assert os.path.isfile(wm_file), "Invalid file name" input_ = Input(FLAGS.batch_size, [FLAGS.img_height, FLAGS.img_width, FLAGS.num_chans]) images = input_(data_file) wm = Watermark(wm_file)() image_upsampled = image_upsampler(images) wm_upsampled = wm_upsampler(wm) image_blended = blender(image_upsampled, wm_upsampled) image_downsampled = downsampler(image_blended, training = True) wm_extracted = extrator(image_downsampled, training = True) image_loss = LossRegression()(image_downsampled, images) wm_loss = LossRegression()(wm_extracted, wm) opt = Adam(FLAGS.learning_rate, lr_decay = FLAGS.lr_decay, lr_decay_steps = FLAGS.lr_decay_steps, lr_decay_factor = FLAGS.lr_decay_factor) train_op = opt(image_loss + wm_loss) summ.register('train', 'image_loss', image_loss) summ.register('train', 'wm_loss', wm_loss) train_summ_op = summ('train') return image_loss + wm_loss, train_op, train_summ_op
def main() -> None: window_size = 5 hidden_size = 100 batch_size = 100 max_epoch = 10 corpus, word_to_id, id_to_word = ptb.load_data('train') vocab_size = len(word_to_id) contexts, target = create_context_target(corpus, window_size) model = CBOW(vocab_size, hidden_size, window_size, corpus) optimizer = Adam() trainer = Trainer(model, optimizer) trainer.fit(contexts, target, max_epoch, batch_size) # trainer.plot() word_vecs = model.word_vecs params = { 'word_vecs': word_vecs.astype(np.float16), 'word_to_id': word_to_id, 'id_to_word': id_to_word } with open('cbow_params.pkl', 'wb') as f: pickle.dump(params, f, -1)
def __init__(self, out_shape: int, reg: Regularization = L2, ns: bool = False, opt: Optimizer = None, opt_bias_: Optimizer = None, eps=1e-3, alpha=1e-5, input_shape: tuple = None, lambda_=0, bias=True, reg_bias=False, opt_bias=True, init_W=stdScale, seed=-1) -> None: super().__init__(out_shape, input_shape, ns) self.seed = seed # general params self.eps: float = eps self.bias: bool = bias self.reg_bias: bool = reg_bias self.opt_bias: bool = opt_bias # layer params self.init_W = init_W self.W = None # TODO remove # np.random.seed(6) self.b = init_W((out_shape, ), eps) if bias else None self.__input_shape = input_shape if input_shape: assert len(input_shape) == 1 # TODO remove if seed >= 0: np.random.seed(seed) self.W = init_W((input_shape[0], out_shape), eps) # hyper params self.alpha, self.lambda_ = alpha, lambda_ # engine param self.reg: Regularization = reg self.opt: Optimizer() = opt if opt else Adam() self.opt_bias_: Optimizer( ) = opt_bias_ if opt_bias_ or not bias else Adam()
def primal_step(self, x, y, learning_rate, alpha, beta, input_dim, p): mlp, cost = self.create_model(x, y, input_dim, p) flag = T.eq(y, 1) * alpha + T.eq(y, 0) * beta cost_weighed = T.mean(cost * flag.dimshuffle(0, 'x')) cg = ComputationGraph([cost_weighed]) weights = VariableFilter(roles=[WEIGHT])(cg.variables) updates = Adam(cost_weighed, weights) return mlp, updates, cost_weighed, cost
def __init__(self, inputDim=1, outputDim=1, optimizer=Adam()): self.inputDim = inputDim self.outputDim = outputDim self.mean = Dense(self.inputDim, self.outputDim, activation=Identity(), optimizer=copy.copy(optimizer)) self.logVar = Dense(self.inputDim, self.outputDim, activation=Identity(), optimizer=copy.copy(optimizer))
def __init__(self, load_data_function, hidden_unit=16, learning_rate=0.01, weight_decay=5e-4): adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data_function() # 邻接矩阵再正则化 self.adj = preprocess_adj(adj) # 特征归一化 self.features = preprocess_features(features) # preprocess self.y_train, self.train_mask = y_train, train_mask self.y_val, self.val_mask = y_val, val_mask self.y_test, self.test_mask = y_test, test_mask # init # 节点个数 self.n = adj.shape[0] # 特征个数,1433维 self.f = features.shape[1] # 类别个数,7维 self.c = y_train.shape[1] # 隐藏单元个数 self.h = hidden_unit # init weight # 第一层权重参数 self.weight_hidden = init_weight((self.f, self.h)) # 第二层权重参数 self.weight_outputs = init_weight((self.h, self.c)) self.adam_weight_hidden = Adam(weights=self.weight_hidden, learning_rate=learning_rate) self.adam_weight_outputs = Adam(weights=self.weight_outputs, learning_rate=learning_rate) # 第二层输入,反向传播使用 self.hidden = np.zeros((self.n, self.h)) # 第二层输出,反向传播使用 self.outputs = np.zeros((self.n, self.c)) self.weight_decay = weight_decay # test self.grad_loss = None self.grad_weight_outputs = None self.grad_hidden = None self.grad_weight_hidden = None
def primal_step(self, x, y, learning_rate, input_dim, p, mask=None): if mask is None: self.model = self.model(x, y, input_dim, p) else: self.model = self.model(x, y, input_dim, p, mask=mask) probs = self.model.create_model() cost = T.sum((probs - y.dimshuffle(0, 'x'))**2) cg = ComputationGraph([cost]) weights = VariableFilter(roles=[WEIGHT])(cg.variables) updates = Adam(cost, weights) return updates, cost
def main(): # optimizer = SGD(lr, weight_decay, mu=mu) optimizer = Adam(lr, weight_decay) model = ListModel(net=[ Linear(784, 400), ReLU(), Linear(400, 100), ReLU(), Linear(100, 10), Softmax() ], loss=CrossEntropyLoss()) for epoch in range(num_epochs): print('epoch number: {}'.format(epoch)) train(model, optimizer) valid(model)
def get_opt(): opt = parameter.optimization.lower() if (opt == "adam"): return Adam() elif (opt == "adadelta"): return Adadelta() elif (opt == "adagrad"): return AdaGrad() elif (opt == "rmsprop"): return RMSProp() elif (opt == "nesterov"): return Nesterov() elif (opt == "momentum"): return Momentum() else: return None
def primal_step(self, x, y, learning_rate, input_dim, p, mask=None): if mask is None: self.model = self.model(x, y, input_dim, p) else: self.model = self.model(x, y, input_dim, p, mask=mask) cost = self.model.create_model() flag = T.eq(y, 1) * (self.gamma[0] * self.alpha[0] + self.gamma[1] * self.beta[0]) +\ T.eq(y, 0) * (self.gamma[0] * self.alpha[1] + self.gamma[1] * self.beta[0]) q0 = theano.shared(np.float32(0), name='q0') q1 = theano.shared(np.float32(0), name='q1') r0 = theano.shared(np.float32(0), name='r0') r1 = theano.shared(np.float32(0), name='r1') q0_temp = q0 * self.t + T.mean( (T.eq(y, 1) * self.alpha[0] + T.eq(y, 0) * self.alpha[1]).dimshuffle(0, 'x') * cost) q1_temp = q1 * self.t + T.mean( (T.eq(y, 1) * self.beta[0] + T.eq(y, 0) * self.beta[1]).dimshuffle( 0, 'x') * cost) # Update r r0_next = (r0 * self.t + T.mean( T.eq(y, 1).dimshuffle(0, 'x') * cost)) * 1.0 / (self.t + 1) r1_next = (r1 * self.t + T.mean( T.eq(y, 0).dimshuffle(0, 'x') * cost)) * 1.0 / (self.t + 1) # Update q q0_next = (q0_temp - self.dual_class.dual1_fn(self.alpha)) / (self.t + 1) q1_next = (q1_temp - self.dual_class.dual2_fn(self.beta)) / (self.t + 1) primal_updates = [(q0, q0_next), (q1, q1_next), (r0, r0_next), (r1, r1_next), (self.t, self.t + 1)] cost_weighed = T.mean(cost * flag.dimshuffle(0, 'x')) cg = ComputationGraph([cost_weighed]) weights = VariableFilter(roles=[WEIGHT])(cg.variables) updates = Adam(cost_weighed, weights) + primal_updates primal_var = [[r0, r1], [q0, q1]] return updates, cost_weighed, cost, primal_var
def t1(): # layer_param = [(8, 3, 1, 1, 'conv'), (8, 'pool'), (12, 3, 1, 1, 'conv'), (12, 3, 1, 1, 'conv'), # (12, 3, 1, 1, 'conv'), (12, 'pool'), (36, 3, 1, 1, 'conv'), (36, 3, 1, 1, 'conv'), # (36, 3, 1, 1, 'conv'), (36, 'pool'), (64, 'FC')] layer_param = [(6, 5, 1, 0, 'conv'), (6, 'pool'), (16, 5, 1, 0, 'conv'), (16, 'pool'), (120, 'FC'), (84, 'FC')] regulation = L2Regulation() activation = ReluActivation() optimizer = Adam() cnn = CnnTest(layer_param) cnn.load_train_data(0.7) # letnet lr不能太大 10 ** -2 左右时由于步长太长收敛不了 bn_update = UpdateBN() cnn.train_random_search(10, 64, 0.9, [-3.0, -5.0], optimizer, activation, [-3, -5], regulation, 10, 1, True)
def __init__(self, out_shape: int, act: Activation = ReLU, norm: NormLayer = None, reg: Regularization = L2, opt: Optimizer = Adam(), opt_bias_: Optimizer = Vanilla(), eps=1e-3, alpha=1e-5, input_shape: tuple = None, lambda_=0, bias=True, reg_bias=False, opt_bias=True, init_W=stdScale) -> None: super().__init__(out_shape, input_shape=input_shape) # general params self.eps: float = eps self.bias: bool = bias self.reg_bias: bool = reg_bias self.opt_bias: bool = opt_bias # layer params self.init_W = init_W self.W = None self.b = init_W((out_shape, ), eps) if bias else None self.__input_shape = input_shape if input_shape: assert len(input_shape) == 1 self.W = init_W((input_shape[0], out_shape), eps) # hyper params self.alpha, self.lambda_ = alpha, lambda_ # engine param self.act: Activation = act self.reg: Regularization = reg self.norm: NormLayer = norm self.opt: Optimizer() = opt self.opt_bias_: Optimizer() = opt_bias_ if isinstance(self.act, ReLU): self.numeric_stability = False
def main() -> None: (x_train, t_train), (x_test, t_test) = sequence.load_data('data.txt') char_to_id, id_to_char = sequence.get_vocab() x_train, x_test = x_train[:, ::-1], x_test[:, ::-1] vocab_size = len(char_to_id) wordvec_size = 16 hidden_size = 256 batch_size = 128 max_epoch = 10 max_grad = 5.0 model = AttentionSeq2seq(vocab_size, wordvec_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer) acc_list = [] for epoch in range(max_epoch): trainer.fit(x_train, t_train, max_epoch=1, batch_size=batch_size, max_grad=max_grad) correct_num = 0 for i in range(len(x_test)): question, correct = x_test[[i]], t_test[[i]] verbose = i < 10 correct_num += eval_seq2seq(model, question, correct, id_to_char, verbose, is_reverse=True) acc = float(correct_num) / len(x_test) acc_list.append(acc) print(f"val acc {acc*100}%") model.save_params() print("DONE")
def get_fns(self, input_dim=123, p_learning_rate=0.01, loss='prec'): x = T.matrix('X') y = T.vector('y') loss_id = loss_functions.loss_dict[loss] scores = self.get_scores(x, input_dim) def get_score_fn(): score_th_fn = theano.function([x], [scores]) return score_th_fn cost = T.sum((scores * y.dimshuffle(0, 'x'))) cg = ComputationGraph([cost]) weights = VariableFilter(roles=[WEIGHT])(cg.variables) updates = Adam(cost, weights) def get_update_fn(): update_th_fn = theano.function([x, y], [cost], updates=updates) return update_th_fn score_th_fn = get_score_fn() def train_fn(x, y): y = 2 * y - 1 update_th_fn = get_update_fn() scores = np.asarray(score_th_fn(x)) beg = time.clock() Y_new = mvc.mvc(y.ravel(), scores, np.sum(y == 1), np.sum(y == -1), loss_id) print "TIME TAKEN", str(time.clock() - beg) update_th_fn(x, Y_new) def valid_fns(x, y): y = 2 * y - 1 scores = np.asarray(score_th_fn(x)).ravel() pred_labels = 2 * ((scores.ravel() > 0).astype(np.float32)) - 1 loss_fn = loss_functions.get_loss_fn(loss_id) loss = loss_fn(y.ravel(), pred_labels) print np.sum(scores[y.ravel() == 1]) print y.shape, np.sum(y), np.sum(pred_labels == 1), loss return loss return train_fn, valid_fns
def main(): # model parameters num_particles = 4 num_hidden = 20 interaction_param = 2.0 ramp_up_speed = 0.01 time_step = 0.001 num_samples = 10000 # initialize objects WaveFunction = FeedForwardNeuralNetwork(num_particles, num_hidden) Hamiltonian = CalogeroSutherland(WaveFunction, interaction_param, ramp_up_speed) Sampler = ImportanceSampling(Hamiltonian, time_step) Optimizer = Adam(WaveFunction) VMC = VariationalMonteCarlo(Optimizer, Sampler, num_samples) # run optimization VMC.minimize_energy()
def t3(): layer_param = [(8, 3, 1, 1, 'conv'), (8, 'pool'), (64, 3, 1, 1, 'conv'), (64, 3, 1, 1, 'conv'), (64, 'pool'), (128, 3, 1, 1, 'conv'), (128, 3, 1, 1, 'conv'), (128, 'pool'), (256, 3, 1, 1, 'conv'), (256, 3, 1, 1, 'conv'), (64, 'FC')] # layer_param = [(6, 5, 1, 0, 'conv'), # (6, 'pool'), # (16, 5, 1, 0, 'conv'), # (16, 'pool'), # (120, 'FC'), # (84, 'FC')] regulation = L2Regulation() activation = ReluActivation() optimizer = Adam() update_bn = UpdateBN() cnn = CnnTest2(layer_param) cnn.load_train_data(0.7) cnn.train_random_search(10, 64, 0.9, [-3.0, -4.0], optimizer, activation, [-3, -5], regulation, 10, 1, update_bn, True)
def train(model, data_path, cur_iter, summ): input_ = Input(FLAGS.train_batch_size, [FLAGS.img_height, FLAGS.img_width, FLAGS.num_channels]) images, labels = input_(data_path) logits = model(images, cur_iter = cur_iter) # Calculate the loss of the model. loss = LossClassification(FLAGS.num_classes, gpu = True)(logits, labels) # Create an optimizer that performs gradient descent. optimizer = Adam(FLAGS.learning_rate, lr_decay = False, lr_decay_steps = FLAGS.lr_decay_steps, lr_decay_factor = FLAGS.lr_decay_factor) train_op = optimizer(loss) summ.register('train', 'train_loss', loss) summ.register('train', 'learning_rate', optimizer.lr) train_summ_op = summ('train') return loss, train_op, train_summ_op
def train(): (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) iters_num = 20000 train_size = x_train.shape[0] batch_size = 100 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_list = [] for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] optimizer = Adam() network.train(x_batch, t_batch, optimizer) loss = network.loss(x_batch, t_batch) train_loss_list.append(loss) if i % iter_per_epoch == 0: epoch_list.append(i / iter_per_epoch) train_acc = network.accuracy(x_train, t_train) test_acc = network.accuracy(x_test, t_test) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print("train accuracy : " + str(train_acc) + ", test accuracy : " + str(test_acc)) print("final loss : " + str(loss)) draw_acc_graph(epoch_list, train_acc_list, test_acc_list) draw_loss_graph(train_loss_list)
def train(data_path, model, summ): input_ = Input(FLAGS.batch_size, [FLAGS.num_chans, FLAGS.num_points]) data, labels = input_(data_path) ''' print_op = tf.print("In train procedure -> label shape: ", tf.shape(labels), output_stream = sys.stdout) with tf.control_dependencies([print_op]): logits = model(data) ''' logits = model(data) loss = LossRegression()(logits, tf.expand_dims(labels, axis=-1)) opt = Adam(FLAGS.learning_rate, lr_decay=FLAGS.lr_decay, lr_decay_steps=FLAGS.lr_decay_steps, lr_decay_factor=FLAGS.lr_decay_factor) train_op = opt(loss) summ.register('train', 'loss', loss) summ.register('train', 'learning_rate', opt.lr) train_summ_op = summ('train') return loss, train_op, train_summ_op
def test_update(self): for vector_size in range(1, 10): adam = Adam() gnn = GraphNeuralNetwork(vector_size) expected = gnn.params adam.update(gnn) actual = gnn.params self.assertEqual(expected, actual) gnn.grads["W"] = np.random.rand(vector_size, vector_size) gnn.grads["A"] = np.random.rand(vector_size) gnn.grads["b"] = np.random.rand(1) v = {} m = {} for key, grad in gnn.grads.items(): v[key] = np.zeros_like(grad) m[key] = np.zeros_like(grad) params = copy.deepcopy(gnn.params) for i in range(1, 100): gnn.grads["W"] = np.random.rand(vector_size, vector_size) gnn.grads["A"] = np.random.rand(vector_size) gnn.grads["b"] = np.random.rand(1) adam.update(gnn) for key, param in params.items(): m[key] = adam.beta1 * m[key] + ( (1 - adam.beta1) * gnn.grads[key]) v[key] = adam.beta2 * v[key] + ( (1 - adam.beta2) * gnn.grads[key]**2) m_hat = m[key] / (1 - adam.beta1**i) v_hat = v[key] / (1 - adam.beta2**i) params[key] = param - adam.lr * m_hat / (np.sqrt(v_hat) + 1.0e-8) expected1 = repr( np.round(np.abs(params[key] - gnn.params[key]), 6)) actual1 = repr(np.zeros_like(params[key])) self.assertEqual(expected1, actual1)
dim = 18 * 9 + 1 w = np.zeros([dim, 1]) x = np.concatenate((np.ones([12 * 471, 1]), x), axis=1).astype(float) learning_rate = 100 iter_time = 100 #four different model: adagrad_result, adagrad_loss = Adagrad(x, y, w, learning_rate, iter_time=1000, dim=dim) rms_result, rms_loss = RMSProp(x, y, w, learning_rate, iter_time=1000, dim=dim) sdg_result, sdg_loss = SGDm(x, y, w, learning_rate, iter_time=20, dim=dim) adam_result, adam_loss = Adam(x, y, w, learning_rate, iter_time=1000, dim=dim) #Visilization: ax1 = plt.subplot(221) ax1.plot(range(0, 1000, 100), adagrad_loss, color='b', linestyle=':', marker='o', markerfacecolor='r', markersize=6) ax1.set_xlabel("Adagrad") ax2 = plt.subplot(222) ax2.plot(range(0, 1000, 100),
# this is a dense layer newLayer = Dense() newLayer.load(layerFolder) model.layers.append(newLayer) self.layers = self.generator.layers + self.discriminator.layers if __name__ == "__main__": dataset = Dataset(name="mnist", train_size=60000, test_size=10000, batch_size=128) LATENT_SIZE = 28 * 28 # set the learning rate and optimizer for training optimizer = Adam(0.0002, 0.5) generator = MLP() generator.addLayer( Dense(inputDim=LATENT_SIZE, outputDim=256, activation=LeakyReLU(0.2), optimizer=optimizer)) generator.addLayer( Dense(inputDim=256, outputDim=512, activation=LeakyReLU(0.2), optimizer=optimizer)) generator.addLayer( Dense(inputDim=512, outputDim=1024,