def train_step(source_seq, target_seq, en_initial_states): """ Execute one training step (forward pass + backward pass) Args: source_seq: source sequences target_seq: input target sequences (<start> + ... + <end>) Returns: The loss value of the current pass """ loss = 0 with tf.GradientTape() as tape: en_outputs = encoder(source_seq, en_initial_states) en_states = en_outputs[1:] de_state_h, de_state_c = en_states # We need to create a loop to iterate through the target sequences for i in range(target_seq.shape[1]): # Input to the decoder must have shape of (batch_size, length) # so we need to expand one dimension decoder_in = tf.expand_dims(target_seq[:, i], 1) logit, de_state_h, de_state_c, _ = decoder( decoder_in, (de_state_h, de_state_c), en_outputs[0]) # The loss is now accumulated through the whole batch loss += loss_func(target_seq[:, i], logit) variables = encoder.trainable_variables + decoder.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) return loss / target_seq.shape[1]
def _get_loss(self, batch): self.model.train() batch = self._process_batch(batch) label_ids = batch['label_ids'] logits = self.model(batch) loss = loss_func(logits, label_ids) return loss
def _get_preds(self, batch): self.model.eval() batch = self._process_batch(batch) with torch.no_grad(): logits = self.model(batch) label_ids = batch['label_ids'] loss_val = loss_func(logits, label_ids).item() label_ids, pred_ids, probs = decode(label_ids, logits) return label_ids, pred_ids, probs, loss_val
def main(n_samples, n_features): data_path = "data_{}k_{}.tsv".format(n_samples, n_features) n_samples *= 1000 points, features, headings = tsv_points_features(data_path) n_features = features.shape[-1] y = points X = features w = stochastic_gradient_descent(n_features, n_samples, X, y) loss = loss_func(X, y, w) print("Loss {} samples: {}".format(n_samples, loss)) output(w, "Q3")
def train_step(source_seq, target_seq, en_initial_states): loss = 0 with tf.GradientTape() as tape: en_outputs = encoder(source_seq, en_initial_states) en_states = en_outputs[1:] de_states = en_states de_outputs = decoder(target_seq, de_states) logits = de_outputs[0] loss = loss_func(target_seq, logits) variables = encoder.trainable_variables + decoder.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) return loss
def main(n_samples, n_features): data_path = "data_{}k_{}.tsv".format(n_samples, n_features) n_samples *= 1000 points, features, headings = tsv_points_features(data_path) n_features = features.shape[-1] y = points X = features w = np.linalg.inv(X.T @ X) @ (X.T @ y) loss = loss_func(X, y, w) print("Loss:", loss) output(w, "Q1")
def pretrain_generator(model_dict, optimizer_dict, scheduler_dict, dataloader, vocab_size, max_norm=5.0, use_cuda=False): ''' Get models, optimizers and schedulers. ''' generator = model_dict["generator"] worker = generator.worker manager = generator.manager m_optimizer = optimizer_dict["manager"] w_optimizer = optimizer_dict["worker"] m_optimizer.zero_grad() w_optimizer.zero_grad() m_lr_scheduler = scheduler_dict["manager"] w_lr_scheduler = scheduler_dict["worker"] ''' Perform pretrain step for real data. ''' for i, sample in enumerate(dataloader): m_lr_scheduler.step() w_lr_scheduler.step() sample = Variable(sample) if use_cuda: sample = sample.cuda(async=True) # Calculate pretrain loss. pre_rets = recurrent_func("pre")(model_dict, sample, use_cuda) real_goal = pre_rets["real_goal"] prediction = pre_rets["prediction"] delta_feature = pre_rets["delta_feature"] m_loss = loss_func("pre_manager")(real_goal, delta_feature) torch.autograd.grad(m_loss, manager.parameters()) clip_grad_norm(manager.parameters(), max_norm=max_norm) m_optimizer.step() m_optimizer.zero_grad() w_loss = loss_func("pre_worker")(sample, prediction, vocab_size, use_cuda) torch.autograd.grad(w_loss, worker.parameters()) clip_grad_norm(worker.parameters(), max_norm=max_norm) w_optimizer.step() w_optimizer.zero_grad() ''' Update model_dict, optimizer_dict and scheduler_dict. ''' generator.worker = worker generator.manager = manager model_dict["generator"] = generator optimizer_dict["manager"] = m_optimizer optimizer_dict["worker"] = w_optimizer scheduler_dict["manager"] = m_lr_scheduler scheduler_dict["worker"] = w_lr_scheduler return model_dict, optimizer_dict, scheduler_dict
def adversarial_train(model_dict, optimizer_dict, scheduler_dict, dis_dataloader_params, vocab_size, pos_file, neg_file, batch_size, gen_train_num=1, dis_train_epoch=5, dis_train_num=3, max_norm=5.0, rollout_num=4, use_cuda=False, temperature=1.0, epoch=1, tot_epoch=100): """ Get all the models, optimizer and schedulers """ generator = model_dict["generator"] discriminator = model_dict["discriminator"] worker = generator.worker manager = generator.manager m_optimizer = optimizer_dict["manager"] w_optimizer = optimizer_dict["worker"] d_optimizer = optimizer_dict["discriminator"] #Why zero grad only m and w? m_optimizer.zero_grad() w_optimizer.zero_grad() m_lr_scheduler = scheduler_dict["manager"] w_lr_scheduler = scheduler_dict["worker"] d_lr_scheduler = scheduler_dict["discriminator"] #Adversarial training for generator for _ in range(gen_train_num): m_lr_scheduler.step() w_lr_scheduler.step() m_optimizer.zero_grad() w_optimizer.zero_grad() #get all the return values adv_rets = recurrent_func("adv")(model_dict, use_cuda) real_goal = adv_rets["real_goal"] all_goal = adv_rets["all_goal"] prediction = adv_rets["prediction"] delta_feature = adv_rets["delta_feature"] delta_feature_for_worker = adv_rets["delta_feature_for_worker"] gen_token = adv_rets["gen_token"] rewards = get_rewards(model_dict, gen_token, rollout_num, use_cuda) m_loss = loss_func("adv_manager")(rewards, real_goal, delta_feature) w_loss = loss_func("adv_worker")(all_goal, delta_feature_for_worker, gen_token, prediction, vocab_size, use_cuda) torch.autograd.grad( m_loss, manager.parameters()) #based on loss improve the parameters torch.autograd.grad(w_loss, worker.parameters()) clip_grad_norm_(manager.parameters(), max_norm) clip_grad_norm_(worker.parameters(), max_norm) m_optimizer.step() w_optimizer.step() print("Adv-Manager loss: {:.5f} Adv-Worker loss: {:.5f}".format( m_loss, w_loss)) del adv_rets del real_goal del all_goal del prediction del delta_feature del delta_feature_for_worker del gen_token del rewards #Adversarial training for discriminator for n in range(dis_train_epoch): generate_samples(model_dict, neg_file, batch_size, use_cuda, temperature) dis_dataloader_params["positive_filepath"] = pos_file dis_dataloader_params["negative_filepath"] = neg_file dataloader = dis_data_loader(**dis_dataloader_params) cross_entropy = nn.CrossEntropyLoss() if use_cuda: cross_entropy = cross_entropy.cuda() """ for d-steps do Use current G, θm,θw to generate negative examples and combine with given positive examples S Train discriminator Dφ for k epochs by Eq. (2) end for """ for _ in range(dis_train_num): for i, sample in enumerate(dataloader): data, label = sample["data"], sample["label"] data = Variable(data) label = Variable(label) if use_cuda: data = data.cuda(async=True) label = label.cuda(async=True) outs = discriminator(data) loss = cross_entropy(outs["score"], label.view(-1)) + discriminator.l2_loss() d_optimizer.zero_grad() d_lr_scheduler.step() loss.backward() d_optimizer.step() print("{}/{} Adv-Discriminator Loss: {:.5f}".format( n, range(dis_train_epoch), loss)) #Save all changes model_dict["discriminator"] = discriminator generator.worker = worker generator.manager = manager model_dict["generator"] = generator optimizer_dict["manager"] = m_optimizer optimizer_dict["worker"] = w_optimizer optimizer_dict["discriminator"] = d_optimizer scheduler_dict["manager"] = m_lr_scheduler scheduler_dict["worker"] = w_lr_scheduler scheduler_dict["disciminator"] = d_lr_scheduler return model_dict, optimizer_dict, scheduler_dict
def pretrain_generator(model_dict, optimizer_dict, scheduler_dict, dataloader, vocab_size, max_norm=5.0, use_cuda=False, epoch=1, tot_epochs=100): #get the models of generator generator = model_dict["generator"] worker = generator.worker manager = generator.manager #get the optimizers m_optimizer = optimizer_dict["manager"] w_optimizer = optimizer_dict["worker"] m_optimizer.zero_grad() w_optimizer.zero_grad() m_lr_scheduler = scheduler_dict["manager"] w_lr_scheduler = scheduler_dict["worker"] """ Perform pretrain step for real data """ for i, sample in enumerate(dataloader): #print("DataLoader: {}".format(dataloader)) m_lr_scheduler.step() w_lr_scheduler.step() sample = Variable(sample) if use_cuda: sample = sample.cuda(async=True) # Calculate pretrain loss if ( sample.size() == torch.zeros([64, 20]).size() ): #sometimes smaller than 64 (16) is passed, so this if statement disables it #print("Sample size: {}".format(sample.size())) pre_rets = recurrent_func("pre")(model_dict, sample, use_cuda) real_goal = pre_rets["real_goal"] prediction = pre_rets["prediction"] delta_feature = pre_rets["delta_feature"] m_loss = loss_func("pre_manager")(real_goal, delta_feature) torch.autograd.grad(m_loss, manager.parameters()) clip_grad_norm_(manager.parameters(), max_norm=max_norm) m_optimizer.step() m_optimizer.zero_grad() w_loss = loss_func("pre_worker")(sample, prediction, vocab_size, use_cuda) torch.autograd.grad(w_loss, worker.parameters()) clip_grad_norm_(worker.parameters(), max_norm=max_norm) w_optimizer.step() w_optimizer.zero_grad() if i == 63: print("Pre-Manager Loss: {:.5f}, Pre-Worker Loss: {:.5f}\n". format(m_loss, w_loss)) """ Update model_dict, optimizer_dict, and scheduler_dict """ generator.woroker = worker generator.manager = manager model_dict["generator"] = generator optimizer_dict["manager"] = m_optimizer optimizer_dict["worker"] = w_optimizer scheduler_dict["manager"] = m_lr_scheduler scheduler_dict["worker"] = w_lr_scheduler return model_dict, optimizer_dict, scheduler_dict
def pretrain_generator(model_dict, optimizer_dict, scheduler_dict, dataloader, vocab_size, max_norm=5.0, use_cuda=False, epoch=1, tot_epochs=100): #get the models of generator generator = model_dict["generator"] worker = generator.worker manager = generator.manager #get the optimizers m_optimizer = optimizer_dict["manager"] w_optimizer = optimizer_dict["worker"] m_optimizer.zero_grad() w_optimizer.zero_grad() m_lr_scheduler = scheduler_dict["manager"] w_lr_scheduler = scheduler_dict["worker"] """ Perform pretrain step for real data """ for i, sample in enumerate(dataloader): #print("DataLoader: {}".format(dataloader)) m_lr_scheduler.step() w_lr_scheduler.step() sample = Variable(sample) if use_cuda: sample = sample.cuda() #sample = sample.cuda() # Calculate pretrain loss if ( sample.size() == torch.zeros([64, 20]).size() ): #sometimes smaller than 64 (16) is passed, so this if statement disables it # 上面这一行能不能效率更高一些,只检测size[0]是否等于64就可以了 #print("Sample size: {}".format(sample.size())) 其中sample:[batch_size , seq_len] pre_rets = recurrent_func("pre")(model_dict, sample, use_cuda) real_goal = pre_rets["real_goal"] prediction = pre_rets["prediction"] delta_feature = pre_rets["delta_feature"] #real_goal和delta求manager的loss,prediction和sample求worker的loss m_loss = loss_func("pre_manager")(real_goal, delta_feature) torch.autograd.grad(m_loss, manager.parameters() ) #这一行不会更改manager.parameters的grad呀???有什么用??? clip_grad_norm_(manager.parameters(), max_norm=max_norm) m_optimizer.step() m_optimizer.zero_grad() # 现在我的理解,上面4行:前两行只是为了裁剪梯度 # 这里较平常的训练过程还有一点区别,平常一般是optimizer.zero_grad() -> loss -> loss.backward() -> optimizer.step() # 这里不需要loss.backward()吗? w_loss = loss_func("pre_worker")(sample, prediction, vocab_size, use_cuda) torch.autograd.grad(w_loss, worker.parameters( )) #这里是求d(w_loss)/d(worker.parameters),但是这里又并不把结果分给任何一个变量 clip_grad_norm_(worker.parameters(), max_norm=max_norm) #这个超参被设定为5,为什么? w_optimizer.step() w_optimizer.zero_grad() if i == 63: print("Pre-Manager Loss: {:.5f}, Pre-Worker Loss: {:.5f}\n". format(m_loss, w_loss)) """ Update model_dict, optimizer_dict, and scheduler_dict """ generator.woroker = worker generator.manager = manager model_dict["generator"] = generator optimizer_dict["manager"] = m_optimizer optimizer_dict["worker"] = w_optimizer scheduler_dict["manager"] = m_lr_scheduler scheduler_dict["worker"] = w_lr_scheduler return model_dict, optimizer_dict, scheduler_dict
def run(trainFile, trainLabelFile, testFile, testLabelFile, groupFile, suspFile, featureDistribution, loss): # reset graph tf.reset_default_graph() # Network Parameters n_input = numpy.array(featureDistribution).max() n_steps = len(featureDistribution) n_hidden = numpy.array(featureDistribution).max() n_classes = 2 # number of output classes # tf Graph input x = tf.placeholder("float", [None, n_steps, n_input]) y = tf.placeholder("float", [None, n_classes]) g = tf.placeholder(tf.int32, [None, 1]) # dropout keep_prob = tf.placeholder(tf.float32) # Define weights weights = { # Hidden layer weights => 2*n_hidden because of forward + backward cells 'out': tf.Variable(tf.random_normal([2*n_hidden, n_classes])) } biases = { 'out': tf.Variable(tf.random_normal([n_classes])) } pred = BiRNN(x, weights, biases, n_hidden, n_steps, keep_prob) # Evaluate model correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # load datasets datasets = input.read_data_sets(trainFile,trainLabelFile, testFile,testLabelFile, groupFile) # load test data test_data=myrnn.fillMatrix(datasets.test.instances,featureDistribution) test_data = test_data.reshape((-1, n_steps, n_input)) test_label = datasets.test.labels # Define loss and optimizer variables = tf.trainable_variables() regularizer = tf.add_n([ tf.nn.l2_loss(v) for v in variables if 'bias' not in v.name]) * L2_value # l2 regularization cost = ut.loss_func(pred, y, loss, datasets, g) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost+regularizer) init = tf.global_variables_initializer() # Launch the graph gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: sess.run(init) step = 1 # Keep training until reach max iterations #while step * batch_size < training_epochs*: total_batch = int(datasets.train.num_instances/batch_size) for epoch in range(training_epochs): avg_cost = 0. # Loop over all batches for i in range(total_batch): batch_x, batch_y, batch_g = datasets.train.next_batch(batch_size) # Reshape data to get 28 seq of 28 elements batch_x = myrnn.fillMatrix(batch_x,featureDistribution) batch_x = batch_x.reshape((batch_size, n_steps, n_input)) # Run optimization op (backprop) _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y, g: batch_g, keep_prob:dropout_rate}) # Compute average loss avg_cost += c / total_batch if epoch % display_step == 0 and i==(total_batch-1): print("Epoch " + str(epoch+1) + ", cost = " + "{:.6f}".format(avg_cost)) if epoch % (dump_step) == (dump_step-1): res=sess.run(tf.nn.softmax(pred),feed_dict={x: test_data, y: test_label, keep_prob:1.0}) with open(suspFile+'-'+str(epoch+1),'w') as f: for susp in res[:,0]: f.write(str(susp)+'\n') print("Optimization Finished!")
def train(float_model, predict, pred_dir, tblogs_dir, batchsize, learnrate, epochs): ''' Variational encoder model ''' image_dim = 28 image_chan = 1 input_layer = Input(shape=(image_dim, image_dim, image_chan)) encoder_mu, encoder_log_variance, encoder_z = encoder.call(input_layer) dec_out = decoder.call(encoder_z) model = Model(inputs=input_layer, outputs=dec_out) ''' Prepare MNIST dataset ''' x_train, x_test, x_train_noisy, x_test_noisy = mnist_download() train_dataset = input_fn((x_train_noisy, x_train), batchsize, True) test_dataset = input_fn((x_test_noisy, x_test), batchsize, False) predict_dataset = input_fn((x_test_noisy), batchsize, False) ''' Call backs ''' tb_call = TensorBoard(log_dir=tblogs_dir) chkpt_call = ModelCheckpoint(filepath=float_model, monitor='val_mse', mode='min', verbose=1, save_weights_only=False, save_best_only=True) callbacks_list = [tb_call, chkpt_call] ''' Compile ''' model.compile(optimizer=Adam(lr=learnrate), loss=lambda y_true, y_predict: loss_func( y_true, y_predict, encoder_mu, encoder_log_variance), metrics=['mse']) ''' Training ''' print(_DIVIDER) print('Training...') print(_DIVIDER) # make folder for saving trained model checkpoint os.makedirs(os.path.dirname(float_model), exist_ok=True) # remake Tensorboard logs folder shutil.rmtree(tblogs_dir, ignore_errors=True) os.makedirs(tblogs_dir) train_history = model.fit(train_dataset, epochs=epochs, steps_per_epoch=len(x_train) // batchsize, validation_data=test_dataset, callbacks=callbacks_list, verbose=1) ''' Predictions ''' if (predict): print(_DIVIDER) print('Making predictions...') print(_DIVIDER) # remake predictions folder shutil.rmtree(pred_dir, ignore_errors=True) os.makedirs(pred_dir) with custom_object_scope({'Sampling': Sampling}): model = load_model(float_model, compile=False, custom_objects={'Sampling': Sampling}) model.compile(loss=lambda y_true, y_predict: loss_func( y_true, y_predict, encoder_mu, encoder_log_variance)) predictions = model.predict(predict_dataset, verbose=1) # scale pixel values back up to range 0:255 then save as PNG for i in range(20): cv2.imwrite(pred_dir + '/pred_' + str(i) + '.png', predictions[i] * 255.0) cv2.imwrite(pred_dir + '/input_' + str(i) + '.png', x_test_noisy[i] * 255.0) print('Inputs and Predictions saved as images in ./' + pred_dir) print( "\nTensorBoard can be opened with the command: tensorboard --logdir=./tb_logs --host localhost --port 6006" ) return
def run(trainFile, trainLabelFile, testFile, testLabelFile, groupFile, suspFile, loss, featureNum, nodeNum): tf.reset_default_graph() # Network Parameters n_classes = 2 # total output classes (0 or 1) n_input = featureNum # total number of input features n_hidden_1 = nodeNum # 1st layer number of nodes train_writer = tf.summary.FileWriter("./log", graph=tf.get_default_graph()) # tf Graph input x = tf.placeholder("float", [None, 226]) spec = tf.placeholder("float", [None, 34]) mutation1 = tf.placeholder("float", [None, 35]) mutation2 = tf.placeholder("float", [None, 35]) mutation3 = tf.placeholder("float", [None, 35]) mutation4 = tf.placeholder("float", [None, 35]) mutation = tf.placeholder("float", [None, 140]) complexity = tf.placeholder("float", [None, 37]) similarity = tf.placeholder("float", [None, 15]) y = tf.placeholder("float", [None, n_classes]) g = tf.placeholder(tf.int32, [None, 1]) is_training = tf.placeholder(tf.bool, name='is_training') # dropout parameter keep_prob = tf.placeholder(tf.float32) # Construct model pred = mutation_spec_first(spec, mutation1, mutation2, mutation3, mutation4, complexity, similarity, keep_prob, is_training) datasets = input.read_data_sets(trainFile, trainLabelFile, testFile, testLabelFile, groupFile) # Define loss and optimizer regu_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) y = tf.stop_gradient(y) cost = ut.loss_func(pred, y, loss, datasets, g) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) summary_op = tf.summary.merge_all() with tf.control_dependencies(update_ops): optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(cost + regu_losses) # Initializing the variables init = tf.global_variables_initializer() # Launch the graph gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: sess.run(init) # Training cycle for epoch in range(training_epochs): avg_cost = 0. total_batch = int(datasets.train.num_instances / batch_size) # Loop over all batches for i in range(total_batch): batch_x, batch_y, batch_g = datasets.train.next_batch( batch_size) # Run optimization op (backprop) and cost op (to get loss value) _, c, regu_loss = sess.run( [optimizer, cost, regu_losses], feed_dict={ spec: batch_x[:, :34], mutation1: batch_x[:, 34:69], mutation2: batch_x[:, 69:104], mutation3: batch_x[:, 104:139], mutation4: batch_x[:, 139:174], complexity: batch_x[:, 174:211], similarity: batch_x[:, -15:], y: batch_y, g: batch_g, keep_prob: dropout_rate, is_training: True }) # Compute average loss avg_cost += c / total_batch # Display logs per epoch step if epoch % display_step == 0: print("Epoch:", '%04d' % (epoch+1), "cost=", \ "{:.9f}".format(avg_cost),", l2 loss= ",numpy.sum(regu_loss)) if epoch % dump_step == (dump_step - 1): #Write Result res, step_summary = sess.run( [tf.nn.softmax(pred), summary_op], feed_dict={ spec: datasets.test.instances[:, :34], mutation1: datasets.test.instances[:, 34:69], mutation2: datasets.test.instances[:, 69:104], mutation3: datasets.test.instances[:, 104:139], mutation4: datasets.test.instances[:, 139:174], complexity: datasets.test.instances[:, 174:211], similarity: datasets.test.instances[:, -15:], y: datasets.test.labels, keep_prob: 1.0, is_training: False }) train_writer.add_summary(step_summary) with open(suspFile + '-' + str(epoch + 1), 'w') as f: for susp in res[:, 0]: f.write(str(susp) + '\n')
dataloader = DataLoader(dataset, batch_size=cfg.TRAIN_BATCH_SIZE, shuffle=True, num_workers=cfg.NUM_WORKERS) net = Net().to(device) optimizer = torch.optim.Adam(net.parameters()) for epoch in range(cfg.EPOCH): for i, (target_13, target_26, target_52, input) in enumerate(dataloader): target_13, target_26, target_52, input = target_13.to( device), target_26.to(device), target_52.to(device), input.to( device) output_13, output_26, output_52 = net(input) loss_13 = utils.loss_func(output_13, target_13, cfg.alpha) loss_26 = utils.loss_func(output_26, target_26, cfg.alpha) loss_52 = utils.loss_func(output_52, target_52, cfg.alpha) loss = loss_13 + loss_26 + loss_52 # 定义损失 optimizer.zero_grad() loss.backward() optimizer.step() print(" loss:", loss.item(), " loss_13:", loss_13.item(), " loss_26:", loss_26.item(), "loss_52", loss_52.item()) if epoch % 10 == 0: torch.save(net, "net.pth") print("epoch {} save success".format(epoch))
def test_loss_func(use_cuda=False): ''' Prepare model_dict. ''' model_dict = prepare_model_dict(use_cuda) generator = model_dict["generator"] worker = generator.worker manager = generator.manager ''' Prepare some fake data. ''' dataloader = prepare_fake_data() ''' Start testing all recurrent functions. ''' m_optimizer = optim.Adam(manager.parameters(), lr=0.001) w_optimizer = optim.Adam(worker.parameters(), lr=0.001) m_optimizer.zero_grad() w_optimizer.zero_grad() for i, sample in enumerate(dataloader): sample = Variable(sample) if use_cuda: sample = sample.cuda(async=True) # Test pre. pre_rets = recurrent_func("pre")(model_dict, sample, use_cuda) real_goal = pre_rets["real_goal"] prediction = pre_rets["prediction"] delta_feature = pre_rets["delta_feature"] m_loss = loss_func("pre_manager")(real_goal, delta_feature) torch.autograd.grad(m_loss, manager.parameters()) nn.utils.clip_grad_norm(manager.parameters(), max_norm=5.0) m_optimizer.step() m_optimizer.zero_grad() w_loss = loss_func("pre_worker")(sample, prediction, 5000, use_cuda) torch.autograd.grad(w_loss, worker.parameters()) nn.utils.clip_grad_norm(worker.parameters(), max_norm=5.0) w_optimizer.step() w_optimizer.zero_grad() print("pre_m_loss={}, pre_w_loss={}".format(m_loss.data[0], w_loss.data[0])) print("Pretrain loss function test finished!") print("\n") # Test adv. adv_rets = recurrent_func('adv')(model_dict, use_cuda) real_goal = adv_rets["real_goal"] all_goal = adv_rets["all_goal"] prediction = adv_rets["prediction"] delta_feature = adv_rets["delta_feature"] delta_feature_for_worker = adv_rets["delta_feature_for_worker"] gen_token = adv_rets["gen_token"] rewards = get_rewards(model_dict, gen_token, 4, use_cuda) m_loss = loss_func("adv_manager")(rewards, real_goal, delta_feature) w_loss = loss_func("adv_worker")(all_goal, delta_feature_for_worker, gen_token, prediction, 5000, use_cuda) m_optimizer = optim.Adam(manager.parameters(), lr=0.001) w_optimizer = optim.Adam(worker.parameters(), lr=0.001) m_optimizer.zero_grad() w_optimizer.zero_grad() torch.autograd.grad(m_loss, manager.parameters()) torch.autograd.grad(w_loss, worker.parameters()) nn.utils.clip_grad_norm(manager.parameters(), max_norm=5.0) nn.utils.clip_grad_norm(worker.parameters(), max_norm=5.0) m_optimizer.step() w_optimizer.step() print("adv_m_loss={}, adv_w_loss={}".format(m_loss.data[0], w_loss.data[0])) print("Adversarial training loss function test finished!") print("\n") if i > 0: break
def adversarial_train(model_dict, optimizer_dict, scheduler_dict, dis_dataloader_params, vocab_size, positive_file, negative_file, num_batches, gen_train_num=1, dis_train_epoch=5, dis_train_num=3, max_norm=5.0, rollout_num=4, use_cuda=False, temperature=1.0): ''' Get models, optimizers and schedulers. ''' generator = model_dict["generator"] discriminator = model_dict["discriminator"] worker = generator.worker manager = generator.manager m_optimizer = optimizer_dict["manager"] w_optimizer = optimizer_dict["worker"] d_optimizer = optimizer_dict["discriminator"] m_optimizer.zero_grad() w_optimizer.zero_grad() m_lr_scheduler = scheduler_dict["manager"] w_lr_scheduler = scheduler_dict["worker"] d_lr_scheduler = scheduler_dict["discriminator"] ''' Adversarial train for generator. ''' for _ in range(gen_train_num): m_lr_scheduler.step() w_lr_scheduler.step() m_optimizer.zero_grad() w_optimizer.zero_grad() adv_rets = recurrent_func('adv')(model_dict, use_cuda) real_goal = adv_rets["real_goal"] all_goal = adv_rets["all_goal"] prediction = adv_rets["prediction"] delta_feature = adv_rets["delta_feature"] delta_feature_for_worker = adv_rets["delta_feature_for_worker"] gen_token = adv_rets["gen_token"] rewards = get_rewards(model_dict, gen_token, rollout_num, use_cuda) m_loss = loss_func("adv_manager")(rewards, real_goal, delta_feature) w_loss = loss_func("adv_worker")(all_goal, delta_feature_for_worker, gen_token, prediction, vocab_size, use_cuda) torch.autograd.grad(m_loss, manager.parameters()) torch.autograd.grad(w_loss, worker.parameters()) clip_grad_norm(manager.parameters(), max_norm=max_norm) clip_grad_norm(worker.parameters(), max_norm=max_norm) m_optimizer.step() w_optimizer.step() del adv_rets del real_goal del all_goal del prediction del delta_feature del delta_feature_for_worker del gen_token del rewards ''' Adversarial train for discriminator. ''' for _ in range(dis_train_epoch): generate_samples(model_dict, negative_file, num_batches, use_cuda, temperature) dis_dataloader_params["positive_filepath"] = positive_file dis_dataloader_params["negative_filepath"] = negative_file dataloader = dis_data_loader(**dis_dataloader_params) cross_entropy = nn.CrossEntropyLoss() if use_cuda: cross_entropy = cross_entropy.cuda() for _ in range(dis_train_num): for i, sample in enumerate(dataloader): data, label = sample["data"], sample["label"] data = Variable(data) label = Variable(label) if use_cuda: data = data.cuda(async=True) label = label.cuda(async=True) outs = discriminator(data) loss = cross_entropy(outs["score"], label.view(-1)) + \ discriminator.l2_loss() d_optimizer.zero_grad() d_lr_scheduler.step() loss.backward() d_optimizer.step() model_dict["discriminator"] = discriminator generator.worker = worker generator.manager = manager model_dict["generator"] = generator optimizer_dict["manager"] = m_optimizer optimizer_dict["worker"] = w_optimizer optimizer_dict["discriminator"] = d_optimizer scheduler_dict["manager"] = m_lr_scheduler scheduler_dict["worker"] = w_lr_scheduler scheduler_dict["discriminator"] = d_lr_scheduler return model_dict, optimizer_dict, scheduler_dict
def run(trainFile, trainLabelFile, testFile,testLabelFile, groupFile, suspFile,loss, featureNum, nodeNum): tf.reset_default_graph() # Network Parameters n_classes = 2 # total output classes (0 or 1) n_input = featureNum # total number of input features n_hidden_1 = nodeNum # 1st layer number of nodes # tf Graph input x = tf.placeholder("float", [None, n_input]) y = tf.placeholder("float", [None, n_classes]) g = tf.placeholder(tf.int32, [None, 1]) # dropout parameter keep_prob = tf.placeholder(tf.float32) # Store layers weight & bias weights = { 'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])), 'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes])) } biases = { 'b1': tf.Variable(tf.random_normal([n_hidden_1])), 'out': tf.Variable(tf.random_normal([n_classes])) } # Construct model pred = multilayer_perceptron(x, weights, biases, keep_prob) datasets=input.read_data_sets(trainFile, trainLabelFile, testFile, testLabelFile, groupFile) # Define loss and optimizer variables = tf.trainable_variables() regularizer = (tf.nn.l2_loss(weights['h1'])+tf.nn.l2_loss(weights['out'])) * L2_value # l2 regularization cost = ut.loss_func(pred, y, loss, datasets,g) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost+regularizer) # Initializing the variables init = tf.global_variables_initializer() # Launch the graph gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: sess.run(init) # Training cycle for epoch in range(training_epochs): avg_cost = 0. total_batch = int(datasets.train.num_instances/batch_size) # Loop over all batches for i in range(total_batch): batch_x, batch_y ,batch_g= datasets.train.next_batch(batch_size) # Run optimization op (backprop) and cost op (to get loss value) _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y, g: batch_g, keep_prob: dropout_rate}) # Compute average loss avg_cost += c / total_batch # Display logs per epoch step if epoch % display_step == 0: print("Epoch:", '%04d' % (epoch+1), "cost=", \ "{:.9f}".format(avg_cost)) if epoch % dump_step ==(dump_step-1): #Write Result res=sess.run(tf.nn.softmax(pred),feed_dict={x: datasets.test.instances, y: datasets.test.labels, keep_prob: 1.0}) with open(suspFile+'-'+str(epoch+1),'w') as f: for susp in res[:,0]: f.write(str(susp)+'\n') print("Optimization Finished!")
def main(args): dist.init_process_group(backend="nccl") torch.cuda.set_device(args.local_rank) ds = dataset(args.data_file, args.class_file, config) sampler = torch.utils.data.distributed.DistributedSampler(ds, shuffle=True) dl = DataLoader(ds, batch_size=args.batch_size, num_workers=args.num_workers, collate_fn=collate_fn, pin_memory=True, drop_last=False, sampler=sampler) batch_save_path = f"{args.model_dir}/batch_4.pth" epoch_save_path = f"{args.model_dir}/epoch_4.pth" net = model(config, ds.num_classes) if os.path.isfile(batch_save_path): log("载入模型中...", args.log_detail_path, args) try: net.load_state_dict(torch.load(batch_save_path)) log("模型载入完成!", args.log_detail_path, args) except Exception as e: log(f"{e}\n载入模型失败: {batch_save_path}", args.log_detail_path, args) else: log(f"没找到模型: {batch_save_path}", args.log_detail_path, args) config["cuda"] = config["cuda"] and torch.cuda.is_available() if config["cuda"]: # net = torch.nn.DataParallel(net.cuda()) net = torch.nn.parallel.DistributedDataParallel( net.cuda(), device_ids=[args.local_rank]) log("cuda", args.log_detail_path, args) criterion = loss_func(config) optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=1, verbose=True, factor=args.lr_decay, threshold=1e-3) schedule_loss = [] net.train() for epoch in range(1, args.epochs + 1): log(f"{'='*30}\n[{epoch}|{args.epochs}]", args.log_detail_path, args) for num_batch, batch_data in enumerate(dl, 1): t = time.time() loss, box_loss, landmark_loss, cls_loss = train_batch( net, batch_data, criterion, optimizer, config["cuda"], args) t = time.time() - t loss, box_loss, landmark_loss, cls_loss = [ reduce_tensor(i).item() for i in [loss, box_loss, landmark_loss, cls_loss] ] msg = f" [{epoch}|{args.epochs}] num_batch:{num_batch}" \ + f" loss:{loss:.4f} box_loss:{box_loss:.4f} landmark_loss:{landmark_loss:.4f} cls_loss:{cls_loss:.4f} time:{t*1000:.1f}ms" log(msg, args.log_detail_path, args) if num_batch % args.num_show == 0: log(msg, args.log_path, args) if args.local_rank == 0: if num_batch % args.num_save == 0: save_model(net, batch_save_path) schedule_loss += [loss] if num_batch % args.num_adjuest_lr == 0: scheduler.step(np.mean(schedule_loss)) schedule_loss = [] if args.local_rank == 0: save_model(net, epoch_save_path)