def __init__(self, pre_path=''): self.all_data = ABSAData() self.train_iter = self.all_data.train_iter self.val_iter = self.all_data.val_iter self.test_iter = self.all_data.test_iter self.text_vocab = self.all_data.text_vocab self.aspect_vocab = self.all_data.aspect_vocab self.label_vocab = self.all_data.label_vocab self.device_dict = { -1: 'cpu', 0: 'cuda:0', 1: 'cuda:1', 2: 'cuda:2', } self.model = config.model().to(self.device_dict[config.device]) if config.pretrain: self.load_model(config.pretrain_path) self.criterion = config.criterion() # TODO: Set momentum for optimizer, momentum=0.9 self.optimizer = config.optimizer(filter(lambda p: p.requires_grad, self.model.parameters()), lr=config.learning_rate, lr_decay=config.lr_decay, weight_decay=0.001) if config.if_log: self.writer = SummaryWriter(log_dir=config.log_dir) # Create pretrained model folder if not config.pretrain: if pre_path != '': self.pre_dir = pre_path
def __init__(self, model, train_dataset, checkpoint_path, best_model_path, log_path): os.environ['CUDA_VISIBLE_DEVICES'] = device_idx self.bare_model = model.to(device) self.model = nn.DataParallel(model) if num_gpu > 1 else model self.train_dataloader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) self.optim = optimizer(self.model.parameters(), lr=learning_rate) self.checkpoint_path = checkpoint_path self.best_model_path = best_model_path self.log_path = log_path self.best_avg_loss = 987654321
def train(epoch, global_step): optimizer = config.optimizer(steps=global_step, epoch=epoch) # return {'accuracy': 10.0, 'loss': 10.0} for batch in range(len(labels) // config.batch_size()): L = batch * config.batch_size() progress(epoch, (L * 10.0) / len(labels)) R = L + config.batch_size() mini_batch_images = images[L:R] mini_batch_labels = labels[L:R] with tfe.GradientTape() as tape: s = tf.concat([ tf.gather_nd(mini_batch_images, gather_map), tf.zeros(list(np.shape(gather_map)[:2]) + [2]) ], -1) logits = model.flow(s) input_labels_one_hot = tf.one_hot(mini_batch_labels, 10) loss = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits_v2( labels=input_labels_one_hot, logits=logits)) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients( zip(grads, model.trainable_variables), global_step=tf.train.get_or_create_global_step()) global_step += 1 loss = 0.0 success = 0.0 for batch in range(len(labels) // config.batch_size()): L = batch * config.batch_size() R = L + config.batch_size() mini_batch_images = images[L:R] mini_batch_labels = labels[L:R] s = tf.concat([ tf.gather_nd(mini_batch_images, gather_map), tf.zeros(list(np.shape(gather_map)[:2]) + [2]) ], -1) logits = model.flow(s) classes = tf.argmax(logits, 1) loss += tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits_v2( labels=input_labels_one_hot, logits=logits)) success += tf.reduce_sum( tf.cast(tf.equal(classes, mini_batch_labels), tf.float32)) return {'accuracy': success * 100.0 / len(labels), 'loss': loss}
def align_classification_train_crf(self, train_data, valid_data, test_data, embed, pretrain=True): init_aspect = np.array(np.load("initAspect.npy")) # init_aspect = init_aspect / np.linalg.norm(init_aspect, axis=-1, keepdims=True) init_aspect = torch.from_numpy(init_aspect) PreTrainABAE = clas_model.PreTrainABAE(init_aspect, embed).to(config.device) pre_trained_aspect = torch.load("AspectExtract/Aspect_Model.pkl") aspect_dict = PreTrainABAE.state_dict() pre_trained_dict = { k: v for k, v in pre_trained_aspect.items() if k in aspect_dict } aspect_dict.update(pre_trained_dict) PreTrainABAE.load_state_dict(aspect_dict) # PreTrainABAE = PreTrainABAE.eval() trained_aspect = pre_trained_aspect["aspect_lookup_mat"].data run = clas_model.CrfWdeRnnEncoder(300, 300, 50, embed, trained_aspect).to(config.device) # params = [] # for param in run.parameters(): # if param.requires_grad: # params.append(param) # 加载预训练权重 if pretrain is True: pre_trained_dict = torch.load(config.pretrained_model) # pre_trained_dict = torch.load(config.pretrained_model, map_location=lambda storage, loc: storage) model_dict = run.state_dict() pre_trained_dict = { k: v for k, v in pre_trained_dict.items() if k in model_dict } model_dict.update(pre_trained_dict) run.load_state_dict(model_dict) criterion = config.criterion() optimizer_rnn = config.optimizer(filter(lambda p: p.requires_grad, run.parameters()), lr=config.clas_lr) optimizer_abae = config.optimizer(filter(lambda p: p.requires_grad, PreTrainABAE.parameters()), lr=config.clas_lr) all_evaluate = [] best_test = 0 for epoch in range(config.epoch + 1): run_hidden = run.initHidden(config.batch_size) # context = torch.ones((config.batch_size, 50)) # loss_last = torch.tensor([0], dtype=torch.float) optimizer_rnn.zero_grad() optimizer_abae.zero_grad() run.zero_grad() for idx, sample_batch in enumerate(train_data): run = run.train() PreTrainABAE = PreTrainABAE.train() input_data = sample_batch['input'].to(config.device) label = sample_batch['label'].to(config.device) aspect_info, trained_aspect, reg = PreTrainABAE(input_data) input_data[:, 1] = aspect_info out = run(input_data, run_hidden, trained_aspect, "train").view(config.batch_size, 2).to(config.device) # print("result :", out.size()) # print(label) # loss = criterion(out, label) + reg.float() loss = criterion(out, label) loss.backward() optimizer_rnn.step() optimizer_abae.step() # if epoch % 5 == 0: # run.zero_grad() # run = run.eval() # valid_now = self.valid(run) # print('epoch {} of {}: TEST : {}'.format(epoch, 100, valid_now)) print('epoch {} of {}: loss : {}'.format(epoch, config.epoch, loss)) if epoch % 1 == 0: with torch.no_grad(): total = 0 correct = 0 optimizer_rnn.zero_grad() optimizer_abae.zero_grad() run.zero_grad() PreTrainABAE.zero_grad() run_hidden = run.initHidden(1) # context = torch.ones((1, 50)) for index, sample_batch in enumerate(valid_data): run = run.eval() PreTrainABAE = PreTrainABAE.eval() input_data = sample_batch['input'].to(config.device) label = sample_batch['label'].to(config.device) aspect_info, trained_aspect, _ = PreTrainABAE( input_data) input_data[:, 1] = aspect_info outputs = run(input_data, run_hidden, trained_aspect, "test").view(1, 2).to(config.device) _, predicted = torch.max(outputs.data, 1) # print(outputs) # print(predicted) # print(label) total += label.size(0) # print(total) correct += (predicted == label).sum().item() # print(correct) acc = correct / total print("acc rate :", acc) if acc > best_test: best_test = acc file_name = "ClassifyModelSave/Final_model.pkl" file_name_aspect = "ClassifyModelSave/Final_model_aspect.pkl" torch.save(run.state_dict(), file_name) torch.save(PreTrainABAE.state_dict(), file_name_aspect) all_evaluate.append(acc) ''' Load the best models and Begin test ''' PreTrainABAE_test = clas_model.PreTrainABAE(init_aspect, embed).to(config.device) pre_trained_aspect = torch.load( "ClassifyModelSave/Final_model_aspect.pkl") aspect_dict = PreTrainABAE_test.state_dict() pre_trained_dict = { k: v for k, v in pre_trained_aspect.items() if k in aspect_dict } aspect_dict.update(pre_trained_dict) PreTrainABAE_test.load_state_dict(aspect_dict) trained_aspect = pre_trained_aspect["aspect_lookup_mat"].data model_test = clas_model.CrfWdeRnnEncoder( 300, 300, 50, embed, trained_aspect).to(config.device) pre_trained_dict = torch.load("ClassifyModelSave/Final_model.pkl") model_dict = model_test.state_dict() pre_trained_dict = { k: v for k, v in pre_trained_dict.items() if k in model_dict } model_dict.update(pre_trained_dict) model_test.load_state_dict(model_dict) with torch.no_grad(): total = 0 correct = 0 model_test.zero_grad() PreTrainABAE_test.zero_grad() run_hidden = model_test.initHidden(1) # context = torch.ones((1, 50)) for index, sample_batch in enumerate(test_data): model_test = model_test.eval() input_data = sample_batch['input'].to(config.device) label = sample_batch['label'].to(config.device) aspect_info, trained_aspect, _ = PreTrainABAE_test(input_data) input_data[:, 1] = aspect_info outputs = model_test(input_data, run_hidden, trained_aspect, "test").view(1, 2).to(config.device) _, predicted = torch.max(outputs.data, 1) # print(outputs) # print(predicted) # print(label) total += label.size(0) # print(total) correct += (predicted == label).sum().item() # print(correct) acc = correct / total print("Test acc rate (final result) :", acc) return all_evaluate
def classification_train_fix(self, train_data, test_data, embed, pretrain=True): run = clas_model.AttentionEncoder(300, 300, 50, embed).to(config.device) params = [] for param in run.parameters(): if param.requires_grad: params.append(param) # 加载预训练权重 if pretrain is True: pre_trained_dict = torch.load( config.pretrained_model, map_location=lambda storage, loc: storage) model_dict = run.state_dict() pre_trained_dict = { k: v for k, v in pre_trained_dict.items() if k in model_dict } model_dict.update(pre_trained_dict) run.load_state_dict(model_dict) optimizer = config.optimizer(params, lr=config.clas_lr) criterion = config.criterion all_evaluate = [] for epoch in range(config.epoch): context = torch.ones((config.batch_size, 50)) # loss_last = torch.tensor([0], dtype=torch.float) optimizer.zero_grad() # run.zero_grad() for idx, sample_batch in enumerate(train_data): run = run.train() input_data = sample_batch['input'].to(config.device) label = sample_batch['label'].to(config.device) out = run(input_data, context.cuda()).view(config.batch_size, 2).to(config.device) # print("result :", out.size()) # print(label) loss = criterion(out, label) loss.backward() optimizer.step() # if epoch % 5 == 0: # run.zero_grad() # run = run.eval() # valid_now = self.valid(run) # print('epoch {} of {}: TEST : {}'.format(epoch, 100, valid_now)) print('epoch {} of {}: loss : {}'.format(epoch, config.epoch, loss)) if epoch % 5 == 0: with torch.no_grad(): total = 0 correct = 0 optimizer.zero_grad() run.zero_grad() context = torch.ones((1, 50)) for index, sample_batch in enumerate(test_data): run = run.eval() input_data = sample_batch['input'].to(config.device) label = sample_batch['label'].to(config.device) outputs = run(input_data, context.cuda()).view(1, 2).to(config.device) _, predicted = torch.max(outputs.data, 1) # print(outputs) # print(predicted) # print(label) total += label.size(0) # print(total) correct += (predicted == label).sum().item() # print(correct) print("acc rate :", correct / total) all_evaluate.append(correct / total) return all_evaluate
def weakly_train(self, train_data, test_pos, test_neg, embed, asp_list): # run = models.AttentionEncoder(300, 300, 50, embed).to(config.device) # init_aspect = np.array(np.load("initAspect.npy")) # # init_aspect = init_aspect / np.linalg.norm(init_aspect, axis=-1, keepdims=True) # init_aspect = torch.from_numpy(init_aspect) # pre_train_abae = weak_model.PreTrainABAE(init_aspect, embed).to(config.device) # # pre_trained_aspect = torch.load("AspectExtract/Aspect_Model.pkl") # aspect_dict = pre_train_abae.state_dict() # pre_trained_dict = {k: v for k, v in pre_trained_aspect.items() if k in aspect_dict} # aspect_dict.update(pre_trained_dict) # pre_train_abae.load_state_dict(aspect_dict) # pre_train_abae = pre_train_abae.eval() # # trained_aspect = pre_trained_aspect["aspect_lookup_mat"].data # run = weak_model.WdeRnnEncoderFix(300, 300, 50, embed, trained_aspect).to(config.device) run = weak_model.WdeRnnEncoderFix(300, 300, 50, embed).to(config.device) # context = torch.ones((config.batch_size, 50)) # optimizer = optim.Adagrad(params, lr=0.003) # params = [] # for param in run.parameters(): # if param.requires_grad: # params.append(param) # optimizer = optim.SGD(filter(lambda p: p.requires_grad, run.parameters()), lr=0.0001) optimizer = config.optimizer(filter(lambda p: p.requires_grad, run.parameters()), lr=config.weak_lr) loss_func = config.criterion(margin=config.margin, p=config.margin_p) for epoch in range(config.epoch): run_hidden = run.initHidden(config.batch_size) loss_last = torch.tensor([0], dtype=torch.float) optimizer.zero_grad() # run.zero_grad() for idx, sample_batch in enumerate(train_data): # now = time.time() run = run.train() input1 = sample_batch['input1'].to(config.device) input2 = sample_batch['input2'].to(config.device) input3 = sample_batch['input3'].to(config.device) aspect1 = sample_batch['aspect1'].to(config.device) aspect2 = sample_batch['aspect2'].to(config.device) aspect3 = sample_batch['aspect3'].to(config.device) # get aspect info # aspect_info = pre_train_abae(input1) # input1[:, 1] = aspect_info # aspect_info = pre_train_abae(input2) # input2[:, 1] = aspect_info # aspect_info = pre_train_abae(input3) # input3[:, 1] = aspect_info # feed input data out1 = run(input1, run_hidden, aspect1).view(config.batch_size, 300) out2 = run(input2, run_hidden, aspect2).view(config.batch_size, 300) out3 = run(input3, run_hidden, aspect3).view(config.batch_size, 300) # count loss loss_last = loss_func(out1, out2, out3) loss_last.backward() optimizer.step() if epoch % config.valid_step == 0: run.zero_grad() run = run.eval() valid_now = self.valid(asp_list, run, test_pos, test_neg, embed) a = round((loss_last).item(), 5) b = round(valid_now, 5) if config.save_model and valid_now > config.valid_thres: file_name = config.save_model_path + "model_loss_" + str( a) + "valid_" + str(b) + ".pkl" torch.save(run.state_dict(), file_name) print('epoch {} of {}: TEST : {}'.format( epoch, config.epoch, valid_now)) print('epoch {} of {}: loss : {}'.format(epoch, config.epoch, loss_last.item()))
param.requires_grad = False for param in model.classifier.parameters(): param.requires_grad = True model = model.to(device) ###### TRAIN NEURAL NET ###### dataloaders = { 'train': train_loader, 'val': val_loader, 'test': test_loader } criterion = cfg.criterion optimizer = cfg.optimizer(model.parameters(), weight_decay=0.1) ###### SAVE / LOAD MODEL ##### filename = "prediction_network_epochs" + str(num_epochs) + "_bs" + str( batch_size) + "_num_classes" + str(num_classes) load_model = os.path.exists(filename) #load_model = False if load_model: model.load_state_dict(torch.load(filename)) else: model, val_acc_history = train_model(model, dataloaders, criterion, optimizer, num_epochs=num_epochs, filename=filename)