def train(word2vec, entity2id, relation2id, bs=50, hd=100): train_bag, train_label, train_pos1, train_pos2, train_entity = load_train() pa_bag, pa_label, pa_pos1, pa_pos2, pb_bag, pb_label, pb_pos1, pb_pos2, mid_entity = load_train_path( ) kg_mid_entity, kg_path_relation = load_train_kg() entity_type = np.load("../data/type/train_ht_type.npy") pa_label = pa_label.reshape((-1, 100)) pb_label = pb_label.reshape((-1, 100)) # test_bag, test_label, test_pos1, test_pos2 = load_test() # model = torch.load("../data/model/sentence_model_4") model = ER_HG(word2vec, len(entity2id), len(relation2id), hd, bs) if cuda: model.cuda() # loss_function = torch.nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001) maxap = 0 patient = 0 maxepoch = 0 for epoch in range(50): temp_order = list(range(len(train_bag))) np.random.shuffle(temp_order) # train_bag, train_label, train_pos1, train_pos2 = shuffle(train_bag, train_label, train_pos1, train_pos2) running_loss = 0.0 print("每个epoch需要多个instance" + str(len(train_bag))) starttime = datetime.datetime.now() for i in range(int(len(train_bag) / bs)): optimizer.zero_grad() # 1. direct sentence encode index = temp_order[i * bs:(i + 1) * bs] batch_word = train_bag[index] batch_label = train_label[index] batch_pos1 = train_pos1[index] batch_pos2 = train_pos2[index] batch_entity = train_entity[index] seq_word = Variable( torch.LongTensor( np.array([s for bag in batch_word for s in bag]))) seq_pos1 = Variable( torch.LongTensor( np.array([s for bag in batch_pos1 for s in bag]))) seq_pos2 = Variable( torch.LongTensor( np.array([s for bag in batch_pos2 for s in bag]))) seq_entity = Variable(torch.LongTensor(np.array(batch_entity))) if cuda: seq_word = seq_word.cuda() seq_pos1 = seq_pos1.cuda() seq_pos2 = seq_pos2.cuda() seq_entity = seq_entity.cuda() batch_length = [len(bag) for bag in batch_word] shape = [0] for j in range(len(batch_length)): shape.append(shape[j] + batch_length[j]) _, sen_0, _ = model.sentence_encoder(seq_word, seq_pos1, seq_pos2, shape) sen_0 = torch.stack(sen_0) sen_0 = torch.squeeze(sen_0) batch_word = pa_bag[index] batch_pos1 = pa_pos1[index] batch_pos2 = pa_pos2[index] seq_word = Variable( torch.LongTensor( np.array([s for bag in batch_word for s in bag]))) seq_pos1 = Variable( torch.LongTensor( np.array([s for bag in batch_pos1 for s in bag]))) seq_pos2 = Variable( torch.LongTensor( np.array([s for bag in batch_pos2 for s in bag]))) if cuda: seq_word = seq_word.cuda() seq_pos1 = seq_pos1.cuda() seq_pos2 = seq_pos2.cuda() batch_length = [len(bag) for bag in batch_word] shape = [0] for j in range(len(batch_length)): shape.append(shape[j] + batch_length[j]) _, sen_a, _ = model.sentence_encoder(seq_word, seq_pos1, seq_pos2, shape) sen_a = torch.stack(sen_a) sen_a = torch.squeeze(sen_a) # loss_a = model.s_forward(sen_a, y_batch=batch_label) # loss_a, _, _ = model(seq_word, seq_pos1, seq_pos2, shape, batch_label) # 2.2 path b encode batch_word = pb_bag[index] batch_pos1 = pb_pos1[index] batch_pos2 = pb_pos2[index] seq_word = Variable( torch.LongTensor( np.array([s for bag in batch_word for s in bag]))) seq_pos1 = Variable( torch.LongTensor( np.array([s for bag in batch_pos1 for s in bag]))) seq_pos2 = Variable( torch.LongTensor( np.array([s for bag in batch_pos2 for s in bag]))) if cuda: seq_word = seq_word.cuda() seq_pos1 = seq_pos1.cuda() seq_pos2 = seq_pos2.cuda() batch_length = [len(bag) for bag in batch_word] shape = [0] for j in range(len(batch_length)): shape.append(shape[j] + batch_length[j]) _, sen_b, _ = model.sentence_encoder(seq_word, seq_pos1, seq_pos2, shape) sen_b = torch.stack(sen_b) sen_b = torch.squeeze(sen_b) # loss_b, _, _ = model(seq_word, seq_pos1, seq_pos2, shape, batch_label) # all loss batch_mid_entity = mid_entity[index] seq_mid_entity = Variable( torch.LongTensor(np.array([s for s in batch_mid_entity]))) if cuda: seq_mid_entity = seq_mid_entity.cuda() # s = [sen_a[0] + sen_b[0] for i in range(batch_size)] # # loss_path = model.s_forward(s, batch_label) # loss = loss_0 # + loss_a + loss_b batch_kg_mid_entity = kg_mid_entity[index] seq_kg_mid_entity = Variable( torch.LongTensor(np.array([s for s in batch_kg_mid_entity]))) batch_kg_relation = kg_path_relation[index] seq_kg_relation = Variable( torch.LongTensor(np.array([s for s in batch_kg_relation]))) batch_entity_type = entity_type[index] seq_entity_type = Variable( torch.LongTensor(np.array([s for s in batch_entity_type]))) if cuda: seq_mid_entity = seq_mid_entity.cuda() seq_kg_mid_entity = seq_kg_mid_entity.cuda() seq_kg_relation = seq_kg_relation.cuda() seq_entity_type = seq_entity_type.cuda() loss, prob = model.er_gcn_layer(sen_0, sen_a, sen_b, seq_entity, seq_mid_entity, seq_kg_mid_entity, seq_kg_relation, seq_entity_type, batch_label) loss.backward() optimizer.step() # running_loss += loss.data[0] running_loss += loss.cpu().item() if i % 100 == 99: # print every 2000 mini-batches print('epoch,steps:[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100)) running_loss = 0.0 endtime = datetime.datetime.now() print((endtime - starttime).seconds) starttime = endtime ap = eval(model, model_name='hgds_edge_gcn_model_' + str(epoch)) model.train() if ap > maxap: maxap = ap maxepoch = epoch patient = 0 else: patient += 1 if patient > 4: break print('maxap:' + str(maxap) + ';in epoch:' + str(maxepoch)) torch.save(model, "../data/model/hgds_edge_gcn_model_%s" % (str(epoch))) print('maxap:' + str(maxap) + ';in epoch:' + str(maxepoch))
args = parser.parse_args() if not os.path.exists(args.output): os.makedirs(args.output) loader = NumpyLoader(args.masks) loader = DataLoader(loader, args.batch, False, num_workers=4) def extend_matrix(matrix, device): tensors = [] for i in range(len(classes)): tensors.append(torch.eq(matrix, float(i))) return torch.stack(tensors, dim=1) classes = getClasses(args.classes) model = torch.load(args.model) index = 0 def onTestBatch(batch_id, features, output): global index for img in output: img = img.cpu().numpy() img = np.transpose(img, (1, 2, 0)) * 255 name = os.path.join(args.output, str(index).rjust(4, '0') + '.png') cv.imwrite(name, img) index += 1 eval(model, loader, args.device, features_transform=extend_matrix, onBatch=onTestBatch)
# optimizer.load_state_dict(torch.load("./models/optimizer.pkl")) #2,进行循环,进行训练 def train(epoch): train_dataloader = get_dataloader(train=True) bar = tqdm(enumerate(train_dataloader), total=len(train_dataloader)) total_loss = [] for idx, (input, target) in bar: input = input.to(conf.device) target = target.to(conf.device) #梯度置为0 optimizer.zero_grad() #计算得到预测值 output = model(input) #得到损失 loss = F.nll_loss(output, target) #反向传播计算损失 loss.backward() total_loss.append(loss.item()) #参数的更新 optimizer.step() #打印数据 if idx%10==0: bar.set_description("epoch:{} idx:{}, loss:{:.6f}".format(epoch, idx, np.mean(total_loss))) torch.save(model.state_dict(), "./models/model.pkl") torch.save(optimizer.state_dict(), "./models/optimizer.pkl") if __name__ == '__main__': for i in range(5): train(i) eval()
for epoch in tqdm(range(nb_epoch)): for idx, (masked_img, mask, gt) in enumerate(dataloader): optimizer.zero_grad() masked_img = masked_img.cuda() mask = mask.cuda() gt = gt.cuda() pred = net(masked_img, mask) loss = criterion(pred, gt) loss.backward() optimizer.step() mse, ssim = eval(net, epoch) # torchvision.utils.save_image([masked_img[0], pred[0], gt[0]], 'output/{}_train.jpg'.format(epoch)) # printing training info train_info = 'Epoch: [%i]\tMSE: %.5f\tSSIM: %.5f\tloss: %.4f\tLR:%.5f' % ( epoch, mse, ssim, loss, scheduler.get_lr()[0]) print(train_info) scheduler.step() score = 1 - mse / 100 + ssim # Save model if score > max_score: torch.save(net.state_dict(), 'models/best_model.pth.tar') max_score = score
parser.add_argument('-batch', type=int, default=4, help='batch size') parser.add_argument('-output', type=str, default="Contents/Output/", help='output directory') args = parser.parse_args() if not os.path.exists(args.output): os.makedirs(args.output) images_loader = ImagesLoader(args.images) images_loader = ImageConverter(images_loader) alpha_loader = ImagesLoader(args.alpha, flag=cv.IMREAD_GRAYSCALE) alpha_loader = ImageConverter(alpha_loader) loader = Numpy((images_loader, alpha_loader)) loader = DataLoader(loader, args.batch, False, num_workers=4) classes = getClasses(args.classes) model = torch.load(args.model) index = 0 def onTestBatch(batch_id, features, output): global index output = torch.argmax(output, dim=1) output = matrices_to_images(output, classes, args.device) for img in output: img = img.cpu().numpy() name = os.path.join(args.output, str(index).rjust(4, '0') + '.png') cv.imwrite(name, img) index += 1 eval(model, loader, args.device, onBatch=onTestBatch)
def main(args): # load sentences (English and Chinese words) train_en, train_cn = utils.load_data(args.train_file) dev_en, dev_cn = utils.load_data(args.dev_file) args.num_train = len(train_en) args.num_dev = len(dev_en) # build English and Chinese dictionary if os.path.isfile(args.vocab_file): en_dict, cn_dict, en_total_words, cn_total_words = pickle.load( open(args.vocab_file, "rb")) else: en_dict, en_total_words = utils.build_dict(train_en) cn_dict, cn_total_words = utils.build_dict(train_cn) pickle.dump([en_dict, cn_dict, en_total_words, cn_total_words], open(args.vocab_file, "wb")) args.en_total_words = en_total_words args.cn_total_words = cn_total_words # index to words dict inv_en_dict = {v: k for k, v in en_dict.items()} inv_cn_dict = {v: k for k, v in cn_dict.items()} # encode train and dev sentences into indieces train_en, train_cn = utils.encode(train_en, train_cn, en_dict, cn_dict) # convert to numpy tensors train_data = utils.gen_examples(train_en, train_cn, args.batch_size) dev_en, dev_cn = utils.encode(dev_en, dev_cn, en_dict, cn_dict) dev_data = utils.gen_examples(dev_en, dev_cn, args.batch_size) if os.path.isfile(args.model_file): model = torch.load(args.model_file) elif args.model == "EncoderDecoderModel": model = EncoderDecoderModel(args) if args.use_cuda: model = model.cuda() crit = utils.LanguageModelCriterion() print("start evaluating on dev...") correct_count, loss, num_words = eval(model, dev_data, args, crit) loss = loss / num_words acc = correct_count / num_words print("dev loss %s" % (loss)) print("dev accuracy %f" % (acc)) print("dev total number of words %f" % (num_words)) best_acc = acc learning_rate = args.learning_rate optimizer = getattr(optim, args.optimizer)(model.parameters(), lr=learning_rate) total_num_sentences = 0. total_time = 0. for epoch in range(args.num_epoches): np.random.shuffle(train_data) total_train_loss = 0. total_num_words = 0. for idx, (mb_x, mb_x_mask, mb_y, mb_y_mask) in tqdm(enumerate(train_data)): batch_size = mb_x.shape[0] total_num_sentences += batch_size # convert numpy ndarray to PyTorch tensors and variables mb_x = torch.from_numpy(mb_x).long() mb_x_mask = torch.from_numpy(mb_x_mask).long() hidden = model.init_hidden(batch_size) mb_input = torch.from_numpy(mb_y[:, :-1]).long() mb_out = torch.from_numpy(mb_y[:, 1:]).long() mb_out_mask = torch.from_numpy(mb_y_mask[:, 1:]) if args.use_cuda: mb_x = mb_x.cuda() mb_x_mask = mb_x_mask.cuda() mb_input = mb_input.cuda() mb_out = mb_out.cuda() mb_out_mask = mb_out_mask.cuda() mb_pred, hidden = model(mb_x, mb_x_mask, mb_input, hidden) loss = crit(mb_pred, mb_out, mb_out_mask) num_words = torch.sum(mb_out_mask).item() total_train_loss += loss.item() * num_words total_num_words += num_words optimizer.zero_grad() loss.backward() optimizer.step() print("training loss: %f" % (total_train_loss / total_num_words)) # evaluate every eval_epoch if (epoch + 1) % args.eval_epoch == 0: print("start evaluating on dev...") correct_count, loss, num_words = eval(model, dev_data, args, crit) loss = loss / num_words acc = correct_count / num_words print("dev loss %s" % (loss)) print("dev accuracy %f" % (acc)) print("dev total number of words %f" % (num_words)) # save model if we have the best accuracy if acc >= best_acc: torch.save(model, args.model_file) best_acc = acc print("model saved...") else: learning_rate *= 0.5 optimizer = getattr(optim, args.optimizer)(model.parameters(), lr=learning_rate) print("best dev accuracy: %f" % best_acc) print("#" * 60) # load test data test_en, test_cn = utils.load_data(args.test_file) args.num_test = len(test_en) test_en, test_cn = utils.encode(test_en, test_cn, en_dict, cn_dict) test_data = utils.gen_examples(test_en, test_cn, args.batch_size) # evaluate on test correct_count, loss, num_words = eval(model, test_data, args, crit) loss = loss / num_words acc = correct_count / num_words print("test loss %s" % (loss)) print("test accuracy %f" % (acc)) print("test total number of words %f" % (num_words)) # evaluate on train correct_count, loss, num_words = eval(model, train_data, args, crit) loss = loss / num_words acc = correct_count / num_words print("train loss %s" % (loss)) print("train accuracy %f" % (acc))