def test(args): with open('data/word_dict.pkl','rb') as file: word_dict= pkl.load(file) args.num_vocab = word_dict.word_num test_set = dataloader.MyDataset('data/test_data.pkl', None, feature_path='./data/test_words_attributes_100.pkl', word_dict=word_dict, max_len=args.max_len) test_iter = DataLoader(test_set, batch_size=args.batch_size, shuffle=False) ckpt=torch.load("./model/best_transformer_248.pt") # model_args=type('args', (object,), ckpt['model_args']) # print(model_args) quora_model = trfencoder.Transformer(num_layer=3,middle_dim=10,modeldim=args.embedding_dim,dff=args.hidden_size*4) # quora_model = trfencoder.Transformer(model_args)#num_layer=args.num_layers,middle_dim=10,modeldim=args.embedding_dim,dff=args.hidden_size*4) quora_model.to(device) quora_model.load_state_dict(ckpt["model_state_dict"]) embeddings = word_dict.embedding embeddings.to(device) preds=[] for i,(data,feature) in enumerate(test_iter): data=data.to(device) data=embeddings(data) outputs=quora_model(data,feature) pred=torch.argmax(outputs,dim=1).cpu().numpy().tolist() preds.extend(pred) print(preds) with open("data/json_ans.json", 'w') as fr: json.dump(preds, fr) print("test ans saved")
def visualize(model_path): quora_model = TestModel(args) # with open(model_path,'rb') as file: check_point = torch.load(model_path) quora_model.load_state_dict(check_point['model_state_dict']) with open('data/word_dict.pkl', 'rb') as file: word_dict = pkl.load(file) embeddings = word_dict.embedding # optimizer = optim.Adam(quora_model.parameters(), lr=args.lr) criterion = nn.CrossEntropyLoss().to( device) #weight = torch.Tensor([bias, 1 - bias]).to(device)) embeddings.to(device) quora_model.to(device) torch.load(model_path) valid_set = dataloader.MyDataset('data/valid_data.pkl', 'data/valid_label.pkl', word_dict=word_dict, max_len=args.max_len) valid_iter = DataLoader(valid_set, batch_size=args.batch_size, shuffle=False) print(len(valid_set)) attention = [] ans = [] with torch.no_grad(): f1s = [] for i, (data, label) in enumerate(valid_iter): data, label = data.to(device), label.to(device) idx = padding_idx(data, 62706) word = data data = embeddings(data) outputs = quora_model(data, idx) pred = list(np.argmax(outputs.cpu().numpy(), axis=-1)) ans = ans + pred # print(quora_model.att.size()) attention.append(quora_model.att.squeeze(-1).cpu().data.numpy()) f1 = F1score(outputs, label) f1s.append(f1) if i % 1000 == 0: print('attention:', attention[i][0, :]) print('word', word[0, :]) val_f1 = sum(f1s) / len(f1s) print('val_f1:', val_f1) attention = np.vstack(attention) # print('attention:',attention[0:1,:]) with open('./data/attention.pkl', 'wb') as file: pkl.dump(attention, file) with open('./data/ans.pkl', 'wb') as file: pkl.dump(ans, file) print(len(ans))
def main(args): with open('data/word_dict.pkl', 'rb') as f: word_dict = pkl.load(f) embeddings = word_dict.embedding embeddings.to(device) valid_set = dataloader.MyDataset( 'data/valid_data.pkl', 'data/valid_label.pkl', feature_path="./data/valid_words_attributes_100.pkl", word_dict=word_dict, max_len=args.max_len) test_set = dataloader.MyDataset( 'data/test_data.pkl', None, feature_path="./data/test_words_attributes_100.pkl", word_dict=word_dict, max_len=args.max_len) valid_iter = DataLoader(valid_set, batch_size=args.batch_size, shuffle=False) test_iter = DataLoader(test_set, batch_size=args.batch_size, shuffle=False) model_types = arg.model model_paths = arg.model_path checkpoints = [torch.load(path) for path in model_paths] model_args = [ type('args', (object, ), c['model_args']) for c in checkpoints ] for ind, checkpoint in enumerate(checkpoints): for key, value in checkpoint["model_args"].items(): setattr(model_args[ind], key, value) eval_models = [] for i, typ in enumerate(model_types): if typ == "test": eval_model = model.TestModel(model_args[i]) eval_model.to(device) eval_model.load_state_dict(checkpoints[i]["model_state_dict"]) eval_models.append(eval_model) elif typ == "quo": eval_model = model.QuoraModel(model_args[i]) eval_model.to(device) eval_model.load_state_dict(checkpoints[i]["model_state_dict"]) eval_models.append(eval_model) with torch.no_grad(): #valid set f1s = [] mats = np.array([[0, 0], [0, 0]]) predicts = torch.tensor([], device=device).long() for i, (data, label, feature) in enumerate(valid_iter): data, label, feature = data.to(device), label.to( device), feature.to(device) data = embeddings(data) final_output = torch.tensor([], device=device) for eval_model in eval_models: eval_model.eval() outputs = eval_model(data, feature) final_output = torch.cat((final_output, outputs), 0) outputs = torch.mean( final_output.view(-1, outputs.size(0), outputs.size(1)), 0) f1, mat, predicted = F1score(outputs, label) predicts = torch.cat((predicts, predicted), 0) f1s.append(f1) mats += mat val_f1 = sum(f1s) / len(f1s) print("validation F1 score: %.3f" % (val_f1)) print("confusion matrix: \n", mats) with open("model/" + args.save + "evaluation.txt", 'w') as f: f.write(",".join([str(c) for c in predicts.cpu().tolist()])) #test set submit_ans = torch.tensor([], device=device).long() for data, feature in test_iter: data, feature = data.to(device), feature.to(device) data = embeddings(data) final_output = torch.tensor([], device=device) for eval_model in eval_models: outputs = eval_model(data, feature) final_output = torch.cat((final_output, outputs), 0) outputs = torch.mean( final_output.view(-1, outputs.size(0), outputs.size(1)), 0) _, predicted = torch.max(F.softmax(outputs, dim=1), 1) submit_ans = torch.cat((submit_ans, predicted), 0) with open("data/json_ans.json", 'w') as fr: json.dump(submit_ans.cpu().numpy().tolist(), fr) print("test ans saved")
def train(quora_model, args, word_dict): args_dict = dict((name, getattr(args(), name)) for name in dir(args()) if not name.startswith("__")) train_set = dataloader.MyDataset('data/train_data.pkl','data/train_label.pkl',feature_path="./data/train_words_attributes_100.pkl", word_dict=word_dict,max_len=args.max_len) valid_set = dataloader.MyDataset('data/valid_data.pkl','data/valid_label.pkl','./data/valid_words_attributes_100.pkl', word_dict=word_dict,max_len=args.max_len) test_set = dataloader.MyDataset('data/test_data.pkl', None, feature_path='./data/test_words_attributes_100.pkl', word_dict=word_dict, max_len=args.max_len) valid_iter = DataLoader(valid_set,batch_size=args.batch_size,shuffle=False) test_iter = DataLoader(test_set, batch_size=args.batch_size, shuffle=False) bias = train_set.labels.sum().item() / len(train_set.labels) with open('data/train_label.pkl', 'rb') as file: train_labels = pkl.load(file) rate_0vs1 = float(len(train_labels) - sum(train_labels)) / float(sum(train_labels)) rate_0vs1 *= args.sample_weight weights = [rate_0vs1 if label == 1 else 1 for label in train_labels] from torch.utils.data.sampler import WeightedRandomSampler sampler = WeightedRandomSampler(weights, num_samples=len(train_labels), replacement=True) train_iter=DataLoader(train_set,batch_size=args.batch_size,shuffle=False, sampler=sampler) len_train_iter = len(train_iter) embeddings = word_dict.embedding optimizer = optim.Adam(quora_model.parameters(), lr=args.lr) # optimizer=trfencoder.NoamOpt(args.embedding_dim, 1, args.dcstep, # torch.optim.Adam(quora_model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=args.lr_decay) criterion = nn.CrossEntropyLoss() lossn = 0 start_time = time.time() best_score = 0.0 for epoch in range(args.num_epochs): running_loss = 0 quora_model.train() for i, (data, label, feature) in enumerate(train_iter): data, label, feature = data.to(device), label.to(device), feature.to(device) # print(data.size(),label.size(),feature.size(),"="*20+"data\n",data,"="*20+'label\n',label,"="*20+"feature\n",feature) data = embeddings(data) quora_model.zero_grad() outputs = quora_model(data, feature) loss = criterion(outputs, label) loss.backward() optimizer.step() f1 = F1score(outputs, label) running_loss += loss.item() if i % args.disp_freq == 0 and i > 0: average_loss = running_loss / args.disp_freq lossn += 1 if pargs.visdom: vis.line(np.array([[average_loss]]), np.array([lossn]), win="loss", update="append") print("%.3f training loss: %.3f training F1 score: %.3f" % (i / len_train_iter, average_loss, f1)) dur_time = time.time() - start_time print("%s / %s batches trained, %d batches /s" % (i, len_train_iter, args.disp_freq / dur_time)) start_time = time.time() running_loss = 0 if i % (args.disp_freq * 10) == 0: quora_model.eval() with torch.no_grad(): f1s = [] for i, (data, label, feature) in enumerate(valid_iter): data, label, feature = data.to(device), label.to(device), feature.to(device) data = embeddings(data) outputs = quora_model(data, feature) f1 = F1score(outputs, label) f1s.append(f1) val_f1 = sum(f1s) / len(f1s) if pargs.visdom: vis.line(np.array([[val_f1]]), np.array([lossn]), win="f1", update="append") print("EPOCH %s, %s validation F1 score: %.3f, previous best score: %.3f" % (epoch, lossn, val_f1, best_score)) start_time = time.time() if val_f1 < best_score: # pass scheduler.step() else: best_score = val_f1 torch.save({ "model_args": args_dict, "model_state_dict": quora_model.state_dict() }, "model/" +"best"+ args.name + "_" + quora_model.name + "_" + str(lossn) + ".pt") quora_model.train() print("current lr: %s" % scheduler.get_lr())
def train(args): print('Start') if torch.cuda.is_available(): device = 'cuda' torch.set_default_tensor_type('torch.cuda.FloatTensor') else: device = 'cpu' hair_colors = [ "blonde hair", "brown hair", "black hair", "blue hair", "pink hair", "purple hair", "green hair", "red hair", "silver hair", "white hair", "orange hair", "aqua hair", "grey hair" ] image_tag_df = pd.read_csv(os.path.join(args.data_dir, 'image_tag.csv')) image_tag_df = image_tag_df.dropna(how='any') all_img_path_list = glob.glob(args.data_dir + '/face_images/*/*.png') all_img_name_list = [ all_img_path.lstrip(args.data_dir + '/face_images/') for all_img_path in all_img_path_list ] img_name_list = list( set(image_tag_df['image name']) & set(all_img_name_list)) img_path_list = [ args.data_dir + '/face_images/' + img_name for img_name in img_name_list ] n_iter = 0 lr = args.lr beta1 = args.beta1 beta2 = args.beta2 train_epoch = args.train_epoch n_dis = args.n_dis batch_size = args.batch_size num_classes = len(hair_colors) gen_num_features = args.gen_num_features gen_dim_z = args.gen_dim_z gen_bottom_width = args.gen_bottom_width gen_distribution = args.gen_distribution dis_num_features = args.dis_num_features dataset = dataloader.MyDataset(img_name_list, img_path_list, image_tag_df, hair_colors) train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, pin_memory=False) gen = model.ResNetGenerator(gen_num_features, gen_dim_z, gen_bottom_width, activation=F.relu, num_classes=num_classes).to(device) dis = model.SNResNetProjectionDiscriminator(dis_num_features, num_classes, F.relu).to(device) opt_gen = optim.Adam(gen.parameters(), lr, (beta1, beta2)) opt_dis = optim.Adam(dis.parameters(), lr, (beta1, beta2)) gen_criterion = model.GenLoss() dis_criterion = model.DisLoss() for epoch in range(train_epoch): print('Epoch : ', epoch) for x_batch, y_batch in train_loader: n_iter += 1 print('n_iter : ', n_iter) for i in range(n_dis): if i == 0: fake, pseudo_y, _ = dataloader.sample_from_gen( num_classes, batch_size, gen_dim_z, device, gen) dis_fake = dis(fake, pseudo_y) loss_gen = gen_criterion(dis_fake, None) gen.zero_grad() loss_gen.backward() opt_gen.step() fake, pseudo_y, _ = dataloader.sample_from_gen( num_classes, batch_size, gen_dim_z, device, gen) real, y = x_batch.type( torch.float32).to(device), y_batch.to(device) dis_fake, dis_real = dis(fake, pseudo_y), dis(real, y) loss_dis = dis_criterion(dis_fake, dis_real) dis.zero_grad() loss_dis.backward() opt_dis.step() if epoch % 1 == 0: save_model(gen, args.model_dir, epoch)
def train(args): if torch.cuda.is_available(): device = torch.device('cuda') torch.set_default_tensor_type('torch.cuda.FloatTensor') else: device = torch.device('cpu') torch.set_default_tensor_type('torch.FloatTensor') img_path_list = glob.glob(args.data_dir + '/face_images/*/*.png') n_iter = 0 lr = args.lr beta1 = args.beta1 beta2 = args.beta2 train_epoch = args.train_epoch n_dis = args.n_dis batch_size = args.batch_size coef = args.coef gen_num_features = args.gen_num_features gen_dim_z = args.gen_dim_z gen_bottom_width = args.gen_bottom_width dis_num_features = args.dis_num_features wd_list = [] gen = model.ResNetGenerator(gen_num_features, gen_dim_z, gen_bottom_width, F.relu).to(device) dis = model.ResNetDiscriminator(dis_num_features, F.relu).to(device) opt_gen = optim.Adam(gen.parameters(), lr=lr, betas=(beta1, beta2)) opt_dis = optim.Adam(dis.parameters(), lr=lr, betas=(beta1, beta2)) dataset = dataloader.MyDataset(img_path_list) train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, pin_memory=False) for epoch in range(train_epoch): print('Epoch: ', epoch) for batch in train_loader: if batch.shape[0] != batch_size: print('Skip') continue n_iter += 1 print('n_iter: ', n_iter) for i in range(n_dis): if i == 0: gen.zero_grad() fake, _ = dataloader.sample_from_gen( batch_size, gen_dim_z, gen, device) dis_fake = dis(fake).mean() loss_gen = loss.cal_loss_gen(dis_fake) loss_gen.backward() opt_gen.step() dis.zero_grad() fake, _ = dataloader.sample_from_gen(batch_size, gen_dim_z, gen, device) real = batch.type(torch.float32).to(device) dis_fake, dis_real = dis(fake).mean(), dis(real).mean() gradient_penalty = loss.cal_gradient_penalty( dis, real, fake, coef, device) loss_dis = loss.cal_loss_dis(dis_fake, dis_real, gradient_penalty) loss_dis.backward() opt_dis.step() if epoch % 1 == 0: save_model(gen, args.model_dir, epoch)
def train(quora_model, args, word_dict): args_dict = dict((name, getattr(args(), name)) for name in dir(args()) if not name.startswith("__")) train_set = dataloader.MyDataset( 'data/train_data.pkl', 'data/train_label.pkl', feature_path="./data/train_words_attributes_100.pkl", word_dict=word_dict, max_len=args.max_len) valid_set = dataloader.MyDataset('data/valid_data.pkl', 'data/valid_label.pkl', './data/valid_words_attributes_100.pkl', word_dict=word_dict, max_len=args.max_len) test_set = dataloader.MyDataset( 'data/test_data.pkl', None, feature_path='./data/test_words_attributes_100.pkl', word_dict=word_dict, max_len=args.max_len) valid_iter = DataLoader(valid_set, batch_size=args.batch_size, shuffle=False) test_iter = DataLoader(test_set, batch_size=args.batch_size, shuffle=False) bias = train_set.labels.sum().item() / len(train_set.labels) with open('data/train_label.pkl', 'rb') as file: train_labels = pkl.load(file) rate_0vs1 = float(len(train_labels) - sum(train_labels)) / float( sum(train_labels)) embeddings = word_dict.embedding optimizer = optim.Adam(quora_model.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=args.lr_decay) criterion = nn.CrossEntropyLoss() lossn = 0 losses = [] training_f1s = [] val_f1s = [] start_time = time.time() best_score = 0.0 for epoch in range(args.num_epochs): print("EPOCH %s: sample_weight: %.3f" % (epoch + 1, args.sample_weight)) train_iter = get_train_iter(rate_0vs1, args, train_set, train_labels) len_train_iter = len(train_iter) args.sample_weight *= args.sample_weight_decay running_loss = 0 quora_model.train() precision, recall = 0, 0 for i, (data, label, feature) in enumerate(train_iter): data, label, feature = data.to(device), label.to( device), feature.to(device) data = embeddings(data) quora_model.zero_grad() outputs = quora_model(data, feature) loss = criterion(outputs, label) loss.backward() optimizer.step() p, r, f1 = F1score(outputs, label) precision += p recall += r running_loss += loss.item() if i % args.disp_freq == 0 and i > 0: average_loss = running_loss / args.disp_freq lossn += 1 losses.append(average_loss) if pargs.visdom: vis.line(np.array([[average_loss]]), np.array([lossn]), win="loss", update="append") print("%.3f training loss: %.3f training F1 score: %.3f" % (i / len_train_iter, average_loss, f1)) dur_time = time.time() - start_time print("%s / %s batches trained, %d batches /s" % (i, len_train_iter, args.disp_freq / dur_time)) start_time = time.time() running_loss = 0 if i % (args.disp_freq * 10) == 0: quora_model.eval() with torch.no_grad(): f1s = [] for i, (data, label, feature) in enumerate(valid_iter): data, label, feature = data.to(device), label.to( device), feature.to(device) data = embeddings(data) outputs = quora_model(data, feature) _, _, f1 = F1score(outputs, label) f1s.append(f1) val_f1 = sum(f1s) / len(f1s) val_f1s.append(val_f1) if pargs.visdom: vis.line(np.array([[val_f1]]), np.array([lossn]), win="f1", update="append") rec, prec = recall / args.disp_freq / 10, precision / args.disp_freq / 10 training_f1 = (2 * rec * prec + E) / (rec + prec + E) training_f1s.append(training_f1) precision, recall = 0, 0 print( "EPOCH %s, %s training F1 score: %.3f, validation F1 score: %.3f\nprevious best score: %.3f" % (epoch, lossn, training_f1, val_f1, best_score)) start_time = time.time() if val_f1 < best_score: scheduler.step() else: best_score = val_f1 torch.save( { "model_args": args_dict, "model_state_dict": quora_model.state_dict() }, "model/" + args.name + "_" + quora_model.name + "_" + str(lossn) + ".pt") quora_model.train() print("current lr: %s" % scheduler.get_lr()[0]) with open("model/" + args.name + "_" + "train.txt", "w") as f: f.write("training_loss,") f.write(",".join([str(item) for item in losses])) f.write("\n") f.write("training_f1,") f.write(",".join([str(item) for item in training_f1s])) f.write("\n") f.write("eval_f1,") f.write(",".join([str(item) for item in val_f1s]))