def process(input_filename, gs_filename): dataset = [] with open(input_filename) as f: reader = csv.reader(f) dataset = [row for row in reader] dataset = clean(dataset) #sampled_dataset = sample(dataset, 20) #print("Result of N_Method:",n_method(sampled_dataset, 5)) #print("Result of P_Method:",p_method(sampled_dataset, 5)) #print("Testing the n method against the p method:",kolgomorov2samples(n_method(sampled_dataset, 5),p_method(sampled_dataset, 5))) #print("Test Z between methods n and p:",testz(n_method(sampled_dataset, 5),p_method(sampled_dataset, 5))) gs = [] with open(gs_filename) as f: reader = csv.reader(f) gs = [row for row in reader] y_true = correct(clean(gs), 3) output = [] for crowd_size in range(20, 81): p, r, f = metrics(dataset, y_true, crowd_size) output.append([crowd_size, 'precision'] + hypothesis_tests(p).tolist()) output.append([crowd_size, 'recall'] + hypothesis_tests(r).tolist()) output.append([crowd_size, 'f_measure'] + hypothesis_tests(f).tolist()) with open('output.csv', 'w') as f: writer = csv.writer(f, delimiter=';') for line in output: writer.writerow(line)
count = 0 for epoch in range(FLAGS.epochs): model.train() # Enable dropout (if have). start_time = time.time() for idx, batch_data in enumerate(train_dataloader): #Assign the user and item on GPU later. user = batch_data['user'].long().cuda() item = batch_data['item'].long().cuda() label = batch_data['label'].float().cuda() model.zero_grad() prediction = model(user, item) loss = loss_function(prediction, label) loss.backward() # nn.utils.clip_grad_norm(model.parameters(), FLAGS.clip_norm) optimizer.step() writer.add_scalar('data/loss', loss.data.item(), count) count += 1 model.eval() #Disable dropout (if have). HR, NDCG = evaluate.metrics(model, test_dataloader, FLAGS.top_k) elapsed_time = time.time() - start_time print("Epoch: %d" %epoch + " Epoch time: " + time.strftime( "%H: %M: %S", time.gmtime(elapsed_time))) print("Hit ratio is %.3f\tNdcg is %.3f" %(np.mean(HR), np.mean(NDCG))) torch.save(model, 'm.pt')
train_loader.dataset.ng_sample() for user, item, label in train_loader: user = user.cuda() item = item.cuda() label = label.float().cuda() model.zero_grad() prediction = model(user, item) loss = loss_function(prediction, label) loss.backward() optimizer.step() count += 1 model.eval() HR, NDCG = evaluate.metrics(model, test_loader, args.top_k) elapsed_time = time.time() - start_time print("The time elapse of epoch {:03d}".format(epoch) + " is: " + time.strftime("%H: %M: %S", time.gmtime(elapsed_time))) print("HR: {:.3f}\tNDCG: {:.3f}".format(np.mean(HR), np.mean(NDCG))) if HR > best_hr: best_hr, best_ndcg, best_epoch = HR, NDCG, epoch if args.out: if not os.path.exists(config.model_path): os.mkdir(config.model_path) torch.save(model, '{}{}.pth'.format(config.model_path, config.model)) print("End. Best epoch {:03d}: HR = {:.3f}, NDCG = {:.3f}".format(
for features, feature_values, label in train_loader: features = features.cuda() feature_values = feature_values.cuda() label = label.cuda() model.zero_grad() prediction = model(features, feature_values) loss = criterion(prediction, label) loss.backward() optimizer.step() # writer.add_scalar('data/loss', loss.item(), count) count += 1 model.eval() train_result = evaluate.metric_rmse(model, train_loader) hr, ndcg, HR, NDCG = evaluate.metrics(model, test_loader) print("Runing Epoch {:03d} ".format(epoch) + "costs " + time.strftime("%H: %M: %S", time.gmtime(time.time() - start_time))) print("Train_RMSE: {:.4f}, Test_hr: {:.4f}, Test_ndcg: {:.4f}".format( train_result, hr, ndcg)) if hr > best_hr: best_hr, best_ndcg, best_epoch = hr, ndcg, epoch if args.out: if not os.path.exists(config.model_path): os.mkdir(config.model_path) torch.save(model, '{}{}.pth'.format(config.model_path, config.model)) np.save('./hr_ttest.npy', np.array(HR)) np.save('./ndcg_ttest.npy', np.array(NDCG))
def main(): parser = argparse.ArgumentParser() parser.add_argument("--embed_size", type=int, default=32, help="the final embedding size") parser.add_argument("--lr", type=float, default=0.001, help="the learning rate for optimization method") parser.add_argument("--dropout", type=float, default=0.5, help="the dropout rate") parser.add_argument("--neg_number", type=int, default=5, help="negative numbers for training the triplet model") parser.add_argument("--batch_size", type=int, default=512, help="batch size for training") parser.add_argument("--top_k", type=int, default=20, help="topk rank items for evaluating") parser.add_argument("--is_output", action='store_true', default=False, help="output the result for rank test") parser.add_argument("--mode", type=str, default='double', help="the model mode") parser.add_argument("--gpu", type=str, default='0', help="choose the gpu card number.") FLAGS = parser.parse_args() writer = SummaryWriter() # for visualization opt_gpu = FLAGS.gpu os.environ["CUDA_VISIBLE_DEVICES"] = opt_gpu cudnn.benchmark = True ############################# PREPARE DATASET ########################## data_train = TranSearchData(FLAGS.neg_number, is_training=True) data_test = TranSearchData(FLAGS.neg_number, is_training=False) print("Sampling negative items for each positive pairs......\n") data_train.sample_neg() dataloader_train = DataLoader(data_train, batch_size=FLAGS.batch_size, shuffle=True, num_workers=4) data_test.sample_neg() dataloader_test = DataLoader(data_test, shuffle=False, batch_size=1) ####################### LOAD PRE-TRAIN WEIGHTS ########################## if os.path.exists(config.image_weights_path) and FLAGS.mode == 'double': visual_FC = torch.load(config.image_weights_path) # remove the dropout layer modules = list(visual_FC.children())[:2] + list( visual_FC.children())[3:] visual_FC = nn.Sequential(*modules) visual_FC.requires_grad = False textual_FC = torch.load(config.text_weights_path) modules = list(textual_FC.children())[:2] + list( textual_FC.children())[3:] textual_FC = nn.Sequential(*modules) textual_FC.requires_grad = False else: visual_FC = None textual_FC = None ############################## CREATE MODEL ########################### full_data = pd.read_csv(config.full_path, usecols=['userID']) user_size = len(full_data.userID.unique()) # create model model = TranSearch(visual_FC, textual_FC, config.visual_size, config.textual_size, FLAGS.embed_size, user_size, FLAGS.mode, FLAGS.dropout, is_training=True) model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.lr, weight_decay=0.0001) best_mrr, best_hit, best_ndcg = 0.0, 0.0, 0.0 best_epoch = 0 print("Start training......\n") for epoch in range(20): model.is_training = True model.train() start_time = time.time() for idx, batch_data in enumerate(dataloader_train): user = batch_data['userID'].cuda() query = batch_data['query'].cuda() pos_vis = batch_data['pos_vis'].cuda() pos_text = batch_data['pos_text'].cuda() neg_vis = batch_data['neg_vis'].cuda() neg_text = batch_data['neg_text'].cuda() model.zero_grad() item_predict, pos_item, neg_items = model(user, query, pos_vis, pos_text, neg_vis, neg_text, False) loss = TripletLoss(item_predict, pos_item, neg_items) loss.backward() optimizer.step() writer.add_scalar('data/endtoend_loss', loss.data.item(), epoch * len(dataloader_train) + idx) # start testing model.eval() model.is_training = False Mrr, Hr, Ndcg = evaluate.metrics(model, data_test, dataloader_test, FLAGS.top_k, FLAGS.is_output, epoch) elapsed_time = time.time() - start_time print("Epoch: {:d}\t".format(epoch) + "Epoch time: " + time.strftime("%H: %M: %S", time.gmtime(elapsed_time))) print("Mrr is {:.3f}.\tHit ratio is {:.3f}.\tNdcg is {:.3f}.".format( Mrr, Hr, Ndcg)) if Mrr > best_mrr: best_mrr = Mrr best_hit = Hr best_ndcg = Ndcg best_epoch = epoch print("\nThe best epoch is on {}".format(best_epoch), end=': ') print("Mrr is {:.3f}.\tHit ratio is {:.3f}.\tNdcg is {:.3f}.".format( best_mrr, best_hit, best_ndcg))
for features, feature_values, label in train_loader: features = features.cuda() feature_values = feature_values.cuda() label = label.cuda() model.zero_grad() prediction = model(features, feature_values) loss = criterion(prediction, label) loss += args.lamda * model.embeddings.weight.norm() loss.backward() optimizer.step() # writer.add_scalar('data/loss', loss.item(), count) count += 1 model.eval() train_result = evaluate.metrics(model, train_loader) valid_result = evaluate.metrics(model, valid_loader) test_result = evaluate.metrics(model, test_loader) print("Runing Epoch {:03d} ".format(epoch) + "costs " + time.strftime( "%H: %M: %S", time.gmtime(time.time()-start_time))) print("Train_RMSE: {:.3f}, Valid_RMSE: {:.3f}, Test_RMSE: {:.3f}".format( train_result, valid_result, test_result)) if test_result < best_rmse: best_rmse, best_epoch = test_result, epoch if args.out: if not os.path.exists(config.model_path): os.mkdir(config.model_path) torch.save(model, '{}{}.pth'.format(config.model_path, config.model))
def main(): parser = argparse.ArgumentParser() parser.add_argument("--dataset", default='MenClothing', type=str, help="choose dataset to process.") parser.add_argument("--embed_size", default=32, type=int, help="the final embedding size.") parser.add_argument("--lr", default=0.001, type=float, help="the learning rate for optimization method.") parser.add_argument("--dropout", default=0.5, type=float, help="the dropout rate.") parser.add_argument("--neg_number", default=5, type=int, help="negative numbers for training the triplet model.") parser.add_argument("--batch_size", default=512, type=int, help="batch size for training.") parser.add_argument("--top_k", default=20, type=int, help="topk rank items for evaluating.") parser.add_argument("--is_output", default=False, type=bool, help="output the result for rank test.") parser.add_argument("--mode", default='double', type=str, help="the model mode.") parser.add_argument("--gpu", default='0', type=str, help="choose the gpu card number.") FLAGS = parser.parse_args() writer = SummaryWriter() #For visualization opt_gpu = FLAGS.gpu os.environ["CUDA_VISIBLE_DEVICES"] = opt_gpu ############################# PREPARE DATASET ########################## data_train = TranSearchData( FLAGS.dataset, 'train.csv', is_training=True) data_test = TranSearchData( FLAGS.dataset, 'test.csv', is_training=False) print("Sampling negative items for each positive pairs......\n") data_train.sample_neg(FLAGS.neg_number) dataloader_train = DataLoader(data_train, batch_size=FLAGS.batch_size, shuffle=True, num_workers=4) data_test.sample_neg(0) dataloader_test = DataLoader(data_test, shuffle=False, batch_size=1) ####################### LOAD PRE-TRAIN WEIGHTS ########################## visual_FC = torch.load('./Variable/visual_FC.pt') #First remove the dropout layer. modules = list(visual_FC.children())[:2] + list(visual_FC.children())[3:] visual_FC = nn.Sequential(*modules) visual_FC.requires_grad=False textual_FC = torch.load('./Variable/textual_FC.pt') modules = list(textual_FC.children())[:2] + list(textual_FC.children())[3:] textual_FC = nn.Sequential(*modules) textual_FC.requires_grad=False ############################## CREATE MODEL ########################### full_data = pd.read_csv(os.path.join(ROOT_DIR, FLAGS.dataset, 'full.csv'), usecols=['userID']) user_size = len(full_data.userID.unique()) # Create model. model = TranSearch(visual_FC, textual_FC, 4096, 512, FLAGS.embed_size, user_size, FLAGS.mode, FLAGS.dropout, is_training=True) model.cuda() # optimizer = torch.optim.SGD( # model.parameters(), momentum=0.9, lr=0.01) #scheduler = ReduceLROnPlateau(optimizer, min_lr=1e-08, patience=30) optimizer = torch.optim.Adam( model.parameters(), lr=FLAGS.lr, weight_decay=0.0001) print("Start training......\n") for epoch in range(20): model.is_training = True model.train() start_time = time.time() for idx, batch_data in enumerate(dataloader_train): user = batch_data['userID'].cuda() query = batch_data['query'].cuda() pos_vis = batch_data['pos_vis'].cuda() pos_text = batch_data['pos_text'].cuda() neg_vis = batch_data['neg_vis'].cuda() neg_text = batch_data['neg_text'].cuda() model.zero_grad() item_predict, pos_item, neg_items = model(user, query, pos_vis, pos_text, neg_vis, neg_text, False) loss = TripletLoss(item_predict, pos_item, neg_items) loss.backward() optimizer.step() # scheduler.step(loss.data[0]) writer.add_scalar('data/endtoend_loss', loss.data.item(), epoch*len(dataloader_train)+idx) print("Epoch %d training is done!\n" %epoch) # Start testing model.eval() model.is_training = False Mrr, Hr, Ndcg = evaluate.metrics(model, data_test, dataloader_test, FLAGS.top_k, FLAGS.is_output, epoch) elapsed_time = time.time() - start_time print("Epoch: %d\t" %epoch + "Epoch time: " + time.strftime( "%H: %M: %S", time.gmtime(elapsed_time))) print("Mrr is %.3f.\nHit ratio is %.3f.\nNdcg is %.3f.\n" %( Mrr, Hr, Ndcg))
loss = criterion(pred, pos, neg) loss.backward() local_optimizer.step() # ---------Global Update--------- model.zero_grad() model.set_global() for i in range(len(query_item_reviews_words)): # ---------Construct Batch--------- pred, pos, neg = model( user_reviews_words, user_reviews_lengths, query_item_reviews_words[i], query_item_reviews_lengths[i], query_queries[i], 'train', query_negative_reviews_words[i], query_negative_reviews_lengths[i]) loss = criterion(pred, pos, neg) loss.backward() global_optimizer.step() Mrr, Hr, Ndcg = metrics(model, test_dataset, test_loader, 20, local_optimizer, criterion) print( "Running Epoch {:03d}/{:03d}".format(epoch + 1, config.epochs), "loss:{:.3f}".format(float(loss)), "Mrr {:.3f}, Hr {:.3f}, Ndcg {:.3f}".format(Mrr, Hr, Ndcg), "costs:", time.strftime("%H: %M: %S", time.gmtime(time.time() - start_time))) print(model.local_parameters)
network.zero_grad() prediction = network(user, item) loss = loss_function(prediction, label) loss.backward() optimizer.step() count += 1 gain = network.gain batchsize = train_loader.current_batch_size accumulation_steps = train_loader.accumulation_steps train_loader.to_tensorboard(writer, epoch, tag_prefix="AdaptDL/Data/") network.to_tensorboard(writer, epoch, tag_prefix="AdaptDL/Model/") network.eval() stats = adl.Accumulator() HR, NDCG = evaluate.metrics(network, test_loader, args.top_k) stats['HR'] += HR stats['replicas'] += 1.0 with stats.synchronized(): writer.add_scalar('Loss/HR', stats['HR'] / stats['replicas'], epoch) elapsed_time = time.time() - start_time print("The time elapse of epoch {:03d}".format(epoch) + " is: " + time.strftime("%H: %M: %S", time.gmtime(elapsed_time))) print("HR: {:.3f}\tNDCG: {:.3f}".format(np.mean(HR), np.mean(NDCG))) if HR > best_hr: best_hr, best_ndcg, best_epoch = HR, NDCG, epoch if args.out and adaptdl.env.replica_rank() == 0: if not os.path.exists(model_path):