def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') cur_dir = os.getcwd() train_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset, phrase='train') train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True) test_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset, phrase='test') test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=False) log_dir = cur_dir + '/../log/' + str(opt.dataset) + '/' + str(opt) + time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime()) if not os.path.exists(log_dir): os.makedirs(log_dir) logging.warning('logging to {}'.format(log_dir)) writer = SummaryWriter(log_dir) if opt.dataset == 'diginetica': n_node = 43097 elif opt.dataset == 'yoochoose1_64' or opt.dataset == 'yoochoose1_4': n_node = 37483 else: n_node = 309 model = GNNModel(hidden_size=opt.hidden_size, n_node=n_node).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.l2) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=opt.lr_dc_step, gamma=opt.lr_dc) logging.warning(model) for epoch in tqdm(range(opt.epoch)): scheduler.step() forward(model, train_loader, device, writer, epoch, top_k=opt.top_k, optimizer=optimizer, train_flag=True) with torch.no_grad(): forward(model, test_loader, device, writer, epoch, top_k=opt.top_k, train_flag=False)
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') cur_dir = os.getcwd() if opt.predict: save_dir = cur_dir + '/../result/' if not os.path.exists(save_dir): os.makedirs(save_dir) save_path = save_dir + datetime.datetime.now().strftime("%Y_%m_%d_%H_%M") + ".csv" test_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset, phrase='predict') test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=False) predict(opt.model_path, test_loader, save_path, device) return train_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset, phrase='train') train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True) test_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset, phrase='test') test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=False) need_feild = {"lr", "epoch", "batch_size"} log_name = "".join([k+"_"+str(v) for k,v in opt.__dict__.items() if k in need_feild]) log_dir = cur_dir + '/../log/' + str(opt.dataset) + '/' + log_name model_dir = cur_dir + '/../model/' + str(opt.dataset) model_path = cur_dir + '/../model/' + str(opt.dataset) + '/' + log_name + '.pth' if not os.path.exists(log_dir): os.makedirs(log_dir) logging.warning('logging to {}'.format(log_dir)) if not os.path.exists(model_dir): os.makedirs(model_dir) logging.warning('model save to {}'.format(log_dir)) writer = SummaryWriter(log_dir) node_d = {'diginetica': 43097, 'yoochoose1_64': 37483, 'yoochoose1_4': 37483, 'debias': 117538} n_node = node_d.get(opt.dataset, 309) model = GNNModel(hidden_size=opt.hidden_size, n_node=n_node).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.l2) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=opt.lr_dc_step, gamma=opt.lr_dc) logging.warning(model) for epoch in tqdm(range(opt.epoch)): #scheduler.step() forward(model, train_loader, device, writer, epoch, scheduler, top_k=opt.top_k, optimizer=optimizer, train_flag=True) with torch.no_grad(): forward(model, test_loader, device, writer, epoch, top_k=opt.top_k, train_flag=False) torch.save(model, model_path)
def entropy_on_new(cur_dir, now, opt, model, device, current_win, win_size): new_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset, phrase='new' + now, sampled_data=current_win) new_loader = DataLoader(new_dataset, batch_size=opt.batch_size, shuffle=False) with torch.no_grad(): pro = forward_entropy( model, new_loader, device, max(max(max(current_win[0])), max(current_win[1]))) os.remove('../datasets/' + opt.dataset + '/processed/new' + now + '.pt') return random_on_new(current_win, win_size, p=pro)
def entropy_on_union(cur_dir, now, opt, model, device, current_res, current_win, win_size, ent='entropy'): # R' = R U R^{new} uni_x = current_res[0] + current_win[0] uni_y = current_res[1] + current_win[1] uni_user = current_res[2] + current_win[2] uni_data = (uni_x, uni_y, uni_user) uni_dataset = MultiSessionsGraph(cur_dir + '/../datasets/' + opt.dataset, phrase='uni' + now, sampled_data=uni_data) uni_loader = DataLoader(uni_dataset, batch_size=opt.batch_size, shuffle=False) with torch.no_grad(): if ent == 'entropy': pro = forward_entropy( model, uni_loader, device, max(max(max(current_win[0])), max(current_win[1]))) elif ent == 'cross': pro = forward_cross_entropy( model, uni_loader, device, max(max(max(current_win[0])), max(current_win[1]))) elif ent == 'wass': pro = forward_wass( model, uni_loader, device, max(max(max(current_win[0])), max(current_win[1]))) os.remove('../datasets/' + opt.dataset + '/processed/uni' + now + '.pt') return random_on_union(current_res, current_win, win_size, p=pro)
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') cur_dir = os.getcwd() # custom dataset train_dataset = MultiSessionsGraph(cur_dir + '/datasets/' + opt.dataset, phrase='train') train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True) test_dataset = MultiSessionsGraph(cur_dir + '/datasets/' + opt.dataset, phrase='test') test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=False) # log_dir = cur_dir + '/log/' + str(opt.dataset) + '/' + str(opt) + '_s2s3_linear_gat8-1_noleaky_' + time.strftime( # "%Y-%m-%d %H:%M:%S", time.localtime()) log_dir = cur_dir + '/log/' + str(opt.dataset) + '/' + 'model_log' if not os.path.exists(log_dir): os.makedirs(log_dir) logging.warning('logging to {}'.format(log_dir)) writer = SummaryWriter(log_dir) if opt.dataset == 'diginetica': n_node = 43097 elif opt.dataset == 'yoochoose1_64' or opt.dataset == 'yoochoose1_4': n_node = 37483 else: n_node = 309 model = GNNModel(hidden_size=opt.hidden_size, n_node=n_node).to(device) # model = SortPoolModel(hidden_size=opt.hidden_size, n_node=n_node).to(device) # model = Set2SetModel(hidden_size=opt.hidden_size, n_node=n_node).to(device) # model = GINSet2SetModel(hidden_size=opt.hidden_size, n_node=n_node).to(device) # model = VirtualNodeModel(hidden_size=opt.hidden_size, n_node=n_node).to(device) # model = Set2SetATTModel(hidden_size=opt.hidden_size, n_node=n_node).to(device) # model = VirtualNodeRNNModel(hidden_size=opt.hidden_size, n_node=n_node).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.l2) # optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr, weight_decay=opt.l2, momentum=opt.momentum) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=opt.lr_dc_step, gamma=opt.lr_dc) logging.warning(model) for epoch in tqdm(range(opt.epoch)): scheduler.step() _, _ = forward(model, train_loader, device, writer, epoch, top_k=opt.top_k, optimizer=optimizer, train_flag=True) with torch.no_grad(): h, m = forward(model, test_loader, device, writer, epoch, top_k=opt.top_k, train_flag=False) print(h, m)
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') cur_dir = os.getcwd() train_filename, test_filename = 'train.txt', 'test.txt' dataset_name = opt.dataset if 'yoochoose' in opt.dataset: dataset_name = 'yoochoose' train_filename = opt.dataset + '-' + train_filename test_filename = opt.dataset + '-' + test_filename train_dataset = MultiSessionsGraph( name=train_filename, raw_dir=cur_dir + '/../../../_data/' + dataset_name + '/processed/', save_dir=cur_dir + '/../../../_data/' + dataset_name + '/saved/', force_reload=True) num_train = len(train_dataset) train_sampler = SubsetRandomSampler(torch.arange(num_train)) train_loader = GraphDataLoader( train_dataset, batch_size=opt.batch_size, # sampler=train_sampler, shuffle=True, drop_last=False) test_dataset = MultiSessionsGraph( name=test_filename, raw_dir=cur_dir + '/../../../_data/' + dataset_name + '/processed/', save_dir=cur_dir + '/../../../_data/' + dataset_name + '/saved/', force_reload=True) num_test = len(test_dataset) test_sampler = SubsetRandomSampler(torch.arange(num_test)) test_loader = GraphDataLoader( test_dataset, batch_size=opt.batch_size, # sampler=test_sampler, shuffle=False, drop_last=False) log_dir = os.path.join(cur_dir, 'log', str(opt.dataset), str(opt)) if not os.path.exists(log_dir): os.makedirs(log_dir) logging.warning('logging to {}'.format(log_dir)) writer = SummaryWriter(log_dir) if 'diginetica' in opt.dataset: n_node = 43097 elif 'yoochoose' in opt.dataset: n_node = 37483 else: n_node = 309 model = GNNModel(hidden_size=opt.hidden_size, n_node=n_node).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.l2) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=opt.lr_dc_step, gamma=opt.lr_dc) logging.warning(model) for epoch in tqdm(range(opt.epoch)): forward(model, train_loader, device, writer, epoch, top_k=opt.top_k, optimizer=optimizer, train_flag=True) with torch.no_grad(): forward(model, test_loader, device, writer, epoch, top_k=opt.top_k, train_flag=False) scheduler.step()