def objective(trial): start = time.time() data_dir = ['../data_luxury_5core/valid1/', '../data_luxury_5core/valid2/'] score_sum = 0 for data_path in data_dir: # dataload dataset = dataloader.AmazonDataset(data_path) edges = [[r[0], r[1]] for r in dataset.triplet_df.values] for r in dataset.triplet_df.values: if r[2] == 0: edges.append([r[1], r[0]]) #user_items_test_dict = pickle.load(open(data_path + 'user_items_test_dict.pickle', 'rb')) # グラフを作る G = nx.DiGraph() G.add_nodes_from([i for i in range(len(dataset.entity_list))]) G.add_edges_from(edges) # ハイパラ alpha = trial.suggest_uniform('alpha', 0, 1) ranking_mat = get_ranking_mat(G, alpha, dataset) evaluater = evaluate.Evaluater(data_path) score = evaluater.topn_map(ranking_mat) score_sum += score mi, sec = time_since(time.time() - start) print('{}m{}s'.format(mi, sec)) return -1 * score_sum / 2
def objective(trial): start = time.time() import gc gc.collect() score_sum = 0 data_dirs = [ '../' + data_path + '/valid1/bpr', '../' + data_path + '/valid2/bpr' ] for data_dir in data_dirs: dataset = dataloader.AmazonDataset(data_dir) embedding_dim = trial.suggest_discrete_uniform('embedding_dim', 16, 128, 16) user_size = len(dataset.user_list) item_size = len(dataset.item_list) layer_size = trial.suggest_int('layer_size', 1, 3) nfm = model.NFM(int(embedding_dim), user_size, item_size, layer_size).to(device) batch_size = int( trial.suggest_discrete_uniform('batch_size', 128, 512, 128)) lr = trial.suggest_loguniform('lr', 1e-4, 1e-2) weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2) warmup = 350 lr_decay_every = 2 lr_decay_rate = trial.suggest_uniform('lr_decay_rate', 0.5, 1) iterater = training.TrainIterater(batch_size=batch_size, data_dir=data_dir) score = iterater.iterate_epoch(nfm, lr, epoch=2000, weight_decay=weight_decay, warmup=warmup, lr_decay_rate=lr_decay_rate, lr_decay_every=lr_decay_every, eval_every=1e+5, early_stop=True) torch.cuda.empty_cache() score_sum += score mi, sec = time_since(time.time() - start) print('{}m{}sec'.format(mi, sec)) return -1 * score_sum / 2
def __init__(self, data_dir): self.dataset = dataloader.AmazonDataset(data_dir)
def load_params(): return pickle.load(open('./result/best_param', 'rb')) def time_since(runtime): mi = int(runtime / 60) sec = runtime - mi * 60 return (mi, sec) if __name__ == '__main__': data_dir = '../data_luxury_5core/bpr' params = load_params() dataset = dataloader.AmazonDataset(data_dir) #print(dataset.user_items_test_dict) embedding_dim = params['embedding_dim'] user_size = len(dataset.user_list) item_size = len(dataset.item_list) layer_size = int(params['layer_size']) batch_size = int(params['batch_size']) lr = params['lr'] weight_decay = params['weight_decay'] warmup = 350 lr_decay_every = 2 lr_decay_rate = params['lr_decay_rate'] nfm = model.NFM(int(embedding_dim), user_size, item_size,
with open(data_path + '/entity_list.txt', 'r') as f: for l in f: entity_list.append(l.replace('\n', '')) with open(data_path + '/user_list.txt', 'r') as f: for l in f: user_list.append(l.replace('\n', '')) with open(data_path + '/item_list.txt', 'r') as f: for l in f: item_list.append(l.replace('\n', '')) user_idx = [entity_list.index(u) for u in user_list] dataset = dataloader.AmazonDataset(data_path) # グラフを作る # user-itemとitem-userどちらの辺も追加 edges = [[r[0], r[1]] for r in dataset.triplet_df.values] for r in dataset.triplet_df.values: if r[2] == 0: edges.append([r[1], r[0]]) # load network G = nx.DiGraph() G.add_nodes_from([i for i in range(len(dataset.entity_list))]) G.add_edges_from(edges) alpha = 0.85 ranking_mat = get_ranking_mat(alpha) evaluater = evaluate.Evaluater(data_path)
def __init__(self, batch_size, data_dir): self.data_dir = data_dir self.dataset = dataloader.AmazonDataset(data_dir) self.batch_size = batch_size