def train_embed(data_dir, params, model_name): # ハイパラ読み込み embedding_dim = params['embedding_dim'] batch_size = params['batch_size'] lr = params['lr'] weight_decay = params['weight_decay'] #warmup = params['warmup'] warmup = 350 #lr_decay_every = params['lr_decay_every'] lr_decay_every = 2 lr_decay_rate = params['lr_decay_rate'] if model_name == 'SparseTransE': alpha = params['alpha'] # dataload dataset = AmazonDataset(data_dir, model_name='TransE') relation_size = len(set(list(dataset.triplet_df['relation'].values))) entity_size = len(dataset.entity_list) if model_name == 'TransE': model = TransE(int(embedding_dim), relation_size, entity_size).to(device) elif model_name == 'SparseTransE': model = SparseTransE(int(embedding_dim), relation_size, entity_size, alpha=alpha).to(device) iterater = TrainIterater(batch_size=int(batch_size), data_dir=data_dir, model_name=model_name) #iterater.iterate_epoch(model, lr=lr, epoch=3000, weight_decay=weight_decay, warmup=warmup, # lr_decay_rate=lr_decay_rate, lr_decay_every=lr_decay_every, eval_every=1e+5) iterater.iterate_epoch(model, lr=lr, epoch=3000, weight_decay=weight_decay, warmup=warmup, lr_decay_rate=lr_decay_rate, lr_decay_every=lr_decay_every, eval_every=1e+5, early_stop=True) return model
def objective(trial): start = time.time() # pagerank para mu = trial.suggest_uniform('mu', 0, 1) alpha = trial.suggest_uniform('beta', 0, 0.5) kappa1 = trial.suggest_uniform('kappa1', 0, 1) kappa2 = trial.suggest_uniform('kappa2', 0, 1) kappa3 = trial.suggest_uniform('kappa3', 0, 1) kappa = [kappa1, kappa2, kappa3] # model para embedding_dim = int( trial.suggest_discrete_uniform('embedding_dim', 16, 128, 16)) #alpha = trial.suggest_loguniform('alpha', 1e-6, 1e-2) #SparseTransEの時だけ # training para lambda_ = trial.suggest_uniform('lambada_', 0, 1) batch_size = trial.suggest_int('batch_size', 256, 512, 128) lr = trial.suggest_loguniform('lr', 1e-4, 1e-2) weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2) warmup = trial.suggest_int('warmup', 10, 100) #lr_decay_every = trial.suggest_int('lr_decay_every', 1, 10) lr_decay_every = 2 lr_decay_rate = trial.suggest_uniform('lr_decay_rate', 0.5, 1) data_dir = ['../data_luxury_5core/valid1', '../data_luxury_5core/valid2'] score_sum = 0 for i in range(len(data_dir)): dataset = AmazonDataset(data_dir[i], model_name='TransE') relation_size = len(set(list(dataset.triplet_df['relation'].values))) entity_size = len(dataset.entity_list) ppr_transe = PPR_TransE(embedding_dim, relation_size, entity_size, data_dir[i], alpha, mu, kappa).to(device) iterater = TrainIterater(batch_size=int(batch_size), data_dir=data_dir[i], model_name=model_name) iterater.iterate_epoch(ppr_transe, lr=lr, epoch=2000, weight_decay=weight_decay, lambda_=lambda_, warmup=warmup, lr_decay_rate=lr_decay_rate, lr_decay_every=lr_decay_every, eval_every=1e+5) # inference inf = Inference(data_dir[i]) score = inf.get_score(ppr_transe, kappa, mu, alpha) score_sum += score mi, sec = time_since(time.time() - start) print('{}m{}sec'.format(mi, sec)) return -1 * score_sum / 2
def objective(trial): start = time.time() import gc gc.collect() data_dir = [data_path + '/valid1', data_path + '/valid2'] score_sum = 0 embed_model = {'TransE': TransE, 'SparseTransE': SparseTransE} # hyper para embedding_dim = trial.suggest_discrete_uniform('embedding_dim', 16, 128, 16) if model_name == 'SparseTransE': alpha = trial.suggest_loguniform('alpha', 1e-6, 1e-2) #SparseTransEの時だけ batch_size = trial.suggest_int('batch_size', 128, 512, 128) lr = trial.suggest_loguniform('lr', 1e-4, 1e-2) weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2) #warmup = trial.suggest_int('warmup', 100, 500) #warmup = trial.suggest_int('warmup', 10, 100) warmup = 350 #lr_decay_every = trial.suggest_int('lr_decay_every', 1, 10) lr_decay_every = 2 lr_decay_rate = trial.suggest_uniform('lr_decay_rate', 0.5, 1) for dir_path in data_dir: # データ読み込み dataset = AmazonDataset(dir_path, model_name=model_name) relation_size = len(set(list(dataset.triplet_df['relation'].values))) entity_size = len(dataset.entity_list) #model = TransE(int(embedding_dim), relation_size, entity_size).to(device) model = embed_model[model_name](int(embedding_dim), relation_size, entity_size).to(device) iterater = TrainIterater(batch_size=int(batch_size), data_dir=dir_path, model_name=model_name) score = iterater.iterate_epoch(model, lr=lr, epoch=3000, weight_decay=weight_decay, warmup=warmup, lr_decay_rate=lr_decay_rate, lr_decay_every=lr_decay_every, eval_every=1e+5, early_stop=True) score_sum += score torch.cuda.empty_cache() mi, sec = time_since(time.time() - start) print('{}m{}sec'.format(mi, sec)) return -1 * score_sum / 2
def objective(trial): start = time.time() import gc gc.collect() dataset = AmazonDataset('./data') embedding_dim = trial.suggest_discrete_uniform('embedding_dim', 16, 64, 16) bpr = BPR(int(embedding_dim), len(dataset.user_list), len(dataset.item_list)).to(device) batch_size = trial.suggest_discrete_uniform('batch_size', 64, 256, 64) iterater = TrainIterater(batch_size=int(batch_size)) lr = trial.suggest_loguniform('lr', 1e-5, 1e-2) weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2) warmup = trial.suggest_int('warmup', 100, 500) #warmup = trial.suggest_int('warmup', 1, 5) lr_decay_every = trial.suggest_int('lr_decay_every', 1, 5) lr_decay_rate = trial.suggest_uniform('lr_decay_rate', 0.5, 1) score = iterater.iterate_epoch(bpr, lr=lr, epoch=3000, weight_decay=weight_decay, warmup=warmup, lr_decay_rate=lr_decay_rate, lr_decay_every=lr_decay_every, eval_every=1e+5) torch.cuda.empty_cache() mi, sec = time_since(time.time() - start) print('{}m{}sec'.format(mi, sec)) return -1 * score
# dataload data_dir = '../' + data_path + '/test/' dataset = AmazonDataset(data_dir, model_name='SparseTransE') relation_size = len(set(list(dataset.triplet_df['relation'].values))) entity_size = len(dataset.entity_list) embedding_dim = params['embedding_dim'] alpha = params['alpha'] model = SparseTransE(int(embedding_dim), relation_size, entity_size, alpha=alpha).to(device) batch_size = params['batch_size'] iterater = TrainIterater(batch_size=int(batch_size), data_dir=data_dir, model_name=model_name) lr = params['lr'] weight_decay = params['weight_decay'] warmup = 350 lr_decay_every = 2 lr_decay_rate = params['lr_decay_rate'] score = iterater.iterate_epoch(model, lr=lr, epoch=3000, weight_decay=weight_decay, warmup=warmup, lr_decay_rate=lr_decay_rate,
if __name__ == '__main__': params = load_param('./result_beauty') embedding_dim = params['embedding_dim'] batch_size = params['batch_size'] lr = params['lr'] weight_decay = params['weight_decay'] warmup = params['warmup'] lr_decay_every = params['lr_decay_every'] lr_decay_rate = params['lr_decay_rate'] #data_dir = '../data_beauty_2core_es/test/bpr' data_dir = '../data_beauty_2core_es/test/bpr' dataset = AmazonDataset(data_dir) bpr = BPR(int(embedding_dim), len(dataset.user_list), len(dataset.item_list)).to(device) iterater = TrainIterater(batch_size=int(batch_size), data_dir=data_dir) score = iterater.iterate_epoch(bpr, lr=lr, epoch=3000, weight_decay=weight_decay, warmup=warmup, lr_decay_rate=lr_decay_rate, lr_decay_every=lr_decay_every, eval_every=1e+5, early_stop=True) # test結果を記録 np.savetxt('./result_beauty/score.txt', np.array([score]))