예제 #1
0
def train_embed(data_dir, params, model_name):
    # ハイパラ読み込み
    embedding_dim = params['embedding_dim']
    batch_size = params['batch_size']
    lr = params['lr']
    weight_decay = params['weight_decay']
    #warmup = params['warmup']
    warmup = 350
    #lr_decay_every = params['lr_decay_every']
    lr_decay_every = 2
    lr_decay_rate = params['lr_decay_rate']
    if model_name == 'SparseTransE':
        alpha = params['alpha']
    
    # dataload
    dataset = AmazonDataset(data_dir, model_name='TransE')
    relation_size = len(set(list(dataset.triplet_df['relation'].values)))
    entity_size = len(dataset.entity_list)
    if model_name == 'TransE':
        model = TransE(int(embedding_dim), relation_size, entity_size).to(device)
    elif model_name == 'SparseTransE':
        model = SparseTransE(int(embedding_dim), relation_size, entity_size, alpha=alpha).to(device)
    iterater = TrainIterater(batch_size=int(batch_size), data_dir=data_dir, model_name=model_name)
    #iterater.iterate_epoch(model, lr=lr, epoch=3000, weight_decay=weight_decay, warmup=warmup,
    #                       lr_decay_rate=lr_decay_rate, lr_decay_every=lr_decay_every, eval_every=1e+5)
    iterater.iterate_epoch(model, lr=lr, epoch=3000, weight_decay=weight_decay, warmup=warmup,
                           lr_decay_rate=lr_decay_rate, lr_decay_every=lr_decay_every, eval_every=1e+5, 
                           early_stop=True)
    return model
예제 #2
0
def objective(trial):
    start = time.time()
    # pagerank para
    mu = trial.suggest_uniform('mu', 0, 1)
    alpha = trial.suggest_uniform('beta', 0, 0.5)
    kappa1 = trial.suggest_uniform('kappa1', 0, 1)
    kappa2 = trial.suggest_uniform('kappa2', 0, 1)
    kappa3 = trial.suggest_uniform('kappa3', 0, 1)
    kappa = [kappa1, kappa2, kappa3]

    # model para
    embedding_dim = int(
        trial.suggest_discrete_uniform('embedding_dim', 16, 128, 16))
    #alpha = trial.suggest_loguniform('alpha', 1e-6, 1e-2) #SparseTransEの時だけ

    # training para
    lambda_ = trial.suggest_uniform('lambada_', 0, 1)
    batch_size = trial.suggest_int('batch_size', 256, 512, 128)
    lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2)
    warmup = trial.suggest_int('warmup', 10, 100)
    #lr_decay_every = trial.suggest_int('lr_decay_every', 1, 10)
    lr_decay_every = 2
    lr_decay_rate = trial.suggest_uniform('lr_decay_rate', 0.5, 1)

    data_dir = ['../data_luxury_5core/valid1', '../data_luxury_5core/valid2']
    score_sum = 0
    for i in range(len(data_dir)):

        dataset = AmazonDataset(data_dir[i], model_name='TransE')
        relation_size = len(set(list(dataset.triplet_df['relation'].values)))
        entity_size = len(dataset.entity_list)

        ppr_transe = PPR_TransE(embedding_dim, relation_size, entity_size,
                                data_dir[i], alpha, mu, kappa).to(device)

        iterater = TrainIterater(batch_size=int(batch_size),
                                 data_dir=data_dir[i],
                                 model_name=model_name)

        iterater.iterate_epoch(ppr_transe,
                               lr=lr,
                               epoch=2000,
                               weight_decay=weight_decay,
                               lambda_=lambda_,
                               warmup=warmup,
                               lr_decay_rate=lr_decay_rate,
                               lr_decay_every=lr_decay_every,
                               eval_every=1e+5)

        # inference
        inf = Inference(data_dir[i])
        score = inf.get_score(ppr_transe, kappa, mu, alpha)
        score_sum += score

    mi, sec = time_since(time.time() - start)
    print('{}m{}sec'.format(mi, sec))

    return -1 * score_sum / 2
예제 #3
0
파일: run.py 프로젝트: szktkfm/kg_embedding
def objective(trial):
    start = time.time()
    import gc
    gc.collect()

    data_dir = [data_path + '/valid1', data_path + '/valid2']
    score_sum = 0

    embed_model = {'TransE': TransE, 'SparseTransE': SparseTransE}
    # hyper para
    embedding_dim = trial.suggest_discrete_uniform('embedding_dim', 16, 128,
                                                   16)

    if model_name == 'SparseTransE':
        alpha = trial.suggest_loguniform('alpha', 1e-6,
                                         1e-2)  #SparseTransEの時だけ

    batch_size = trial.suggest_int('batch_size', 128, 512, 128)
    lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2)
    #warmup = trial.suggest_int('warmup', 100, 500)
    #warmup = trial.suggest_int('warmup', 10, 100)
    warmup = 350
    #lr_decay_every = trial.suggest_int('lr_decay_every', 1, 10)
    lr_decay_every = 2
    lr_decay_rate = trial.suggest_uniform('lr_decay_rate', 0.5, 1)

    for dir_path in data_dir:
        # データ読み込み
        dataset = AmazonDataset(dir_path, model_name=model_name)
        relation_size = len(set(list(dataset.triplet_df['relation'].values)))
        entity_size = len(dataset.entity_list)
        #model = TransE(int(embedding_dim), relation_size, entity_size).to(device)
        model = embed_model[model_name](int(embedding_dim), relation_size,
                                        entity_size).to(device)
        iterater = TrainIterater(batch_size=int(batch_size),
                                 data_dir=dir_path,
                                 model_name=model_name)
        score = iterater.iterate_epoch(model,
                                       lr=lr,
                                       epoch=3000,
                                       weight_decay=weight_decay,
                                       warmup=warmup,
                                       lr_decay_rate=lr_decay_rate,
                                       lr_decay_every=lr_decay_every,
                                       eval_every=1e+5,
                                       early_stop=True)

        score_sum += score

    torch.cuda.empty_cache()

    mi, sec = time_since(time.time() - start)
    print('{}m{}sec'.format(mi, sec))

    return -1 * score_sum / 2
예제 #4
0
def objective(trial):
    start = time.time()

    import gc
    gc.collect()

    dataset = AmazonDataset('./data')

    embedding_dim = trial.suggest_discrete_uniform('embedding_dim', 16, 64, 16)
    bpr = BPR(int(embedding_dim), len(dataset.user_list),
              len(dataset.item_list)).to(device)

    batch_size = trial.suggest_discrete_uniform('batch_size', 64, 256, 64)
    iterater = TrainIterater(batch_size=int(batch_size))

    lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2)
    warmup = trial.suggest_int('warmup', 100, 500)
    #warmup = trial.suggest_int('warmup', 1, 5)
    lr_decay_every = trial.suggest_int('lr_decay_every', 1, 5)
    lr_decay_rate = trial.suggest_uniform('lr_decay_rate', 0.5, 1)

    score = iterater.iterate_epoch(bpr,
                                   lr=lr,
                                   epoch=3000,
                                   weight_decay=weight_decay,
                                   warmup=warmup,
                                   lr_decay_rate=lr_decay_rate,
                                   lr_decay_every=lr_decay_every,
                                   eval_every=1e+5)

    torch.cuda.empty_cache()

    mi, sec = time_since(time.time() - start)
    print('{}m{}sec'.format(mi, sec))

    return -1 * score
예제 #5
0
    # dataload
    data_dir = '../' + data_path + '/test/'
    dataset = AmazonDataset(data_dir, model_name='SparseTransE')

    relation_size = len(set(list(dataset.triplet_df['relation'].values)))
    entity_size = len(dataset.entity_list)
    embedding_dim = params['embedding_dim']
    alpha = params['alpha']
    model = SparseTransE(int(embedding_dim),
                         relation_size,
                         entity_size,
                         alpha=alpha).to(device)

    batch_size = params['batch_size']
    iterater = TrainIterater(batch_size=int(batch_size),
                             data_dir=data_dir,
                             model_name=model_name)

    lr = params['lr']
    weight_decay = params['weight_decay']

    warmup = 350
    lr_decay_every = 2
    lr_decay_rate = params['lr_decay_rate']

    score = iterater.iterate_epoch(model,
                                   lr=lr,
                                   epoch=3000,
                                   weight_decay=weight_decay,
                                   warmup=warmup,
                                   lr_decay_rate=lr_decay_rate,
예제 #6
0

if __name__ == '__main__':
    params = load_param('./result_beauty')
    embedding_dim = params['embedding_dim']
    batch_size = params['batch_size']
    lr = params['lr']
    weight_decay = params['weight_decay']
    warmup = params['warmup']
    lr_decay_every = params['lr_decay_every']
    lr_decay_rate = params['lr_decay_rate']

    #data_dir = '../data_beauty_2core_es/test/bpr'
    data_dir = '../data_beauty_2core_es/test/bpr'
    dataset = AmazonDataset(data_dir)
    bpr = BPR(int(embedding_dim), len(dataset.user_list),
              len(dataset.item_list)).to(device)
    iterater = TrainIterater(batch_size=int(batch_size), data_dir=data_dir)
    score = iterater.iterate_epoch(bpr,
                                   lr=lr,
                                   epoch=3000,
                                   weight_decay=weight_decay,
                                   warmup=warmup,
                                   lr_decay_rate=lr_decay_rate,
                                   lr_decay_every=lr_decay_every,
                                   eval_every=1e+5,
                                   early_stop=True)

    # test結果を記録
    np.savetxt('./result_beauty/score.txt', np.array([score]))