Esempio n. 1
0
#nn settings
epochs_in_batch = 25
epochs_overall = 10
back_propagation_batch_size = 64
training_batch_size = 6000
min_skill_size = 0
min_member_size = 0
latent_dim = 50

print(K.tensorflow_backend._get_available_gpus())

if dblp.preprocessed_dataset_exist() and dblp.train_test_indices_exist():
    dataset = dblp.load_preprocessed_dataset()
    train_test_indices = dblp.load_train_test_indices()
else:
    if not dblp.ae_data_exist(file_path='../dataset/ae_dataset.pkl'):
        dblp.extract_data(filter_journals=True,
                          skill_size_filter=min_skill_size,
                          member_size_filter=min_member_size)
    if not dblp.preprocessed_dataset_exist(
    ) or not dblp.train_test_indices_exist():
        dblp.dataset_preprocessing(
            dblp.load_ae_dataset(file_path='../dataset/ae_dataset.pkl'),
            seed=seed,
            kfolds=k_fold)
    dataset = dblp.load_preprocessed_dataset()
    train_test_indices = dblp.load_train_test_indices()

# k_fold Cross Validation
cvscores = []
Esempio n. 2
0
encoding_dim = 500

print(K.tensorflow_backend._get_available_gpus())

print('Skill embedding options')
t2v_model_skill = Team2Vec()
t2v_model_skill = load_T2V_model(t2v_model_skill)
embedding_dim_skill = t2v_model_skill.model.vector_size

print('User embedding options')
t2v_model_user = Team2Vec()
t2v_model_user = load_T2V_model(t2v_model_user)
embedding_dim_user = t2v_model_user.model.vector_size

if dblp.ae_data_exist(
        file_path='../dataset/ae_t2v_dimSkill{}_dimUser{}_tFull_dataset.pkl'.
        format(embedding_dim_skill, embedding_dim_user)):
    dataset = dblp.load_ae_dataset(
        file_path='../dataset/ae_t2v_dimSkill{}_dimUser{}_tFull_dataset.pkl'.
        format(embedding_dim_skill, embedding_dim_user))
else:
    if not dblp.ae_data_exist(file_path='../dataset/ae_dataset.pkl'):
        dblp.extract_data(filter_journals=True,
                          skill_size_filter=min_skill_size,
                          member_size_filter=min_member_size)
    if not dblp.preprocessed_dataset_exist(
    ) or not dblp.train_test_indices_exist():
        dblp.dataset_preprocessing(
            dblp.load_ae_dataset(file_path='../dataset/ae_dataset.pkl'),
            seed=seed,
            kfolds=k_fold)
Esempio n. 3
0
#nn settings
epochs = 300
back_propagation_batch_size = 64
training_batch_size = 6000
min_skill_size = 0
min_member_size = 0
encoding_dim = 1000

print(K.tensorflow_backend._get_available_gpus())

t2v_model = Team2Vec()
t2v_model = load_T2V_model(t2v_model)
embedding_dim = t2v_model.model.vector_size

if dblp.ae_data_exist(file_path='../dataset/ae_t2v_dim{}_tSkill_dataset.pkl'.format(embedding_dim)):
    dataset = dblp.load_ae_dataset(file_path='../dataset/ae_t2v_dim{}_tSkill_dataset.pkl'.format(embedding_dim))
else:
    if not dblp.ae_data_exist(file_path='../dataset/ae_dataset.pkl'):
        dblp.extract_data(filter_journals=True, skill_size_filter=min_skill_size, member_size_filter=min_member_size)
    if not dblp.preprocessed_dataset_exist() or not dblp.train_test_indices_exist():
        dblp.dataset_preprocessing(dblp.load_ae_dataset(file_path='../dataset/ae_dataset.pkl'), seed=seed, kfolds=k_fold)
    preprocessed_dataset = dblp.load_preprocessed_dataset()

    dblp.nn_t2v_dataset_generator(t2v_model, preprocessed_dataset, output_file_path='../dataset/ae_t2v_dim{}_tSkill_dataset.pkl'.format(embedding_dim), mode='skill')
    del preprocessed_dataset
    dataset = dblp.load_ae_dataset(file_path='../dataset/ae_t2v_dim{}_tSkill_dataset.pkl'.format(embedding_dim))



# reparameterization trick
evaluation_k_set = np.arange(1, k_max+1, 1)

#nn settings
epochs = 2000
back_propagation_batch_size = 32
training_batch_size = 6000
min_skill_size = 0
min_member_size = 0
latent_dim = 2
beta = 30

print(tf.test.is_gpu_available())
m2v_path = '../dataset/embedding_dict.pkl'


if dblp.ae_data_exist(file_path='../dataset/ae_e_m2v_tSkill_dataset.pkl'):
    dataset = dblp.load_ae_dataset(file_path='../dataset/ae_e_m2v_tSkill_dataset.pkl')
else:
    if not dblp.ae_data_exist(file_path='../dataset/ae_dataset.pkl'):
        dblp.extract_data(filter_journals=True, skill_size_filter=min_skill_size, member_size_filter=min_member_size, output_dir='../dataset/ae_dataset.pkl')
    if not dblp.preprocessed_dataset_exist(file_path='../dataset/dblp_preprocessed_dataset.pkl') or not dblp.train_test_indices_exist(file_path='../dataset/Train_Test_indices.pkl'):
        dblp.dataset_preprocessing(dblp.load_ae_dataset(file_path='../dataset/ae_dataset.pkl'), indices_dict_file_path='../dataset/Train_Test_indices.pkl', preprocessed_dataset_file_path='../dataset/dblp_preprocessed_dataset.pkl', seed=seed, kfolds=k_fold)
    preprocessed_dataset = dblp.load_preprocessed_dataset(file_path='../dataset/dblp_preprocessed_dataset.pkl')

    dblp.nn_m2v_embedding_dataset_generator(model_path=m2v_path, dataset=preprocessed_dataset, output_file_path='../dataset/ae_e_m2v_tSkill_dataset.pkl', mode='skill', max_length=22)
    del preprocessed_dataset
    dataset = dblp.load_ae_dataset(file_path='../dataset/ae_e_m2v_tSkill_dataset.pkl')



# reparameterization trick