Exemple #1
0
print(nb_train)

validate_data_path = data_dir + 'validate.txt'
validate_instances = utils.read_instances_lines_from_file(validate_data_path)
nb_validate = len(validate_instances)
print(nb_validate)

test_data_path = data_dir + 'test.txt'
test_instances = utils.read_instances_lines_from_file(test_data_path)
nb_test = len(test_instances)
print(nb_test)

### build knowledge ###

print("@Build knowledge")
MAX_SEQ_LENGTH, item_dict, reversed_item_dict, item_probs = utils.build_knowledge(train_instances, validate_instances)

print("#Statistic")
NB_ITEMS = len(item_dict)
print(" + Maximum sequence length: ", MAX_SEQ_LENGTH)
print(" + Total items: ", NB_ITEMS)
print('density of C matrix: %.6f' % (real_adj_matrix.nnz * 1.0 / NB_ITEMS / NB_ITEMS))

batch_size = args.batch_size
# train_loader = data_utils.generate_data_loader(train_instances, load_param['batch_size'], item_dict, MAX_SEQ_LENGTH, is_bseq=True, is_shuffle=True)
# valid_loader = data_utils.generate_data_loader(validate_instances, load_param['batch_size'], item_dict, MAX_SEQ_LENGTH, is_bseq=True, is_shuffle=False)
test_loader = data_utils.generate_data_loader(test_instances, batch_size, item_dict, MAX_SEQ_LENGTH, is_bseq=True, is_shuffle=False)

pre_trained_model = model.RecSysModel(load_param, MAX_SEQ_LENGTH, item_probs, real_adj_matrix.todense(), device, model_data_type)
pre_trained_model.to(device, dtype= model_data_type)
optimizer = torch.optim.RMSprop(pre_trained_model.parameters(), lr= 0.001)
    nb_predict = args.nb_predict
    topk = args.topk
    ex_file = args.example_file

    data_dir = f_dir
    train_data_path = data_dir + 'train_lines.txt'
    train_instances = utils.read_instances_lines_from_file(train_data_path)
    nb_train = len(train_instances)
    # print(nb_train)

    test_data_path = data_dir + 'test_lines.txt'
    test_instances = utils.read_instances_lines_from_file(test_data_path)
    nb_test = len(test_instances)
    # print(nb_test)
    # print("---------------------@Build knowledge-------------------------------")
    MAX_SEQ_LENGTH, item_dict, reversed_item_dict, item_probs, item_freq_dict, user_dict = utils.build_knowledge(
        train_instances + test_instances)

    if not os.path.exists(o_dir):
        os.makedirs(o_dir)
    saved_file = os.path.join(o_dir, model_name)
    # print("Save model in ", saved_file)
    f = np.load(saved_file, item_probs)
    item_probs = f['item_probs']
    # print(item_probs)
    pop_model = POP(item_dict, reversed_item_dict, item_probs)

    if ex_file is not None:
        ex_instances = utils.read_instances_lines_from_file(ex_file)
    else:
        ex_instances = test_instances
    for i in random.sample(ex_instances, nb_predict):
Exemple #3
0
validate_instances = utils.read_file_as_lines(validate_file)
nb_validate = len(validate_instances)
total_validate_batches = utils.compute_total_batches(nb_validate,
                                                     config.batch_size)
print(" + Total validating sequences: ", nb_validate)
print(" + #batches in validate ", total_validate_batches)

testing_instances = utils.read_file_as_lines(testing_file)
nb_test = len(testing_instances)
total_test_batches = utils.compute_total_batches(nb_test, config.batch_size)
print(" + Total testing sequences: ", nb_test)
print(" + #batches in test ", total_test_batches)

# Create dictionary
print("@Build knowledge")
MAX_SEQ_LENGTH, item_dict = utils.build_knowledge(training_instances,
                                                  validate_instances)

print("#Statistic")
NB_ITEMS = len(item_dict)
print(" + Maximum sequence length: ", MAX_SEQ_LENGTH)
print(" + Total items: ", NB_ITEMS)

model_dir = output_dir + "/models"
if config.train_mode:
    with tf.Session(config=gpu_config) as sess:
        # Init the network
        net = procedure.create_network(sess, MAX_SEQ_LENGTH, NB_ITEMS, config)
        sess.run(tf.global_variables_initializer())

        # Train the network
        train_generator = utils.seq_batch_generator(training_instances,
Exemple #4
0
print(nb_train)

validate_data_path = data_dir + 'validate.txt'
valid_instances = utils.read_instances_lines_from_file(validate_data_path)
nb_validate = len(valid_instances)
print(nb_validate)

test_data_path = data_dir + 'test.txt'
test_instances = utils.read_instances_lines_from_file(test_data_path)
nb_test = len(test_instances)
print(nb_test)

### build knowledge ###

print("---------------------@Build knowledge-------------------------------")
MAX_SEQ_LENGTH, item_dict, reversed_item_dict, item_probs, user_consumption_dict = utils.build_knowledge(
    train_instances, valid_instances, test_instances)

config_param = dict()
config_param['batch_size'] = 16
print('---------------------Create data loader--------------------')
train_loader = data_utils.generate_data_loader(train_instances[:1000],
                                               config_param['batch_size'],
                                               item_dict,
                                               MAX_SEQ_LENGTH,
                                               is_bseq=True,
                                               is_shuffle=True)
valid_loader = data_utils.generate_data_loader(valid_instances[:500],
                                               config_param['batch_size'],
                                               item_dict,
                                               MAX_SEQ_LENGTH,
                                               is_bseq=True,