Exemple #1
0
    vocab_size = args.vocab_size
    split_ratio = args.split_ratio

    print "=================================Preprocess Option Setting================================="
    print "\tsaving preprocessed aux path - %s" % aux_path
    print "\tsaving preprocessed data path - %s" % data_path
    print "\trating data path - %s" % path_rating
    print "\tdocument data path - %s" % path_itemtext
    print "\tmin_rating: %d\n\tmax_length_document: %d\n\tmax_df: %.1f\n\tvocab_size: %d\n\tsplit_ratio: %.1f" \
        % (min_rating, max_length, max_df, vocab_size, split_ratio)
    print "==========================================================================================="

    R, D_all = data_factory.preprocess(
        path_rating, path_itemtext, min_rating, max_length, max_df, vocab_size)
    data_factory.save(aux_path, R, D_all)
    data_factory.generate_train_valid_test_file_from_R(
        data_path, R, split_ratio)
else:
    res_dir = args.res_dir
    emb_dim = args.emb_dim
    pretrain_w2v = args.pretrain_w2v
    dimension = args.dimension
    lambda_u = args.lambda_u
    lambda_v = args.lambda_v
    max_iter = args.max_iter
    num_kernel_per_ws = args.num_kernel_per_ws
    give_item_weight = args.give_item_weight

    if res_dir is None:
        sys.exit("Argument missing - res_dir is required")
    if lambda_u is None:
        sys.exit("Argument missing - lambda_u is required")
Exemple #2
0
    print("\tsaving preprocessed aux path - %s" % aux_path)
    print("\tsaving preprocessed data path - %s" % data_path)
    print("\trating data path - %s" % path_rating)
    print("\tdocument data path - %s" % path_itemtext)
    print("\tprofile data path - %s" % path_usertext)
    print ("\tmin_rating: %d\n\tmax_length_document: %d\n\tmax_df: %.1f\n\tvocab_size: %d\n\tsplit_ratio: %.1f" \
        % (min_rating, max_length, max_df, vocab_size, split_ratio))
    print(
        "==========================================================================================="
    )

    R, D_all = data_factory.preprocess_ext(path_rating, path_itemtext,
                                           path_usertext, min_rating,
                                           max_length, max_df, vocab_size)
    data_factory.save(aux_path, R, D_all)
    data_factory.generate_train_valid_test_file_from_R(data_path, R,
                                                       split_ratio)
else:
    res_dir = args.res_dir
    emb_dim = args.emb_dim
    pretrain_w2v = args.pretrain_w2v
    dimension = args.dimension
    lambda_u = args.lambda_u
    lambda_v = args.lambda_v
    max_iter = args.max_iter
    num_kernel_per_ws = args.num_kernel_per_ws
    give_weight = args.give_weight
    threshold_doclen = args.threshold_length_document
    threshold_sentlen = args.threshold_length_sentence
    binary_rating = args.binary_rating

    if res_dir is None:
for i in range(len(movie_ids)):
    movie_ids[i] = mapped_ids.index(movie_ids[i])
mapped_ids = sorted(list(set(user_ids)))
for i in range(len(user_ids)):
    user_ids[i] = mapped_ids.index(user_ids[i])
filestr = ""
for i in range(len(user_ids)):
    writeline = str(user_ids[i]) + "::" + str(movie_ids[i]) + "::" + str(
        ratings[i]) + "\n"
    filestr += writeline

with open('ratings.dat', 'w') as f:
    f.write(filestr)

data = np.array([float(i) for i in ratings])
row = np.array([int(i) for i in user_ids])
col = np.array([int(i) for i in movie_ids])
R = csr_matrix((data, (row, col)))
f = open('test/FS/ratings.all', 'wb')
pickle.dump(R, f)
f.close()

f = open("test/FS/ratings.all", 'rb')
R = pickle.load(f)
f.close()

split_ratio = 0.2

##R = csr_matrix(([int(i) for i in ratings], ([int(i) for i in user_ids], [int(i) for i in movie_ids])))
data_factory.generate_train_valid_test_file_from_R('test/FS/', R, split_ratio)