Beispiel #1
0
    )
    print("\tsaving preprocessed aux path - %s" % aux_path)
    print("\tsaving preprocessed data path - %s" % data_path)
    print("\trating data path - %s" % path_rating)
    print("\tdocument data path - %s" % path_itemtext)
    print("\tprofile data path - %s" % path_usertext)
    print ("\tmin_rating: %d\n\tmax_length_document: %d\n\tmax_df: %.1f\n\tvocab_size: %d\n\tsplit_ratio: %.1f" \
        % (min_rating, max_length, max_df, vocab_size, split_ratio))
    print(
        "==========================================================================================="
    )

    R, D_all = data_factory.preprocess_ext(path_rating, path_itemtext,
                                           path_usertext, min_rating,
                                           max_length, max_df, vocab_size)
    data_factory.save(aux_path, R, D_all)
    data_factory.generate_train_valid_test_file_from_R(data_path, R,
                                                       split_ratio)
else:
    res_dir = args.res_dir
    emb_dim = args.emb_dim
    pretrain_w2v = args.pretrain_w2v
    dimension = args.dimension
    lambda_u = args.lambda_u
    lambda_v = args.lambda_v
    max_iter = args.max_iter
    num_kernel_per_ws = args.num_kernel_per_ws
    give_weight = args.give_weight
    threshold_doclen = args.threshold_length_document
    threshold_sentlen = args.threshold_length_sentence
    binary_rating = args.binary_rating
Beispiel #2
0
    max_df = args.max_df
    vocab_size = args.vocab_size
    split_ratio = args.split_ratio

    print "=================================Preprocess Option Setting================================="
    print "\tsaving preprocessed aux path - %s" % aux_path
    print "\tsaving preprocessed data path - %s" % data_path
    print "\trating data path - %s" % path_rating
    print "\tdocument data path - %s" % path_itemtext
    print "\tmin_rating: %d\n\tmax_length_document: %d\n\tmax_df: %.1f\n\tvocab_size: %d\n\tsplit_ratio: %.1f" \
        % (min_rating, max_length, max_df, vocab_size, split_ratio)
    print "==========================================================================================="

    R, D_all = data_factory.preprocess(
        path_rating, path_itemtext, min_rating, max_length, max_df, vocab_size)
    data_factory.save(aux_path, R, D_all)
    data_factory.generate_train_valid_test_file_from_R(
        data_path, R, split_ratio)
else:
    res_dir = args.res_dir
    emb_dim = args.emb_dim
    pretrain_w2v = args.pretrain_w2v
    dimension = args.dimension
    lambda_u = args.lambda_u
    lambda_v = args.lambda_v
    max_iter = args.max_iter
    num_kernel_per_ws = args.num_kernel_per_ws
    give_item_weight = args.give_item_weight

    if res_dir is None:
        sys.exit("Argument missing - res_dir is required")
Beispiel #3
0
    print "=================================Preprocess Option Setting================================="
    print "\tsaving preprocessed aux path - %s" % aux_path
    print "\tsaving preprocessed data path - %s" % data_path
    print "\trating data path - %s" % path_rating
    print "\tdocument data path - %s" % path_itemtext
    print "\tmin_rating: %d\n\tmax_length_document: %d\n\tmax_df: %.1f\n\tvocab_size: %d\n\tsplit_ratio: %.1f" \
        % (min_rating, max_length, max_df, vocab_size, split_ratio)
    print "\tnumber of new itmes: %d" % new_item
    print "==========================================================================================="

    if new_item:
        print "Under construction"
        R_old, R_new, D_old, D_new = data_factory.preprocess_newitem(
            path_rating, path_itemtext, min_rating, max_length, max_df,
            vocab_size, new_item)
        data_factory.save(aux_path, R_old, D_old, True, R_new, D_new)
        data_factory.generate_train_valid_test_file_from_R(
            data_path, R_old, split_ratio, R_new)

    else:
        R, D_all = data_factory.preprocess(path_rating, path_itemtext,
                                           min_rating, max_length, max_df,
                                           vocab_size)
        data_factory.save(aux_path, R, D_all)
        data_factory.generate_train_valid_test_file_from_R(
            data_path, R, split_ratio)
else:
    res_dir = args.res_dir
    emb_dim = args.emb_dim
    pretrain_w2v = args.pretrain_w2v
    dimension = args.dimension
Beispiel #4
0
data_factory = Data_Factory()

if do_preprocess:
    path_rating = args.raw_rating_data_path
    min_rating = args.min_rating
    split_ratio = args.split_ratio

    print "=================================Preprocess Option Setting================================="
    print "\tsaving preprocessed aux path - %s" % aux_path
    print "\tsaving preprocessed data path - %s" % data_path
    print "\trating data path - %s" % path_rating
    print "\tmin_rating: %d\n\t split_ratio: %.1f" % (min_rating, split_ratio)
    print "==========================================================================================="

    R = data_factory.preprocess(path_rating, min_rating)
    data_factory.save(aux_path, R)
    data_factory.generate_train_valid_test_file_from_R(data_path, R,
                                                       split_ratio)
else:
    methods = args.flag
    dimension = args.dimension
    lambda_u = args.lambda_u
    lambda_v = args.lambda_v
    lambda_p = args.lambda_p
    lambda_q = args.lambda_q

    max_iter = args.max_iter
    momentum_flag = args.momentum_flag

    if lambda_u is None:
        sys.exit("Argument missing - lambda_u is required")