Exemple #1
0
parser.add_argument("-n", "--max_iter", type=int,
                    help="Value of max iteration (default: 200)", default=500)
parser.add_argument("-w", "--num_kernel_per_ws", type=int,
                    help="Number of kernels per window size for CNN module (default: 100)", default=100)
parser.add_argument("-F","--flag",type=str,help="class",default="ConvMF")
parser.add_argument("-G","--momentum_flag",type=int,help="momentum_flag",default=0)
args = parser.parse_args()
do_preprocess = args.do_preprocess
data_path = args.data_path
aux_path = args.aux_path
if data_path is None:
    sys.exit("Argument missing - data_path is required")
if aux_path is None:
    sys.exit("Argument missing - aux_path is required")

data_factory = Data_Factory()

if do_preprocess:
    path_rating = args.raw_rating_data_path
    path_itemtext = args.raw_item_document_data_path
    min_rating = args.min_rating
    max_length = args.max_length_document
    max_df = args.max_df
    vocab_size = args.vocab_size
    split_ratio = args.split_ratio

    print "=================================Preprocess Option Setting================================="
    print "\tsaving preprocessed aux path - %s" % aux_path
    print "\tsaving preprocessed data path - %s" % data_path
    print "\trating data path - %s" % path_rating
    print "\tdocument data path - %s" % path_itemtext #Plot.idmap
Exemple #2
0
parser.add_argument("-b",
                    "--binary_rating",
                    type=bool,
                    help="True or False to binarize ratings (default = False)",
                    default=False)

args = parser.parse_args()
do_preprocess = args.do_preprocess
data_path = args.data_path
aux_path = args.aux_path
if data_path is None:
    sys.exit("Argument missing - data_path is required")
if aux_path is None:
    sys.exit("Argument missing - aux_path is required")

data_factory = Data_Factory()

if do_preprocess:
    path_rating = args.raw_rating_data_path
    path_itemtext = args.raw_item_document_data_path
    path_usertext = args.raw_user_profile_data_path
    min_rating = args.min_rating
    max_length = args.max_length_document
    max_df = args.max_df
    vocab_size = args.vocab_size
    split_ratio = args.split_ratio

    print(
        "=================================Preprocess Option Setting================================="
    )
    print("\tsaving preprocessed aux path - %s" % aux_path)
parser.add_argument("-d", "--data_path", type=str,
                    help="Path to training, valid and test data sets")
parser.add_argument("-a", "--aux_path", type=str, help="Path to R, D_all, sets")



args = parser.parse_args()
do_preprocess = args.do_preprocess
data_path = args.data_path
aux_path = args.aux_path
if data_path is None:
    sys.exit("Argument missing - data_path is required")
if aux_path is None:
    sys.exit("Argument missing - aux_path is required")

data_factory = Data_Factory()

if do_preprocess:
    path_rating = args.raw_rating_data_path
    path_itemtext = args.raw_item_document_data_path
    path_usertext = args.raw_user_profile_data_path
    min_rating = args.min_rating
    max_length = args.max_length_document
    max_df = args.max_df
    vocab_size = args.vocab_size
    split_ratio = args.split_ratio

    print ("=================================Preprocess Option Setting=================================")
    print ("\tsaving preprocessed aux path - %s" % aux_path)
    print ("\tsaving preprocessed data path - %s" % data_path)
    print ("\trating data path - %s" % path_rating)
Exemple #4
0
    "-w",
    "--num_kernel_per_ws",
    type=int,
    help="Number of kernels per window size for CNN module (default: 100)",
    default=100)

args = parser.parse_args()
do_preprocess = args.do_preprocess
data_path = args.data_path
aux_path = args.aux_path
if data_path is None:
    sys.exit("Argument missing - data_path is required")
if aux_path is None:
    sys.exit("Argument missing - aux_path is required")

data_factory = Data_Factory()

if do_preprocess:
    path_rating = args.raw_rating_data_path
    path_itemtext = args.raw_item_document_data_path
    min_rating = args.min_rating
    max_length = args.max_length_document
    max_df = args.max_df
    vocab_size = args.vocab_size
    split_ratio = args.split_ratio
    new_item = args.new_item

    print "=================================Preprocess Option Setting================================="
    print "\tsaving preprocessed aux path - %s" % aux_path
    print "\tsaving preprocessed data path - %s" % data_path
    print "\trating data path - %s" % path_rating
Exemple #5
0
from data_manager import Data_Factory
from rating_models import PMF
import pickle
import sys
import os

data_factory = Data_Factory()

data = sys.argv[1]
file = data.split('/')[2].split('.')[0]
test_file = "./test/" + file + "/"
output_file = "./outputs/" + file + "/" 
if not os.path.exists(output_file):
    os.makedirs(output_file)

binary_rating = False

R = pickle.load(open(test_file + 'ratings.all', 'rb'))

train_user = data_factory.read_rating(test_file + 'train_user.dat', binary_rating)
train_item = data_factory.read_rating(test_file + 'train_item.dat', binary_rating)
valid_user = data_factory.read_rating(test_file + 'valid_user.dat', binary_rating)
test_user = data_factory.read_rating(test_file + 'test_user.dat', binary_rating)

'''PMF'''
PMF(res_dir=output_file, lambda_u=0.1, lambda_v=0.1, train_user=train_user, train_item=train_item, valid_user=valid_user, test_user=test_user, R=R)
Exemple #6
0
#!/usr/bin/python

#coding=utf-8

from evaluation import eval_precision, eval_recall
from data_manager import Data_Factory
import numpy as np

data_factory = Data_Factory()
data_path = '/home/daicoolb/test_0724_0.8_amazon'
train_user = data_factory.read_rating(data_path + '/train_user.dat')
test_user = data_factory.read_rating(data_path + '/test_user.dat')
valid_user = data_factory.read_rating(data_path + '/valid_user.dat')
U = np.loadtxt(data_path + '/Result/U.dat')
V = np.loadtxt(data_path + '/Result/V.dat')
print "Precision @5: %.5f \n" % eval_precision(U, V, test_user, train_user,
                                               valid_user, 5)
print "Recall @5:% .5f \n" % eval_recall(U, V, test_user, train_user,
                                         valid_user, 5)

print "Precision @10: %.5f \n" % eval_precision(U, V, test_user, train_user,
                                                valid_user, 10)
print "Recall @10:% .5f \n" % eval_recall(U, V, test_user, train_user,
                                          valid_user, 10)

print "Precision @15: %.5f \n" % eval_precision(U, V, test_user, train_user,
                                                valid_user, 15)
print "Recall @15:% .5f \n" % eval_recall(U, V, test_user, train_user,
                                          valid_user, 15)

print "Precision @30: %.5f \n" % eval_precision(U, V, test_user, train_user,
Exemple #7
0
from data_manager import Data_Factory
import os
import numpy as np
import pickle
import sys

data_factory = Data_Factory()

data = sys.argv[1]
file = data.split('/')[2].split('.')[0]
test_file = "./test/" + file + "/" 
if not os.path.exists(test_file):
    os.makedirs(test_file)

f = open(test_file + 'ratings.all', 'rb')
R = pickle.load(f)
f.close()

split_ratio = 0.2

data_factory.generate_train_valid_test_file_from_R(test_file, R, split_ratio)
Exemple #8
0
'''
import numpy as np


def eval_MAE(R, U, V, TS):
    num_user = U.shape[0]
    sub_mae = np.zeros(num_user)
    TS_count = 0
    for i in xrange(num_user):
        idx_item = TS[i]
        if len(idx_item) == 0:
            continue
        TS_count = TS_count + len(idx_item)
        approx_R_i = U[i].dot(V[idx_item].T)  # approx_R[i, idx_item]
        R_i = R[i]
        sub_mae[i] = np.abs(approx_R_i - R_i).sum()
    mae = sub_mae.sum() / TS_count
    return mae


from data_manager import Data_Factory
#import numpy as np
data_factory = Data_Factory()
data_path = '/home/daicoolb/CopeData/test_0723_0.8_100k'
#R,D_all,S=data_factory.load(data_path)
#train_user=data_factory.read_rating(data_path+'/train_user.dat')
test_user = data_factory.read_rating(data_path + '/test_user.dat')
U = np.loadtxt(data_path + '/Result/U.dat')
V = np.loadtxt(data_path + '/Result/V.dat')
print "MAE: %.5f \n " % eval_MAE(test_user[1], U, V, test_user[0])
Exemple #9
0
                    help="Value of item regularizer")
parser.add_argument("-n", "--max_iter", type=int,
                    help="Value of max iteration (default: 200)", default=200)
parser.add_argument("-w", "--num_kernel_per_ws", type=int,
                    help="Number of kernels per window size for CNN module (default: 100)", default=100)

args = parser.parse_args()
do_preprocess = args.do_preprocess
data_path = args.data_path
aux_path = args.aux_path
if data_path is None:
    sys.exit("Argument missing - data_path is required")
if aux_path is None:
    sys.exit("Argument missing - aux_path is required")

data_factory = Data_Factory()

if do_preprocess:
    path_rating = args.raw_rating_data_path
    path_itemtext = args.raw_item_document_data_path
    min_rating = args.min_rating
    max_length = args.max_length_document
    max_df = args.max_df
    vocab_size = args.vocab_size
    split_ratio = args.split_ratio

    print "=================================Preprocess Option Setting================================="
    print "\tsaving preprocessed aux path - %s" % aux_path
    print "\tsaving preprocessed data path - %s" % data_path
    print "\trating data path - %s" % path_rating
    print "\tdocument data path - %s" % path_itemtext
Exemple #10
0
                    "--learning_rate",
                    type=float,
                    help="learning rate used for ensemble")
args = parser.parse_args()
grid_search = args.grid_search
do_preprocess = args.do_preprocess
data_path = args.data_path
aux_path = args.aux_path
splits_dir = args.splits_dir
fold_num = args.fold_num
if data_path is None:
    sys.exit("Argument missing - data_path is required")
if aux_path is None:
    sys.exit("Argument missing - aux_path is required")

data_factory = Data_Factory()

if do_preprocess:
    path_rating = args.raw_rating_data_path
    path_itemtext = args.raw_item_document_data_path
    min_rating = args.min_rating
    max_length = args.max_length_document
    max_df = args.max_df
    vocab_size = args.vocab_size
    split_ratio = args.split_ratio

    print "=================================Preprocess Option Setting================================="
    print "\trating data path - %s" % path_rating
    print "\tdocument data path - %s" % path_itemtext
    print "\tmin_rating: %d\n\tmax_length_document: %d\n\tmax_df: %.1f\n\tvocab_size: %d" \
          % (min_rating, max_length, max_df, vocab_size)
Exemple #11
0
parser.add_argument("-G",
                    "--momentum_flag",
                    type=int,
                    help="momentum_flag",
                    default=1)

args = parser.parse_args()
do_preprocess = args.do_preprocess
data_path = args.data_path
aux_path = args.aux_path
if data_path is None:
    sys.exit("Argument missing - data_path is required")
if aux_path is None:
    sys.exit("Argument missing - aux_path is required")

data_factory = Data_Factory()

if do_preprocess:
    path_rating = args.raw_rating_data_path
    min_rating = args.min_rating
    split_ratio = args.split_ratio

    print "=================================Preprocess Option Setting================================="
    print "\tsaving preprocessed aux path - %s" % aux_path
    print "\tsaving preprocessed data path - %s" % data_path
    print "\trating data path - %s" % path_rating
    print "\tmin_rating: %d\n\t split_ratio: %.1f" % (min_rating, split_ratio)
    print "==========================================================================================="

    R = data_factory.preprocess(path_rating, min_rating)
    data_factory.save(aux_path, R)