n_items = len(unique_sid) def load_data(csv_file, shape=(n_users, n_items)): tp = pd.read_csv(csv_file) rows, cols, vals = np.array(tp['uid']), np.array( tp['sid']), np.array(tp['rating']) data = sparse.csr_matrix((vals, (rows, cols)), dtype=np.float32, shape=shape) return data train_data = load_data(os.path.join(DATA_DIR, 'train.csv')) vad_data = load_data(os.path.join(DATA_DIR, 'validation.csv')) test_data = load_data(os.path.join(DATA_DIR, 'test_full.csv')) train_data_imp = exp_to_imp(train_data, 0.5) vad_data_imp = exp_to_imp(vad_data, 0.5) dims = np.array([1, 2, 5, 10, 20, 50, 100]) train_data_coo = train_data_imp.tocoo() row_tr, col_tr = train_data_coo.row, train_data_coo.col vad_data_coo = vad_data_imp.tocoo() row_vd, col_vd = vad_data_coo.row, vad_data_coo.col for i, dim in enumerate(dims): print("dim", dim) pf = pmf.PoissonMF(n_components=dim, random_state=98765, verbose=True,
for dim in dims: U = np.loadtxt(CAUSEFIT_DIR + '/cause_pmf_k' + str(dim) + '_U.csv') V = np.loadtxt(CAUSEFIT_DIR + '/cause_pmf_k' + str(dim) + '_V.csv') U = (np.atleast_2d(U.T).T) V = (np.atleast_2d(V.T).T) reconstr_cau = U.dot(V.T) for i, K in enumerate(outdims): print("K0", dim, "K", K) D = train_data.shape[0] N = train_data.shape[1] weights = train_data * alpha cau = exp_to_imp(train_data) tf.reset_default_graph() sess = tf.InteractiveSession() idx_ph = tf.placeholder(tf.int32, M) cau_ph = tf.placeholder(tf.float32, [M, N]) sd_ph = tf.placeholder(tf.float32, [M, N]) reconstr_cau_ph = tf.placeholder(tf.float32, [M, N]) U = Gamma(0.3 * tf.ones([M, K]), 0.3 * tf.ones([M, K])) V = Gamma(0.3 * tf.ones([N, K]), 0.3 * tf.ones([N, K])) gamma = Gamma(tf.ones([M, 1]), tf.ones([M, 1])) beta0 = Gamma(0.3 * tf.ones([1, 1]), 0.3 * tf.ones([1, 1])) x = Poisson(tf.add(tf.matmul(U, V, transpose_b=True),\
data_te = sparse.csr_matrix( (ratings_te, (rows_te, cols_te)), dtype='float64', shape=(end_idx - start_idx + 1, n_items)) return data_tr, data_te train_data = load_train_data(os.path.join(DATA_DIR, 'train.csv')).tocsr() vad_data_tr, vad_data_te = load_tr_te_data( os.path.join(DATA_DIR, 'validation_tr.csv'), os.path.join(DATA_DIR, 'validation_te.csv')) test_data_tr, test_data_te = load_tr_te_data( os.path.join(DATA_DIR, 'test_tr.csv'), os.path.join(DATA_DIR, 'test_te.csv')) train_data = exp_to_imp(train_data, 0.5) vad_data_tr, vad_data_te = exp_to_imp(vad_data_tr, 0.5), exp_to_imp( vad_data_tr, 0.5) test_data_tr, test_data_te = exp_to_imp(test_data_tr, 0.5), exp_to_imp( test_data_tr, 0.5) dims = np.array([1, 2, 5, 10, 20, 50, 100]) train_data_coo = train_data.tocoo() vad_data_tr_coo = vad_data_tr.tocoo() test_data_tr_coo = test_data_tr.tocoo() for i, dim in enumerate(dims): print("dim", dim) pf = pmf.PoissonMF(n_components=dim, random_state=98765,
for dim in dims: U = np.loadtxt(CAUSEFIT_DIR + '/cause_pmf_k' + str(dim) + '_trainU.csv') V = np.loadtxt(CAUSEFIT_DIR + '/cause_pmf_k' + str(dim) + '_V.csv') U = (np.atleast_2d(U.T).T) V = (np.atleast_2d(V.T).T) reconstr_cau_train = U.dot(V.T) for i, K in enumerate(outdims): print("K0", dim, "K", K) D = train_data.shape[0] N = train_data.shape[1] weights = train_data * alpha cau = exp_to_imp(train_data) tf.reset_default_graph() sess = tf.InteractiveSession() idx_ph = tf.placeholder(tf.int32, M) cau_ph = tf.placeholder(tf.float32, [M, N]) sd_ph = tf.placeholder(tf.float32, [M, N]) U = Normal(loc=tf.zeros([M, K]), scale=pri_U * tf.ones([M, K])) V = Normal(loc=tf.zeros([N, K]), scale=pri_V * tf.ones([N, K])) x = Normal(loc=tf.multiply(cau_ph, tf.matmul(U, V, transpose_b=True)), scale=tf.ones([M, N]))
shape=(end_idx - start_idx + 1, n_items)) return data_tr, data_te train_data = load_train_data(os.path.join(DATA_DIR, 'train.csv')).tocsr() vad_data_tr, vad_data_te = load_tr_te_data( os.path.join(DATA_DIR, 'validation_tr.csv'), os.path.join(DATA_DIR, 'validation_te.csv')) test_data_tr, test_data_te = load_tr_te_data( os.path.join(DATA_DIR, 'test_tr.csv'), os.path.join(DATA_DIR, 'test_te.csv')) alpha = args.alpha print("alpha", alpha) cau = exp_to_imp(train_data, 0.5) if binary > 0: train_data = binarize_rating(train_data) vad_data_tr, vad_data_te = binarize_rating( vad_data_tr), binarize_rating(vad_data_te) test_data_tr, test_data_te = binarize_rating( test_data_tr), binarize_rating(test_data_te) train_data_coo = train_data.tocoo() vad_data_tr_coo = vad_data_tr.tocoo() test_data_tr_coo = test_data_tr.tocoo() model_name = 'sg_pmf_obs' print("model", model_name)