예제 #1
0
def evaluate(u: np.ndarray, v: np.ndarray, test_X, mode: str):
    print(u.shape, v.shape, test_X.shape, mode)
    a = ALSSparse(u, v, test_X) if mode == 'sparse' else ALS(
        u, v, test_X.toarray())
    mse = a.function_eval() / test_X.getnnz()
    print("Test set MSE:", mse)
    return mse
예제 #2
0
        def myfunction(seed):
            input_dict_copy = self.input_dict.copy()
            input_dict_copy['seed'] = seed

            als = ALS(**input_dict_copy)  # ** allows to pass arguments as dict
            als.learningManager.run_experiment()

            # result is a dict with keys as metric_strs and values as list of that metric per learning step
            result = als.learningManager.get_performance_results()
            return result.copy()
def main_func(m, n, k, V, USER_MATRIX_RANK, MF_RANK):
    # 1. Builds user matrix
    M = np.random.rand(m, k)
    U = alter_rank(M, m, k, USER_MATRIX_RANK)

    # 2. generate true rating matrix, with variance
    sigma = .1
    # R = np.random.rand(m, n) * sigma + np.dot(U, V)
    R = np.dot(U, V)
    R = alter_rank(R, m, n, MF_RANK)

    # 3. sample some values out
    mask_prob = .1
    mask = generate_mask(mask_prob, m, n)

    # 4. matrix factorization, guess actual values
    lamba = .5
    R_hat = ALS(R, mask, k, lamba)

    rmse = calc_unobserved_rmse(U, V, R_hat, mask)
    return rmse
예제 #4
0
    def run(self, n_reps, n_als_to_perform, n_als_performed, n_jobs=4):
        """
        :param n_reps: number of repetitions of als to perform with the exact same parameters (except seed)
        :param n_jobs: number of cores to use
        :return: nothing, self.results gets new values
        """
        self.results = []

        seeds = list(range(n_reps))
        inputs = seeds

        #inputs = tqdm(seeds)

        def myfunction(seed):
            input_dict_copy = self.input_dict.copy()
            input_dict_copy['seed'] = seed

            als = ALS(**input_dict_copy)  # ** allows to pass arguments as dict
            als.learningManager.run_experiment()

            # result is a dict with keys as metric_strs and values as list of that metric per learning step
            result = als.learningManager.get_performance_results()
            return result.copy()

        # print(__name__)
        # if __name__ == 'alsRepeater':
        #    self.results = Parallel(n_jobs=n_jobs)(delayed(myfunction)(i) for i in inputs)

        for i in range(n_reps):
            self.input_dict['seed'] = i
            als = ALS(**self.input_dict)  # ** allows to pass arguments as dict
            als.learningManager.run_experiment(
                n_als_performed=n_als_performed,
                n_als_to_perform=n_als_to_perform)
            n_als_performed = n_als_performed + 1
            # result is a dict with keys as metric_strs and values as list of that metric per learning step
            result = als.learningManager.get_performance_results()
            self.results.append(result)
            #if n_reps > 1:  #
            self.save_temp_results()
예제 #5
0
def run_experiment(data: MovieLensDataset,
                   sparse=True,
                   grad_sensibility=1e-8,
                   param_sensibility=1e-16,
                   num_experiments=1,
                   warmup=0,
                   workers=8):
    date = datetime.now().strftime("%d-%m-%Y_%H-%M-%S")
    # try to load matrices first
    try:
        print("Loading train and test split from /tmp/..")
        trainX = load_matrix(f'trainX_{"sparse" if sparse else "full"}',
                             sparse)
        testX = load_matrix(f'testX_{"sparse" if sparse else "full"}', sparse)
    except:
        print("Loading failed, generating train-test split now..")
        # %5 test size
        test_set_size = data.n_ratings // 20
        # trainX, testX = data.train_test_split(test_set_size, workers)
        trainX, testX = data.train_test_split_simple(test_set_size)
        print(f"Saving train and test set to /tmp/ first..")
        save_matrix(f'trainX_{"sparse" if sparse else "full"}', trainX)
        save_matrix(f'testX_{"sparse" if sparse else "full"}', testX)

    # print(trainX.shape, testX.shape)
    # optional warmup
    for _ in range(warmup):
        u = init_vector(data.n_users, normalize=True)
        v = init_vector(data.n_movies, normalize=True)
        args = [u, v, trainX]
        als = ALSSparse(*args) if sparse else ALS(*args)
        u, v = als.fit(eps_g=grad_sensibility)

    stats = {}
    start = time.time()
    for i in range(num_experiments):
        u = init_vector(data.n_users, normalize=True)
        v = init_vector(data.n_movies, normalize=True)
        args = [u, v, trainX]
        als = ALSSparse(*args) if sparse else ALS(*args)
        # run Alternating Least Squares algorithm
        u, v = als.fit(eps_g=grad_sensibility, eps_params=param_sensibility)
        # average results
        stats = average_stats(stats, als.stats, i + 1)
    end = time.time()
    # additional context info non depending from experiment results
    stats['number_of_ratings'] = trainX.getnnz(
    ) if sparse else np.count_nonzero(trainX)
    stats['dataset_path'] = data.path
    stats['grad_sensibility'] = grad_sensibility
    stats['param_sensibility'] = param_sensibility
    stats['theta_diff_sensibility'] = 1e-10
    stats['num_experiments'] = num_experiments
    stats['warmup_cycles'] = warmup
    stats['experiments_total_runtime'] = end - start
    stats['date'] = date
    stats['train_mse'] = als.function_eval() / stats['number_of_ratings']
    print("Train Mean Squared error is:", stats['train_mse'])

    # free memory before testing
    del trainX
    del data

    # test on test set
    test_mse = evaluate(als.u, als.v, testX, "sparse" if sparse else "full")

    stats['test_mse'] = test_mse
    # save results
    print("Saving results..")
    with open(f'data/als_{"sparse" if sparse else "full"}_{date}.json',
              'w') as f:
        json.dump(stats, f, indent=4)

    return als
예제 #6
0
            ]
            row = head + temp
            #          print(head)
            #          print(row)
            data.append(row)
        else:
            if len(head) > 0:
                head = []
            head.append(int(temp[0][:-1]))
    f.close()
    return data


X = load_movie_ratings()

model = ALS()
model.fit(X, k=3, max_iter=6)


def writeto(file_name, contant):
    with open(file_name, "a+") as f:
        f.writelines(str(contant))
        f.writelines("\n")


def format(x):
    if x < 1:
        x = 1.0
    elif x > 5:
        x = 5.0
    else:
예제 #7
0
from aucReader import Dataloader

test = 0
user = 0
als = 0
attention = 1
config = Config()
if test:
    config.data_path = '../data/ml-1m/sample.csv'
    config.num_users = 50
    config.train_path = '../data/ml-1m/train2'

dl = Dataloader(config)
if user:
    from userRnn import UserRNN
    print('user model test:%d' % test)
    model = UserRNN(config, dl)
elif attention:

    from attURnn import AttUserRNN
    print('attention model test:%d' % test)
    model = AttUserRNN(config, dl)
elif als:
    from als import ALS
    print('als model test:%d' % test)
    model = ALS(config, dl)
else:
    from rnnRS import RnnRs
    print('rnn model test:%d' % test)
    model = RnnRs(config, dl)
model.train_and_evaluate()