Example #1
0
                mask = [0] * (len(all_movies) * 5)
                for movie_id, rat in profiles[userid]:
                    user_profile[all_movies.index(movie_id)] = rat
                    for _i in range(5):
                        mask[5 * all_movies.index(movie_id) + _i] = 1
                example = expand(np.array([user_profile])).astype('float32')
                bin_profiles[userid] = example
                masks[userid] = mask

            positions = {profile_id: pos for pos, profile_id
                         in enumerate(batch)}
            profile_batch = [bin_profiles[el] for el in batch]
            test_batch = np.array(profile_batch).reshape(size,
                                                         len(all_movies * 5))
            predict = sess.run(rbm.predict, feed_dict={rbm.input : test_batch})
            user_preds = revert_expected_value(predict)
 
            for profile_id in batch:
                test_movies = tests[profile_id]
                try:
                    for movie, rating in test_movies:
                        current_profile = user_preds[positions[profile_id]]
                        predicted = current_profile[all_movies.index(movie)]
                        rating = float(rating)
                        ratings.append(rating)
                        predictions.append(predicted)
                except Exception:
                    pass

        vabs = np.vectorize(abs)
        distances = np.array(ratings) - np.array(predictions)
Example #2
0
def run(name, dataset, config, all_users, all_movies, tests, initial_v, sep):
    config_name = config['name']
    number_hidden = config['number_hidden']
    epochs = config['epochs']
    ks = config['ks']
    momentums = config['momentums']
    l_w = config['l_w']
    l_v = config['l_v']
    l_h = config['l_h']
    decay = config['decay']
    batch_size = config['batch_size']

    config_result = config.copy()
    config_result['results'] = []

    vis = T.matrix()
    vmasks = T.matrix()

    rbm = CFRBM(len(all_movies) * 5, number_hidden)

    profiles = defaultdict(list)

    with open(dataset, 'rt') as data:
        for i, line in enumerate(data):
            uid, mid, rat, timstamp = line.strip().split(sep)
            profiles[uid].append((mid, float(rat)))

    print("Users and ratings loaded")

    for j in range(epochs):
        def get_index(col):
            if j/(epochs/len(col)) < len(col):
                return j/(epochs/len(col))
            else:
                return -1

        index = get_index(ks)
        mindex = get_index(momentums)
        icurrent_l_w = get_index(l_w)
        icurrent_l_v = get_index(l_v)
        icurrent_l_h = get_index(l_h)

        k = ks[index]
        momentum = momentums[mindex]
        current_l_w = l_w[icurrent_l_w]
        current_l_v = l_v[icurrent_l_v]
        current_l_h = l_h[icurrent_l_h]

        train = rbm.cdk_fun(vis,
                            vmasks,
                            k=k,
                            w_lr=current_l_w,
                            v_lr=current_l_v,
                            h_lr=current_l_h,
                            decay=decay,
                            momentum=momentum)
        predict = rbm.predict(vis)

        for batch_i, batch in enumerate(chunker(profiles.keys(),
                                                batch_size)):
            size = min(len(batch), batch_size)

            # create needed binary vectors
            bin_profiles = {}
            masks = {}
            for userid in batch:
                user_profile = [0.] * len(all_movies)
                mask = [0] * (len(all_movies) * 5)

                for movie_id, rat in profiles[userid]:
                    user_profile[all_movies.index(movie_id)] = rat
                    for _i in range(5):
                        mask[5 * all_movies.index(movie_id) + _i] = 1

                example = expand(np.array([user_profile])).astype('float32')
                bin_profiles[userid] = example
                masks[userid] = mask

            profile_batch = [bin_profiles[id] for id in batch]
            masks_batch = [masks[id] for id in batch]
            train_batch = np.array(profile_batch).reshape(size,
                                                          len(all_movies) * 5)
            train_masks = np.array(masks_batch).reshape(size,
                                                        len(all_movies) * 5)
            train_masks = train_masks.astype('float32')
            train(train_batch, train_masks)
            sys.stdout.write('.')
            sys.stdout.flush()

        ratings = []
        predictions = []

        for batch in chunker(tests.keys(), batch_size):
            size = min(len(batch), batch_size)

            # create needed binary vectors
            bin_profiles = {}
            masks = {}
            for userid in batch:
                user_profile = [0.] * len(all_movies)
                mask = [0] * (len(all_movies) * 5)

                for movie_id, rat in profiles[userid]:
                    user_profile[all_movies.index(movie_id)] = rat
                    for _i in range(5):
                        mask[5 * all_movies.index(movie_id) + _i] = 1

                example = expand(np.array([user_profile])).astype('float32')
                bin_profiles[userid] = example
                masks[userid] = mask

            positions = {profile_id: pos for pos, profile_id
                         in enumerate(batch)}
            profile_batch = [bin_profiles[el] for el in batch]
            test_batch = np.array(profile_batch).reshape(size,
                                                         len(all_movies) * 5)
            user_preds = revert_expected_value(predict(test_batch))
            for profile_id in batch:
                test_movies = tests[profile_id]
                try:
                    for movie, rating in test_movies:
                        current_profile = user_preds[positions[profile_id]]
                        predicted = current_profile[all_movies.index(movie)]
                        rating = float(rating)
                        ratings.append(rating)
                        predictions.append(predicted)
                except Exception:
                    pass

        vabs = np.vectorize(abs)
        distances = np.array(ratings) - np.array(predictions)

        mae = vabs(distances).mean()
        rmse = sqrt((distances ** 2).mean())

        iteration_result = {
            'iteration': j,
            'k': k,
            'momentum': momentum,
            'mae': mae,
            'rmse': rmse,
            'lrate': current_l_w
        }

        config_result['results'].append(iteration_result)

        print(iteration_str.format(j, k, current_l_w, momentum, mae, rmse))

        with open('experiments/{}_{}.json'.format(config_name, name), 'wt') as res_output:
            res_output.write(json.dumps(config_result, indent=4))

        W,V,H = rbm.get_weights()
        print H
Example #3
0
def run(name, dataset, config, all_users, all_movies, tests, initial_v, sep):
    config_name = config['name']
    number_hidden = config['number_hidden']
    epochs = config['epochs']
    ks = config['ks']
    momentums = config['momentums']
    l_w = config['l_w']
    l_v = config['l_v']
    l_h = config['l_h']
    decay = config['decay']
    batch_size = config['batch_size']

    config_result = config.copy()
    config_result['results'] = []

    vis = T.matrix()
    vmasks = T.matrix()

    rbm = CFRBM(len(all_movies) * 5, number_hidden)

    profiles = defaultdict(list)

    with open(dataset, 'rt') as data:
        for i, line in enumerate(data):
            uid, mid, rat, timstamp = line.strip().split(sep)
            profiles[uid].append((mid, float(rat)))

    print("Users and ratings loaded")

    for j in range(epochs):
        def get_index(col):
            if j/(epochs/len(col)) < len(col):
                return j/(epochs/len(col))
            else:
                return -1

        index = get_index(ks)
        mindex = get_index(momentums)
        icurrent_l_w = get_index(l_w)
        icurrent_l_v = get_index(l_v)
        icurrent_l_h = get_index(l_h)

        k = ks[index]
        momentum = momentums[mindex]
        current_l_w = l_w[icurrent_l_w]
        current_l_v = l_v[icurrent_l_v]
        current_l_h = l_h[icurrent_l_h]

        train = rbm.cdk_fun(vis,
                            vmasks,
                            k=k,
                            w_lr=current_l_w,
                            v_lr=current_l_v,
                            h_lr=current_l_h,
                            decay=decay,
                            momentum=momentum)
        predict = rbm.predict(vis)

        for batch_i, batch in enumerate(chunker(profiles.keys(),
                                                batch_size)):
            size = min(len(batch), batch_size)

            # create needed binary vectors
            bin_profiles = {}
            masks = {}
            for userid in batch:
                user_profile = [0.] * len(all_movies)
                mask = [0] * (len(all_movies) * 5)

                for movie_id, rat in profiles[userid]:
                    user_profile[all_movies.index(movie_id)] = rat
                    for _i in range(5):
                        mask[5 * all_movies.index(movie_id) + _i] = 1

                example = expand(np.array([user_profile])).astype('float32')
                bin_profiles[userid] = example
                masks[userid] = mask

            profile_batch = [bin_profiles[id] for id in batch]
            masks_batch = [masks[id] for id in batch]
            train_batch = np.array(profile_batch).reshape(size,
                                                          len(all_movies) * 5)
            train_masks = np.array(masks_batch).reshape(size,
                                                        len(all_movies) * 5)
            train_masks = train_masks.astype('float32')
            train(train_batch, train_masks)
            sys.stdout.write('.')
            sys.stdout.flush()

        ratings = []
        predictions = []

        for batch in chunker(tests.keys(), batch_size):
            size = min(len(batch), batch_size)

            # create needed binary vectors
            bin_profiles = {}
            masks = {}
            for userid in batch:
                user_profile = [0.] * len(all_movies)
                mask = [0] * (len(all_movies) * 5)

                for movie_id, rat in profiles[userid]:
                    user_profile[all_movies.index(movie_id)] = rat
                    for _i in range(5):
                        mask[5 * all_movies.index(movie_id) + _i] = 1

                example = expand(np.array([user_profile])).astype('float32')
                bin_profiles[userid] = example
                masks[userid] = mask

            positions = {profile_id: pos for pos, profile_id
                         in enumerate(batch)}
            profile_batch = [bin_profiles[el] for el in batch]
            test_batch = np.array(profile_batch).reshape(size,
                                                         len(all_movies) * 5)
            user_preds = revert_expected_value(predict(test_batch))
            for profile_id in batch:
                test_movies = tests[profile_id]
                try:
                    for movie, rating in test_movies:
                        current_profile = user_preds[positions[profile_id]]
                        predicted = current_profile[all_movies.index(movie)]
                        rating = float(rating)
                        ratings.append(rating)
                        predictions.append(predicted)
                except Exception:
                    pass

        vabs = np.vectorize(abs)
        distances = np.array(ratings) - np.array(predictions)

        mae = vabs(distances).mean()
        rmse = sqrt((distances ** 2).mean())

        iteration_result = {
            'iteration': j,
            'k': k,
            'momentum': momentum,
            'mae': mae,
            'rmse': rmse,
            'lrate': current_l_w
        }

        config_result['results'].append(iteration_result)

        print(iteration_str.format(j, k, current_l_w, momentum, mae, rmse))

        with open('{}_{}.json'.format(config_name, name), 'wt') as res_output:
            res_output.write(json.dumps(config_result, indent=4))
def run(name, dataset, user_info, config, all_users, all_movies, all_occupations, all_sex, all_ages, tests, initial_v, sep):
    config_name = config['name']
    number_hidden = config['number_hidden']
    epochs = config['epochs']
    ks = config['ks']
    momentums = config['momentums']
    l_w = config['l_w']
    l_v = config['l_v']
    l_h = config['l_h']
    decay = config['decay']
    batch_size = config['batch_size']

    config_result = config.copy()
    config_result['results'] = []

    vis_x = T.matrix()
    vis_o = T.matrix()
    vis_s = T.matrix()
    vis_a = T.matrix()
    vmasks_x = T.matrix()
    vmasks_o = T.matrix()
    vmasks_s = T.matrix()
    vmasks_a = T.matrix()

    rbm = CFRBM(len(all_movies) * 5, len(all_occupations), 1, len(all_ages), number_hidden)

    profiles = defaultdict(list)

    with open(dataset, 'rt') as data:
        for i, line in enumerate(data):
            uid, mid, rat, timstamp = line.strip().split(sep)
            profiles[uid].append((mid, float(rat)))

    print("Users and ratings loaded")

    user_occ = defaultdict(list)
    user_sex = defaultdict(list)
    user_age = defaultdict(list)

    r = csv.reader(open(user_info, 'rb'), delimiter='|')
    for row in r:
        user_age[row[0]] = [int(x) for x in row[1:7]]
        user_sex[row[0]] = [int(row[7])]
        user_occ[row[0]] = [int(x) for x in row[8:]]

    print("User info loaded")

    for j in range(epochs):
        def get_index(col):
            if j/(epochs/len(col)) < len(col):
                return j/(epochs/len(col))
            else:
                return -1

        index = get_index(ks)
        mindex = get_index(momentums)
        icurrent_l_w = get_index(l_w)
        icurrent_l_v = get_index(l_v)
        icurrent_l_h = get_index(l_h)

        k = ks[index]
        momentum = momentums[mindex]
        current_l_w = l_w[icurrent_l_w]
        current_l_v = l_v[icurrent_l_v]
        current_l_h = l_h[icurrent_l_h]

        train = rbm.cdk_fun(vis_x,
                            vis_o,
                            vis_s,
                            vis_a,
                            vmasks_x,
                            vmasks_o,
                            vmasks_s,
                            vmasks_a,
                            k=k,
                            w_lr=current_l_w,
                            v_lr=current_l_v,
                            h_lr=current_l_h,
                            decay=decay,
                            momentum=momentum)
        predict = rbm.predict(vis_x, vis_o, vis_s, vis_a)

        start_time = time.time()

        for batch_i, batch in enumerate(chunker(profiles.keys(),
                                                batch_size)):
            size = min(len(batch), batch_size)

            # create needed binary vectors
            bin_profiles = {}
            occ_profiles = {}
            sex_profiles = {}
            age_profiles = {}
            masks_x = {}
            masks_o = {}
            masks_s = {}
            masks_a = {}
            for userid in batch:
                user_profile = [0.] * len(all_movies)
                occ_profile = [0.] * len(all_occupations)
                sex_profile = [0.] * 1
                age_profile = [0.] * len(all_ages)
                mask_x = [0] * (len(all_movies) * 5)
                mask_o = [1] * (len(all_occupations))
                mask_s = [1] * (1)
                mask_a = [1] * (len(all_ages))

                for movie_id, rat in profiles[userid]:
                    user_profile[all_movies.index(movie_id)] = rat
                    for _i in range(5):
                        mask_x[5 * all_movies.index(movie_id) + _i] = 1

                mask_o = [1] * len(all_occupations)
                mask_s = [1] * 1
                mask_a = [1] * len(all_ages)

                example_x = expand(np.array([user_profile])).astype('float32')
                example_o = expand(np.array([occ_profile]), k=1).astype('float32')
                example_s = expand(np.array([sex_profile]), k=1).astype('float32')
                example_a = expand(np.array([age_profile]), k=1).astype('float32')
                bin_profiles[userid] = example_x
                occ_profiles[userid] = example_o
                sex_profiles[userid] = example_s
                age_profiles[userid] = example_a
                masks_x[userid] = mask_x
                masks_o[userid] = mask_o
                masks_s[userid] = mask_s
                masks_a[userid] = mask_a

            profile_batch = [bin_profiles[id] for id in batch]
            occ_batch = [occ_profiles[id] for id in batch]
            sex_batch = [sex_profiles[id] for id in batch]
            age_batch = [age_profiles[id] for id in batch]
            masks_x_batch = [masks_x[id] for id in batch]
            masks_o_batch = [masks_o[id] for id in batch]
            masks_s_batch = [masks_s[id] for id in batch]
            masks_a_batch = [masks_a[id] for id in batch]
            train_batch_x = np.array(profile_batch).reshape(size,
                                                          len(all_movies) * 5)
            train_batch_o = np.array(occ_batch).reshape(size,
                                                         len(all_occupations))
            train_batch_s = np.array(sex_batch).reshape(size,
                                                         1)
            train_batch_a = np.array(age_batch).reshape(size,
                                                         len(all_ages))
            train_masks_x = np.array(masks_x_batch).reshape(size,
                                                        len(all_movies) * 5)
            train_masks_o = np.array(masks_o_batch).reshape(size,
                                                        len(all_occupations))
            train_masks_s = np.array(masks_s_batch).reshape(size,
                                                        1)
            train_masks_a = np.array(masks_a_batch).reshape(size,
                                                        len(all_ages))
            train_masks_x = train_masks_x.astype('float32')
            train_masks_o = train_masks_o.astype('float32')
            train_masks_s = train_masks_s.astype('float32')
            train_masks_a = train_masks_a.astype('float32')
            train(train_batch_x, train_batch_o, train_batch_s, train_batch_a, train_masks_x, train_masks_o, train_masks_s, train_masks_a)
            sys.stdout.write('.')
            sys.stdout.flush()

        end_time = time.time()

        train_time = end_time - start_time

        ratings = []
        predictions = []

        start_time = time.time()

        for batch in chunker(tests.keys(), batch_size):
            size = min(len(batch), batch_size)

            # create needed binary vectors
            bin_profiles = {}
            occ_profiles = {}
            sex_profiles = {}
            age_profiles = {}
            masks_x = {}
            masks_o = {}
            masks_s = {}
            masks_a = {}
            for userid in batch:
                user_profile = [0.] * len(all_movies)
                occ_profile = [0.] * len(all_occupations)
                sex_profile = [0.] * 1
                age_profile = [0.] * len(all_ages)
                mask_x = [0] * (len(all_movies) * 5)
                mask_o = [1] * (len(all_occupations))
                mask_s = [1] * (1)
                mask_a = [1] * (len(all_ages))

                for movie_id, rat in profiles[userid]:
                    user_profile[all_movies.index(movie_id)] = rat
                    for _i in range(5):
                        mask_x[5 * all_movies.index(movie_id) + _i] = 1

                mask_o = [1] * len(all_occupations)
                mask_s = [1] * 1
                mask_a = [1] * len(all_ages)

                example_x = expand(np.array([user_profile])).astype('float32')
                example_o = expand(np.array([occ_profile]), k=1).astype('float32')
                example_s = expand(np.array([sex_profile]), k=1).astype('float32')
                example_a = expand(np.array([age_profile]), k=1).astype('float32')
                bin_profiles[userid] = example_x
                occ_profiles[userid] = example_o
                sex_profiles[userid] = example_s
                age_profiles[userid] = example_a
                masks_x[userid] = mask_x
                masks_o[userid] = mask_o
                masks_s[userid] = mask_s
                masks_a[userid] = mask_a

            positions = {profile_id: pos for pos, profile_id
                         in enumerate(batch)}
            profile_batch = [bin_profiles[el] for el in batch]
            occ_batch = [occ_profiles[el] for el in batch]
            sex_batch = [sex_profiles[el] for el in batch]
            age_batch = [age_profiles[el] for el in batch]
            test_batch_x = np.array(profile_batch).reshape(size,
                                                         len(all_movies) * 5)
            test_batch_o = np.array(occ_batch).reshape(size,
                                                        len(all_occupations))
            test_batch_s = np.array(sex_batch).reshape(size,
                                                        1)
            test_batch_a = np.array(age_batch).reshape(size,
                                                        len(all_ages))
            user_preds = revert_expected_value(predict(test_batch_x, test_batch_o, test_batch_s, test_batch_a))
            for profile_id in batch:
                test_movies = tests[profile_id]
                try:
                    for movie, rating in test_movies:
                        current_profile = user_preds[positions[profile_id]]
                        predicted = current_profile[all_movies.index(movie)]
                        rating = float(rating)
                        ratings.append(rating)
                        predictions.append(predicted)
                except Exception:
                    pass

        end_time = time.time()

        test_time = end_time - start_time

        true_rat = np.array(ratings, dtype=np.uint8)
        pred_rat = np.array(predictions, dtype=np.uint8)

        #print true_rat < 3, true_rat
        prec_rec = precision_recall_fscore_support(true_rat < 3,pred_rat < 3, average='binary')
        print prec_rec

        vabs = np.vectorize(abs)
        distances = np.array(ratings) - np.array(predictions)

        mae = vabs(distances).mean()
        rmse = sqrt((distances ** 2).mean())

        iteration_result = {
            'iteration': j,
            'k': k,
            'momentum': momentum,
            'mae': mae,
            'rmse': rmse,
            'lrate': current_l_w,
            'train_time': train_time,
            'test_time': test_time,
            'prec_rec': prec_rec
        }

        config_result['results'].append(iteration_result)

        print(iteration_str.format(j, k, current_l_w, momentum, mae, rmse))

        with open('experiments/{}_{}.json'.format(config_name, name), 'wt') as res_output:
            res_output.write(json.dumps(config_result, indent=4))
Example #5
0
def run(name, dataset, config, all_users, all_movies, tests, initial_v, sep):
    config_name = config['name']
    number_hidden = config['number_hidden']
    epochs = config['epochs']
    ks = config['ks']
    momentums = config['momentums']
    l_w = config['l_w']
    l_v = config['l_v']
    l_h = config['l_h']
    lr_decay = config['lr_decay'][0]
    decay = config['decay']
    batch_size = config['batch_size']

    config_result = config.copy()
    config_result['results'] = []

    vis = T.matrix()
    vmasks = T.matrix()

    rbm = CFRBM(len(all_movies) * 20, number_hidden)

    profiles = defaultdict(list)

    with open(dataset, 'rt') as data:
        for i, line in enumerate(data):
            uid, mid, rat = line.strip().split(sep)
            profiles[uid].append((mid, float(rat)))

    current_l_w = l_w[0]
    current_l_v = l_v[0]
    current_l_h = l_h[0]

    print("Users and ratings loaded")

    for j in range(epochs):

        print "epochs: ", j

        def get_index(col):
            if j / (epochs / len(col)) < len(col):
                return j / (epochs / len(col))
            else:
                return -1

        index = get_index(ks)
        mindex = get_index(momentums)
        #icurrent_l_w = get_index(l_w)
        #icurrent_l_v = get_index(l_v)
        #icurrent_l_h = get_index(l_h)

        k = ks[index]
        momentum = momentums[mindex]
        current_l_w *= lr_decay
        current_l_v *= lr_decay
        current_l_h *= lr_decay

        train = rbm.cdk_fun(vis,
                            vmasks,
                            k=k,
                            w_lr=current_l_w,
                            v_lr=current_l_v,
                            h_lr=current_l_h,
                            decay=decay,
                            momentum=momentum)
        predict = rbm.predict(vis)

        n_batch = 0
        users_ids = []
        for batch in chunker(tests.keys(), batch_size):

            n_batch += 1

            # print "&*&*" * 20
            # print "START OF A BATCH"
            # print "batch: ", batch
            users_ids.extend(batch)

            size = min(len(batch), batch_size)

            # create needed binary vectors
            bin_profiles = {}
            masks = {}
            for userid in batch:
                user_profile = [0.] * len(all_movies)
                mask = [0] * (len(all_movies) * 20)

                for movie_id, rat in profiles[userid]:
                    user_profile[all_movies.index(movie_id)] = rat

                    for _i in range(20):
                        mask[20 * all_movies.index(movie_id) + _i] = 1

                example = expand(np.array([user_profile])).astype('float32')
                bin_profiles[userid] = example
                masks[userid] = mask
                #print np.sum(mask)

            positions = {
                profile_id: pos
                for pos, profile_id in enumerate(batch)
            }
            profile_batch = [bin_profiles[el] for el in batch]

            # print profile_batch[0]
            # print len(profile_batch[0])

            test_batch = np.array(profile_batch).reshape(
                size,
                len(all_movies) * 20)

            # print batch

            # print "test batch :"
            # print test_batch
            # print test_batch.shape
            #print test_batch[:3,:3]
            batch_preds = predict(test_batch)
            user_preds = revert_expected_value(batch_preds, do_round=False)
            if n_batch == 1:
                print user_preds[:4, :5]

        train_batch_i = 0
        for batch_i, batch in enumerate(chunker(profiles.keys(), batch_size)):
            size = min(len(batch), batch_size)

            train_batch_i += 1

            # create needed binary vectors

            bin_profiles = {}
            masks = {}
            for userid in batch:

                user_profile = [0.] * len(all_movies)
                mask = [0] * (len(all_movies) * 20)

                for movie_id, rat in profiles[userid]:
                    user_profile[all_movies.index(movie_id)] = rat
                    for _i in range(20):
                        mask[20 * all_movies.index(movie_id) + _i] = 1

                example = expand(np.array([user_profile])).astype('float32')
                bin_profiles[userid] = example
                masks[userid] = mask

            # print example
            # print len(example[0])

            profile_batch = [bin_profiles[id] for id in batch]
            # print profile_batch[0][0]
            # print len(profile_batch[0][0])

            masks_batch = [masks[id] for id in batch]
            train_batch = np.array(profile_batch).reshape(
                size,
                len(all_movies) * 20)

            train_masks = np.array(masks_batch).reshape(
                size,
                len(all_movies) * 20)
            train_masks = train_masks.astype('float32')
            train(train_batch, train_masks)

            if (train_batch_i % 200 == 0):
                sys.stdout.write('.')
                sys.stdout.flush()

        # print "number of train batches: ", train_batch_i

        ratings = []
        predictions = []

        # pickle.dump(all_movies, open("item_ids.pickle", "wb"))

        # print "###############################################"
        # print "user ids"
        # print tests.keys()[1:100]
        # # print len(tests.keys)
        # # print type(tests.keys)
        # print "all users"
        # print all_users[1:100]
        # print len(all_users)
        # print type(all_users)
        # print "beer ids"
        # print all_movies[1:100]
        # print len(all_movies)
        # print type(all_movies)

        #reconstruct_mat = np.array([]).reshape(0, 1269)

        n_batch = 0
        users_ids = []
        for batch in chunker(tests.keys(), batch_size):

            n_batch += 1

            # print "&*&*" * 20
            # print "START OF A BATCH"
            # print "batch: ", batch
            users_ids.extend(batch)

            size = min(len(batch), batch_size)

            # create needed binary vectors
            bin_profiles = {}
            masks = {}
            for userid in batch:
                user_profile = [0.] * len(all_movies)
                mask = [0] * (len(all_movies) * 20)

                for movie_id, rat in profiles[userid]:
                    user_profile[all_movies.index(movie_id)] = rat

                    for _i in range(20):
                        mask[20 * all_movies.index(movie_id) + _i] = 1

                example = expand(np.array([user_profile])).astype('float32')
                bin_profiles[userid] = example
                masks[userid] = mask
                #print np.sum(mask)

            positions = {
                profile_id: pos
                for pos, profile_id in enumerate(batch)
            }
            profile_batch = [bin_profiles[el] for el in batch]

            # print profile_batch[0]
            # print len(profile_batch[0])

            test_batch = np.array(profile_batch).reshape(
                size,
                len(all_movies) * 20)

            #print batch

            # print "test batch :"
            # print test_batch
            # print test_batch.shape
            batch_preds = predict(test_batch)
            user_preds = revert_expected_value(batch_preds, do_round=False)
            #if n_batch == 1:
            #    print test_batch[:2,:]

            # reconstruct_mat = np.concatenate((reconstruct_mat, user_preds))

            # print predict(test_batch)

            # print "user pred: ", user_preds
            # print user_preds.shape

            for profile_id in batch:
                test_movies = tests[profile_id]
                try:
                    for movie, rating in test_movies:
                        current_profile = user_preds[positions[profile_id]]
                        predicted = current_profile[all_movies.index(movie)]
                        rating = float(rating)
                        ratings.append(rating)
                        predictions.append(predicted)
                except Exception:
                    pass

        #print (np.array(predictions))[0:10]
        # print "number of test batches: ", n_batch

        # print reconstruct_mat

        # pickle.dump(users_ids, open("users_ids.pickle", "wb"))
        # pickle.dump(reconstruct_mat, open("reconstruct_mat.pickle", "wb"))

        vabs = np.vectorize(abs)
        distances = np.array(ratings) - np.array(predictions)

        mae = vabs(distances).mean()
        rmse = sqrt((distances**2).mean())

        iteration_result = {
            'iteration': j,
            'k': k,
            'momentum': momentum,
            'mae': mae,
            'rmse': rmse,
            'lrate': current_l_w
        }

        config_result['results'].append(iteration_result)

        print(iteration_str.format(j, k, current_l_w, momentum, mae, rmse))

        with open('{}_{}.json'.format(config_name, name), 'wt') as res_output:
            res_output.write(json.dumps(config_result, indent=4))

        w = rbm.weights.eval()
        np.save('weights', w)
def run(name, dataset, config, all_users, all_movies, tests, initial_v, sep):
    config_name = config['name']
    number_hidden = config['number_hidden']
    epochs = config['epochs']
    ks = config['ks']
    momentums = config['momentums']
    l_w = config['l_w']
    l_v = config['l_v']
    l_h = config['l_h']
    decay = config['decay']
    batch_size = config['batch_size']

    config_result = config.copy()
    config_result['results'] = []

    vis = T.matrix()
    vmasks = T.matrix()

    rbm = CFRBM(len(all_movies) * 5, number_hidden)

    profiles = defaultdict(list)
    #all_ratings = np.zeros((943,1682*5), dtype=np.float32)
    #all_masks = np.zeros((943,1682*5), dtype=np.float32)

    with open(dataset, 'rt') as data:
        for i, line in enumerate(data):
            uid, mid, rat, timstamp = line.strip().split(sep)
            profiles[uid].append((mid, float(rat)))
            #for i in range(1,5):
            #    if i == int(rat):
            #        all_ratings[int(uid)-1][(int(mid)-1)*5+i-1] = 1.0
            #    all_masks[int(uid)-1][(int(mid)-1)*5+i-1] = 1.0

    print("Users and ratings loaded")

    for j in range(epochs):
        def get_index(col):
            if j/(epochs/len(col)) < len(col):
                return j/(epochs/len(col))
            else:
                return -1

        index = get_index(ks)
        mindex = get_index(momentums)
        icurrent_l_w = get_index(l_w)
        icurrent_l_v = get_index(l_v)
        icurrent_l_h = get_index(l_h)

        k = ks[index]
        momentum = momentums[mindex]
        current_l_w = l_w[icurrent_l_w]
        current_l_v = l_v[icurrent_l_v]
        current_l_h = l_h[icurrent_l_h]

        train = rbm.cdk_fun(vis,
                            vmasks,
                            k=k,
                            w_lr=current_l_w,
                            v_lr=current_l_v,
                            h_lr=current_l_h,
                            decay=decay,
                            momentum=momentum)
        predict = rbm.predict(vis)

        #batch_size = 10
        start_time = time.time()

        for batch_i, batch in enumerate(chunker(profiles.keys(), batch_size)):
        #for batch_i in range(0,943,batch_size):

            #profile_batch = np.copy(all_ratings[batch_i:batch_i+batch_size])
            #masks_batch = np.copy(all_masks[batch_i:batch_i+batch_size])
            #print batch_i, len(profile_batch)
            size = min(len(batch), batch_size)

            
            #create needed binary vectors
            bin_profiles = {}
            masks = {}
            for userid in batch:
                user_profile = [0.] * len(all_movies)
                mask = [0] * (len(all_movies) * 5)

                for movie_id, rat in profiles[userid]:
                    user_profile[all_movies.index(movie_id)] = rat
                    for _i in range(5):
                        mask[5 * all_movies.index(movie_id) + _i] = 1

                example = expand(np.array([user_profile])).astype('float32')
                bin_profiles[userid] = example
                masks[userid] = mask
                #print example[0].shape,userid,all_ratings[343].shape
                #print example[0][:20],all_ratings[343][:20],user_profile[:20]
                

            profile_batch = [bin_profiles[id] for id in batch]
            masks_batch = [masks[id] for id in batch]
            
            train_batch = np.array(profile_batch).reshape(size,
                                                         len(all_movies) * 5)
            train_masks = np.array(masks_batch).reshape(size,
                                                        len(all_movies) * 5)
            #print train_batch[0]
            train_masks = train_masks.astype('float32')
            train(train_batch, train_masks)
            #train(movies_batch, masks_batch)
            sys.stdout.write('.')
            sys.stdout.flush()

        end_time = time.time()

        train_time = end_time - start_time
        #batch_size = 10
        ratings = []
        predictions = []

        start_time = time.time()

        for batch in chunker(tests.keys(), batch_size):
            size = min(len(batch), batch_size)

            #profile_batch = []
            #from_test = []
            #for b in batch:
            #    profile_batch.append(all_ratings[int(b)-1])
            #    users = [0 for x in range(1682)]
            #    for u in tests[b]:
            #        users[int(u[0])-1] = int(u[1])
            #    from_test.append(users)

            
            bin_profiles = {}
            masks = {}
            for userid in batch:
                user_profile = [0.] * len(all_movies)
                mask = [0] * (len(all_movies) * 5)

                for movie_id, rat in profiles[userid]:
                    user_profile[all_movies.index(movie_id)] = rat
                    for _i in range(5):
                        mask[5 * all_movies.index(movie_id) + _i] = 1

                example = expand(np.array([user_profile])).astype('float32')
                bin_profiles[userid] = example
                masks[userid] = mask

            

            positions = {profile_id: pos for pos, profile_id
                         in enumerate(batch)}
            profile_batch = [bin_profiles[el] for el in batch]

            test_batch = np.array(profile_batch).reshape(size,
                                                         len(all_movies) * 5)
            user_preds = revert_expected_value(predict(test_batch))
            
            for profile_id in batch:
                test_movies = tests[profile_id]
                try:
                    for movie, rating in test_movies:
                        current_profile = user_preds[positions[profile_id]]
                        predicted = current_profile[all_movies.index(movie)]
                        rating = float(rating)
                        ratings.append(rating)
                        predictions.append(predicted)
                except Exception:
                    pass
        
        end_time = time.time()
        test_time = end_time - start_time

        vabs = np.vectorize(abs)
        distances = np.array(ratings) - np.array(predictions)

        true_rat = np.array(ratings, dtype=np.uint8)
        pred_rat = np.array(predictions, dtype=np.uint8)

        #print true_rat < 3, true_rat
        prec_rec = precision_recall_fscore_support(true_rat < 3,pred_rat < 3, average='binary')
        print prec_rec
        
        mae = vabs(distances).mean()
        rmse = sqrt((distances ** 2).mean())

        iteration_result = {
            'iteration': j,
            'k': k,
            'momentum': momentum,
            'mae': mae,
            'rmse': rmse,
            'lrate': current_l_w,
            'train_time': train_time,
            'test_time': test_time,
            'prec_rec': prec_rec
        }

        config_result['results'].append(iteration_result)

        print(iteration_str.format(j, k, current_l_w, momentum, mae, rmse))

        with open('experiments/{}_{}.json'.format(config_name, name), 'wt') as res_output:
            res_output.write(json.dumps(config_result, indent=4))

        W,V,H = rbm.get_weights()
        print H
Example #7
0
def run(name, dataset, config, all_users, all_movies, tests, initial_v, sep):
    config_name = config["name"]
    number_hidden = config["number_hidden"]
    epochs = config["epochs"]
    ks = config["ks"]
    momentums = config["momentums"]
    l_w = config["l_w"]
    l_v = config["l_v"]
    l_h = config["l_h"]
    decay = config["decay"]

    config_result = config.copy()
    config_result["results"] = []

    vis = T.matrix()
    vmasks = T.matrix()

    rbm = CFRBM(len(all_users) * 5, number_hidden)

    profiles = defaultdict(list)

    with open(dataset, "rt") as data:
        for i, line in enumerate(data):
            uid, mid, rat, timstamp = line.strip().split(sep)
            profiles[mid].append((uid, float(rat)))

    print("Users and ratings loaded")

    for j in range(epochs):

        def get_index(col):
            if j / (epochs / len(col)) < len(col):
                return j / (epochs / len(col))
            else:
                return -1

        index = get_index(ks)
        mindex = get_index(momentums)
        icurrent_l_w = get_index(l_w)
        icurrent_l_v = get_index(l_v)
        icurrent_l_h = get_index(l_h)

        k = ks[index]
        momentum = momentums[mindex]
        current_l_w = l_w[icurrent_l_w]
        current_l_v = l_v[icurrent_l_v]
        current_l_h = l_h[icurrent_l_h]

        train = rbm.cdk_fun(
            vis, vmasks, k=k, w_lr=current_l_w, v_lr=current_l_v, h_lr=current_l_h, decay=decay, momentum=momentum
        )
        predict = rbm.predict(vis)

        batch_size = 10
        for batch_i, batch in enumerate(utils.chunker(profiles.keys(), batch_size)):
            size = min(len(batch), batch_size)

            # create needed binary vectors
            bin_profiles = {}
            masks = {}
            for movieid in batch:
                movie_profile = [0.0] * len(all_users)
                mask = [0] * (len(all_users) * 5)

                for user_id, rat in profiles[movieid]:
                    movie_profile[all_users.index(user_id)] = rat
                    for _i in range(5):
                        mask[5 * all_users.index(user_id) + _i] = 1

                example = expand(np.array([movie_profile])).astype("float32")
                bin_profiles[movieid] = example
                masks[movieid] = mask

            movies_batch = [bin_profiles[id] for id in batch]
            masks_batch = [masks[id] for id in batch]
            train_batch = np.array(movies_batch).reshape(size, len(all_users) * 5)
            train_masks = np.array(masks_batch).reshape(size, len(all_users) * 5)
            train_masks = train_masks.astype("float32")
            train(train_batch, train_masks)
            sys.stdout.write(".")
            sys.stdout.flush()

        batch_size = 10
        ratings = []
        predictions = []

        for batch in utils.chunker(tests.keys(), batch_size):
            size = min(len(batch), batch_size)

            # create needed binary vectors
            bin_profiles = {}
            masks = {}
            for movieid in batch:
                movie_profile = [0.0] * len(all_users)
                mask = [0] * (len(all_users) * 5)

                for userid, rat in profiles[movieid]:
                    movie_profile[all_users.index(userid)] = rat
                    for _i in range(5):
                        mask[5 * all_users.index(userid) + _i] = 1

                example = expand(np.array([movie_profile])).astype("float32")
                bin_profiles[movieid] = example
                masks[movieid] = mask

            positions = {movie_id: pos for pos, movie_id in enumerate(batch)}
            movies_batch = [bin_profiles[el] for el in batch]
            test_batch = np.array(movies_batch).reshape(size, len(all_users) * 5)
            movie_predictions = revert_expected_value(predict(test_batch))
            for movie_id in batch:
                test_users = tests[movie_id]
                try:
                    for user, rating in test_users:
                        current_movie = movie_predictions[positions[movie_id]]
                        predicted = current_movie[all_users.index(user)]
                        rating = float(rating)
                        ratings.append(rating)
                        predictions.append(predicted)
                except Exception:
                    pass

        vabs = np.vectorize(abs)
        distances = np.array(ratings) - np.array(predictions)

        mae = vabs(distances).mean()
        rmse = sqrt((distances ** 2).mean())

        iteration_result = {
            "iteration": j,
            "k": k,
            "momentum": momentum,
            "mae": mae,
            "rmse": rmse,
            "lrate": current_l_w,
        }

        config_result["results"].append(iteration_result)

        print(iteration_str.format(j, k, current_l_w, momentum, mae, rmse))

        with open("{}_{}.json".format(config_name, name), "wt") as res_output:
            res_output.write(json.dumps(config_result, indent=4))