Example #1
0
def run_lstm_model():
    if not is_file_empty("lstm_results.txt"):
        with open("lstm_results.txt", "r") as lstm_results:
            result_data = lstm_results.readlines()
            # result_data_string = '; '.join(result_data)
        return result_data
        # was return result_data_string

    max_sequence_length = 200
    min_sequence_length = 20
    step_size = 200
    random_state = np.random.RandomState(100)

    dataset = get_movielens_dataset('1M')

    train, rest = user_based_train_test_split(dataset,
                                              random_state=random_state)
    test, validation = user_based_train_test_split(rest,
                                                   test_percentage=0.5,
                                                   random_state=random_state)
    train = train.to_sequence(max_sequence_length=max_sequence_length,
                              min_sequence_length=min_sequence_length,
                              step_size=step_size)
    test = test.to_sequence(max_sequence_length=max_sequence_length,
                            min_sequence_length=min_sequence_length,
                            step_size=step_size)
    validation = validation.to_sequence(
        max_sequence_length=max_sequence_length,
        min_sequence_length=min_sequence_length,
        step_size=step_size)

    # mode = sys.argv[1] # was
    mode = "lstm"  # is
    # TODO: see how to interpret the results --> you can just print out the results and later on see how to interpret them
    # results = Results()
    # best_results = {"results": results, "best_res_string":'best_results'}
    best_results = run(
        train, test, validation, random_state,
        mode)  # it saw the files with the results and prints the best one

    # print("Best results from main: {}".format(best_results["best_res_string"]))
    print(
        best_results)  # TODO: now figure out how to pass this to the frontend

    # TODO: pass the .txt files with the results as well as the best result
    # results.best()
    # results.save()
    return best_results
Example #2
0
def _get_synthetic_data(num_users=100,
                        num_items=100,
                        num_interactions=10000,
                        randomness=0.01,
                        order=2,
                        max_sequence_length=10,
                        random_state=None):

    interactions = synthetic.generate_sequential(
        num_users=num_users,
        num_items=num_items,
        num_interactions=num_interactions,
        concentration_parameter=randomness,
        order=order,
        random_state=random_state)

    print('Max prob {}'.format(
        (np.unique(interactions.item_ids, return_counts=True)[1] /
         num_interactions).max()))

    train, test = user_based_train_test_split(interactions,
                                              random_state=random_state)

    train = train.to_sequence(max_sequence_length=max_sequence_length,
                              step_size=None)
    test = test.to_sequence(max_sequence_length=max_sequence_length,
                            step_size=None)

    return train, test
Example #3
0
def data_implicit_sequence():

    max_sequence_length = 200
    min_sequence_length = 20
    step_size = 200

    interactions = movielens.get_movielens_dataset('100K')

    train, test = user_based_train_test_split(interactions,
                                              random_state=RANDOM_STATE)

    train = train.to_sequence(max_sequence_length=max_sequence_length,
                              min_sequence_length=min_sequence_length,
                              step_size=step_size)

    test = test.to_sequence(max_sequence_length=max_sequence_length,
                            min_sequence_length=min_sequence_length,
                            step_size=step_size)

    model = ImplicitSequenceModel(loss='adaptive_hinge',
                                  representation='lstm',
                                  batch_size=8,
                                  learning_rate=1e-2,
                                  l2=1e-3,
                                  n_iter=2,
                                  use_cuda=CUDA,
                                  random_state=RANDOM_STATE)

    model.fit(train, verbose=True)

    return train, test, model
def test_user_based_split():

    interactions = movielens.get_movielens_dataset('100K')

    train, test = (cross_validation.user_based_train_test_split(
        interactions, test_percentage=0.2, random_state=RANDOM_STATE))

    assert len(train) + len(test) == len(interactions)

    users_in_test = len(np.unique(test.user_ids))
    assert np.allclose(float(users_in_test) / interactions.num_users,
                       0.2,
                       atol=0.001)
Example #5
0
        results.save(hyperparameters, test_mrr.mean(), val_mrr.mean())

    return results


if __name__ == '__main__':

    max_sequence_length = 200
    min_sequence_length = 20
    step_size = 200
    random_state = np.random.RandomState(100)

    dataset = get_movielens_dataset('1M')

    train, rest = user_based_train_test_split(dataset,
                                              random_state=random_state)
    test, validation = user_based_train_test_split(rest,
                                                   test_percentage=0.5,
                                                   random_state=random_state)
    train = train.to_sequence(max_sequence_length=max_sequence_length,
                              min_sequence_length=min_sequence_length,
                              step_size=step_size)
    test = test.to_sequence(max_sequence_length=max_sequence_length,
                            min_sequence_length=min_sequence_length,
                            step_size=step_size)
    validation = validation.to_sequence(
        max_sequence_length=max_sequence_length,
        min_sequence_length=min_sequence_length,
        step_size=step_size)

    # mode = sys.argv[1] # was