def generateLinks():
    db = ds.MovieLens('ml-latest-small/')
    links = db.links
    links.drop("imdbId", 1)
    return links
Exemple #2
0
 def _readDatasetFile(self):
     self.log("Opening dataset...")
     file = os.path.dirname(os.path.abspath(__file__)) + \
         "/" + self.datasetFolder
     self.movieDataset = ds.MovieLens(file)
Exemple #3
0
We'll also use the LensKit API to implement our recommender systems algorithms.

***STEP 1***

**Step 1.1**
"""

!pip install lenskit
!pip install -U numba

import lenskit.datasets as ds
import pandas as pd

!git clone https://github.com/crash-course-ai/lab4-recommender-systems.git

data = ds.MovieLens('lab4-recommender-systems/')

print("Successfully installed dataset.")

"""It's important to understand how a dataset is structured and to make sure that the dataset imported correctly.  Let's print out a few rows of the rating data. 

As you see, MovieLens stores a user's ID number (the first row few rows look like they're all ratings from user 1), the item's ID (in this case each ID is a different movie), the rating the user gave this item, and a time stamp for when the rating was left.

**Step 1.2**
"""

# Top 10 "ratings" of the MovieLens files
rows_to_show = 10   
data.ratings.head(rows_to_show)

"""A big aspect of recommender system datasets is how they handle missing data. Recommender systems usually have a LOT of missing data, because most users only rate a few movies and most movies only receive ratings from a few users. 
Exemple #4
0
"""
This module defines the data sets that we are going to work with.
"""
import pandas as pd

from lenskit import datasets as ds

ml20m = ds.MovieLens('data/ml-20m')
mlsmall = ds.MovieLens('data/ml-latest-small')
ml100k = ds.ML100K('data/ml-100k')
ml1m = ds.ML1M('data/ml-1m')
ml10m = ds.ML10M('data/ml-10M100K')

if hasattr(ds, 'BookCrossing'):
    bx = ds.BookCrossing('data/bx')


def ds_diff(full, subset):
    "Return the difference of two data sets."
    mask = pd.Series(True, index=full.index)
    mask[subset.index] = False
    return full[mask]
 def load_data(link):
     repo = ds.MovieLens(link)
     return repo