예제 #1
0
 def _init_model(self):
     if self._model is None:
         self._model = FM(n_factors=self._n_factors,
                          sparse=self._sparse,
                          n_iter=self._n_iter,
                          loss=self._loss,
                          l2=self._l2,
                          learning_rate=self._learning_rate,
                          optimizer_func=self._optimizer_func,
                          batch_size=self._batch_size,
                          random_state=self._random_state,
                          use_cuda=self._use_cuda,
                          device_id=self._device_id,
                          logger=self._logger,
                          n_jobs=self._n_jobs,
                          pin_memory=self._pin_memory,
                          verbose=self._verbose,
                          early_stopping=self._early_stopping,
                          n_iter_no_change=self._n_iter_no_change,
                          tol=self._tol,
                          stopping=self._stopping)
     elif isinstance(self._model, str):
         self._model = FM(model=self._model,
                          n_factors=self._n_factors,
                          sparse=self._sparse,
                          n_iter=self._n_iter,
                          loss=self._loss,
                          l2=self._l2,
                          learning_rate=self._learning_rate,
                          optimizer_func=self._optimizer_func,
                          batch_size=self._batch_size,
                          random_state=self._random_state,
                          use_cuda=self._use_cuda,
                          device_id=self._device_id,
                          logger=self._logger,
                          n_jobs=self._n_jobs,
                          pin_memory=self._pin_memory,
                          verbose=self._verbose,
                          early_stopping=self._early_stopping,
                          n_iter_no_change=self._n_iter_no_change,
                          tol=self._tol,
                          stopping=self._stopping)
     elif not isinstance(self._model, FM):
         raise ValueError("Model must be an instance of "
                          "divmachines.classifiers.FM class")
예제 #2
0
               verbose=VERBOSE,
               sparse=SPARSE)

train = pd.read_csv(train_path, header=0)
ground = pd.read_csv(ground_path, header=0)

dataset = pd.concat((train, ground))
users = dataset.user.unique()
item_catalogue = dataset[proj[1:-1]].drop_duplicates().values

values = cartesian2D(users.reshape(-1, 1), item_catalogue)

model0 = FM(n_iter=N_ITER,
            model=MODEL_PATH,
            n_jobs=N_JOBS,
            batch_size=BATCH_SIZE,
            n_factors=FACTORS,
            learning_rate=LEARNING_RATE,
            use_cuda=USE_CUDA,
            verbose=VERBOSE,
            sparse=SPARSE)

rank = model0.predict(values) \
    .reshape(users.shape[0], item_catalogue.shape[0])

for b in tqdm([1.0], desc="sys.div.", leave=False):
    table = np.zeros((users.shape[0], TOP + 1), dtype=np.object)
    table[:, 0] = users
    table[:, 1:] = model.predict(values, top=TOP, b=b, rank=rank)
    np.savetxt("./results-b" + str(b), table, fmt="%s")
예제 #3
0
         "Musical | Mystery | Romance | Sci-Fi | Thriller | War | Western "
header = header.replace(" |", "")
header = header.split()
items = pd.read_csv(GENRE_PATH, sep="|", names=header, encoding='iso-8859-2')
proj = ['user', 'item']
proj.extend(header[5:])
proj.append('rating')
train = pd.merge(data, items, on='item', how='inner')[proj].sample(1000)

n_users = np.unique(train[["user"]].values).shape[0]
n_items = np.unique(train[["item"]].values).shape[0]

print("Number of users: %s" % n_users)
print("Number of items: %s" % n_items)

model = FM(verbose=True)

interactions = train.values
x = interactions[:, :-1]
y = interactions[:, -1]

gSearch = GridSearchCV(model,
                       param_grid={"n_iter": [10], "learning_rate": [1], "l2": [0, 0.1, 0.2]},
                       cv='kFold',
                       metrics='mean_square_error',
                       n_jobs=8,
                       verbose=10,
                       return_train_score=True)

gSearch.fit(x, y, fit_params={'dic':
                                  {'users': 0, 'items': 1},
예제 #4
0
train = train.loc[train['user'].isin(user_take)]

dr = test.groupby('user', as_index=False) \
        .apply(lambda g: g.sample(5)).reset_index(0, drop=True)

# Create Ground Truth
ground = test[~test.index.isin(dr.index)].dropna()

train = pd.concat((train, dr))
users = ground.user.unique()

model = FM(n_iter=100,
           learning_rate=1e-3,
           sparse=False,
           batch_size=4096,
           n_jobs=8,
           optimizer_func=Adam,
           use_cuda=True,
           verbose=True,
           stopping=True,
           early_stopping=True)

interactions = train[['user', 'item', 'rating']].values
np.random.shuffle(interactions)
x = interactions[:, :-1]
y = interactions[:, -1]

model.fit(x, y, {'users': 0, 'items': 1}, n_items=n_items, n_users=n_users)

x = cartesian2D(users.reshape(-1, 1), item_cat.reshape(-1, 1))
pred = model.predict(x).reshape(users.shape[0], item_cat.shape[0])
예제 #5
0
         "Musical | Mystery | Romance | Sci-Fi | Thriller | War | Western "
header = header.replace(" |", "")
header = header.split()
items = pd.read_csv(GENRE_PATH, sep="|", names=header, encoding='iso-8859-2')
proj = ['user', 'item']
proj.extend(header[5:])
proj.append('rating')
train = pd.merge(data, items, on='item', how='inner')[proj]

n_users = np.unique(train[["user"]].values).shape[0]
n_items = np.unique(train[["item"]].values).shape[0]

print("Number of users: %s" % n_users)
print("Number of items: %s" % n_items)

model = FM(n_iter=10, learning_rate=1e-1, use_cuda=False)

cv = KFold(folds=10, shuffle=True)

interactions = train.values
x = interactions[:, :-1]
y = interactions[:, -1]

for k, v in cross_validate(model,
                           x,
                           y,
                           cv=cv,
                           fit_params={
                               'dic': {
                                   'users': 0,
                                   'items': 1
예제 #6
0
df = pd.read_csv(PATH, sep=",", header=0)

feats = pd.read_csv(FEATS_PATH, sep=",",
                    header=0)[['artist_id', 'mode', 'tps_id']]
train = pd.merge(df, feats, on="tps_id")

n_users = df.user_id.unique().shape[0]
n_items = df.tps_id.unique().shape[0]
n_artists = feats.artist_id.unique().shape[0]
interactions = train[['user_id', 'tps_id', 'artist_id', 'mode',
                      'playcounts']].values

logger = TLogger()
model = FM(n_iter=10,
           learning_rate=1e-1,
           logger=logger,
           n_jobs=4,
           use_cuda=False)

x = interactions[:, :-1]
y = interactions[:, -1]
model.fit(x,
          y, {
              'users': 0,
              'items': 1,
              'artists': 2
          },
          n_users=n_users,
          n_items=n_items,
          lengths={"n_artists": n_artists})