def _init_model(self): if self._model is None: self._model = FM(n_factors=self._n_factors, sparse=self._sparse, n_iter=self._n_iter, loss=self._loss, l2=self._l2, learning_rate=self._learning_rate, optimizer_func=self._optimizer_func, batch_size=self._batch_size, random_state=self._random_state, use_cuda=self._use_cuda, device_id=self._device_id, logger=self._logger, n_jobs=self._n_jobs, pin_memory=self._pin_memory, verbose=self._verbose, early_stopping=self._early_stopping, n_iter_no_change=self._n_iter_no_change, tol=self._tol, stopping=self._stopping) elif isinstance(self._model, str): self._model = FM(model=self._model, n_factors=self._n_factors, sparse=self._sparse, n_iter=self._n_iter, loss=self._loss, l2=self._l2, learning_rate=self._learning_rate, optimizer_func=self._optimizer_func, batch_size=self._batch_size, random_state=self._random_state, use_cuda=self._use_cuda, device_id=self._device_id, logger=self._logger, n_jobs=self._n_jobs, pin_memory=self._pin_memory, verbose=self._verbose, early_stopping=self._early_stopping, n_iter_no_change=self._n_iter_no_change, tol=self._tol, stopping=self._stopping) elif not isinstance(self._model, FM): raise ValueError("Model must be an instance of " "divmachines.classifiers.FM class")
verbose=VERBOSE, sparse=SPARSE) train = pd.read_csv(train_path, header=0) ground = pd.read_csv(ground_path, header=0) dataset = pd.concat((train, ground)) users = dataset.user.unique() item_catalogue = dataset[proj[1:-1]].drop_duplicates().values values = cartesian2D(users.reshape(-1, 1), item_catalogue) model0 = FM(n_iter=N_ITER, model=MODEL_PATH, n_jobs=N_JOBS, batch_size=BATCH_SIZE, n_factors=FACTORS, learning_rate=LEARNING_RATE, use_cuda=USE_CUDA, verbose=VERBOSE, sparse=SPARSE) rank = model0.predict(values) \ .reshape(users.shape[0], item_catalogue.shape[0]) for b in tqdm([1.0], desc="sys.div.", leave=False): table = np.zeros((users.shape[0], TOP + 1), dtype=np.object) table[:, 0] = users table[:, 1:] = model.predict(values, top=TOP, b=b, rank=rank) np.savetxt("./results-b" + str(b), table, fmt="%s")
"Musical | Mystery | Romance | Sci-Fi | Thriller | War | Western " header = header.replace(" |", "") header = header.split() items = pd.read_csv(GENRE_PATH, sep="|", names=header, encoding='iso-8859-2') proj = ['user', 'item'] proj.extend(header[5:]) proj.append('rating') train = pd.merge(data, items, on='item', how='inner')[proj].sample(1000) n_users = np.unique(train[["user"]].values).shape[0] n_items = np.unique(train[["item"]].values).shape[0] print("Number of users: %s" % n_users) print("Number of items: %s" % n_items) model = FM(verbose=True) interactions = train.values x = interactions[:, :-1] y = interactions[:, -1] gSearch = GridSearchCV(model, param_grid={"n_iter": [10], "learning_rate": [1], "l2": [0, 0.1, 0.2]}, cv='kFold', metrics='mean_square_error', n_jobs=8, verbose=10, return_train_score=True) gSearch.fit(x, y, fit_params={'dic': {'users': 0, 'items': 1},
train = train.loc[train['user'].isin(user_take)] dr = test.groupby('user', as_index=False) \ .apply(lambda g: g.sample(5)).reset_index(0, drop=True) # Create Ground Truth ground = test[~test.index.isin(dr.index)].dropna() train = pd.concat((train, dr)) users = ground.user.unique() model = FM(n_iter=100, learning_rate=1e-3, sparse=False, batch_size=4096, n_jobs=8, optimizer_func=Adam, use_cuda=True, verbose=True, stopping=True, early_stopping=True) interactions = train[['user', 'item', 'rating']].values np.random.shuffle(interactions) x = interactions[:, :-1] y = interactions[:, -1] model.fit(x, y, {'users': 0, 'items': 1}, n_items=n_items, n_users=n_users) x = cartesian2D(users.reshape(-1, 1), item_cat.reshape(-1, 1)) pred = model.predict(x).reshape(users.shape[0], item_cat.shape[0])
"Musical | Mystery | Romance | Sci-Fi | Thriller | War | Western " header = header.replace(" |", "") header = header.split() items = pd.read_csv(GENRE_PATH, sep="|", names=header, encoding='iso-8859-2') proj = ['user', 'item'] proj.extend(header[5:]) proj.append('rating') train = pd.merge(data, items, on='item', how='inner')[proj] n_users = np.unique(train[["user"]].values).shape[0] n_items = np.unique(train[["item"]].values).shape[0] print("Number of users: %s" % n_users) print("Number of items: %s" % n_items) model = FM(n_iter=10, learning_rate=1e-1, use_cuda=False) cv = KFold(folds=10, shuffle=True) interactions = train.values x = interactions[:, :-1] y = interactions[:, -1] for k, v in cross_validate(model, x, y, cv=cv, fit_params={ 'dic': { 'users': 0, 'items': 1
df = pd.read_csv(PATH, sep=",", header=0) feats = pd.read_csv(FEATS_PATH, sep=",", header=0)[['artist_id', 'mode', 'tps_id']] train = pd.merge(df, feats, on="tps_id") n_users = df.user_id.unique().shape[0] n_items = df.tps_id.unique().shape[0] n_artists = feats.artist_id.unique().shape[0] interactions = train[['user_id', 'tps_id', 'artist_id', 'mode', 'playcounts']].values logger = TLogger() model = FM(n_iter=10, learning_rate=1e-1, logger=logger, n_jobs=4, use_cuda=False) x = interactions[:, :-1] y = interactions[:, -1] model.fit(x, y, { 'users': 0, 'items': 1, 'artists': 2 }, n_users=n_users, n_items=n_items, lengths={"n_artists": n_artists})