예제 #1
0
def get_data():

    df = pd.read_csv('./data/ml-100k/u.data',
                     sep='\t',
                     names=['user', 'item', 'rating', 'timestamp'])

    df, idx_to_user = reindex_col(df, 'user')
    df, idx_to_item = reindex_col(df, 'item')

    test_df = df.groupby(
        ["user"], as_index=False,
        group_keys=False).apply(lambda x: x.nlargest(1, ["timestamp"]))
    train_df = df[~df.index.isin(test_df.index.values)]

    train_rows = train_df.user.values
    train_cols = train_df.item.values
    train_data = np.ones_like(train_rows)
    train_sparse = csr_matrix((train_data, (train_rows, train_cols)),
                              shape=(len(train_rows), len(train_cols)))

    # test_rows = test_df.user.values
    # test_cols = test_df.item.values
    # test_data = np.ones_like(test_rows)
    # test_sparse = csr_matrix((test_data, (test_rows, test_cols)), shape=(len(test_rows), len(test_cols)))

    train = Interactions(user_ids=train_df.user.values,
                         item_ids=train_df.item.values)
    test = Interactions(user_ids=test_df.user.values,
                        item_ids=test_df.item.values)
    return df, train, test, train_sparse  # , test_sparse
예제 #2
0
def user_based_train_test_split(interactions,
                                test_percentage=0.2,
                                random_state=None):
    """
    Split interactions between a train and a test set based on
    user ids, so that a given user's entire interaction history
    is either in the train, or the test set.

    Parameters
    ----------

    interactions: :class:`spotlight.interactions.Interactions`
        The interactions to shuffle.
    test_percentage: float, optional
        The fraction of users to place in the test set.
    random_state: np.random.RandomState, optional
        The random state used for the shuffle.

    Returns
    -------

    (train, test): (:class:`spotlight.interactions.Interactions`,
                    :class:`spotlight.interactions.Interactions`)
         A tuple of (train data, test data)
    """

    if random_state is None:
        random_state = np.random.RandomState()

    minint = np.iinfo(np.uint32).min
    maxint = np.iinfo(np.uint32).max

    seed = random_state.randint(minint, maxint, dtype=np.int64)

    in_test = (
        (murmurhash3_32(interactions.user_ids, seed=seed, positive=True) %
         100 / 100.0) < test_percentage)
    in_train = np.logical_not(in_test)

    train = Interactions(interactions.user_ids[in_train],
                         interactions.item_ids[in_train],
                         ratings=_index_or_none(interactions.ratings,
                                                in_train),
                         timestamps=_index_or_none(interactions.timestamps,
                                                   in_train),
                         weights=_index_or_none(interactions.weights,
                                                in_train),
                         num_users=interactions.num_users,
                         num_items=interactions.num_items)
    test = Interactions(interactions.user_ids[in_test],
                        interactions.item_ids[in_test],
                        ratings=_index_or_none(interactions.ratings, in_test),
                        timestamps=_index_or_none(interactions.timestamps,
                                                  in_test),
                        weights=_index_or_none(interactions.weights, in_test),
                        num_users=interactions.num_users,
                        num_items=interactions.num_items)

    return train, test
예제 #3
0
def timestamp_based_train_test_split(interactions, test_percentage=0.2):
    """
    Split interactions between a train and a test set based on
    user ids, so that a given user's entire interaction history
    is either in the train, or the test set.

    Parameters
    ----------

    interactions: :class:`spotlight.interactions.Interactions`
        The interactions to shuffle.
    test_percentage: float, optional
        The fraction of users to place in the test set.
    random_state: np.random.RandomState, optional
        The random state used for the shuffle.

    Returns
    -------

    (train, test): (:class:`spotlight.interactions.Interactions`,
                    :class:`spotlight.interactions.Interactions`)
         A tuple of (train data, test data)
    """

    in_test = np.zeros(len(interactions.user_ids), dtype=int)
    user_ids, indices = np.unique(interactions.user_ids, return_index=True)
    for u in user_ids:
        start_idx = indices[u]
        end_idx = None if u == user_ids[-1] else indices[u + 1]
        user_timestamps = interactions.timestamps[start_idx:end_idx]
        time_threshold = np.floor(user_timestamps[-1] * (1 - test_percentage))
        in_test[start_idx:end_idx][user_timestamps > time_threshold] = 1

    in_test = in_test.astype(bool)
    in_train = np.logical_not(in_test)

    train = Interactions(interactions.user_ids[in_train],
                         interactions.item_ids[in_train],
                         ratings=_index_or_none(interactions.ratings,
                                                in_train),
                         timestamps=_index_or_none(interactions.timestamps,
                                                   in_train),
                         weights=_index_or_none(interactions.weights,
                                                in_train),
                         num_users=interactions.num_users,
                         num_items=interactions.num_items)
    test = Interactions(interactions.user_ids[in_test],
                        interactions.item_ids[in_test],
                        ratings=_index_or_none(interactions.ratings, in_test),
                        timestamps=_index_or_none(interactions.timestamps,
                                                  in_test),
                        weights=_index_or_none(interactions.weights, in_test),
                        num_users=interactions.num_users,
                        num_items=interactions.num_items)

    return train, test
예제 #4
0
def random_train_test_split(interactions,
                            test_percentage=0.2,
                            random_state=None):
    """
    Randomly split interactions between training and testing.

    Parameters
    ----------

    interactions: :class:`spotlight.interactions.Interactions`
        The interactions to shuffle.
    test_percentage: float, optional
        The fraction of interactions to place in the test set.
    random_state: np.random.RandomState, optional
        The random state used for the shuffle.

    Returns
    -------

    (train, test): (:class:`spotlight.interactions.Interactions`,
                    :class:`spotlight.interactions.Interactions`)
         A tuple of (train data, test data)
    """

    interactions = shuffle_interactions(interactions,
                                        random_state=random_state)

    cutoff = int((1.0 - test_percentage) * len(interactions))

    train_idx = slice(None, cutoff)
    test_idx = slice(cutoff, None)

    train = Interactions(interactions.user_ids[train_idx],
                         interactions.item_ids[train_idx],
                         ratings=_index_or_none(interactions.ratings,
                                                train_idx),
                         timestamps=_index_or_none(interactions.timestamps,
                                                   train_idx),
                         weights=_index_or_none(interactions.weights,
                                                train_idx),
                         num_users=interactions.num_users,
                         num_items=interactions.num_items)
    test = Interactions(interactions.user_ids[test_idx],
                        interactions.item_ids[test_idx],
                        ratings=_index_or_none(interactions.ratings,
                                               test_idx),
                        timestamps=_index_or_none(interactions.timestamps,
                                                  test_idx),
                        weights=_index_or_none(interactions.weights,
                                               test_idx),
                        num_users=interactions.num_users,
                        num_items=interactions.num_items)

    return train, test
예제 #5
0
def test_known_output_step_1():

    interactions = Interactions(np.zeros(5),
                                np.arange(5) + 1,
                                timestamps=np.arange(5))
    sequences = interactions.to_sequence(max_sequence_length=5,
                                         step_size=1).sequences

    expected = np.array([[1, 2, 3, 4, 5], [0, 1, 2, 3, 4], [0, 0, 1, 2, 3],
                         [0, 0, 0, 1, 2], [0, 0, 0, 0, 1]])

    assert np.all(sequences == expected)
    def obtener_interacciones_gui(self, ruta_ratings, sep_ratings, encoding_ratings):
        """
        Método obtener_interacciones_gui. Obtiene las interacciones necesarias para la creación de los modelos de Spotlight.

        Este método solo se utiliza en la interfaz web.

        Parameters
        ----------

        ruta_ratings: str
            ruta del archivo que contiene las valoraciones.
        sep_ratings: str
            separador utilizado en el archivo de valoraiones.
        encoding_ratings: str
            encoding utilizado en el archivo de valoraciones.
        """

        global train, test
        
        # Se obtiene el dataframe de valoraciones
        ratings_df = Entrada.leer_csv(ruta_ratings, sep_ratings, encoding_ratings)
        ratings_df.sort_values([ratings_df.columns.values[0], ratings_df.columns.values[1]], inplace=True)

        # Se obtienen arrays con los ids de los usuarios y de los ítems
        users_ids = np.asarray(ratings_df[ratings_df.columns.values[0]].tolist(), dtype=np.int32)         
        items_ids = np.asarray(ratings_df[ratings_df.columns.values[1]].tolist(), dtype=np.int32)
        
        # Se transforma el dataframe de valoraciones en interacciones que puedan ser utilzadas por los modelos
        if self.opcion_time == 1:
            timestamps = np.asarray(ratings_df[ratings_df.columns.values[3]].tolist(), dtype=np.int32)
            if self.opcion_modelo == 1:
                ratings = np.asarray(ratings_df[ratings_df.columns.values[2]].tolist(), dtype=np.float32)
                interacciones = Interactions(users_ids, items_ids, ratings=ratings, timestamps=timestamps)
                train, test = random_train_test_split(interacciones)
            else:
                interacciones = Interactions(users_ids, items_ids, timestamps=timestamps)
                train, test = random_train_test_split(interacciones)
                if self.opcion_modelo == 3:
                    train = train.to_sequence()
                    test = test.to_sequence()
        else:
            if self.opcion_modelo == 1:
                ratings = np.asarray(ratings_df[ratings_df.columns.values[2]].tolist(), dtype=np.float32)
                interacciones = Interactions(users_ids, items_ids, ratings=ratings)
            else:
                interacciones = Interactions(users_ids, items_ids)
            train, test = random_train_test_split(interacciones)
            
        # Se guardan las interacciones de entrenamiento y test
        print("Guarda las interacciones de train")
        guardar_datos_pickle(train, 'las interacciones de entrenamiento')
        print("Guarda las interacciones de test")
        guardar_datos_pickle(test, 'las interacciones de test')
예제 #7
0
def time_based_train_test_split(interactions,
                                test_percentage=0.2,
                                test_drop_unknown=True):
    assert interactions.timestamps is not None

    cutoff = int((1.0 - test_percentage) * len(interactions))

    indices = np.argsort(interactions.timestamps)

    train_idx = slice(None, cutoff)
    test_idx = slice(cutoff, None)

    train = Interactions(interactions.user_ids[indices][train_idx],
                         interactions.item_ids[indices][train_idx],
                         ratings=_index_or_none(interactions.ratings,
                                                train_idx, indices),
                         timestamps=_index_or_none(interactions.timestamps,
                                                   train_idx, indices),
                         weights=_index_or_none(interactions.weights,
                                                train_idx, indices),
                         num_users=interactions.num_users,
                         num_items=interactions.num_items)

    unkown_item_ids = np.setdiff1d(interactions.item_ids[indices][test_idx],
                                   interactions.item_ids[indices][train_idx])

    if test_drop_unknown:
        mask = np.empty(interactions.item_ids[indices][test_idx].shape[0],
                        dtype=bool)
        i = 0
        for x in np.nditer(interactions.item_ids[indices][test_idx]):
            mask[i] = not np.any(unkown_item_ids == x)
            i += 1
    else:
        mask = np.ones(interactions.item_ids[indices][test_idx].shape[0],
                       dtype=bool)

    test = Interactions(interactions.user_ids[indices][test_idx][mask],
                        interactions.item_ids[indices][test_idx][mask],
                        ratings=_index_or_none(
                            _index_or_none(interactions.ratings, test_idx,
                                           indices), mask),
                        timestamps=_index_or_none(
                            _index_or_none(interactions.timestamps, test_idx,
                                           indices), mask),
                        weights=_index_or_none(
                            _index_or_none(interactions.weights, test_idx,
                                           indices), mask),
                        num_users=interactions.num_users,
                        num_items=interactions.num_items)

    return train, test
예제 #8
0
def test_known_output_step_2():

    interactions = Interactions(np.zeros(5),
                                np.arange(5) + 1,
                                timestamps=np.arange(5))
    sequences = interactions.to_sequence(max_sequence_length=5,
                                         step_size=2).sequences

    expected = np.array([
        [1, 2, 3, 4, 5],
        [0, 0, 1, 2, 3],
        [0, 0, 0, 0, 1],
    ])

    assert np.all(sequences == expected)
예제 #9
0
def get_movielens_dataset(variant='100K'):
    """
    Download and return one of the Movielens datasets.

    Parameters
    ----------

    variant: string, optional
         String specifying which of the Movielens datasets
         to download. One of ('100K', '1M', '10M', '20M').

    Returns
    -------

    Interactions: :class:`spotlight.interactions.Interactions`
        instance of the interactions class
    """

    if variant not in VARIANTS:
        raise ValueError('Variant must be one of {}, '
                         'got {}.'.format(VARIANTS, variant))

    url = 'movielens_{}'.format(variant)

    return Interactions(*_get_movielens(url))
예제 #10
0
def shuffle_interactions(interactions, random_state=None):
    """
    Shuffle interactions.

    Parameters
    ----------

    interactions: :class:`spotlight.interactions.Interactions`
        The interactions to shuffle.
    random_state: np.random.RandomState, optional
        The random state used for the shuffle.

    Returns
    -------

    interactions: :class:`spotlight.interactions.Interactions`
        The shuffled interactions.
    """

    if random_state is None:
        random_state = np.random.RandomState()

    shuffle_indices = np.arange(len(interactions.user_ids))
    random_state.shuffle(shuffle_indices)

    return Interactions(interactions.user_ids[shuffle_indices],
                        interactions.item_ids[shuffle_indices],
                        ratings=_index_or_none(interactions.ratings,
                                               shuffle_indices),
                        timestamps=_index_or_none(interactions.timestamps,
                                                  shuffle_indices),
                        weights=_index_or_none(interactions.weights,
                                               shuffle_indices),
                        num_users=interactions.num_users,
                        num_items=interactions.num_items)
    def build_interactions_object(self, df_interactions: pd.DataFrame, df_timestamps: pd.DataFrame, df_weights: pd.DataFrame) -> Interactions:
        """Builds a matrix of interactions between user and cashtag item.

        Takes as params a number of pandas.DataFrame which contains mappings between
        user-cashtag interactions, associated timestamps, normalised weights for
        interactions and builds a matrix for input to a Spotlight model.

        Args:
            df_interactions (pandas.DataFrame): User-Item interactions DataFrame consisting of user and item IDs.
            df_timestamps (pandas.DataFrame): Timestamps DataFrame consisting of timestamps associated with mappings
                in df_interactions.
            df_weights (pandas.DataFrame): Weights DataFrame consisting of weights associated with mappings
                in df_interactions, that is, the number of times a user has interacted with a particular item.

        Returns:
            spotlight.interactions.Interactions: Returns Spotlight interactions matrix.

        """

        logger = logging.getLogger()
        user_ids = df_interactions['user_id'].values.astype(int)
        cashtag_ids = df_interactions['item_tag_ids'].values.astype(int)
        timestamps, weights = df_timestamps.values, np.array(df_weights['count'].values)
        normalise = lambda v: v / np.sqrt(np.sum(v**2))
        normalised_weights = normalise(weights) # THIS IS NOT CORRECT, NORMALISES BY ALL INSTEAD OF BY ID
        interactions = Interactions(
            user_ids=user_ids,
            item_ids=cashtag_ids,
            timestamps=np.array([int(x[0]) for x in timestamps]),
            weights=normalised_weights
        )
        logger.info("Build interactions object: {}".format(interactions))
        return interactions
예제 #12
0
def get_test(myfile):
    """
    returns testing set appropriate for spotlight.interactions.Interactions
    """
    
    # return testing set as spotlight.interactions 
    return Interactions(*test_load(myfile))
예제 #13
0
def get_train(myfile):
    """
    returns training set appropriate for spotlight.interactions.Interactions
    """
    
    # return training set as spotlight.interactions 
    return Interactions(*train_load(myfile))
def individual_predictions(df, model):
    num_users = len(df['user_id'].unique())
    num_items = len(df['item_id'].unique())
    predictions = np.zeros(shape=(num_users, num_items + 1))

    dataset = Interactions(user_ids=np.array(df['user_id'], dtype='int32'),
                           item_ids=np.array(df['item_id'], dtype='int32'),
                           timestamps=df['entry_at'])
    sequences = dataset.to_sequence(max_sequence_length=15)

    user_id = 0

    for user, sequence in zip(sequences.user_ids, sequences.sequences):
        if user == user_id:
            predictions[user] = model.predict(sequence)
            user_id += 1

    return predictions
def create_input_for_spotlight(user_id, movie_id, ratings):
    """

    :param user_id: a list containing the id of users
    :param movie_id: a list containing the id of movies
    :param ratings: a list containing the corresponding ratings
    :return: Interaction Object (a useful object containing users, movies and ratings)
    """
    return Interactions(user_id, movie_id, ratings)
    def obtener_interacciones(self):
        """
        Método obtener_interacciones. Obtiene las interacciones necesarias por los modelos de Spotlight.

        Este método solo se utiliza en la interfaz de texto.
        """
        
        global train, test
        
        # Se obtiene el dataframe de valoraciones
        Entrada.obtener_datos()
        ratings_df = Entrada.ratings_df

        # Se obtienen arrays con los ids de los usuarios y de los ítems
        users_ids = np.asarray(ratings_df[ratings_df.columns.values[0]].tolist(), dtype=np.int32)         
        items_ids = np.asarray(ratings_df[ratings_df.columns.values[1]].tolist(), dtype=np.int32)
        
        # Se transforma el dataframe de valoraciones en interacciones que puedan ser utilzadas por los modelos
        if self.opcion_time == 1:
            timestamps = np.asarray(ratings_df[ratings_df.columns.values[3]].tolist(), dtype=np.int32)
            if self.opcion_modelo == 1:
                ratings = np.asarray(ratings_df[ratings_df.columns.values[2]].tolist(), dtype=np.float32)
                interacciones = Interactions(users_ids, items_ids, ratings=ratings, timestamps=timestamps)
                train, test = random_train_test_split(interacciones)
            else:
                interacciones = Interactions(users_ids, items_ids, timestamps=timestamps)
                train, test = random_train_test_split(interacciones)
                if self.opcion_modelo == 3:
                    train = train.to_sequence()
                    test = test.to_sequence()
        else:
            if self.opcion_modelo == 1:
                ratings = np.asarray(ratings_df[ratings_df.columns.values[2]].tolist(), dtype=np.float32)
                interacciones = Interactions(users_ids, items_ids, ratings=ratings)
            else:
                interacciones = Interactions(users_ids, items_ids)
            train, test = random_train_test_split(interacciones)
            
        # Se guardan las interacciones de entrenamiento y test
        print("Guarda las interacciones de train")
        guardar_datos_pickle(train, 'las interacciones de entrenamiento')
        print("Guarda las interacciones de test")
        guardar_datos_pickle(test, 'las interacciones de test')
예제 #17
0
 def build_interactions_object(self, df_interactions: pd.DataFrame, df_timestamps: pd.DataFrame) -> Interactions:
     user_ids = df_interactions['user_id'].values.astype(int)
     cashtag_ids = df_interactions['tag_id'].values.astype(int)
     timestamps, weights = df_timestamps.values, np.array(df_interactions['count'].values)
     interactions = Interactions(
         user_ids=user_ids,
         item_ids=cashtag_ids,
         timestamps=np.array([x for x in timestamps]) if self.filter is 'hybrid' else None,
         weights=weights
     )
     return interactions
예제 #18
0
def spotlight_algo(train, test, model, verbose=True):
    # Explicitly convert into datatypes needed by spotlight models
    user_tr = np.array(train.User, dtype=np.int32)
    movie_tr = np.array(train.Movie, dtype=np.int32)
    rating_tr = np.array(train.Rating, dtype=np.float32)
    user_te = np.array(test.User, dtype=np.int32)
    movie_te = np.array(test.Movie, dtype=np.int32)
    
    train_data = Interactions(user_ids=user_tr, item_ids=movie_tr, ratings=rating_tr)
    test_data = Interactions(user_ids=user_te, item_ids=movie_te)
    
    model.fit(train_data, verbose=verbose)
    # predict
    predictions = model.predict(user_te, movie_te)
    
    predictions_df = pd.DataFrame()
    predictions_df['User'] = user_te
    predictions_df['Movie'] = movie_te
    predictions_df['Rating'] = predictions
    
    return predictions_df
예제 #19
0
def sparsify(interactions, drop_fraction, random_state=None):

    if random_state is None:
        random_state = np.random.RandomState()

    indices = random_state.rand(len(interactions)) > drop_fraction

    it = interactions

    return Interactions(it.user_ids[indices],
                        it.item_ids[indices],
                        timestamps=it.timestamps[indices],
                        num_users=it.num_users,
                        num_items=it.num_items)
예제 #20
0
def train(user_ids, item_ids, ratings, num_dimensions, verbose):
    dataset = Interactions(np.array(user_ids, dtype=np.int32),
                           np.array(item_ids, dtype=np.int32),
                           ratings=np.array(ratings, dtype=np.float32))

    is_cuda_available = False if device.type == 'cpu' else True

    m = ExplicitFactorizationModel(loss='logistic',
                                   use_cuda=is_cuda_available,
                                   embedding_dim=num_dimensions)
    m.fit(dataset, verbose=verbose)

    user_embeddings = m._net.user_embeddings.weight.detach().cpu().numpy()

    return user_embeddings
예제 #21
0
def get_goodbooks_dataset():
    """
    Download and return the goodbooks-10K dataset [2]_.

    Returns
    -------

    Interactions: :class:`spotlight.interactions.Interactions`
        instance of the interactions class

    References
    ----------

    .. [2] https://github.com/zygmuntz/goodbooks-10k
    """

    return Interactions(*_get_dataset())
예제 #22
0
def load_interactions(path):
    try:
        df = pd.read_csv(path)
        interactions = Interactions(
            user_ids=df['user_id'].astype(np.int32).values,
            item_ids=df['item_id'].astype(np.int32).values,
            timestamps=df['timestamp'].astype(np.int32).values,
            weights=df['weight'].astype(np.int32).values)
        reloaded_time = int(os.path.basename(path).split('.')[0])

        user_indexer = Indexer(dumped_filepath=os.path.join(
            mrecsys.__dataset_path__, 'dicts/user_to_index/{}.json'.format(
                reloaded_time)))
        item_indexer = Indexer(dumped_filepath=os.path.join(
            mrecsys.__dataset_path__, 'dicts/item_to_index/{}.json'.format(
                reloaded_time)))
        return interactions, reloaded_time, user_indexer, item_indexer
    except:
        return None
예제 #23
0
def preprocess_rsc15(density_value = 1.0, limit_train = None, limit_test = None):
    """
    Return index normalized sequences for train and test.
    
    density_value: randomly filter out events (0.0-1.0, 1:keep all)

    limit_train = limit_train #limit in number of rows or None
    limit_test = limit_test #limit in number of rows or None
    """
    data_path = 'ludewig/data/rsc15/single/'
    file_prefix = 'rsc15-clicks'
    density_value = density_value if density_value else 1.0

    remove_imdups = False
    train, test = loader.load_data(data_path, file_prefix, 
                                   rows_train=limit_train, 
                                   rows_test=limit_test, 
                                   density=density_value)
    for dat in train, test:
        dat.columns = ['sessionId','itemId','time']

    ind2val, val2ind = {}, {}
    for col in ['sessionId','itemId']:
        vals = np.unique(np.concatenate((train[col].values, test[col].values)))
        ind2val[col] = {idx+1 : id for idx, id in enumerate(vals)}
        val2ind[col] = {val : key for key, val in ind2val[col].items()}
        for df in [train, test]:
            df[col+"_idx"] = df[col].map(lambda x: val2ind[col][x])

    #train = train.to_sequence()
    #test = test.to_sequence()
    dat = {'train' : train, 'test' :  test}
    
    # Transform into sequence interaction object
    
    dat_seq = {}
    for name, df in dat.items():
        dat_seq[name] = Interactions(user_ids=df.sessionId_idx.values,
                    item_ids=df.itemId_idx.values,
                    timestamps=df.time.values).to_sequence(max_sequence_length = 10)
        
    return dat, dat_seq , ind2val
def train_model(df, hyperparams):
    # Fix random_state
    seed = 42
    set_seed(seed)
    random_state = np.random.RandomState(seed)

    max_sequence_length = 15
    min_sequence_length = 2
    step_size = 1

    # create dataset using interactions dataframe and timestamps
    dataset = Interactions(user_ids=np.array(df['user_id'], dtype='int32'),
                           item_ids=np.array(df['item_id'], dtype='int32'),
                           timestamps=df['entry_at'])

    # create training and test sets using a 80/20 split
    train, test = user_based_train_test_split(dataset,
                                              test_percentage=0.2,
                                              random_state=random_state)
    # convert to sequences
    train = train.to_sequence(max_sequence_length=max_sequence_length,
                              min_sequence_length=min_sequence_length,
                              step_size=step_size)
    test = test.to_sequence(max_sequence_length=max_sequence_length,
                            min_sequence_length=min_sequence_length,
                            step_size=step_size)

    print('data: {}'.format(train))

    # initialize and train model
    model = ImplicitSequenceModel(**hyperparams,
                                  use_cuda=CUDA,
                                  random_state=random_state)
    model.fit(train, verbose=True)

    # compute mrr score on test set
    test_mrr = sequence_mrr_score(model, test).mean()
    print('MRR score on test set: {}'.format(test_mrr))

    return model
예제 #25
0
    def _interactions_sequence_from_obs(self,
                                        obs,
                                        timestamp_col='first_timestamp',
                                        max_sequence_length=10,
                                        min_sequence_length=None,
                                        step_size=None,
                                        **kwargs):

        obs.timestamp_col = timestamp_col

        return Interactions(
            user_ids=self.sparse_mat_builder.uid_encoder.
                transform(obs.user_ids.astype(str)).astype('int32'),
            item_ids=self.sparse_mat_builder.iid_encoder.
                transform(obs.item_ids.astype(str)).astype('int32') + 1,
            ratings=obs.ratings,
            timestamps=obs.timestamps
        ). \
            to_sequence(
            max_sequence_length=max_sequence_length,
            min_sequence_length=min_sequence_length,
            step_size=step_size
        )
예제 #26
0
파일: pred.py 프로젝트: 3v1l91l/dss4
    'gender': 'Sender_gender',
    'index': 'Sender_index'
},
                        inplace=True)
finder_decisions = finder_decisions.merge(users,
                                          how='left',
                                          left_on='Receiver_id',
                                          right_index=True)
finder_decisions.rename(columns={
    'age': 'Receiver_age',
    'gender': 'Receiver_gender',
    'index': 'Receiver_index'
},
                        inplace=True)

ratings = np.ones(len(finder_decisions))
ratings[finder_decisions['Decision'] == 'skip'] = -1
ratings = ratings.astype(np.float32)
dataset = Interactions(finder_decisions['Sender_index'].values,
                       finder_decisions['Receiver_index'].values, ratings)

from spotlight.cross_validation import random_train_test_split

train, test = random_train_test_split(dataset,
                                      random_state=np.random.RandomState(42))

spotlight_model = torch.load('spotlight.model')

predictions = spotlight_model.predict(test.user_ids, test.item_ids)
print((predictions == test.ratings).sum() / len(predictions))
예제 #27
0
train_dataset = "./datas/data_train.csv"
test_dataset = "./datas/sampleSubmission.csv"

train = load_dataset(train_dataset)
test = load_dataset(test_dataset)

# Explicitly convert into datatypes needed by spotlight models
user_tr = np.array(train.User, dtype=np.int32)
movie_tr = np.array(train.Movie, dtype=np.int32)
rating_tr = np.array(train.Rating, dtype=np.float32)
user_te = np.array(train.User, dtype=np.int32)
movie_te = np.array(train.Movie, dtype=np.int32)

# Transform into Spotlight interactions
train_data = Interactions(user_ids=user_tr,
                          item_ids=movie_tr,
                          ratings=rating_tr)
test_data = Interactions(user_ids=user_tr, item_ids=movie_te)

loss = ['regression', 'logistic', 'poisson']
n_iter = [50, 100, 200]
batch_size = [256, 512, 1024, 2048, 4096]
l2 = np.logspace(-10, -3, 8)
learning_rate = np.logspace(-10, -3, 8)
embedding_dim = [20, 50, 100, 150, 200]


def best_params_spotlight(losses,
                          n_iters,
                          batch_sizes,
                          l2s,
예제 #28
0
def spotlight_interactions_from_sparse(sp_mat):
    sp_mat = sp_mat.tocoo()
    return Interactions(user_ids=sp_mat.row,
                        item_ids=sp_mat.col,
                        ratings=sp_mat.data)
    hit_ratio, ndcg = -1, -1
    logging.info(
        '******** [Epoch {}]  Embs NDCG {:.4f}, Hit Ratio: {:.4f}, NDCG: {:.4f}'
        .format(epoch_num, pairs_ndcg, hit_ratio, ndcg))
    torch.save(net, model_store_dir + "/" + model_alias + "-" + str(epoch_num))


num_users = len(original_train_data["uindex"].unique())
num_items = len(original_train_data["vindex"].unique())

train_data = original_train_data.sample(frac=train_sample)

interactions = Interactions(
    train_data["uindex"].to_numpy(),
    train_data["vindex"].to_numpy(),
    train_data["pct_cvt"].to_numpy(),
    train_data["latest_watch_time"].to_numpy(),
    num_users=len(original_train_data["uindex"].unique()),
    num_items=len(original_train_data["vindex"].unique()))

if "-" in net_conf:
    args = net_conf.split("-")
    config = {
        "factor_size": int(args[0]),
        "num_layers": int(args[1]),
        "loss_type": args[2],
        "model_type": args[3],
        "num_users": num_users,
        "num_items": num_items,
    }
 def interactions_to_sequence(f_train: Interactions, f_test: Interactions):
     train, test = f_train.to_sequence(), f_test.to_sequence()
     return train, test
예제 #31
0
    target_pos_y = ui.pos().y() + ui.textbox.pos().y()
    mouse.position = (target_pos_x, target_pos_y)
    mouse.click(Button.left)
    mouse.position = mouse_pos_before


def tray_icon_activated(reason):
    if reason == tray.Trigger:  # tray.Trigger is left click
        show_ui()


song_queue = SongQueue()
image_queue = ImageQueue()

# creates the interactions object
interactions = Interactions(sp, token_info, sp_oauth, exit_app, song_queue)

# UI
ui = Ui(interactions, sp)

# Create icon
icon = QIcon(f"{ASSETS_DIR}img{sep}logo_small.png")

# Create tray
tray = QSystemTrayIcon()
tray.setIcon(icon)
tray.setVisible(True)
tray.setToolTip("Spotlightify")

# Create menu
menu = QMenu()