コード例 #1
0
 def als_model(self, dataset):
     return WALSModel(
         dataset.n_students,
         dataset.n_courses,
         self.num_factors,
         regularization=self.regularization,
         unobserved_weight=0)
コード例 #2
0
ファイル: algo.py プロジェクト: sportsapril/RecEng
def run_wals(data,
             dim,
             reg,
             unobs,
             weights=False,
             wt_type=LINEAR_RATINGS,
             feature_wt_exp=None,
             obs_wt=LINEAR_OBS_W):
    """Create the WALSModel and input, row and col factor tensors.

  Inputs:
    data:           scipy coo_matrix of item ratings
    dim:            number of latent factors
    reg:            regularization constant
    unobs:          unobserved item weight
    weights:        True: set obs weights, False: obs weights = unobs weights
    wt_type:        feature weight type: linear (0) or log (1)
    feature_wt_exp: feature weight exponent constant
    obs_wt:         feature weight linear factor constant

  Outputs:
    input_tensor:   tensor holding the input ratings matrix
    row_factor:     tensor for row_factor
    col_factor:     tensor for col_factor
    model:          WALSModel instance
  """
    row_wts = None
    col_wts = None

    num_rows = data.shape[0]
    num_cols = data.shape[1]

    if weights:
        assert feature_wt_exp is not None
        row_wts = np.ones(num_rows)
        col_wts = make_wts(data, wt_type, obs_wt, feature_wt_exp, 0)

    row_factor = None
    col_factor = None

    with tf.Graph().as_default():

        input_tensor = tf.SparseTensor(indices=zip(data.row, data.col),
                                       values=(data.data).astype(np.float32),
                                       dense_shape=data.shape)

        model = WALSModel(num_rows,
                          num_cols,
                          dim,
                          unobserved_weight=unobs,
                          regularization=reg,
                          row_weights=row_wts,
                          col_weights=col_wts)

        # retrieve the row and column factors
        row_factor = model.row_factors[0]
        col_factor = model.col_factors[0]

    return input_tensor, row_factor, col_factor, model
コード例 #3
0
ファイル: wrmf.py プロジェクト: WyAzx/RSinAction
    def _build_model(self):
        """
        构建wALS算法计算图
        :return:
        """

        num_rows = self.data.shape[0]
        num_cols = self.data.shape[1]

        # Weight矩阵初始化方式
        # 1.User orientation 同一个User下Miss Value平均
        # 2.Item orientation 同一个Item下Miss Value平均
        if self.weights:
            if self.weight_type == 'user':
                self.row_wts = np.ones(num_rows)
                self.col_wts = self._make_wts(self.data, self.wt_type,
                                              self.obs_wt, self.feature_wt_exp,
                                              0)
            elif self.weight_type == 'item':
                self.col_wts = np.ones(num_cols)
                self.row_wts = self._make_wts(self.data, self.wt_type,
                                              self.obs_wt, self.feature_wt_exp,
                                              1)

        with tf.Graph().as_default():
            self.input_tensor = tf.SparseTensor(
                indices=list(zip(self.data.row, self.data.col)),
                values=(self.data.data).astype(np.float32),
                dense_shape=self.data.shape)
            self.model = WALSModel(num_rows,
                                   num_cols,
                                   self.dim,
                                   unobserved_weight=self.unobs,
                                   regularization=self.reg,
                                   row_weights=self.row_wts,
                                   col_weights=self.col_wts)

            self.row_factor = self.model.row_factors[0]
            self.col_factor = self.model.col_factors[0]
コード例 #4
0
def get_model(data, ncomponents=10, unobserved_weight=0, regularization=0.05):
    nrows, ncols = data.shape
    r_weight = np.ones(nrows)
    c_weight = np.ones(ncols)

    with tf.Graph().as_default():
        tensor = tf.SparseTensor(np.column_stack((data.row, data.col)),
                                 (data.data).astype(np.float32), data.shape)
        model = WALSModel(nrows,
                          ncols,
                          ncomponents,
                          unobserved_weight,
                          regularization,
                          row_weights=r_weight,
                          col_weights=c_weight)
    return tensor, model.row_factors[0], model.col_factors[0], model
コード例 #5
0
ファイル: model.py プロジェクト: dhodun/movielens
def train_model(train_sparse,
                test_sparse,
                num_users,
                num_movies,
                args,
                verbose=False):
    tf.logging.info('Train Start: {:%Y-%m-%d %H:%M:%S}'.format(
        datetime.datetime.now()))

    with tf.Graph().as_default(), tf.Session() as sess:

        row_weights = np.ones(num_users)
        col_weights = np.ones(num_movies)

        if args.col_weight_bool:
            col_weights = make_weights(train_sparse,
                                       args.col_weight_factor,
                                       axis=0)

        if args.row_weight_bool:
            row_weights = make_weights(train_sparse,
                                       args.row_weight_factor,
                                       axis=1)

        # create model
        model = WALSModel(num_users,
                          num_movies,
                          args.num_factors,
                          regularization=args.regularization,
                          unobserved_weight=args.unobserved_weight,
                          row_weights=row_weights,
                          col_weights=col_weights)

        # create sparse tensor

        input_tensor = tf.SparseTensor(
            indices=zip(train_sparse.row, train_sparse.col),
            values=(train_sparse.data).astype(np.float32),
            dense_shape=train_sparse.shape)

        test_tensor = tf.SparseTensor(
            indices=zip(test_sparse.row, test_sparse.col),
            values=(test_sparse.data).astype(np.float32),
            dense_shape=test_sparse.shape)

        # train model

        rmse_op = rmse(model, input_tensor) if verbose else None
        rmse_test_op = rmse(model, test_tensor)

        row_update_op = model.update_row_factors(sp_input=input_tensor)[1]
        col_update_op = model.update_col_factors(sp_input=input_tensor)[1]

        model.initialize_op.run()
        model.worker_init.run()
        for _ in range(args.epochs):
            # Update Users
            model.row_update_prep_gramian_op.run()
            model.initialize_row_update_op.run()
            row_update_op.run()
            # Update Items
            model.col_update_prep_gramian_op.run()
            model.initialize_col_update_op.run()
            col_update_op.run()

            if verbose:
                train_metric = rmse_op.eval()
                test_metric = rmse_test_op.eval()
                tf.logging.info('RMSE Train: {:,.3f}'.format(train_metric))
                tf.logging.info('RMSE Test:  {:,.3f}'.format(test_metric))
                # TODO Collect these in variable for graphing later

        row_factor = model.row_factors[0].eval()
        col_factor = model.col_factors[0].eval()

    tf.logging.info('Train Finish: {:%Y-%m-%d %H:%M:%S}'.format(
        datetime.datetime.now()))

    return row_factor, col_factor
コード例 #6
0
n_rows = len(users_from_idx)
n_cols = len(items_from_idx)
shape = (n_rows, n_cols)

P = tf.SparseTensor(indices, values, shape)

print(P)
print('Total values: {:,}'.format(n_rows * n_cols))

from tensorflow.contrib.factorization import WALSModel

k = 10
n = 10
reg = 1e-1

model = WALSModel(n_rows, n_cols, k, regularization=reg, unobserved_weight=0)

row_factors = tf.nn.embedding_lookup(params=model.row_factors,
                                     ids=tf.range(model._input_rows),
                                     partition_strategy="div")
col_factors = tf.nn.embedding_lookup(params=model.col_factors,
                                     ids=tf.range(model._input_cols),
                                     partition_strategy="div")

row_indices, col_indices = tf.split(P.indices, axis=1, num_or_size_splits=2)
gathered_row_factors = tf.gather(row_factors, row_indices)
gathered_col_factors = tf.gather(col_factors, col_indices)
approx_vals = tf.squeeze(
    tf.matmul(gathered_row_factors, gathered_col_factors, adjoint_b=True))
P_approx = tf.SparseTensor(indices=P.indices,
                           values=approx_vals,
コード例 #7
0
    with open(data_path, 'r') as f:
        data = json.load(f)

    indices = []
    values = []

    for idx, elem in enumerate(data):
        indices += zip([idx] * len(elem), elem)
        values += [1.0] * len(elem)
    with tf.Graph().as_default() as graph1:
        sp_mat = tf.SparseTensor(indices, values, [num_rows, num_cols])

        model = WALSModel(num_rows,
                          num_cols,
                          dimension,
                          0.5,
                          2.0,
                          row_weights=None,
                          col_weights=None)

        row_factors = model.row_factors[0]
        col_factors = model.col_factors[0]

        sess = tf.Session(graph=graph1)

        writer = tf.summary.FileWriter('walsmodels', graph1)

        row_update_op = model.update_row_factors(sp_mat)[1]
        col_update_op = model.update_col_factors(sp_mat)[1]

        sess.run(model.initialize_op)
コード例 #8
0
ファイル: model_old.py プロジェクト: dhodun/movielens
 def als_model(self, dataset):
     return WALSModel(len(dataset["visitorid"].unique()),
                      len(dataset["itemid"].unique()),
                      self.num_factors,
                      regularization=self.regularization,
                      unobserved_weight=0)
コード例 #9
0
ファイル: reco.py プロジェクト: rfrowe/cse547
def reco(sess, inp, code, label, epsilon, train_dataset, dev_dataset, lr,
         weights_path):

    # Initialize hyperparameters
    # TODO: Proper tuning_threshold strategy, or is there a better stopping condition?
    # TODO: Grid search for reg_l2 tuning?  Currently only tune factor_dim
    factor_dim = 0
    reg_l2 = 0.1
    factor_loss_thresh = 1e-6
    tuning_thresh = 1e-6

    # Ratings matrix dimensions
    n_items = _train_utils.dataset_iter_len(
        sess,
        train_dataset.make_one_shot_iterator().get_next())
    n_users_train = 877
    n_users_dev = 110
    n_users_test = 110
    '''Placeholder labels
    label = np.random.randn(n_users_train + n_users_dev + n_users_test, 1)
    label = tf.convert_to_tensor(label, dtype=tf.float32)
    '''

    label_train = label[1:n_users_train + 1, -1]
    label_dev = label[n_users_train + 1:n_users_train + 1 + n_users_dev + 1,
                      -1]
    label_test = label[n_users_train + 1 + n_users_dev + 1:-1, -1]

    # Rating matrix
    # TODO: Random placeholder data for now.  Rating matrix must include all train/dev/test
    #       data.  Each row represents a user, and each column represents a feature. The label
    #       is to be included in the last feature column, with dev/test set labels removed.
    rating_matrix = np.random.randn(n_users_train + n_users_dev + n_users_test,
                                    n_items)

    input_tensor = tf.convert_to_tensor(rating_matrix, dtype=tf.float32)
    input_tensor = tf.contrib.layers.dense_to_sparse(input_tensor)

    # Tune model using increasing latent factor matrix dimension
    losscrit = np.inf
    while losscrit > tuning_thresh:

        factor_dim += 1

        # Weighted alternating least squares model (causes deprecation warning)
        model = WALSModel(n_users_train + n_users_dev + n_users_test,
                          n_items,
                          factor_dim,
                          regularization=reg_l2,
                          row_weights=None,
                          col_weights=None)

        # Retrieve row and column factors
        users_factor = model.row_factors[0]
        items_factor = model.col_factors[0]

        # Initialize training
        row_update_op = model.update_row_factors(sp_input=input_tensor)[1]
        col_update_op = model.update_col_factors(sp_input=input_tensor)[1]
        sess.run(model.initialize_op)
        sess.run(model.worker_init)

        # Update latent factor matrices via Alternating Least Squares until matrix decomposition converges
        u_factor_old = users_factor.eval(session=sess)
        i_factor_old = items_factor.eval(session=sess)
        factor_loss = np.inf
        while factor_loss > factor_loss_thresh:
            sess.run(model.row_update_prep_gramian_op)
            sess.run(model.initialize_row_update_op)
            sess.run(row_update_op)
            sess.run(model.col_update_prep_gramian_op)
            sess.run(model.initialize_col_update_op)
            sess.run(col_update_op)

            u_factor_new = users_factor.eval(session=sess)
            i_factor_new = items_factor.eval(session=sess)
            factor_loss = max(np.linalg.norm(u_factor_new - u_factor_old),
                              np.linalg.norm(i_factor_new - i_factor_old))

            u_factor_old = u_factor_new
            i_factor_old = i_factor_new

        # Predictions
        pred_fun = tf.matmul(users_factor, items_factor, transpose_b=True)
        pred = sess.run(pred_fun)
        pred_train = pred[1:n_users_train + 1, -1]
        pred_dev = pred[n_users_train + 1:n_users_train + 1 + n_users_dev + 1,
                        -1]
        pred_test = pred[n_users_train + 1 + n_users_dev + 1:-1, -1]

        # Performance
        loss_fun = tf.math.reduce_sum(tf.math.square(
            tf.abs(pred - label))) + tf.nn.l2_loss(
                users_factor) + tf.nn.l2_loss(items_factor)
        losscrit = sess.run(loss_fun)
        train_loss = sess.run(tf.reduce_mean(tf.abs(pred_train - label_train)))
        dev_loss = sess.run(tf.reduce_mean(tf.abs(pred_dev - label_dev)))
        test_loss = sess.run(tf.reduce_mean(tf.abs(pred_test - label_test)))
コード例 #10
0
ファイル: wrmf.py プロジェクト: WyAzx/RSinAction
class WRMFRecommender(object):
    def __init__(self, config):
        """
        推荐模型初始化
        :param config:
            data: 训练数据
            user_map: User映射文件
            item_map: Item映射文件
            weight_type: 权重矩阵初始化策略:['user'|'item']
            weights: 是否加权
            wt_type: 权重值线性或指数变换
            obs_wt: 权重线性变换参数
            feature_wt_exp: 权重指数变换参数
            dim: 隐状态维度
            unobs: 缺失值初始化大小
            reg: 正则化参数
            num_iterations: 迭代次数
            save_path: 模型保存路径
            topn: 推荐结果个数
        """
        self.data = config['data']
        self.test = config['val']
        self.user_map = config['user_map']
        self.item_map = config['item_map']
        self.weight_type = config['weight_type']
        self.weights = config['weights']
        self.wt_type = config['wt_type']
        self.obs_wt = config['obs_wt']
        self.feature_wt_exp = config['feature_wt_exp']
        self.dim = config['dim']
        self.unobs = config['unobs']
        self.reg = config['reg']
        self.num_iterations = config['num_iterations']
        self.save_path = config['save_path']
        self.topn = config['topn']

        self.output_row = None
        self.output_col = None
        self.row_wts = None
        self.col_wts = None

    def _build_model(self):
        """
        构建wALS算法计算图
        :return:
        """

        num_rows = self.data.shape[0]
        num_cols = self.data.shape[1]

        # Weight矩阵初始化方式
        # 1.User orientation 同一个User下Miss Value平均
        # 2.Item orientation 同一个Item下Miss Value平均
        if self.weights:
            if self.weight_type == 'user':
                self.row_wts = np.ones(num_rows)
                self.col_wts = self._make_wts(self.data, self.wt_type,
                                              self.obs_wt, self.feature_wt_exp,
                                              0)
            elif self.weight_type == 'item':
                self.col_wts = np.ones(num_cols)
                self.row_wts = self._make_wts(self.data, self.wt_type,
                                              self.obs_wt, self.feature_wt_exp,
                                              1)

        with tf.Graph().as_default():
            self.input_tensor = tf.SparseTensor(
                indices=list(zip(self.data.row, self.data.col)),
                values=(self.data.data).astype(np.float32),
                dense_shape=self.data.shape)
            self.model = WALSModel(num_rows,
                                   num_cols,
                                   self.dim,
                                   unobserved_weight=self.unobs,
                                   regularization=self.reg,
                                   row_weights=self.row_wts,
                                   col_weights=self.col_wts)

            self.row_factor = self.model.row_factors[0]
            self.col_factor = self.model.col_factors[0]

    def eval_train_tf(self):
        """
        训练模型
        :return:
        """
        tf.logging.info('Train Start: {:%Y-%m-%d %H:%M:%S}'.format(
            datetime.datetime.now()))
        self._build_model()
        self.sess = tf.Session(graph=self.input_tensor.graph)
        self.saver = tf.train.Saver([self.row_factor, self.col_factor])

        with self.input_tensor.graph.as_default():
            self.load_tf_model()
            row_update_op = self.model.update_row_factors(
                sp_input=self.input_tensor)[1]
            col_update_op = self.model.update_col_factors(
                sp_input=self.input_tensor)[1]

            self.sess.run(self.model.initialize_op)
            self.sess.run(self.model.worker_init)
            for i in range(self.num_iterations):
                self.sess.run(self.model.row_update_prep_gramian_op)
                self.sess.run(self.model.initialize_row_update_op)
                self.sess.run(row_update_op)
                self.sess.run(self.model.col_update_prep_gramian_op)
                self.sess.run(self.model.initialize_col_update_op)
                self.sess.run(col_update_op)
                self.output_row = self.row_factor.eval(session=self.sess)
                self.output_col = self.col_factor.eval(session=self.sess)
                if i % 2 == 0:
                    self.eval_ranking(self.topn)
                    # self.save_tf_model(i)
        tf.logging.info('Train Finish: {:%Y-%m-%d %H:%M:%S}'.format(
            datetime.datetime.now()))
        self.sess.close()

    def eval_test(self, user_idx):
        """
        获取测试集特定用户的评价物品
        :param user_idx: 用户id
        :return: 测试集用户评价物品列表
        """
        return self.test.getrow(user_idx).indices

    def eval_recommend(self, user_idx, k):
        """
        为特定用户生成推荐列表
        :param user_idx: 用户id
        :param k: 推荐列表大小
        :return: 用户推荐列表
        """
        user_rated = self.data.getrow(user_idx).indices
        assert (self.output_col.shape[0] - len(user_rated)) >= k
        user_f = self.output_row[user_idx]
        pred_ratings = self.output_col.dot(user_f)
        k_r = k + len(user_rated)
        candidate_items = np.argsort(pred_ratings)[-k_r:]
        recommended_items = [i for i in candidate_items if i not in user_rated]
        recommended_items = recommended_items[-k:]
        recommended_items.reverse()

        return recommended_items

    def eval_ranking(self, N):
        """
        对模型进行评价
        :param N: 为每个用户推荐物品的个数
        :return:
        """
        rec_list = {}
        test_list = {}
        for ux in range(len(self.user_map)):
            recommended_items = self.eval_recommend(ux, N)
            rec_list[self.user_map[ux]] = recommended_items
            test_list[self.user_map[ux]] = self.eval_test(ux)
        self.measure = Metrics.ranking_measure(test_list, rec_list, N)

    def save_tf_model(self, step):
        """
        保存tf模型
        :param step: 全局总步数
        :return:
        """
        self.saver.save(self.sess,
                        os.path.join(self.save_path, 'tf'),
                        global_step=step)

    def load_tf_model(self):
        """
        加载tf模型
        :return:
        """
        ckpt = tf.train.get_checkpoint_state(self.save_path)
        if ckpt and ckpt.model_checkpoint_path:
            self.saver.restore(self.sess, ckpt.model_checkpoint_path)
        else:
            print("No checkpoint file.")

    def save_model(self):
        """
        使用numpy保存隐矩阵
        :return:
        """
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
        np.save(os.path.join(self.save_path, 'user'), self.user_map)
        np.save(os.path.join(self.save_path, 'item'), self.item_map)
        np.save(os.path.join(self.save_path, 'row'), self.output_row)
        np.save(os.path.join(self.save_path, 'col'), self.output_col)

    def load_model(self):
        """
        加载隐矩阵
        :return:
        """
        self.user_map = np.load(os.path.join(self.save_path, 'user.npy'))
        self.item_map = np.load(os.path.join(self.save_path, 'item.npy'))
        self.output_row = np.load(os.path.join(self.save_path, 'row.npy'))
        self.output_col = np.load(os.path.join(self.save_path, 'col.npy'))

    def eval_train(self):
        """
        传统方法进行训练
        :return:
        """
        print('Start training...')
        num_rows = self.data.shape[0]
        num_cols = self.data.shape[1]
        if os.path.exists(os.path.join(
                self.save_path, 'row.npy')) and os.path.exists(
                    os.path.join(self.save_path, 'col.npy')):
            self.load_model()
        else:
            self.output_row = np.random.rand(num_rows, self.dim)  # 对应论文中的X
            self.output_col = np.random.rand(num_cols, self.dim)  # 对应论文中的Y
        iteration = 0
        while iteration < self.num_iterations:
            print('iteration:', iteration)
            self.loss = 0
            YtY = self.output_col.T.dot(self.output_col)
            I = np.ones(num_cols)
            for uid in range(len(self.user_map)):
                #C_u = np.ones(self.data.getSize(self.recType))
                val = []
                H = np.ones(num_cols)
                pos = []
                P_u = np.zeros(num_cols)
                for iid in self.data.getrow(uid).indices:
                    r_ui = float(
                        self.data.getrow(uid).getcol(iid).toarray()[0][0])
                    pos.append(iid)
                    val.append(r_ui)
                    H[iid] += r_ui
                    P_u[iid] = 1
                    error = (P_u[iid] -
                             self.output_row[uid].dot(self.output_col[iid]))
                    self.loss += error**2
                C_u = coo_matrix((val, (pos, pos)), shape=(num_cols, num_cols))
                # 计算权重Wu,Wu = (YtCuY + lambda * itemIdx) ^ -1
                Au = (YtY +
                      np.dot(self.output_col.T, C_u.dot(self.output_col)) +
                      self.reg * np.eye(self.dim))
                Wu = np.linalg.inv(Au)
                # 更新Xu,这里即X[uid], Xu = Wu*YtCuPu
                self.output_row[uid] = np.dot(Wu,
                                              (self.output_col.T * H).dot(P_u))

            XtX = self.output_row.T.dot(self.output_row)
            I = np.ones(num_rows)
            for iid in range(len(self.item_map)):
                P_i = np.zeros(num_rows)
                H = np.ones(num_rows)
                val = []
                pos = []
                for uid in self.data.getcol(iid).indices:
                    r_ui = float(
                        self.data.getrow(uid).getcol(iid).toarray()[0][0])
                    pos.append(uid)
                    val.append(r_ui)
                    H[uid] += r_ui
                    P_i[uid] = 1
                C_i = coo_matrix((val, (pos, pos)), shape=(num_rows, num_rows))
                # 计算权重Wi,Wi = (XtCiX + lambda * userIdx) ^ -1
                Ai = (XtX +
                      np.dot(self.output_row.T, C_i.dot(self.output_row)) +
                      self.reg * np.eye(self.dim))
                Wi = np.linalg.inv(Ai)
                # 更新Yi, Yi = Wi*XtCiPi
                self.output_col[iid] = np.dot(Wi,
                                              (self.output_row.T * H).dot(P_i))

            iteration += 1
            self.loss += self.reg * (
                (self.output_row * self.output_row).sum() +
                (self.output_col * self.output_col).sum())
            print('Loss:', self.loss)
            self.eval_ranking(self.topn)
            if iteration % 2 == 0:
                self.save_model()

    @staticmethod
    def _make_wts(data, wt_type, obs_wt, feature_wt_exp, axis):
        """
        计算缺失值初始化权重
        :param data: 训练数据集
        :param wt_type: 权重线性变换或指数变换
        :param obs_wt: 线性变换参数
        :param feature_wt_exp: 指数变换参数
        :param axis: 数据累加维度
        :return: 在一个维度上权重分布
        """
        frac = np.array(1.0 / (data > 0.0).sum(axis))
        frac[np.ma.masked_invalid(frac).mask] = 0.0
        if wt_type == 1:
            wts = np.array(np.power(frac, feature_wt_exp)).flatten()
        else:
            wts = np.array(obs_wt * frac).flatten()
        assert np.isfinite(wts).sum() == wts.shape[0]
        return wts
コード例 #11
0
def wals(id,
         from_date,
         to_date,
         predict_moment,
         dimension=30,
         weight=0.5,
         coef=2.0,
         n_iter=30):

    data_path = 'wp_' + from_date + '_' + to_date + '_sparse.json'

    deal_dict = np.load('dict_' + from_date + '_' + to_date +
                        '_for_sparse.npy')
    user_dict = np.load('user_' + from_date + '_' + to_date + '.npy')

    if id not in user_dict:
        return -1
    else:
        user_index = np.where(user_dict == id)[0][0]

    num_rows = len(user_dict)
    num_cols = len(deal_dict)

    connect('wprec', host='mongodb://10.102.61.251:27017')

    deals = WepickDeal.objects(pk__gte=predict_moment + ' 20',
                               pk__lte=predict_moment + ' 99')
    deal_slots = []
    deal_ids = []
    predict_input = []
    for elem in deals:
        dealid = elem['deal'].id
        if dealid in deal_dict:
            deal_slots.append(int(elem.id[-2:]))
            deal_ids.append(elem['deal'].id)

    deal_finder = dict(zip(deal_dict, range(num_cols)))

    with open(data_path, 'r') as f:
        data = json.load(f)

    indices = []
    values = []

    for idx, elem in enumerate(data):
        indices += zip([idx] * len(elem), elem)
        values += [1.0] * len(elem)
    with tf.Graph().as_default() as graph1:
        sp_mat = tf.SparseTensor(indices, values, [num_rows, num_cols])

        model = WALSModel(num_rows,
                          num_cols,
                          dimension,
                          weight,
                          coef,
                          row_weights=None,
                          col_weights=None)

        row_factors = model.row_factors[0]
        col_factors = model.col_factors[0]

        sess = tf.Session(graph=graph1)

        row_update_op = model.update_row_factors(sp_mat)[1]
        col_update_op = model.update_col_factors(sp_mat)[1]

        sess.run(model.initialize_op)
        for _ in range(n_iter):
            sess.run(model.row_update_prep_gramian_op)
            sess.run(model.initialize_row_update_op)
            sess.run(row_update_op)
            sess.run(model.col_update_prep_gramian_op)
            sess.run(model.initialize_col_update_op)
            sess.run(col_update_op)

    output_row = row_factors.eval(sess)
    output_col = col_factors.eval(sess)

    sess.close()

    results = []

    for i in range(len(deal_ids)):
        deal_index = deal_finder[deal_ids[i]]
        results.append({
            'id':
            deal_ids[i],
            'slot':
            deal_slots[i],
            'score':
            sum(output_row[user_index][:] * output_col[deal_index])
        })
    return results
コード例 #12
0
def wals_cate(from_date,
              to_date,
              dimension=10,
              weight=0.5,
              coef=2.0,
              n_iter=30):
    data_path = 'wp_' + from_date + '_' + to_date + '_cate.json'
    cate_dict = np.load('cate_dict.npy')
    user_dict = np.load('user_' + from_date + '_' + to_date + '_for_cate.npy')

    num_rows = len(user_dict)
    num_cols = len(cate_dict)

    with open(data_path, 'r') as f:
        data = json.load(f)

    indices = []
    values = []

    for idx, elem in enumerate(data):
        indices += zip([idx] * len(elem), elem)
        values += [1.0] * len(elem)

    with tf.Graph().as_default() as graph1:
        sp_mat = tf.SparseTensor(indices, values, [num_rows, num_cols])

        model = WALSModel(num_rows,
                          num_cols,
                          dimension,
                          weight,
                          coef,
                          row_weights=None,
                          col_weights=None)

        row_factors = model.row_factors[0]
        col_factors = model.col_factors[0]

        sess = tf.Session(graph=graph1)

        row_update_op = model.update_row_factors(sp_mat)[1]
        col_update_op = model.update_col_factors(sp_mat)[1]

        sess.run(model.initialize_op)
        for _ in range(n_iter):
            sess.run(model.row_update_prep_gramian_op)
            sess.run(model.initialize_row_update_op)
            sess.run(row_update_op)
            sess.run(model.col_update_prep_gramian_op)
            sess.run(model.initialize_col_update_op)
            sess.run(col_update_op)

    output_row = row_factors.eval(sess).tolist()
    output_col = col_factors.eval(sess).tolist()

    sess.close()

    # temporary mechanism for generated matrice
    random.seed()
    temp_num = str(random.randrange(100))

    user_temp_name = 'temp_user' + temp_num
    item_temp_name = 'temp_item' + temp_num

    with open('../' + user_temp_name + '.json', 'w') as f:
        json.dump(output_row, f)
    with open('../' + item_temp_name + '.json', 'w') as f:
        json.dump(output_col, f)

    print('files saved')

    return dimension, user_temp_name, item_temp_name