def run_tests(predictor,
              model_file,
              dataset,
              get_full_recommendation_list=False,
              k=10):
    # Load model
    predictor.load(model_file)
    #predictor.load_last(os.path.dirname(model_file) + '/')
    # Prepare evaluator
    evaluator = evaluation.Evaluator(dataset, k=k)

    if get_full_recommendation_list:
        k = dataset.n_items

    count = 0
    for sequence, user_id in dataset.test_set(epochs=1):
        count += 1
        num_viewed = int(len(sequence) / 2)
        viewed = sequence[:num_viewed]
        goal = [i[0] for i in sequence[num_viewed:]]

        recommendations = predictor.top_k_recommendations(viewed,
                                                          user_id=user_id,
                                                          k=k)

        evaluator.add_instance(goal, recommendations)

        if len(goal) == 0:
            raise ValueError
    return evaluator
    def _compute_validation_metrics(self, metrics):
        clusters = np.zeros(self.n_clusters, dtype="int")
        used_items = []
        ev = evaluation.Evaluator(self.dataset, k=10)
        ev_clusters = evaluation.Evaluator(self.dataset, k=10)
        try:
            for batch, goal in self._gen_mini_batch(
                    self.dataset.validation_set(epochs=1), test=True):
                pred1, pred2, cl, i = self.test_function(batch)
                ev.add_instance(goal, pred1)
                ev_clusters.add_instance(goal, pred2)
                clusters[cl] += 1
                used_items.append(i)
        except Exception as e:
            print(e)

        if self.cluster_type == 'softmax':
            ignored_items = 0
            cluster_size = np.histogram(
                self.cluster_repartition.get_value(borrow=True).argmax(axis=1),
                bins=range(self.n_clusters + 1))[0].tolist()
        elif self.cluster_type == 'mix':
            ignored_items = 0
            sig_clusters = self.cluster_repartition.get_value(borrow=True) > 0.
            softmax_clusters = self.cluster_repartition.get_value(
                borrow=True).argmax(axis=1)
            for i in range(self.n_items):
                sig_clusters[i, softmax_clusters[i]] = True
            cluster_size = sig_clusters.sum(axis=0)
        else:
            ignored_items = (self.cluster_repartition.get_value(
                borrow=True).max(axis=1) < 0.).sum()
            cluster_size = (self.cluster_repartition.get_value(borrow=True) >
                            0.).sum(axis=0)

        metrics['recall'].append(ev.average_recall())
        metrics['cluster_recall'].append(ev_clusters.average_recall())
        metrics['sps'].append(ev.sps())
        metrics['cluster_sps'].append(ev_clusters.sps())
        metrics['assr'].append(self.n_items / np.mean(used_items))
        metrics['ignored_items'].append(ignored_items)
        metrics['cluster_use'].append(clusters)
        metrics['cluster_use_std'].append(np.std(clusters))
        metrics['cluster_size'].append(cluster_size)

        return metrics
    def _compute_validation_metrics(self, metrics):
        """
		add value to lists in metrics dictionary
		"""
        ev = evaluation.Evaluator(self.dataset, k=10)
        if not self.iter:
            for batch_input, goal in self._gen_mini_batch(
                    self.dataset.validation_set(epochs=1), test=True):
                predictions = self.test_function(
                    [batch_input[0], batch_input[1]])

                # print("predictions")
                # print(predictions)
                ev.add_instance(goal, predictions)
        else:
            for sequence, user in self.dataset.validation_set(epochs=1):
                seq_lengths = list(range(
                    1, len(sequence)))  # 1, 2, 3, ... len(sequence)-1
                for length in seq_lengths:
                    X = np.zeros(
                        (1, self.max_length, self._input_size()),
                        dtype=self._input_type)  # input shape of the RNN
                    mask = np.zeros(
                        (1, self.max_length)
                    )  # mask of the input (to deal with sequences of different length)

                    seq_by_max_length = sequence[max(
                        length -
                        self.max_length, 0):length]  # last max length or all
                    X[0, :len(seq_by_max_length), :] = np.array(
                        map(lambda x: self._get_features(x),
                            seq_by_max_length))
                    mask[0, :len(seq_by_max_length)] = 1

                    predictions = self.test_function(
                        [X, mask.astype(theano.config.float)])
                    # print("predictions")
                    # print(predictions)
                    goal = sequence[length:][0]
                    ev.add_instance(goal, predictions)

        metrics['recall'].append(ev.average_recall())
        metrics['sps'].append(ev.sps())
        metrics['precision'].append(ev.average_precision())
        metrics['ndcg'].append(ev.average_ndcg())
        metrics['user_coverage'].append(ev.user_coverage())
        metrics['item_coverage'].append(ev.item_coverage())
        metrics['blockbuster_share'].append(ev.blockbuster_share())

        # del ev
        ev.instances = []

        return metrics
def compute_validation_metrics(model, dataset, metrics):
    """
    add value to lists in metrics dictionary
    """

    ev = evaluation.Evaluator(dataset, k=10)
    print(iter)
    if not iter:
        for batch_input, goal in gen_mini_batch(dataset.validation_set(),
                                                test=False):  # test=True
            # print(batch_input[0].shape())
            # output = model.predict_on_batch(batch_input[0])
            output = model.predict_on_batch(batch_input)
            predictions = np.argpartition(-output, list(range(10)),
                                          axis=-1)[0, :10]
            # print("predictions")
            # print(predictions)
            ev.add_instance(goal, predictions)
    else:
        for sequence, user in dataset.validation_set(epochs=1):
            seq_lengths = list(range(
                1, len(sequence)))  # 1, 2, 3, ... len(sequence)-1
            for length in seq_lengths:
                X = np.zeros((1, max_length, n_items),
                             dtype=input_type)  # input shape of the RNN

                seq_by_max_length = sequence[max(length - max_length, 0):
                                             length]  # last max length or all
                X[0, :len(seq_by_max_length), :] = np.array(
                    map(lambda x: get_features(x), seq_by_max_length))

                output = model.predict_on_batch(X)
                predictions = np.argpartition(-output,
                                              list(range(10)),
                                              axis=-1)[0, :10]
                # print("predictions")
                # print(predictions)
                goal = sequence[length:][0]
                ev.add_instance(goal, predictions)

    metrics['recall'].append(ev.average_recall())
    metrics['sps'].append(ev.sps())
    metrics['precision'].append(ev.average_precision())
    metrics['ndcg'].append(ev.average_ndcg())
    metrics['user_coverage'].append(ev.user_coverage())
    metrics['item_coverage'].append(ev.item_coverage())
    metrics['blockbuster_share'].append(ev.blockbuster_share())

    # del ev
    ev.instances = []

    return metrics
Ejemplo n.º 5
0
def run_tests(predictor,
              model_file,
              dataset,
              args,
              get_full_recommendation_list=False,
              k=10):

    predictor._load(model_file)
    #predictor.load_last(os.path.dirname(model_file) + '/')

    # Prepare evaluator
    evaluator = evaluation.Evaluator(dataset, k=k)

    if get_full_recommendation_list:
        k = dataset.n_items

    nb_of_dp = []
    start = time.clock()
    for sequence, user_id in dataset.test_set(epochs=1):
        if not args.test_iter:
            num_viewed = int(len(sequence) / 2)
            viewed = sequence[:num_viewed]
            goal = [i[0] for i in sequence[num_viewed:]]  #list of movie ids

            recommendations = predictor.top_k_recommendations(viewed, k=k)

            #recommendations(movie ids) 잘 추가되게 하면 됨
            #print(recommendations)
            evaluator.add_instance(goal, recommendations)

            if len(goal) == 0:
                raise ValueError
        else:
            #seq_lengths = sorted(random.sample(xrange(1, len(sequence)),len(sequence) - 1))
            seq_lengths = list(range(1, len(sequence)))

            for length in seq_lengths:
                viewed = sequence[:length]
                goal = sequence[length:][0]

                recommendations = predictor.top_k_recommendations(viewed, k=k)
                evaluator.add_instance(goal, recommendations)

    end = time.clock()
    print('Timer: ', end - start)
    if len(nb_of_dp) == 0:
        evaluator.nb_of_dp = dataset.n_items
    else:
        evaluator.nb_of_dp = np.mean(nb_of_dp)
    return evaluator
    def _compute_validation_metrics(self, metrics):
        ev = evaluation.Evaluator(self.dataset, k=10)
        for batch_input, goal in self._gen_mini_batch(
                self.dataset.validation_set(epochs=1), test=True):
            predictions = self.test_function(batch_input)
            ev.add_instance(goal, predictions)

        metrics['recall'].append(ev.average_recall())
        metrics['sps'].append(ev.sps())
        metrics['ndcg'].append(ev.average_ndcg())
        metrics['user_coverage'].append(ev.user_coverage())
        metrics['item_coverage'].append(ev.item_coverage())
        metrics['blockbuster_share'].append(ev.blockbuster_share())

        return metrics
Ejemplo n.º 7
0
    def _compute_validation_metrics(self, metrics):
        ev = evaluation.Evaluator(self.dataset, k=10)
        for sequence, user_id in self.dataset.validation_set(epochs=1):
            top_k = self.top_k_recommendations(sequence[:len(sequence) // 2],
                                               user_id=int(user_id))
            goal = [i[0] for i in sequence[len(sequence) // 2:]]
            ev.add_instance(goal, top_k)

        metrics['recall'].append(ev.average_recall())
        metrics['sps'].append(ev.sps())
        metrics['ndcg'].append(ev.average_ndcg())
        metrics['user_coverage'].append(ev.user_coverage())
        metrics['item_coverage'].append(ev.item_coverage())
        metrics['blockbuster_share'].append(ev.blockbuster_share())

        return metrics
Ejemplo n.º 8
0
def run_tests(predictor,
              model_file,
              dataset,
              args,
              get_full_recommendation_list=False,
              k=10):
    # Load model
    predictor.load(model_file)
    #predictor.load_last(os.path.dirname(model_file) + '/')
    # Prepare evaluator
    evaluator = evaluation.Evaluator(dataset, k=k)

    if get_full_recommendation_list:
        k = dataset.n_items

    count = 0
    nb_of_dp = []
    start = time.clock()
    for sequence, user_id in dataset.test_set(epochs=1):
        count += 1
        num_viewed = int(len(sequence) / 2)
        viewed = sequence[:num_viewed]
        goal = [i[0] for i in sequence[num_viewed:]]

        if args.clusters > 0:
            recommendations, n = predictor.top_k_recommendations(
                viewed, user_id=user_id, k=k)
            nb_of_dp.append(n)
        else:
            recommendations = predictor.top_k_recommendations(viewed,
                                                              user_id=user_id,
                                                              k=k)

        evaluator.add_instance(goal, recommendations)

        if len(goal) == 0:
            raise ValueError
    end = time.clock()
    print('Timer: ', end - start)
    if len(nb_of_dp) == 0:
        evaluator.nb_of_dp = dataset.n_items
    else:
        evaluator.nb_of_dp = np.mean(nb_of_dp)
    return evaluator
Ejemplo n.º 9
0
    def _compute_validation_metrics(self, dataset, metrics):
        """
        add value to lists in v_metrics dictionary
        """
        self.dataset = dataset
        ev = evaluation.Evaluator(self.dataset, k=10)
        # for batch_input, goal in gen_mini_batch(dataset.validation_set(epochs=1)):  # test=True
        for sequence, user_id in self.dataset.validation_set(epochs=1):
            sequence = sequence[-min(self.max_length, len(sequence)):]
            num_viewed = int(len(sequence) / 2)
            viewed = sequence[:num_viewed]
            goal = [i[0] for i in sequence[num_viewed:]]  # list of movie ids

            X = np.zeros((1, self.max_length), dtype=np.int32)  # ktf embedding requires movie-id sequence, not one-hot
            X[0, :len(viewed)] = np.array([item[0] for item in viewed])

            output = self.model.predict_on_batch(X)
            # output[[i[0] for i in viewed]] = -np.inf
            predictions = np.argpartition(-output, list(range(10)), axis=-1)[0, :10]
            # print("predictions")
            # print(predictions)
            ev.add_instance(goal, predictions)

        #
        metrics['recall'].append(ev.average_recall())
        metrics['sps'].append(ev.sps())
        metrics['sps_short'].append(ev.sps_short())
        metrics['sps_long'].append(ev.sps_long())
        metrics['precision'].append(ev.average_precision())
        metrics['ndcg'].append(ev.average_ndcg())
        metrics['user_coverage'].append(ev.user_coverage())
        metrics['item_coverage'].append(ev.item_coverage())
        metrics['total_item_coverage'].append(ev.total_item_coverage())
        metrics['uniq_rec'].append(ev.uniq_rec())
        metrics['blockbuster_share'].append(ev.blockbuster_share())
        metrics['intra_list_similarity'].append(ev.average_intra_list_similarity())

        # del ev
        ev.nb_of_dp = self.dataset.n_items
        ev.instances = []

        return metrics
    def _compute_validation_metrics(self, metrics):
        """
		add value to lists in metrics dictionary
		"""
        ev = evaluation.Evaluator(self.dataset, k=10)
        if not self.iter:
            for batch_input, goal in self._gen_mini_batch(
                    self.dataset.validation_set(epochs=1), test=True):
                output = self.sess.run(self.softmax,
                                       feed_dict={
                                           self.X: batch_input[0],
                                           self.length: batch_input[2]
                                       })
                predictions = np.argpartition(-output,
                                              list(range(10)),
                                              axis=-1)[0, :10]
                # print("predictions")
                # print(predictions)
                ev.add_instance(goal, predictions)
        else:
            for sequence, user in self.dataset.validation_set(epochs=1):
                seq_lengths = list(range(
                    1, len(sequence)))  # 1, 2, 3, ... len(sequence)-1
                for seq_length in seq_lengths:
                    X = np.zeros(
                        (1, self.max_length, self._input_size()),
                        dtype=self._input_type)  # input shape of the RNN
                    # Y = np.zeros((1, self.n_items))  # Y가 왜 있는지????? 안쓰임
                    length = np.zeros((1, ), dtype=np.int32)

                    seq_by_max_length = sequence[
                        max(length - self.max_length,
                            0):seq_length]  # last max length or all
                    X[0, :len(seq_by_max_length), :] = np.array(
                        map(lambda x: self._get_features(x),
                            seq_by_max_length))
                    length[0] = len(seq_by_max_length)

                    output = self.sess.run(self.softmax,
                                           feed_dict={
                                               self.X: X,
                                               self.length: length
                                           })
                    predictions = np.argpartition(-output,
                                                  list(range(10)),
                                                  axis=-1)[0, :10]
                    # print("predictions")
                    # print(predictions)
                    goal = sequence[seq_length:][0]
                    ev.add_instance(goal, predictions)

        metrics['recall'].append(ev.average_recall())
        metrics['sps'].append(ev.sps())
        metrics['precision'].append(ev.average_precision())
        metrics['ndcg'].append(ev.average_ndcg())
        metrics['user_coverage'].append(ev.user_coverage())
        metrics['item_coverage'].append(ev.item_coverage())
        metrics['blockbuster_share'].append(ev.blockbuster_share())

        # del ev
        ev.instances = []

        return metrics