Example #1
0
    def fit(self, X: list, Y: list = None):

        self.__instantiate_classifier()

        X = transform(self.transformer, X)

        pipe = make_pipeline(self.clf)
        self.pipe = pipe.fit(X, Y)
Example #2
0
    def fit(self, X: list, Y: list = None):

        self.__instantiate_classifier(X)

        # Transform the input if there are dicts, multiple representation, etc.
        X = transform(self.transformer, X)

        pipe = make_pipeline(self.clf)
        self.pipe = pipe.fit(X, Y)
Example #3
0
    def fit(self, X: list, Y: list = None):

        # Transform the input
        X = transform(self.transformer, X)

        # Try fitting the Calibrated classifier for better classification
        try:
            self.__instantiate_classifier(calibrated=True)

            pipe = make_pipeline(self.clf)
            self.pipe = pipe.fit(X, Y)

        # If exception instantiate a non-calibrated classifier, then fit
        except ValueError:
            self.__instantiate_classifier(calibrated=False)

            pipe = make_pipeline(self.clf)
            self.pipe = pipe.fit(X, Y)
Example #4
0
    def predict(self,
                ratings: pd.DataFrame,
                recs_number: int,
                items_directory: str,
                candidate_item_id_list: List = None) -> pd.DataFrame:
        """
        After computing the centroid of the positive rated items by the user and getting the similarity scores
        of said centroid compared with every unrated item, creates and returns a recommendation list of unrated
        items ordered by their similarity score with the centroid. A candidate_item_id_list can be passed
        which will be used instead of the unrated items.

            EXAMPLE:
                Creates a recommendation list of length 1 with the similarity to the centroid as score, only considering
                the item tt0114319 instead of all the unrated items. (Ratings is a DataFrame containing the ratings
                given by the user)
                    predict(ratings=ratings, recs_number=1, items_directory='.../somedir',
                    candidate_item_id_list=['tt0114319'])

        Args:
            candidate_item_id_list (list): list of the items that can be recommended, if None
                all unrated items will be used
            recs_number (int): how long the ranking will be
            ratings (pd.DataFrame): ratings of a user
            items_directory (str): name of the directory where the items are stored.

        Returns:
             scores (pd.DataFrame): DataFrame whose columns are the ids of the items (to_id),
                and the similarities between the items and the centroid (rating)
        """
        # Loads the items and extracts features from the unrated items, then
        # extracts features from the positive rated items
        # If exception, returns an empty score_frame
        try:
            rated_items, unrated_items, unrated_features_bag_list = \
                super().preprocessing(items_directory, ratings, candidate_item_id_list)
            positive_rated_features_bag_list = self.__calc_positive_rated_baglist(
                rated_items, ratings)
        except (ValueError, FileNotFoundError) as e:
            logger.warning(str(e))
            columns = ["to_id", "rating"]
            score_frame = pd.DataFrame(columns=columns)
            return score_frame

        logger.info("Computing rated items centroid")
        positive_rated_items_array = transform(
            self.__transformer, positive_rated_features_bag_list)
        centroid = np.array(positive_rated_items_array).mean(axis=0)

        columns = ["to_id", "rating"]
        score_frame = pd.DataFrame(columns=columns)

        logger.info("Computing similarity between centroid and unrated items")
        unrated_items_array = transform(self.__transformer,
                                        unrated_features_bag_list)
        similarities = [
            self.__similarity.perform(centroid, item)
            for item in unrated_items_array
        ]

        for item, similarity in zip(unrated_items, similarities):
            score_frame = pd.concat([
                score_frame,
                pd.DataFrame.from_records([(item.content_id, similarity)],
                                          columns=columns)
            ],
                                    ignore_index=True)

        score_frame = score_frame.sort_values(
            ['rating'], ascending=False).reset_index(drop=True)
        score_frame = score_frame[:recs_number]

        return score_frame
Example #5
0
    def predict_proba(self, X_pred: list):
        X_pred = transform(self.transformer, X_pred)

        return self.pipe.predict_proba(X_pred)