예제 #1
0
    def fit(self, table):

        stemmed_text = stem(table.text)

        self.tfidf = TfidfVectorizer(stop_words='english', max_features=self.max_features)
        text_features = self.tfidf.fit_transform(stemmed_text)
        text_features = text_features.toarray()

        self.pca = PCA(150).fit(text_features)

        self.avg_user = CategoryAverage()
        self.avg_user = self.avg_user.fit(table.user_id, log(table.votes_useful + 1))

        # scale for votes
        votes = log(table.votes_useful + 1)
        if self.use_scale:
            self.scale = 1 / max(votes)
        else:
            self.scale = 1

        return self