def predict(self, features): ''' Predict importances ''' features = self.norm.transform(features[:,0:self.n_comp]) results = self.svr.predict(features) #print results[0:100:5] results = self.std_scaler_i.inverse_transform(results) #print results[0:100:5] return results if __name__ == '__main__': import corpus, featureExtractor from featureExtractor import FeatureExtractor print 'Loading corpus ...' corpus = corpus.TwitterCorpus() tweets = corpus.all_tweets() importances = np.array([featureExtractor.tweet_importance(t) for t in tweets]) # try to load feature vectors try: v = joblib.load('data/cache/vectors.joblib') except: print 'FeatureExtractor fit transform ...' feat = FeatureExtractor() v = feat.train(tweets, importances) joblib.dump(v, 'data/cache/vectors.joblib') print 'HotTweets train ...' ht = HotTweets() ht.train(v[0:1000], importances[0:1000])