Esempio n. 1
0
 def main(self):
     self.data = self.parser.parse_args()
     logging.basicConfig(level=self.data.verbose)
     logger = logging.getLogger('b4msa')
     logger.setLevel(self.data.verbose)
     with open(self.data.model, 'rb') as fpt:
         svc = pickle.load(fpt)
     X = [
         svc.model.transform_q_voc_ratio(x)
         for x in read_data(self.data.test_set)
     ]
     qv = [x[1] for x in X]
     X = [x[0] for x in X]
     output = self.get_output()
     if output.endswith('.gz'):
         gzip_flag = True
         output = gzip.open(output, 'wb')
     else:
         gzip_flag = False
         output = open(output, 'w')
     with output as fpt:
         if not self.data.decision_function:
             hy = svc.predict(X)
             for tweet, klass, r in zip(tweet_iterator(self.data.test_set),
                                        hy, qv):
                 tweet['klass'] = str(klass)
                 tweet['q_voc_ratio'] = r
                 cdn = json.dumps(tweet) + "\n"
                 cdn = bytes(cdn, encoding='utf-8') if gzip_flag else cdn
                 fpt.write(cdn)
         else:
             hy = svc.decision_function(X)
             for tweet, klass, r in zip(tweet_iterator(self.data.test_set),
                                        hy, qv):
                 try:
                     o = klass.tolist()
                 except AttributeError:
                     o = klass
                 tweet['decision_function'] = o
                 tweet['q_voc_ratio'] = r
                 cdn = json.dumps(tweet) + "\n"
                 cdn = bytes(cdn, encoding='utf-8') if gzip_flag else cdn
                 fpt.write(cdn)
Esempio n. 2
0
 def predict_file(self, fname, get_tweet='text', maxitems=1e100):
     hy = [self.predict_text(x)
           for x in read_data(fname, get_tweet=get_tweet,
                              maxitems=maxitems)]
     return hy