def main(self): self.data = self.parser.parse_args() logging.basicConfig(level=self.data.verbose) logger = logging.getLogger('b4msa') logger.setLevel(self.data.verbose) with open(self.data.model, 'rb') as fpt: svc = pickle.load(fpt) X = [ svc.model.transform_q_voc_ratio(x) for x in read_data(self.data.test_set) ] qv = [x[1] for x in X] X = [x[0] for x in X] output = self.get_output() if output.endswith('.gz'): gzip_flag = True output = gzip.open(output, 'wb') else: gzip_flag = False output = open(output, 'w') with output as fpt: if not self.data.decision_function: hy = svc.predict(X) for tweet, klass, r in zip(tweet_iterator(self.data.test_set), hy, qv): tweet['klass'] = str(klass) tweet['q_voc_ratio'] = r cdn = json.dumps(tweet) + "\n" cdn = bytes(cdn, encoding='utf-8') if gzip_flag else cdn fpt.write(cdn) else: hy = svc.decision_function(X) for tweet, klass, r in zip(tweet_iterator(self.data.test_set), hy, qv): try: o = klass.tolist() except AttributeError: o = klass tweet['decision_function'] = o tweet['q_voc_ratio'] = r cdn = json.dumps(tweet) + "\n" cdn = bytes(cdn, encoding='utf-8') if gzip_flag else cdn fpt.write(cdn)
def predict_file(self, fname, get_tweet='text', maxitems=1e100): hy = [self.predict_text(x) for x in read_data(fname, get_tweet=get_tweet, maxitems=maxitems)] return hy