def main(self): self.data = self.parser.parse_args() svc = load_model(self.data.model) X = [svc.model[x] for x in read_data(self.data.test_set)] output = self.get_output() if output.endswith('.gz'): gzip_flag = True output = gzip.open(output, 'wb') else: gzip_flag = False output = open(output, 'w') with output as fpt: if not self.data.decision_function: hy = svc.predict(X) for tweet, klass in zip(tweet_iterator(self.data.test_set), hy): tweet['klass'] = str(klass) cdn = json.dumps(tweet)+"\n" cdn = bytes(cdn, encoding='utf-8') if gzip_flag else cdn fpt.write(cdn) else: hy = svc.decision_function(X) for tweet, klass in zip(tweet_iterator(self.data.test_set), hy): try: o = klass.tolist() except AttributeError: o = klass tweet['decision_function'] = o cdn = json.dumps(tweet)+"\n" cdn = bytes(cdn, encoding='utf-8') if gzip_flag else cdn fpt.write(cdn)
def main(self, args=None, model_svc_le=None): self.data = self.parser.parse_args(args=args) logging.basicConfig(level=self.data.verbose) if model_svc_le is None: with open(self.data.model, 'rb') as fpt: model, svc, le = pickle.load(fpt) else: model, svc, le = model_svc_le veclist, afflist = [], [] for x in read_data(self.data.test_set): v, a = model.vectorize(x) veclist.append(v) afflist.append(a) L = [] hy = svc.decision_function(veclist) hyy = le.inverse_transform(svc.predict(veclist)) for tweet, scores, klass, aff in zip( tweet_iterator(self.data.test_set), hy, hyy, afflist): # if True: # print("-YY>", scores) # print("-XX>", scores.shape, len(scores.shape)) # print(svc.svc.classes_) # print(le) # if len(scores.shape) == 0: # index = 0 if scores < 0.0 else 1 # elif len(scores.shape) == 1: # index = np.argmax(scores) # else: # index = scores.argmax(axis=1) # klass = le.inverse_transform(svc.svc.classes_[index]) tweet['decision_function'] = scores.tolist() tweet['voc_affinity'] = aff tweet['klass'] = str(klass) L.append(tweet) with open(self.get_output(), 'w') as fpt: for tweet in L: fpt.write(json.dumps(tweet) + "\n") return L
def main(self, args=None, model_svc_le=None): self.data = self.parser.parse_args(args=args) logging.basicConfig(level=self.data.verbose) if model_svc_le is None: model, svc, le = load_pickle(self.data.model) else: model, svc, le = model_svc_le veclist, afflist = [], [] for x in read_data(self.data.test_set): v, a = model.vectorize(x) veclist.append(v) afflist.append(a) L = [] if le is None: hy = svc.predict(veclist) if self.data.ordinal: start, end = self.data.ordinal.split(':') start = int(start) end = int(end) for tweet, pred in zip(tweet_iterator(self.data.test_set), hy): c = round(pred) if c < start: c = start elif c > end: c = end if c == 0: # handles IEEE's negative cero -0.0 c = 0 tweet[VALUE] = int(c) L.append(tweet) else: for tweet, pred in zip(tweet_iterator(self.data.test_set), hy): tweet[VALUE] = pred L.append(tweet) else: decision_function = None predict_proba = None try: decision_function = svc.decision_function(veclist).tolist() except AttributeError: try: predict_proba = svc.predict_proba(veclist).tolist() except AttributeError: pass hyy = le.inverse_transform(svc.predict(veclist)) for i, tweet in enumerate(tweet_iterator(self.data.test_set)): if decision_function is not None: tweet['decision_function'] = decision_function[i] if predict_proba is not None: tweet['predict_proba'] = predict_proba[i] klass = hyy[i] aff = afflist[i] tweet['voc_affinity'] = aff tweet[KLASS] = str(klass) tweet['predicted'] = tweet[KLASS] L.append(tweet) with open(self.get_output(), 'w') as fpt: for tweet in L: fpt.write(json.dumps(tweet) + "\n") return L
def predict_file(self, fname, get_tweet='text', maxitems=1e100): hy = [self.predict_text(x) for x in read_data(fname, get_tweet=get_tweet, maxitems=maxitems)] return hy
def predict_file(self, fname, get_tweet='text', maxitems=1e100): hy = [ self.predict_text(x) for x in read_data(fname, get_tweet=get_tweet, maxitems=maxitems) ] return hy