예제 #1
0
 def main(self):
     self.data = self.parser.parse_args()
     svc = load_model(self.data.model)
     X = [svc.model[x] for x in read_data(self.data.test_set)]
     output = self.get_output()
     if output.endswith('.gz'):
         gzip_flag = True
         output = gzip.open(output, 'wb')
     else:
         gzip_flag = False
         output = open(output, 'w')
     with output as fpt:
         if not self.data.decision_function:
             hy = svc.predict(X)
             for tweet, klass in zip(tweet_iterator(self.data.test_set), hy):
                 tweet['klass'] = str(klass)
                 cdn = json.dumps(tweet)+"\n"
                 cdn = bytes(cdn, encoding='utf-8') if gzip_flag else cdn
                 fpt.write(cdn)
         else:
             hy = svc.decision_function(X)
             for tweet, klass in zip(tweet_iterator(self.data.test_set), hy):
                 try:
                     o = klass.tolist()
                 except AttributeError:
                     o = klass
                 tweet['decision_function'] = o
                 cdn = json.dumps(tweet)+"\n"
                 cdn = bytes(cdn, encoding='utf-8') if gzip_flag else cdn
                 fpt.write(cdn)
예제 #2
0
    def main(self, args=None, model_svc_le=None):
        self.data = self.parser.parse_args(args=args)
        logging.basicConfig(level=self.data.verbose)
        if model_svc_le is None:
            with open(self.data.model, 'rb') as fpt:
                model, svc, le = pickle.load(fpt)
        else:
            model, svc, le = model_svc_le

        veclist, afflist = [], []
        for x in read_data(self.data.test_set):
            v, a = model.vectorize(x)
            veclist.append(v)
            afflist.append(a)

        L = []
        hy = svc.decision_function(veclist)
        hyy = le.inverse_transform(svc.predict(veclist))
        for tweet, scores, klass, aff in zip(
                tweet_iterator(self.data.test_set), hy, hyy, afflist):
            # if True:
            #     print("-YY>", scores)
            #     print("-XX>", scores.shape, len(scores.shape))
            #     print(svc.svc.classes_)
            #     print(le)

            # if len(scores.shape) == 0:
            #     index = 0 if scores < 0.0 else 1
            # elif len(scores.shape) == 1:
            #     index = np.argmax(scores)
            # else:
            #     index = scores.argmax(axis=1)

            # klass = le.inverse_transform(svc.svc.classes_[index])
            tweet['decision_function'] = scores.tolist()
            tweet['voc_affinity'] = aff
            tweet['klass'] = str(klass)
            L.append(tweet)

        with open(self.get_output(), 'w') as fpt:
            for tweet in L:
                fpt.write(json.dumps(tweet) + "\n")

        return L
예제 #3
0
    def main(self, args=None, model_svc_le=None):
        self.data = self.parser.parse_args(args=args)
        logging.basicConfig(level=self.data.verbose)
        if model_svc_le is None:
            model, svc, le = load_pickle(self.data.model)
        else:
            model, svc, le = model_svc_le

        veclist, afflist = [], []
        for x in read_data(self.data.test_set):
            v, a = model.vectorize(x)
            veclist.append(v)
            afflist.append(a)

        L = []
        if le is None:
            hy = svc.predict(veclist)

            if self.data.ordinal:
                start, end = self.data.ordinal.split(':')
                start = int(start)
                end = int(end)

                for tweet, pred in zip(tweet_iterator(self.data.test_set), hy):
                    c = round(pred)
                    if c < start:
                        c = start
                    elif c > end:
                        c = end

                    if c == 0:  # handles IEEE's negative cero -0.0
                        c = 0

                    tweet[VALUE] = int(c)
                    L.append(tweet)
            else:
                for tweet, pred in zip(tweet_iterator(self.data.test_set), hy):
                    tweet[VALUE] = pred
                    L.append(tweet)
        else:
            decision_function = None
            predict_proba = None
            try:
                decision_function = svc.decision_function(veclist).tolist()
            except AttributeError:
                try:
                    predict_proba = svc.predict_proba(veclist).tolist()
                except AttributeError:
                    pass

            hyy = le.inverse_transform(svc.predict(veclist))

            for i, tweet in enumerate(tweet_iterator(self.data.test_set)):
                if decision_function is not None:
                    tweet['decision_function'] = decision_function[i]
                if predict_proba is not None:
                    tweet['predict_proba'] = predict_proba[i]

                klass = hyy[i]
                aff = afflist[i]

                tweet['voc_affinity'] = aff
                tweet[KLASS] = str(klass)
                tweet['predicted'] = tweet[KLASS]
                L.append(tweet)

        with open(self.get_output(), 'w') as fpt:
            for tweet in L:
                fpt.write(json.dumps(tweet) + "\n")

        return L
예제 #4
0
 def predict_file(self, fname, get_tweet='text', maxitems=1e100):
     hy = [self.predict_text(x)
           for x in read_data(fname, get_tweet=get_tweet,
                              maxitems=maxitems)]
     return hy
예제 #5
0
 def predict_file(self, fname, get_tweet='text', maxitems=1e100):
     hy = [
         self.predict_text(x)
         for x in read_data(fname, get_tweet=get_tweet, maxitems=maxitems)
     ]
     return hy