Esempio n. 1
0
    def main(self, args=None):
        self.data = self.parser.parse_args(args=args)
        assert not self.data.update_klass
        logging.basicConfig(level=self.data.verbose)
        logger = logging.getLogger('b4msa')
        logger.setLevel(self.data.verbose)
        best = load_json(self.data.params_fname)[0]
        print(self.data.params_fname, self.data.training_set)
        corpus, labels = read_data_labels(self.data.training_set)
        le = LabelEncoder()
        le.fit(labels)
        y = le.transform(labels)
        t = TextModel(corpus, **best)
        X = [t[x] for x in corpus]
        hy = [None for x in y]
        for tr, ts in KFold(n_splits=self.data.kratio,
                            shuffle=True,
                            random_state=self.data.seed).split(X):
            c = SVC(model=t)
            c.fit([X[x] for x in tr], [y[x] for x in tr])
            _ = c.decision_function([X[x] for x in ts])
            [hy.__setitem__(k, v) for k, v in zip(ts, _)]

        i = 0
        with open(self.get_output(), 'w') as fpt:
            for tweet in tweet_iterator(self.data.training_set):
                tweet['decision_function'] = hy[i].tolist()
                i += 1
                fpt.write(json.dumps(tweet) + "\n")
        return hy
Esempio n. 2
0
def test_test():
    from b4msa.command_line import params, train, test
    from b4msa.utils import read_data_labels
    import os
    import sys
    import tempfile
    import json
    output = tempfile.mktemp()
    fname = os.path.dirname(__file__) + '/text.json'
    sys.argv = ['b4msa', '-o', output, '-k', '2', fname, '-s', '2']
    params()
    sys.argv = ['b4msa', '-m', output, fname, '-o', output]
    train()
    output2 = tempfile.mktemp()
    sys.argv = ['b4msa', '-m', output, fname, '-o', output2]
    test()
    X, y = read_data_labels(output2)
    print(y)
    os.unlink(output)
    with open(output2) as fpt:
        a = [json.loads(x) for x in fpt.readlines()]
    os.unlink(output2)
    for x in a:
        assert 'q_voc_ratio' in x
    assert len(y)
Esempio n. 3
0
def test_kfold():
    import os
    from b4msa.classifier import SVC
    from b4msa.utils import read_data_labels
    fname = os.path.dirname(__file__) + '/text.json'
    X, y = read_data_labels(fname, get_klass='klass', get_tweet='text')
    hy = SVC.predict_kfold(X, y, n_folds=10)
    for x in hy:
        assert x in ['POS', 'NEU', 'NEG']
Esempio n. 4
0
 def fit_file(self,
              fname,
              get_tweet='text',
              get_klass='klass',
              maxitems=1e100):
     X, y = read_data_labels(fname,
                             get_klass=get_klass,
                             get_tweet=get_tweet,
                             maxitems=maxitems)
     self.fit([self.model[x] for x in X], y)
     return self
Esempio n. 5
0
def test_kfold_pool():
    import os
    from b4msa.classifier import SVC
    from b4msa.utils import read_data_labels
    from multiprocessing import Pool
    fname = os.path.dirname(__file__) + '/text.json'
    X, y = read_data_labels(fname, get_klass='klass', get_tweet='text')
    pool = Pool(2)
    hy = SVC.predict_kfold(X, y, n_folds=2, pool=pool)
    for x in hy:
        assert x in ['POS', 'NEU', 'NEG']
    pool.close()
Esempio n. 6
0
def test_SVC_predict():
    from b4msa.classifier import SVC
    from b4msa.textmodel import TextModel
    from b4msa.utils import read_data_labels
    import os
    fname = os.path.dirname(__file__) + '/text.json'
    X, y = read_data_labels(fname)
    t = TextModel(X)
    c = SVC(t)
    c.fit_file(fname)
    y = c.predict_text('Excelente dia b4msa')
    assert y == 'POS'
Esempio n. 7
0
def test_SVC_predict_from_file():
    from b4msa.classifier import SVC
    from b4msa.textmodel import TextModel
    from b4msa.utils import read_data_labels
    import os
    fname = os.path.dirname(__file__) + '/text.json'
    X, y = read_data_labels(fname)
    t = TextModel(X)
    c = SVC(t)
    c.fit_file(fname)
    y = c.predict_file(fname)
    for i in y:
        assert i in ['POS', 'NEU', 'NEG']
Esempio n. 8
0
def test_SVC_predict_from_file():
    from b4msa.classifier import SVC
    from b4msa.textmodel import TextModel
    from b4msa.utils import read_data_labels
    import os
    #fname = os.path.dirname(__file__) + '/text.json'
    fname = 'text.json'
    #fname = 'test_text.json'
    X, y = read_data_labels(fname)
    t = TextModel(X)
    c = SVC(t)
    c.fit_file(fname)
    y = c.predict_file("test_text.json")
    print "Final Labels"
    print y
Esempio n. 9
0
    def predict_kfold_params(cls, fname, n_folds=10, score=None, numprocs=None, seed=0, param_kwargs={}):
        from b4msa.params import ParameterSelection, Wrapper
        X, y = read_data_labels(fname)
        if numprocs is not None:
            pool = Pool(numprocs)
        else:
            pool = None
            numprocs = 1

        if n_folds % numprocs == 0:
            f = Wrapper(X, y, score, n_folds, cls, pool=pool, seed=seed)
            pool = None
        else:
            f = Wrapper(X, y, score, n_folds, cls, seed=seed)

        return ParameterSelection().search(f.f, pool=pool, **param_kwargs)
Esempio n. 10
0
 def fit_from_file(cls, fname, textModel_params={}):
     X, y = read_data_labels(fname)
     model = TextModel(X, **textModel_params)
     svc = cls(model)
     return svc.fit([model[x] for x in X], y)
Esempio n. 11
0
def test_read_data_labels():
    import os
    from b4msa.utils import read_data_labels
    filename = os.path.join(os.path.dirname(__file__), "text.json")
    read_data_labels(filename)