Exemple #1
0
def test_predict_proba():
    ftdf = _ftdf()
    ft_clf = FirstColFtClassifier()
    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])

    res = ft_clf.predict_proba([['woof woof']])[0]
    assert res[0] > res[1]
    res = ft_clf.predict_proba([['meow meow']])[0]
    assert res[1] > res[0]
Exemple #2
0
def test_pickle_unfitted():
    ftdf = pd.DataFrame(data=[['woof woof', 0], ['meow meow', 1]],
                        columns=['txt', 'lbl'])
    ft_clf = FirstColFtClassifier()

    pic_fpath = os.path.expanduser('~/.temp/ttemp_ft_model.ft')
    with open(pic_fpath, 'wb+') as bfile:
        pickle.dump(ft_clf, bfile)
    with open(pic_fpath, 'rb') as bfile:
        ft_clf2 = pickle.load(bfile)

    with pytest.raises(NotFittedError):
        assert ft_clf.predict([['woof woof']])[0] == 0

    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])
    assert ft_clf.predict([['woof woof']])[0] == 0
    assert ft_clf.predict([['meow meow']])[0] == 1
    assert ft_clf.predict([['meow']])[0] == 1
    assert ft_clf.predict([['woof lol']])[0] == 0
    assert ft_clf.predict([['meow lolz']])[0] == 1

    assert ft_clf2 != ft_clf
    with pytest.raises(NotFittedError):
        assert ft_clf2.predict([['woof woof']])[0] == 0

    ft_clf2.fit(ftdf[['txt']], ftdf['lbl'])
    assert ft_clf2.predict([['woof woof']])[0] == 0
    assert ft_clf2.predict([['meow meow']])[0] == 1
    assert ft_clf2.predict([['meow']])[0] == 1
    assert ft_clf2.predict([['woof lol']])[0] == 0
    assert ft_clf2.predict([['meow lolz']])[0] == 1
Exemple #3
0
def test_pickle():
    ftdf = pd.DataFrame(data=[['woof woof', 0], ['meow meow', 1]],
                        columns=['txt', 'lbl'])
    ft_clf = FirstColFtClassifier()
    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])

    assert ft_clf.predict([['woof woof']])[0] == 0
    assert ft_clf.predict([['meow meow']])[0] == 1
    assert ft_clf.predict([['meow']])[0] == 1
    assert ft_clf.predict([['woof lol']])[0] == 0
    assert ft_clf.predict([['meow lolz']])[0] == 1

    fd, pic_fpath = tempfile.mkstemp()
    with open(pic_fpath, 'wb+') as bfile:
        pickle.dump(ft_clf, bfile)
    with open(pic_fpath, 'rb') as bfile:
        ft_clf2 = pickle.load(bfile)

    assert ft_clf2 != ft_clf
    assert ft_clf2.predict([['woof woof']])[0] == 0
    assert ft_clf2.predict([['meow meow']])[0] == 1
    assert ft_clf2.predict([['meow']])[0] == 1
    assert ft_clf2.predict([['woof lol']])[0] == 0
    assert ft_clf2.predict([['meow lolz']])[0] == 1

    # Clean up
    os.close(fd)  # Prevent a file-handle leak
    os.unlink(pic_fpath)
Exemple #4
0
def test_predict():
    ftdf = _ftdf()
    ft_clf = FirstColFtClassifier()
    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])

    assert ft_clf.predict([['woof woof']])[0] == 0
    assert ft_clf.predict([['meow meow']])[0] == 1
    assert ft_clf.predict([['meow']])[0] == 1
    assert ft_clf.predict([['woof lol']])[0] == 0
    assert ft_clf.predict([['meow lolz']])[0] == 1
Exemple #5
0
def test_pickle():
    ftdf = pd.DataFrame(
        data=[['woof woof', 0], ['meow meow', 1]],
        columns=['txt', 'lbl']
    )
    ft_clf = FirstColFtClassifier()
    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])

    assert ft_clf.predict([['woof woof']])[0] == 0
    assert ft_clf.predict([['meow meow']])[0] == 1
    assert ft_clf.predict([['meow']])[0] == 1
    assert ft_clf.predict([['woof lol']])[0] == 0
    assert ft_clf.predict([['meow lolz']])[0] == 1

    fd, pic_fpath = tempfile.mkstemp()
    with open(pic_fpath, 'wb+') as bfile:
        pickle.dump(ft_clf, bfile)
    with open(pic_fpath, 'rb') as bfile:
        ft_clf2 = pickle.load(bfile)

    assert ft_clf2 != ft_clf
    assert ft_clf2.predict([['woof woof']])[0] == 0
    assert ft_clf2.predict([['meow meow']])[0] == 1
    assert ft_clf2.predict([['meow']])[0] == 1
    assert ft_clf2.predict([['woof lol']])[0] == 0
    assert ft_clf2.predict([['meow lolz']])[0] == 1

    # Clean up
    os.close(fd)    # Prevent a file-handle leak
    os.unlink(pic_fpath)
Exemple #6
0
def test_predict():
    ftdf = _ftdf()
    ft_clf = FirstColFtClassifier()
    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])

    assert ft_clf.predict([['woof woof']])[0] == 0
    assert ft_clf.predict([['meow meow']])[0] == 1
    assert ft_clf.predict([['meow']])[0] == 1
    assert ft_clf.predict([['woof lol']])[0] == 0
    assert ft_clf.predict([['meow lolz']])[0] == 1
Exemple #7
0
def test_predict_proba():
    ftdf = _ftdf()
    ft_clf = FirstColFtClassifier()
    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])

    res = ft_clf.predict_proba([['woof woof']])[0]
    assert res[0] > res[1]
    res = ft_clf.predict_proba([['meow meow']])[0]
    assert res[1] > res[0]
Exemple #8
0
def test_cross_val():
    ft_clf = ColLblBasedFtClassifier('txt', epoch=3)
    ftdf = _big_ftdf()
    cross_val_score(
        ft_clf, X=ftdf[['txt']], y=ftdf['lbl'], cv=2, scoring='accuracy')

    ft_clf = IdxBasedFtClassifier(0, epoch=3)
    ftdf = _big_ftdf()
    cross_val_score(
        ft_clf, X=ftdf[['txt']], y=ftdf['lbl'], cv=2, scoring='accuracy')

    ft_clf = FirstColFtClassifier(epoch=3)
    ftdf = _big_ftdf()
    cross_val_score(
        ft_clf, X=ftdf[['txt']], y=ftdf['lbl'], cv=2, scoring='accuracy')
Exemple #9
0
def test_bad_shape():
    ft_clf = FirstColFtClassifier()
    with pytest.raises(ValueError):
        ft_clf.fit([7], [0])
    with pytest.raises(ValueError):
        ft_clf.fit([[7]], [[0]])
Exemple #10
0
    train_data_format = np.asarray([content_train]).T
    logger.info("complete formate train data")

    columns = train_data_df.columns.values.tolist()

    # model train
    logger.info("start train model")

    classifier_dict = dict()

    for column in columns[2:]:
        train_label = train_data_df[column]
        logger.info("start train %s model" % column)
        sk_clf = FirstColFtClassifier(lr=learning_rate,
                                      epoch=epoch,
                                      wordNgrams=word_ngrams,
                                      minCount=min_count,
                                      verbose=2)
        sk_clf.fit(train_data_format, train_label)
        logger.info("complete train %s model" % column)
        classifier_dict[column] = sk_clf

    logger.info("complete train model")
    logger.info("start save model")

    model_path = config.model_path

    if not os.path.exists(model_path):
        os.makedirs(model_path)

    joblib.dump(classifier_dict, model_path + model_name)
Exemple #11
0
def test_bad_shape():
    ft_clf = FirstColFtClassifier()
    with pytest.raises(ValueError):
        ft_clf.fit([7], [0])
    with pytest.raises(ValueError):
        ft_clf.fit([[7]], [[0]])
 def create_model(self):
     sk_clf = FirstColFtClassifier(lr=1.0, epoch=10,
                                   wordNgrams=1,
                                   minCount=5, verbose=2)
     return sk_clf
Exemple #13
0
def test_pickle(quantize):
    ftdf = pd.DataFrame(data=[['woof woof', 0], ['meow meow', 1]],
                        columns=['txt', 'lbl'])
    ft_clf = FirstColFtClassifier()
    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])
    if quantize:
        with pytest.raises(ValueError):
            ft_clf.quantize(cutoff=1)
        assert not ft_clf.is_quantized()
        return

    assert ft_clf.predict([['woof woof']])[0] == 0
    assert ft_clf.predict([['meow meow']])[0] == 1
    assert ft_clf.predict([['meow']])[0] == 1
    assert ft_clf.predict([['woof lol']])[0] == 0
    assert ft_clf.predict([['meow lolz']])[0] == 1

    fd, pic_fpath = tempfile.mkstemp()
    with open(pic_fpath, 'wb+') as bfile:
        pickle.dump(ft_clf, bfile)
    with open(pic_fpath, 'rb') as bfile:
        ft_clf2 = pickle.load(bfile)

    assert ft_clf2 != ft_clf
    assert ft_clf2.predict([['woof woof']])[0] == 0
    assert ft_clf2.predict([['meow meow']])[0] == 1
    assert ft_clf2.predict([['meow']])[0] == 1
    assert ft_clf2.predict([['woof lol']])[0] == 0
    assert ft_clf2.predict([['meow lolz']])[0] == 1

    if quantize:
        assert not ft_clf2.is_quantized()

    # Clean up
    os.close(fd)  # Prevent a file-handle leak
    os.unlink(pic_fpath)