Example #1
0
def test_pickle_unfitted():
    ftdf = pd.DataFrame(data=[['woof woof', 0], ['meow meow', 1]],
                        columns=['txt', 'lbl'])
    ft_clf = FirstColFtClassifier()

    pic_fpath = os.path.expanduser('~/.temp/ttemp_ft_model.ft')
    with open(pic_fpath, 'wb+') as bfile:
        pickle.dump(ft_clf, bfile)
    with open(pic_fpath, 'rb') as bfile:
        ft_clf2 = pickle.load(bfile)

    with pytest.raises(NotFittedError):
        assert ft_clf.predict([['woof woof']])[0] == 0

    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])
    assert ft_clf.predict([['woof woof']])[0] == 0
    assert ft_clf.predict([['meow meow']])[0] == 1
    assert ft_clf.predict([['meow']])[0] == 1
    assert ft_clf.predict([['woof lol']])[0] == 0
    assert ft_clf.predict([['meow lolz']])[0] == 1

    assert ft_clf2 != ft_clf
    with pytest.raises(NotFittedError):
        assert ft_clf2.predict([['woof woof']])[0] == 0

    ft_clf2.fit(ftdf[['txt']], ftdf['lbl'])
    assert ft_clf2.predict([['woof woof']])[0] == 0
    assert ft_clf2.predict([['meow meow']])[0] == 1
    assert ft_clf2.predict([['meow']])[0] == 1
    assert ft_clf2.predict([['woof lol']])[0] == 0
    assert ft_clf2.predict([['meow lolz']])[0] == 1
Example #2
0
def test_pickle():
    ftdf = pd.DataFrame(
        data=[['woof woof', 0], ['meow meow', 1]],
        columns=['txt', 'lbl']
    )
    ft_clf = FirstColFtClassifier()
    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])

    assert ft_clf.predict([['woof woof']])[0] == 0
    assert ft_clf.predict([['meow meow']])[0] == 1
    assert ft_clf.predict([['meow']])[0] == 1
    assert ft_clf.predict([['woof lol']])[0] == 0
    assert ft_clf.predict([['meow lolz']])[0] == 1

    fd, pic_fpath = tempfile.mkstemp()
    with open(pic_fpath, 'wb+') as bfile:
        pickle.dump(ft_clf, bfile)
    with open(pic_fpath, 'rb') as bfile:
        ft_clf2 = pickle.load(bfile)

    assert ft_clf2 != ft_clf
    assert ft_clf2.predict([['woof woof']])[0] == 0
    assert ft_clf2.predict([['meow meow']])[0] == 1
    assert ft_clf2.predict([['meow']])[0] == 1
    assert ft_clf2.predict([['woof lol']])[0] == 0
    assert ft_clf2.predict([['meow lolz']])[0] == 1

    # Clean up
    os.close(fd)    # Prevent a file-handle leak
    os.unlink(pic_fpath)
Example #3
0
def test_pickle():
    ftdf = pd.DataFrame(data=[['woof woof', 0], ['meow meow', 1]],
                        columns=['txt', 'lbl'])
    ft_clf = FirstColFtClassifier()
    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])

    assert ft_clf.predict([['woof woof']])[0] == 0
    assert ft_clf.predict([['meow meow']])[0] == 1
    assert ft_clf.predict([['meow']])[0] == 1
    assert ft_clf.predict([['woof lol']])[0] == 0
    assert ft_clf.predict([['meow lolz']])[0] == 1

    fd, pic_fpath = tempfile.mkstemp()
    with open(pic_fpath, 'wb+') as bfile:
        pickle.dump(ft_clf, bfile)
    with open(pic_fpath, 'rb') as bfile:
        ft_clf2 = pickle.load(bfile)

    assert ft_clf2 != ft_clf
    assert ft_clf2.predict([['woof woof']])[0] == 0
    assert ft_clf2.predict([['meow meow']])[0] == 1
    assert ft_clf2.predict([['meow']])[0] == 1
    assert ft_clf2.predict([['woof lol']])[0] == 0
    assert ft_clf2.predict([['meow lolz']])[0] == 1

    # Clean up
    os.close(fd)  # Prevent a file-handle leak
    os.unlink(pic_fpath)
Example #4
0
def test_predict_proba():
    ftdf = _ftdf()
    ft_clf = FirstColFtClassifier()
    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])

    res = ft_clf.predict_proba([['woof woof']])[0]
    assert res[0] > res[1]
    res = ft_clf.predict_proba([['meow meow']])[0]
    assert res[1] > res[0]
Example #5
0
def test_predict_proba():
    ftdf = _ftdf()
    ft_clf = FirstColFtClassifier()
    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])

    res = ft_clf.predict_proba([['woof woof']])[0]
    assert res[0] > res[1]
    res = ft_clf.predict_proba([['meow meow']])[0]
    assert res[1] > res[0]
Example #6
0
def test_predict():
    ftdf = _ftdf()
    ft_clf = FirstColFtClassifier()
    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])

    assert ft_clf.predict([['woof woof']])[0] == 0
    assert ft_clf.predict([['meow meow']])[0] == 1
    assert ft_clf.predict([['meow']])[0] == 1
    assert ft_clf.predict([['woof lol']])[0] == 0
    assert ft_clf.predict([['meow lolz']])[0] == 1
Example #7
0
def test_predict():
    ftdf = _ftdf()
    ft_clf = FirstColFtClassifier()
    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])

    assert ft_clf.predict([['woof woof']])[0] == 0
    assert ft_clf.predict([['meow meow']])[0] == 1
    assert ft_clf.predict([['meow']])[0] == 1
    assert ft_clf.predict([['woof lol']])[0] == 0
    assert ft_clf.predict([['meow lolz']])[0] == 1
Example #8
0
def test_pickle(quantize):
    ftdf = pd.DataFrame(data=[['woof woof', 0], ['meow meow', 1]],
                        columns=['txt', 'lbl'])
    ft_clf = FirstColFtClassifier()
    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])
    if quantize:
        with pytest.raises(ValueError):
            ft_clf.quantize(cutoff=1)
        assert not ft_clf.is_quantized()
        return

    assert ft_clf.predict([['woof woof']])[0] == 0
    assert ft_clf.predict([['meow meow']])[0] == 1
    assert ft_clf.predict([['meow']])[0] == 1
    assert ft_clf.predict([['woof lol']])[0] == 0
    assert ft_clf.predict([['meow lolz']])[0] == 1

    fd, pic_fpath = tempfile.mkstemp()
    with open(pic_fpath, 'wb+') as bfile:
        pickle.dump(ft_clf, bfile)
    with open(pic_fpath, 'rb') as bfile:
        ft_clf2 = pickle.load(bfile)

    assert ft_clf2 != ft_clf
    assert ft_clf2.predict([['woof woof']])[0] == 0
    assert ft_clf2.predict([['meow meow']])[0] == 1
    assert ft_clf2.predict([['meow']])[0] == 1
    assert ft_clf2.predict([['woof lol']])[0] == 0
    assert ft_clf2.predict([['meow lolz']])[0] == 1

    if quantize:
        assert not ft_clf2.is_quantized()

    # Clean up
    os.close(fd)  # Prevent a file-handle leak
    os.unlink(pic_fpath)
Example #9
0
def test_bad_shape():
    ft_clf = FirstColFtClassifier()
    with pytest.raises(ValueError):
        ft_clf.fit([7], [0])
    with pytest.raises(ValueError):
        ft_clf.fit([[7]], [[0]])
Example #10
0
    columns = train_data_df.columns.values.tolist()

    # model train
    logger.info("start train model")

    classifier_dict = dict()

    for column in columns[2:]:
        train_label = train_data_df[column]
        logger.info("start train %s model" % column)
        sk_clf = FirstColFtClassifier(lr=learning_rate,
                                      epoch=epoch,
                                      wordNgrams=word_ngrams,
                                      minCount=min_count,
                                      verbose=2)
        sk_clf.fit(train_data_format, train_label)
        logger.info("complete train %s model" % column)
        classifier_dict[column] = sk_clf

    logger.info("complete train model")
    logger.info("start save model")

    model_path = config.model_path

    if not os.path.exists(model_path):
        os.makedirs(model_path)

    joblib.dump(classifier_dict, model_path + model_name)
    logger.info("complete svae model")

    # validata model
Example #11
0
def test_bad_shape():
    ft_clf = FirstColFtClassifier()
    with pytest.raises(ValueError):
        ft_clf.fit([7], [0])
    with pytest.raises(ValueError):
        ft_clf.fit([[7]], [[0]])