コード例 #1
0
ファイル: test_pickle.py プロジェクト: xtenex/skift
def test_pickle_unfitted():
    ftdf = pd.DataFrame(data=[['woof woof', 0], ['meow meow', 1]],
                        columns=['txt', 'lbl'])
    ft_clf = FirstColFtClassifier()

    pic_fpath = os.path.expanduser('~/.temp/ttemp_ft_model.ft')
    with open(pic_fpath, 'wb+') as bfile:
        pickle.dump(ft_clf, bfile)
    with open(pic_fpath, 'rb') as bfile:
        ft_clf2 = pickle.load(bfile)

    with pytest.raises(NotFittedError):
        assert ft_clf.predict([['woof woof']])[0] == 0

    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])
    assert ft_clf.predict([['woof woof']])[0] == 0
    assert ft_clf.predict([['meow meow']])[0] == 1
    assert ft_clf.predict([['meow']])[0] == 1
    assert ft_clf.predict([['woof lol']])[0] == 0
    assert ft_clf.predict([['meow lolz']])[0] == 1

    assert ft_clf2 != ft_clf
    with pytest.raises(NotFittedError):
        assert ft_clf2.predict([['woof woof']])[0] == 0

    ft_clf2.fit(ftdf[['txt']], ftdf['lbl'])
    assert ft_clf2.predict([['woof woof']])[0] == 0
    assert ft_clf2.predict([['meow meow']])[0] == 1
    assert ft_clf2.predict([['meow']])[0] == 1
    assert ft_clf2.predict([['woof lol']])[0] == 0
    assert ft_clf2.predict([['meow lolz']])[0] == 1
コード例 #2
0
def test_pickle():
    ftdf = pd.DataFrame(data=[['woof woof', 0], ['meow meow', 1]],
                        columns=['txt', 'lbl'])
    ft_clf = FirstColFtClassifier()
    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])

    assert ft_clf.predict([['woof woof']])[0] == 0
    assert ft_clf.predict([['meow meow']])[0] == 1
    assert ft_clf.predict([['meow']])[0] == 1
    assert ft_clf.predict([['woof lol']])[0] == 0
    assert ft_clf.predict([['meow lolz']])[0] == 1

    fd, pic_fpath = tempfile.mkstemp()
    with open(pic_fpath, 'wb+') as bfile:
        pickle.dump(ft_clf, bfile)
    with open(pic_fpath, 'rb') as bfile:
        ft_clf2 = pickle.load(bfile)

    assert ft_clf2 != ft_clf
    assert ft_clf2.predict([['woof woof']])[0] == 0
    assert ft_clf2.predict([['meow meow']])[0] == 1
    assert ft_clf2.predict([['meow']])[0] == 1
    assert ft_clf2.predict([['woof lol']])[0] == 0
    assert ft_clf2.predict([['meow lolz']])[0] == 1

    # Clean up
    os.close(fd)  # Prevent a file-handle leak
    os.unlink(pic_fpath)
コード例 #3
0
ファイル: test_common.py プロジェクト: wojohowitz00/skift
def test_predict_proba():
    ftdf = _ftdf()
    ft_clf = FirstColFtClassifier()
    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])

    res = ft_clf.predict_proba([['woof woof']])[0]
    assert res[0] > res[1]
    res = ft_clf.predict_proba([['meow meow']])[0]
    assert res[1] > res[0]
コード例 #4
0
ファイル: test_common.py プロジェクト: wojohowitz00/skift
def test_predict():
    ftdf = _ftdf()
    ft_clf = FirstColFtClassifier()
    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])

    assert ft_clf.predict([['woof woof']])[0] == 0
    assert ft_clf.predict([['meow meow']])[0] == 1
    assert ft_clf.predict([['meow']])[0] == 1
    assert ft_clf.predict([['woof lol']])[0] == 0
    assert ft_clf.predict([['meow lolz']])[0] == 1
コード例 #5
0
ファイル: test_common.py プロジェクト: xtenex/skift
def test_cross_val():
    ft_clf = ColLblBasedFtClassifier('txt', epoch=3)
    ftdf = _big_ftdf()
    cross_val_score(
        ft_clf, X=ftdf[['txt']], y=ftdf['lbl'], cv=2, scoring='accuracy')

    ft_clf = IdxBasedFtClassifier(0, epoch=3)
    ftdf = _big_ftdf()
    cross_val_score(
        ft_clf, X=ftdf[['txt']], y=ftdf['lbl'], cv=2, scoring='accuracy')

    ft_clf = FirstColFtClassifier(epoch=3)
    ftdf = _big_ftdf()
    cross_val_score(
        ft_clf, X=ftdf[['txt']], y=ftdf['lbl'], cv=2, scoring='accuracy')
コード例 #6
0
def test_pickle(quantize):
    ftdf = pd.DataFrame(data=[['woof woof', 0], ['meow meow', 1]],
                        columns=['txt', 'lbl'])
    ft_clf = FirstColFtClassifier()
    ft_clf.fit(ftdf[['txt']], ftdf['lbl'])
    if quantize:
        with pytest.raises(ValueError):
            ft_clf.quantize(cutoff=1)
        assert not ft_clf.is_quantized()
        return

    assert ft_clf.predict([['woof woof']])[0] == 0
    assert ft_clf.predict([['meow meow']])[0] == 1
    assert ft_clf.predict([['meow']])[0] == 1
    assert ft_clf.predict([['woof lol']])[0] == 0
    assert ft_clf.predict([['meow lolz']])[0] == 1

    fd, pic_fpath = tempfile.mkstemp()
    with open(pic_fpath, 'wb+') as bfile:
        pickle.dump(ft_clf, bfile)
    with open(pic_fpath, 'rb') as bfile:
        ft_clf2 = pickle.load(bfile)

    assert ft_clf2 != ft_clf
    assert ft_clf2.predict([['woof woof']])[0] == 0
    assert ft_clf2.predict([['meow meow']])[0] == 1
    assert ft_clf2.predict([['meow']])[0] == 1
    assert ft_clf2.predict([['woof lol']])[0] == 0
    assert ft_clf2.predict([['meow lolz']])[0] == 1

    if quantize:
        assert not ft_clf2.is_quantized()

    # Clean up
    os.close(fd)  # Prevent a file-handle leak
    os.unlink(pic_fpath)
コード例 #7
0
ファイル: test_common.py プロジェクト: wojohowitz00/skift
def test_bad_shape():
    ft_clf = FirstColFtClassifier()
    with pytest.raises(ValueError):
        ft_clf.fit([7], [0])
    with pytest.raises(ValueError):
        ft_clf.fit([[7]], [[0]])
コード例 #8
0
ファイル: main_train.py プロジェクト: blair101/AI-Competition
    train_data_format = np.asarray([content_train]).T
    logger.info("complete formate train data")

    columns = train_data_df.columns.values.tolist()

    # model train
    logger.info("start train model")

    classifier_dict = dict()

    for column in columns[2:]:
        train_label = train_data_df[column]
        logger.info("start train %s model" % column)
        sk_clf = FirstColFtClassifier(lr=learning_rate,
                                      epoch=epoch,
                                      wordNgrams=word_ngrams,
                                      minCount=min_count,
                                      verbose=2)
        sk_clf.fit(train_data_format, train_label)
        logger.info("complete train %s model" % column)
        classifier_dict[column] = sk_clf

    logger.info("complete train model")
    logger.info("start save model")

    model_path = config.model_path

    if not os.path.exists(model_path):
        os.makedirs(model_path)

    joblib.dump(classifier_dict, model_path + model_name)
コード例 #9
0
 def create_model(self):
     sk_clf = FirstColFtClassifier(lr=1.0, epoch=10,
                                   wordNgrams=1,
                                   minCount=5, verbose=2)
     return sk_clf
コード例 #10
0
    logger.info("start seg train data...")
    content_train = train_df.iloc[:, 1]
    content_train = data_util.seg_words(args, content_train)

    logger.info("prepare train format...")
    train_data_format = np.asarray([content_train]).T  # array([[第三次 参加 大众],[同行 点 小吃  榴莲 酥],...])

    columns = train_df.columns.values.tolist()

    logger.info("start train model...")
    classifier_dict = dict()
    for column in columns[2:]:  # 标签
        train_label = train_df[column]
        logger.info("start train %s model" % column)
        sk_clf = FirstColFtClassifier(lr=args.learning_rate, epoch=args.epoch,
                                      wordNgrams=args.word_ngrams,
                                      minCount=args.min_count, verbose=2)
        sk_clf.fit(train_data_format, train_label)
        logger.info("complete train %s model" % column)
        classifier_dict[column] = sk_clf

    logger.info("start save train model...")
    model_name = args.model_name
    joblib.dump(classifier_dict, model_name)

    logger.info("start seg valid data...")
    content_valid = valid_df.iloc[:, 1]
    content_valid = data_util.seg_words(args, content_valid)

    logger.info("prepare valid format")
    valid_data_format = np.asarray([content_valid]).T