Ejemplo n.º 1
0
train_datas = [{'monkey': 1, 'dog': 1, 'cat': 2, 'elephant': 4}, {'dog': 2, 'run': 5}]
feature_hasher = FeatureHasher(n_features=2 ** 20, non_negative=True)
train_datas = feature_hasher.transform(train_datas)
"""X = np.array([[1, 2, 4, 1, 1, 1],
 [3, 2, 4, 2, 2, 3],
 [2, 2, 3, 4, 4, 1],
 [2, 0, 3, 2, 3, 1],
 [2, 0, 0, 3, 3, 3],
 [2, 3, 1, 0, 3, 4]])"""
class_label = np.array([1, 2])
# 调整平滑因子
clf = MultinomialNB(alpha=0.01)
train = clf.fit(train_datas, class_label)
test_datas = [{'monkey': 3, 'mouse': 1}]
test_datas = feature_hasher.transform(test_datas)
test = clf.predict(test_datas)
print train_datas
print test_datas
print train
print test
print clf._joint_log_likelihood(test_datas)
print clf.__dict__


# test metrics in sklearn
y_true = [0, 1, 2, 0, 1, 2]
y_pred = [0, 2, 1, 0, 0, 1]
print "macro:", precision_score(y_true, y_pred, average='macro')
print "micro:", precision_score(y_true, y_pred, average='micro')
print "none:", precision_score(y_true, y_pred, average=None)
Ejemplo n.º 2
0
    'run': 5
}]
feature_hasher = FeatureHasher(n_features=2**20, non_negative=True)
train_datas = feature_hasher.transform(train_datas)
"""X = np.array([[1, 2, 4, 1, 1, 1],
 [3, 2, 4, 2, 2, 3],
 [2, 2, 3, 4, 4, 1],
 [2, 0, 3, 2, 3, 1],
 [2, 0, 0, 3, 3, 3],
 [2, 3, 1, 0, 3, 4]])"""
class_label = np.array([1, 2])
# 调整平滑因子
clf = MultinomialNB(alpha=0.01)
train = clf.fit(train_datas, class_label)
test_datas = [{'monkey': 3, 'mouse': 1}]
test_datas = feature_hasher.transform(test_datas)
test = clf.predict(test_datas)
print train_datas
print test_datas
print train
print test
print clf._joint_log_likelihood(test_datas)
print clf.__dict__

# test metrics in sklearn
y_true = [0, 1, 2, 0, 1, 2]
y_pred = [0, 2, 1, 0, 0, 1]
print "macro:", precision_score(y_true, y_pred, average='macro')
print "micro:", precision_score(y_true, y_pred, average='micro')
print "none:", precision_score(y_true, y_pred, average=None)
Ejemplo n.º 3
0
def _naive_bayes_predict(table,
                         model,
                         suffix,
                         display_log_prob=False,
                         prediction_col='prediction',
                         prob_prefix='probability',
                         log_prob_prefix='log_probability',
                         display_joint_log_likelihood=False,
                         joint_log_likelihood_prefix='joint_log_likelihood'):
    if 'features' in model:
        feature_cols = model['features']
    else:
        feature_cols = model['feature_cols']
    feature_names, features = check_col_type(table, feature_cols)
    if 'nb_model' in model:
        nb_model = model['nb_model']
    else:
        model_table = model['table_1']
        if model_table.model_type[0] == 'multinomial':
            nb_model = MultinomialNB()
        else:
            nb_model = BernoulliNB()
        nb_model.fit([[1]], [1])
        nb_model.classes_ = np.array([0, 1])
        nb_model.class_log_prior_ = model_table.pi.values
        nb_model.feature_log_prob_ = np.array(list(model_table.theta))
    prediction = nb_model.predict(features)
    if 'label_encoder' in model:
        label_encoder = model['label_encoder']
        prediction = label_encoder.inverse_transform(prediction)
        if suffix == 'label':
            suffixes = label_encoder.classes_
        else:
            suffixes = range(0, len(label_encoder.classes_))
    else:
        suffixes = [0, 1]

    prob = nb_model.predict_proba(features)
    likelihood = nb_model._joint_log_likelihood(features)
    prob_cols = [
        '{prefix}_{suffix}'.format(prefix=prob_prefix, suffix=suffix)
        for suffix in suffixes
    ]
    prob_df = pd.DataFrame(data=prob, columns=prob_cols)

    result = table
    result[prediction_col] = prediction

    if display_log_prob == True:
        log_prob = nb_model.predict_log_proba(features)
        logprob_cols = [
            '{prefix}_{suffix}'.format(prefix=log_prob_prefix, suffix=suffix)
            for suffix in suffixes
        ]
        logprob_df = pd.DataFrame(data=log_prob, columns=logprob_cols)
        result = pd.concat([result, prob_df, logprob_df], axis=1)
    else:
        result = pd.concat([result, prob_df], axis=1)

    if display_joint_log_likelihood:
        likelihood_cols = [
            '{prefix}_{suffix}'.format(prefix=joint_log_likelihood_prefix,
                                       suffix=suffix) for suffix in suffixes
        ]
        likelihood_df = pd.DataFrame(likelihood, columns=likelihood_cols)
        result = pd.concat([result, likelihood_df], axis=1)

    return {'out_table': result}