コード例 #1
0
ファイル: classifier_shogun.py プロジェクト: jubatus/jubakit
# Create a Dataset.
train_dataset = Dataset(train_loader, schema).shuffle()
test_dataset = Dataset(test_loader, schema)

# Create a Classifier Service.
cfg = Config(
  method = 'PA',
  converter = {
    'string_rules': [{'key': 'first_name', 'type': 'unigram', 'sample_weight': 'bin', 'global_weight': 'bin'}]
  }
)
classifier = Classifier.run(cfg)

# Train the classifier.
for _ in classifier.train(train_dataset): pass

# Classify using the classifier.
for (idx, label, result) in classifier.classify(test_dataset):
  true_family_name = label
  pred_family_name = result[0][0]
  first_name = test_dataset.get(idx)['first_name']
  print("{0} {1} ({2})".format(
    pred_family_name,
    first_name,
    'correct!' if pred_family_name == true_family_name else 'incorrect'
  ))

# Stop the classifier.
classifier.stop()
コード例 #2
0
train_dataset = Dataset(train_loader, schema).shuffle()
test_dataset = Dataset(test_loader, schema)

# Create a Classifier Service.
cfg = Config(method='PA',
             converter={
                 'string_rules': [{
                     'key': 'first_name',
                     'type': 'unigram',
                     'sample_weight': 'bin',
                     'global_weight': 'bin'
                 }]
             })
classifier = Classifier.run(cfg)

# Train the classifier.
for _ in classifier.train(train_dataset):
    pass

# Classify using the classifier.
for (idx, label, result) in classifier.classify(test_dataset):
    true_family_name = label
    pred_family_name = result[0][0]
    first_name = test_dataset.get(idx)['first_name']
    print("{0} {1} ({2})".format(
        pred_family_name, first_name,
        'correct!' if pred_family_name == true_family_name else 'incorrect'))

# Stop the classifier.
classifier.stop()
コード例 #3
0
classifier = Classifier.run(Config())

# Number of tweets used for training.
n_train = 1000

print('---- Train: {0} tweets -------------------------------------'.format(
    n_train))

# Train the classifier using tweets from Twitter stream.
trained_labels = set()
dataset = Dataset(get_loader(), schema)
for (idx, label) in classifier.train(dataset):
    if idx == n_train: break

    trained_labels.add(label)
    text_summary = dataset.get(idx)['.text'].replace('\n', '')
    print('Train[{0}]: language {1}  >> {2}'.format(idx, label, text_summary))

print('Languages Trained: {0}'.format(str(trained_labels)))

print('---- Prediction (Ctrl-C to stop) -------------------------------------')

try:
    # Classify tweets using the classifier.
    (y_true, y_pred) = ([], [])
    dataset = Dataset(get_loader(), schema)
    for (idx, label, result) in classifier.classify(dataset):
        (true_lang, pred_lang) = (label, result[0][0])
        text_summary = dataset.get(idx)['.text'].replace('\n', '')

        message = None