# Create a Dataset. train_dataset = Dataset(train_loader, schema).shuffle() test_dataset = Dataset(test_loader, schema) # Create a Classifier Service. cfg = Config( method = 'PA', converter = { 'string_rules': [{'key': 'first_name', 'type': 'unigram', 'sample_weight': 'bin', 'global_weight': 'bin'}] } ) classifier = Classifier.run(cfg) # Train the classifier. for _ in classifier.train(train_dataset): pass # Classify using the classifier. for (idx, label, result) in classifier.classify(test_dataset): true_family_name = label pred_family_name = result[0][0] first_name = test_dataset.get(idx)['first_name'] print("{0} {1} ({2})".format( pred_family_name, first_name, 'correct!' if pred_family_name == true_family_name else 'incorrect' )) # Stop the classifier. classifier.stop()
train_dataset = Dataset(train_loader, schema).shuffle() test_dataset = Dataset(test_loader, schema) # Create a Classifier Service. cfg = Config(method='PA', converter={ 'string_rules': [{ 'key': 'first_name', 'type': 'unigram', 'sample_weight': 'bin', 'global_weight': 'bin' }] }) classifier = Classifier.run(cfg) # Train the classifier. for _ in classifier.train(train_dataset): pass # Classify using the classifier. for (idx, label, result) in classifier.classify(test_dataset): true_family_name = label pred_family_name = result[0][0] first_name = test_dataset.get(idx)['first_name'] print("{0} {1} ({2})".format( pred_family_name, first_name, 'correct!' if pred_family_name == true_family_name else 'incorrect')) # Stop the classifier. classifier.stop()
classifier = Classifier.run(Config()) # Number of tweets used for training. n_train = 1000 print('---- Train: {0} tweets -------------------------------------'.format( n_train)) # Train the classifier using tweets from Twitter stream. trained_labels = set() dataset = Dataset(get_loader(), schema) for (idx, label) in classifier.train(dataset): if idx == n_train: break trained_labels.add(label) text_summary = dataset.get(idx)['.text'].replace('\n', '') print('Train[{0}]: language {1} >> {2}'.format(idx, label, text_summary)) print('Languages Trained: {0}'.format(str(trained_labels))) print('---- Prediction (Ctrl-C to stop) -------------------------------------') try: # Classify tweets using the classifier. (y_true, y_pred) = ([], []) dataset = Dataset(get_loader(), schema) for (idx, label, result) in classifier.classify(dataset): (true_lang, pred_lang) = (label, result[0][0]) text_summary = dataset.get(idx)['.text'].replace('\n', '') message = None