def run(self): '''Runs the pipeline step. ''' labels = persistence.json_to_obj(self.input['label_dict']) classifier = Classifier( persistence.bin_to_obj(self.input['TrainClassifier_vectorizer']), persistence.bin_to_obj(self.input['TrainClassifier_model']), labels) def update_label(row): return labels[str(row['expected'])] def predict(row): classification = classifier.predict(row['name']) return classification['label']['name'] df = pd.read_csv(self.input['predictions']) df['expected'] = df.apply(update_label, axis=1) df['actual'] = df.apply(predict, axis=1) def print_incorrect(row): if row['actual'] != row['expected']: self.print( '\'{name}\' [ expected: {expected}, actual: {actual} ]', name=row['name'], expected=row['expected'], actual=row['actual']) df.apply(print_incorrect, axis=1)
def __score_model(self, classifier, features, labels, title): classes = persistence.json_to_obj(self.input['label_dict']) plot_confusion_matrix( estimator=classifier, X=features, y_true=labels, display_labels=classes.values(), normalize='true') plt.title(title) plt.savefig(self.output['{title}_results'.format(title=title)])
def predict(filename, model_id='latest'): ''' Makes a prediction using the classification model. Args: filenanme (string): The filename to evaluate. model_id (string): the id of the model to use. ''' model_path = __get_model_path(model_id) classifier = Classifier( persistence.bin_to_obj(model_path + 'classifier_vec.pickle'), persistence.bin_to_obj(model_path + 'classifier_mdl.pickle'), persistence.json_to_obj('data/processed/label_dictionary.json') ) classification = classifier.predict(filename) return (classification['label'], classification['probability'])
def fixture_labels(): return persistence.json_to_obj('models/label_dictionary.json')
def test_json_to_obj_null_path_throws_exception(): with pytest.raises(TypeError): persistence.json_to_obj(None)
def test_json_to_obj_not_null(): assert persistence.json_to_obj('models/label_dictionary.json') is not None
def test_json_to_obj_empty_path_throws_exception(): with pytest.raises(FileNotFoundError): persistence.json_to_obj('')
def fixture_model(): return Classifier(persistence.bin_to_obj('models/classifier_vec.pickle'), persistence.bin_to_obj('models/classifier_mdl.pickle'), persistence.json_to_obj('models/label_dictionary.json'))