def run(self):
        '''Runs the pipeline step.

        '''
        labels = persistence.json_to_obj(self.input['label_dict'])
        classifier = Classifier(
            persistence.bin_to_obj(self.input['TrainClassifier_vectorizer']),
            persistence.bin_to_obj(self.input['TrainClassifier_model']),
            labels)

        def update_label(row):
            return labels[str(row['expected'])]

        def predict(row):
            classification = classifier.predict(row['name'])
            return classification['label']['name']

        df = pd.read_csv(self.input['predictions'])
        df['expected'] = df.apply(update_label, axis=1)
        df['actual'] = df.apply(predict, axis=1)

        def print_incorrect(row):
            if row['actual'] != row['expected']:
                self.print(
                    '\'{name}\' [ expected: {expected}, actual: {actual} ]',
                    name=row['name'],
                    expected=row['expected'],
                    actual=row['actual'])

        df.apply(print_incorrect, axis=1)
Ejemplo n.º 2
0
 def __score_model(self, classifier, features, labels, title):
     classes = persistence.json_to_obj(self.input['label_dict'])
     plot_confusion_matrix(
         estimator=classifier,
         X=features,
         y_true=labels,
         display_labels=classes.values(),
         normalize='true')
     plt.title(title)
     plt.savefig(self.output['{title}_results'.format(title=title)])
Ejemplo n.º 3
0
def predict(filename, model_id='latest'):
    ''' Makes a prediction using the classification model.

    Args:
        filenanme (string): The filename to evaluate.
        model_id (string): the id of the model to use.
    '''
    model_path = __get_model_path(model_id)
    classifier = Classifier(
        persistence.bin_to_obj(model_path + 'classifier_vec.pickle'),
        persistence.bin_to_obj(model_path + 'classifier_mdl.pickle'),
        persistence.json_to_obj('data/processed/label_dictionary.json')
    )
    classification = classifier.predict(filename)

    return (classification['label'], classification['probability'])
def fixture_labels():
    return persistence.json_to_obj('models/label_dictionary.json')
Ejemplo n.º 5
0
def test_json_to_obj_null_path_throws_exception():
    with pytest.raises(TypeError):
        persistence.json_to_obj(None)
Ejemplo n.º 6
0
def test_json_to_obj_not_null():
    assert persistence.json_to_obj('models/label_dictionary.json') is not None
Ejemplo n.º 7
0
def test_json_to_obj_empty_path_throws_exception():
    with pytest.raises(FileNotFoundError):
        persistence.json_to_obj('')
def fixture_model():
    return Classifier(persistence.bin_to_obj('models/classifier_vec.pickle'),
                      persistence.bin_to_obj('models/classifier_mdl.pickle'),
                      persistence.json_to_obj('models/label_dictionary.json'))