コード例 #1
0
def run_number_classifier():
    rows = -1  # -1 means retrieving complete set. When testing, set lower for faster training (e.g. 5000).
    print('-- Executing number classification')

    print('Loading data...')
    data, labels = data_retriever.load_mnist(rows)

    print('Splitting data...')
    training_data, training_labels, test_data, test_labels = split_and_shuffle_data_set(
        data, labels)

    print('Extracting features...')
    extractor = BitmapFeatureExtractor()
    extractor.fit(training_data)
    training_features = extractor.transform(training_data)
    test_features = extractor.transform(test_data)

    print('Training classifier...')
    classifier = train_classifier(training_features, training_labels)

    print('Testing classifier...')
    validate_model(classifier,
                   test_data,
                   test_features,
                   test_labels,
                   bitmap=True)
コード例 #2
0
ファイル: spam_filter.py プロジェクト: Itera/ml-scikit-intro
def run_spam_filter():
    row_count = -1  # set this number to some number below 2000 if you are having performance problems
    print('-- Executing spam filter')
    print('-- Loading data')
    data, labels = data_retriever.load_sms(cache_data=False, rows=row_count)

    # randomize and split the data
    training_data, test_data, training_labels, test_labels = split_and_shuffle_data_set(data, labels)

    # fit the transformer
    extractor = FeatureExtractor()
    extractor.fit(training_data)

    # extract the features from the test data
    training_features = extractor.transform(training_data)

    # train the classifier
    classifier = train_classifier(training_features, training_labels)

    # generate classification report
    test_features = extractor.transform(test_data)
    validate_model(classifier, test_data, test_features, test_labels)