Ejemplo n.º 1
0
def load_ds(flags=None):
    """Constructs the dataset that is set in flags.
    
    Args:
        flags: A FLAGS object with properties. If it's not set, use the global flags.
        
    Returns:
        The Dataset object that is set in the flags.
    """

    if flags is None:  # Load the default flags.
        flags = tf.app.flags.FLAGS

    if flags.dataset.lower() == 'mnist':
        ds = Mnist()
    elif flags.dataset.lower() == 'f-mnist':
        ds = FMnist()
    elif flags.dataset.lower() == 'celeba':
        if hasattr(flags, 'attribute'):
            ds = CelebA(resize_size=flags.output_height,
                        attribute=flags.attribute)
        else:
            ds = CelebA(resize_size=flags.output_height)
    else:
        raise ValueError('[!] Dataset {} is not supported.'.format(
            flags.dataset.lower()))
    return ds
Ejemplo n.º 2
0
def create_generator(dataset_name,
                     split,
                     batch_size,
                     randomize,
                     attribute=None):
    """Creates a batch generator for the dataset.

    Args:
        dataset_name: `str`. The name of the dataset.
        split: `str`. The split of data. It can be `train`, `val`, or `test`.
        batch_size: An integer. The batch size.
        randomize: `bool`. Whether to randomize the order of images before
            batching.
        attribute (optional): For cele

    Returns:
        image_batch: A Python generator for the images.
        label_batch: A Python generator for the labels.
    """
    flags = tf.app.flags.FLAGS

    if dataset_name.lower() == 'mnist':
        ds = Mnist()
    elif dataset_name.lower() == 'f-mnist':
        ds = FMnist()
    elif dataset_name.lower() == 'cifar-10':
        ds = Cifar10()
    elif dataset_name.lower() == 'celeba':
        ds = CelebA(attribute=attribute)
    else:
        raise ValueError("Dataset {} is not supported.".format(dataset_name))

    ds.load(split=split, randomize=randomize)

    def get_gen():
        for i in range(0, len(ds) - batch_size, batch_size):
            image_batch, label_batch = ds.images[
                                       i:i + batch_size], \
                                       ds.labels[i:i + batch_size]
            yield image_batch, label_batch

    return get_gen
Ejemplo n.º 3
0
# coding=utf-8

import sys

sys.path.append('..')
sys.path.append('../..')
sys.path.append('../../cutedl')
'''
使用卷积神经网络实现的手写数字识别模型
'''

from datasets.mnist import Mnist
'''
加载手写数字数据集
'''
mnist = Mnist('../datasets/mnist')
ds_train, ds_test = mnist.load(64)

import pdb
import numpy as np

from cutedl.model import Model
from cutedl.session import Session
from cutedl import session, losses, optimizers, utils
from cutedl import nn_layers as nn
from cutedl import cnn_layers as cnn

import fit_tools

report_path = "./pics/mnist-recoginze-"
model_path = "./model/mnist-recoginze-"
Ejemplo n.º 4
0
# coding=utf-8

import sys
sys.path.append('..')
sys.path.append('../..')
sys.path.append('../../cutedl')
'''
手写数字识别模型
'''

from datasets.mnist import Mnist
'''
加载手写数字数据集
'''
mnist = Mnist('../datasets/mnist')
ds_train, ds_test = mnist.load(64, flatting=True)

import pdb
import numpy as np

from cutedl.model import Model
from cutedl.session import Session
from cutedl import session, losses, optimizers, utils
from cutedl import nn_layers as nn

import matplotlib.pyplot as plt
'''
生成模型的拟合报告
'''

Ejemplo n.º 5
0
    def test_classifier(self, input_split='test', save_result=False, model_name=None, labels_filename=None,
                        acc_filename=None, acc_filenames_i=None):
        """Predicts labels and compares them to ground truth labels from given split. Returns test accuracy.
        Args:
            input_split: What split to test on [train|val|test].
            save_result: Optional, boolean. If True saves predicted labels and accuracy.
            model_name:  For neural network classifiers, model name to load and use to predict.
            labels_filename: Optional, string. Path to save predicted labels in.
            acc_filename: Optional, string. Path to save predicted accuracy in.
            acc_filenames_i: Optional, array of strings. Path to save class-specific predicted labels in.

        Returns:
            predicted_labels: Predicted labels for the input split.
            accuracy: Accuracy on the input split.
            per_class_accuracies: Array of per-class accuracies on the input split.

        Raises:
            IOError: If an input error occurs when loading features, or an output error occurs when saving results.
            ValueError: If the specified dataset [mnist|f-mnist|celeba] or classifier type
            [svm|linear-svm|lmnn|logistic|knn|nn] is not supported.
        """

        # If save_result is True, but no labels_filename was specified, use default filename.
        if save_result and (labels_filename is None):
            output_dir = self.get_output_dir()
            labels_filename = self.get_labels_filename(input_split)
            labels_filename = os.path.join(output_dir, labels_filename)

        # If save_result is True, but no acc_filename was specified, use default filename.
        if save_result and (acc_filename is None):
            output_dir = self.get_output_dir()
            acc_filename, acc_filenames_i = self.get_acc_filename(input_split)
            acc_filename = os.path.join(output_dir, acc_filename)
            for i in range(self.classifier_params['num_classes']):
                acc_filenames_i[i] = os.path.join(output_dir, acc_filenames_i[i])

        # Load feature vectors.
        feature_dir = os.path.dirname(self.classifier_params['feature_file'])
        feature_file = os.path.basename(self.classifier_params['feature_file'])
        feature_file = feature_file.replace('train', input_split)
        feature_file = os.path.join(feature_dir, feature_file)

        try:
            with open(feature_file, 'r') as f:
                features = cPickle.load(f)
        except IOError as err:
            print('[!] I/O error({0}): {1}.'.format(err.errno, err.strerror))

        if 'verbose' in self.classifier_params and self.classifier_params['verbose']:
            print('[*] Loaded feature vectors from {}.'.format(feature_file))

        # Initiate dataset object to load ground-truth labels.
        if self.classifier_params['dataset'] == 'mnist':
            ds = Mnist()
        elif self.classifier_params['dataset'] == 'f-mnist':
            ds = FMnist()
        elif self.classifier_params['dataset'] == 'celeba':
            ds = CelebA(resize_size=self.classifier_params['output_height'],
                        attribute=self.classifier_params['attribute'])
        else:
            raise ValueError('[!] Dataset {} is not supported.'.format(self.classifier_params['dataset']))

        # Load ground-truth labels.
        _, labels, _ = ds.load(input_split)
        num_samples = min(np.shape(features)[0], len(labels))
        labels = labels[:num_samples]
        features = features[:num_samples, :]

        if 'verbose' in self.classifier_params and self.classifier_params['verbose']:
            print('[*] Loaded ground-truth labels from: {}.'.format(
                self.classifier_params['dataset']))

        # Predict labels.
        if self.classifier_type in ('svm', 'logistic', 'knn', 'linear-svm', 'lmnn'):
            predicted_labels = self.predict(features, save_result, labels_filename)
        elif self.classifier_type == 'nn':
            predicted_labels = self.predict(features, save_result, model_name, labels_filename)
        else:
            raise ValueError('[!] Classifier type {} is not supported.'.format(self.classifier_type))

        # Compare predicted labels to ground-truth labels and calculate accuracy.
        num_correct = np.sum(np.equal(predicted_labels, labels))
        accuracy = num_correct / (1.0 * len(labels))
        per_class_accuracies = []
        for i in range(self.classifier_params['num_classes']):
            idx = np.where(np.equal(labels, i))[0]
            num_correct = np.sum(np.equal(predicted_labels[idx], labels[idx]))
            accuracy_i = num_correct / (1.0 * len(labels[idx]))
            per_class_accuracies.append(accuracy_i)

        # Save results.
        if save_result:
            try:
                with open(acc_filename, 'w') as fp:
                    fp.write("{}".format(accuracy))
            except IOError as err:
                print("[!] I/O error({0}): {1}.".format(err.errno,
                                                        err.strerror))

            if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']:
                print('[*] Saved predicted labels {}.'.format(labels_filename))
                print('[*] Saved predicted accuracy {}.'.format(acc_filename))

            for i in range(self.classifier_params['num_classes']):
                try:
                    with open(acc_filenames_i[i], 'w') as fp:
                        fp.write("{}".format(per_class_accuracies[i]))
                except IOError as err:
                    print("[!] I/O error({0}): {1}.".format(err.errno,
                                                            err.strerror))

        if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']:
            print('[*] Testing complete. Accuracy on {} split {}.'.format(
                input_split, accuracy))
            for i in range(self.classifier_params['num_classes']):
                print('[*] Testing complete. Accuracy on {} split, class {}: {}.'.format(input_split, i,
                                                                                         per_class_accuracies[i]))

        return predicted_labels, accuracy, per_class_accuracies
Ejemplo n.º 6
0
    def validate(self):
        """Only needed for neural networks. Validates different checkpoints by testing them on the validation split and
        retaining the one with the top accuracy.

        Returns:
            best_model: Name of chosen best model (empty string if no validation was performed). An empty string is
            returned for non neural network classifiers.

        Raises:
            IOError: If an input error occurs when loading feature vectors, or an output error occurs when saving the
            chosen model.
            ValueError: If the specified dataset [mnist|f-mnist|celeba] or classifier type
            [svm|linear-svm|lmnn|logistic|knn|nn] is not supported.
        """

        if 'verbose' in self.classifier_params and self.classifier_params['verbose']:
            print("[*] Validating.")

        # Get feature file paths.
        feature_dir = os.path.dirname(self.classifier_params['feature_file'])
        feature_file = os.path.basename(self.classifier_params['feature_file'])
        feature_file = feature_file.replace('train', 'val')
        feature_file = os.path.join(feature_dir, feature_file)

        # Load feature vectors.
        try:
            with open(feature_file, 'r') as f:
                features = cPickle.load(f)
        except IOError as err:
            print("[!] I/O error({0}): {1}.".format(err.errno, err.strerror))

        if 'verbose' in self.classifier_params and self.classifier_params['verbose']:
            print('[*] Loaded feature vectors from {}.'.format(feature_file))

        # Initialize the dataset object to load ground-truth labels.
        if self.classifier_params['dataset'] == 'mnist':
            ds = Mnist()
        elif self.classifier_params['dataset'] == 'f-mnist':
            ds = FMnist()
        elif self.classifier_params['dataset'] == 'celeba':
            ds = CelebA(resize_size=self.classifier_params['output_height'],
                        attribute=self.classifier_params['attribute'])
        else:
            raise ValueError('[!] Dataset {} is not supported.'.format(self.classifier_params['dataset']))

        # Load ground-truth labels from the validation split.
        _, labels, _ = ds.load('val')
        num_samples = min(np.shape(features)[0], len(labels))
        labels = labels[:num_samples]
        features = features[:num_samples, :]

        if 'verbose' in self.classifier_params and self.classifier_params['verbose']:
            print('[*] Loaded ground-truth labels from {}.'.format(
                self.classifier_params['dataset']))

        # Non neural network classifiers do not require validation as no intermediate models exist.
        if self.classifier_type in ('svm', 'logistic', 'knn', 'linear-svm', 'lmnn'):
            print('[!] No validation needed.')
            return ""

        # Neural network classifiers.
        elif self.classifier_type == 'nn':
            # Call the neural network validate function on the features.
            best_acc, best_model, _ = self.estimator.validate(features, labels, session=self.session)

            # Save results.
            try:
                with open(os.path.join(self.get_output_dir(), self.tf_checkpoint_dir(), 'chosen_model.txt'), 'w') as fp:
                    fp.write("{} {}".format(os.path.basename(best_model), best_acc))
            except IOError as err:
                print("[!] I/O error({0}): {1}.".format(err.errno,
                                                        err.strerror))

            if 'verbose' in self.classifier_params and self.classifier_params['verbose']:
                print(
                    '[*] Chose model: {}, with validation accuracy {}.'.format(os.path.basename(best_model), best_acc))
            return best_model

        else:
            raise ValueError('[!] Classifier type {} is not supported.'.format(self.classifier_type))
Ejemplo n.º 7
0
    def train(self, features=None, labels=None, retrain=False, num_train=-1):
        """Trains classifier using training features and ground truth training labels.

        Args:
            features: Path to training feature vectors (use None to automatically load saved features from experiment
            output directory).
            labels: Path to ground truth train labels (use None to automatically load from dataset).
            retrain: Boolean, whether or not to retrain if classifier is already saved.
            num_train: Number of training samples to use (use -1 to include all training samples).

        Raises:
            ValueError: If the specified dataset [mnist|f-mnist|celeba] or classifier type
            [svm|linear-svm|lmnn|logistic|knn|nn] is not supported.
        """

        # If no feature vector is provided load from experiment output directory.
        if features is None:
            feature_file = self.classifier_params['feature_file']
            try:
                with open(feature_file, 'r') as f:
                    features = cPickle.load(f)
            except IOError as err:
                print("[!] I/O error({0}): {1}.".format(err.errno,
                                                        err.strerror))
            if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']:
                print('[*] Loaded feature file from {}.'.format(feature_file))

        # If no label vector is provided load from dataset.
        if labels is None:
            # Create dataset object based on dataset name.
            if self.classifier_params['dataset'] == 'mnist':
                ds = Mnist()
            elif self.classifier_params['dataset'] == 'f-mnist':
                ds = FMnist()
            elif self.classifier_params['dataset'] == 'celeba':
                ds = CelebA(resize_size=self.classifier_params['output_height'],
                            attribute=self.classifier_params['attribute'])
            else:
                raise ValueError('[!] Dataset {} is not supported.'.format(self.classifier_params['dataset']))
            # Load labels from the train split.
            _, labels, _ = ds.load('train')
            num_samples = min(np.shape(features)[0], len(labels))

            # Restrict to the first num_train samples if num_train is not -1.
            if num_train > -1:
                num_samples = min(num_train, num_samples)

            labels = labels[:num_samples]
            features = features[:num_samples, :]

            if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']:
                print('[*] Loaded ground truth labels from {}.'.format(
                    self.classifier_params['dataset']))

        # Train the classifier.
        if self.classifier_type in ('svm', 'logistic', 'knn', 'linear-svm'):
            self.estimator.fit(features, labels)

        # Neural network classifiers.
        elif self.classifier_type == 'nn':
            self.estimator.fit(features, labels, retrain=retrain, session=self.session)

        # For LMNN, first transform the feature vector then perform k-NN.
        elif self.classifier_type == 'lmnn':
            # Learn the metric.
            self.helper_estimator.fit(features, labels)
            # Transform feature space.
            transformed_features = self.helper_estimator.transform(features)
            # Create k-nn graph.
            self.estimator.fit(transformed_features, labels)

        else:
            raise ValueError('[!] Classifier type {} is not supported.'.format(self.classifier_type))

        if ('verbose' in self.classifier_params) and self.classifier_params['verbose']:
            print('[*] Trained classifier.')