def create_generator(dataset_name, split, batch_size, randomize, attribute=None): """Creates a batch generator for the dataset. Args: dataset_name: `str`. The name of the dataset. split: `str`. The split of data. It can be `train`, `val`, or `test`. batch_size: An integer. The batch size. randomize: `bool`. Whether to randomize the order of images before batching. attribute (optional): For cele Returns: image_batch: A Python generator for the images. label_batch: A Python generator for the labels. """ flags = tf.app.flags.FLAGS if dataset_name.lower() == 'mnist': ds = Mnist() elif dataset_name.lower() == 'f-mnist': ds = FMnist() elif dataset_name.lower() == 'cifar-10': ds = Cifar10() elif dataset_name.lower() == 'celeba': ds = CelebA(attribute=attribute) else: raise ValueError("Dataset {} is not supported.".format(dataset_name)) ds.load(split=split, randomize=randomize) def get_gen(): for i in range(0, len(ds) - batch_size, batch_size): image_batch, label_batch = ds.images[ i:i + batch_size], \ ds.labels[i:i + batch_size] yield image_batch, label_batch return get_gen
import sys sys.path.append('..') sys.path.append('../..') sys.path.append('../../cutedl') ''' 使用卷积神经网络实现的手写数字识别模型 ''' from datasets.mnist import Mnist ''' 加载手写数字数据集 ''' mnist = Mnist('../datasets/mnist') ds_train, ds_test = mnist.load(64) import pdb import numpy as np from cutedl.model import Model from cutedl.session import Session from cutedl import session, losses, optimizers, utils from cutedl import nn_layers as nn from cutedl import cnn_layers as cnn import fit_tools report_path = "./pics/mnist-recoginze-" model_path = "./model/mnist-recoginze-" '''
# coding=utf-8 import sys sys.path.append('..') sys.path.append('../..') sys.path.append('../../cutedl') ''' 手写数字识别模型 ''' from datasets.mnist import Mnist ''' 加载手写数字数据集 ''' mnist = Mnist('../datasets/mnist') ds_train, ds_test = mnist.load(64, flatting=True) import pdb import numpy as np from cutedl.model import Model from cutedl.session import Session from cutedl import session, losses, optimizers, utils from cutedl import nn_layers as nn import matplotlib.pyplot as plt ''' 生成模型的拟合报告 '''
def test_classifier(self, input_split='test', save_result=False, model_name=None, labels_filename=None, acc_filename=None, acc_filenames_i=None): """Predicts labels and compares them to ground truth labels from given split. Returns test accuracy. Args: input_split: What split to test on [train|val|test]. save_result: Optional, boolean. If True saves predicted labels and accuracy. model_name: For neural network classifiers, model name to load and use to predict. labels_filename: Optional, string. Path to save predicted labels in. acc_filename: Optional, string. Path to save predicted accuracy in. acc_filenames_i: Optional, array of strings. Path to save class-specific predicted labels in. Returns: predicted_labels: Predicted labels for the input split. accuracy: Accuracy on the input split. per_class_accuracies: Array of per-class accuracies on the input split. Raises: IOError: If an input error occurs when loading features, or an output error occurs when saving results. ValueError: If the specified dataset [mnist|f-mnist|celeba] or classifier type [svm|linear-svm|lmnn|logistic|knn|nn] is not supported. """ # If save_result is True, but no labels_filename was specified, use default filename. if save_result and (labels_filename is None): output_dir = self.get_output_dir() labels_filename = self.get_labels_filename(input_split) labels_filename = os.path.join(output_dir, labels_filename) # If save_result is True, but no acc_filename was specified, use default filename. if save_result and (acc_filename is None): output_dir = self.get_output_dir() acc_filename, acc_filenames_i = self.get_acc_filename(input_split) acc_filename = os.path.join(output_dir, acc_filename) for i in range(self.classifier_params['num_classes']): acc_filenames_i[i] = os.path.join(output_dir, acc_filenames_i[i]) # Load feature vectors. feature_dir = os.path.dirname(self.classifier_params['feature_file']) feature_file = os.path.basename(self.classifier_params['feature_file']) feature_file = feature_file.replace('train', input_split) feature_file = os.path.join(feature_dir, feature_file) try: with open(feature_file, 'r') as f: features = cPickle.load(f) except IOError as err: print('[!] I/O error({0}): {1}.'.format(err.errno, err.strerror)) if 'verbose' in self.classifier_params and self.classifier_params['verbose']: print('[*] Loaded feature vectors from {}.'.format(feature_file)) # Initiate dataset object to load ground-truth labels. if self.classifier_params['dataset'] == 'mnist': ds = Mnist() elif self.classifier_params['dataset'] == 'f-mnist': ds = FMnist() elif self.classifier_params['dataset'] == 'celeba': ds = CelebA(resize_size=self.classifier_params['output_height'], attribute=self.classifier_params['attribute']) else: raise ValueError('[!] Dataset {} is not supported.'.format(self.classifier_params['dataset'])) # Load ground-truth labels. _, labels, _ = ds.load(input_split) num_samples = min(np.shape(features)[0], len(labels)) labels = labels[:num_samples] features = features[:num_samples, :] if 'verbose' in self.classifier_params and self.classifier_params['verbose']: print('[*] Loaded ground-truth labels from: {}.'.format( self.classifier_params['dataset'])) # Predict labels. if self.classifier_type in ('svm', 'logistic', 'knn', 'linear-svm', 'lmnn'): predicted_labels = self.predict(features, save_result, labels_filename) elif self.classifier_type == 'nn': predicted_labels = self.predict(features, save_result, model_name, labels_filename) else: raise ValueError('[!] Classifier type {} is not supported.'.format(self.classifier_type)) # Compare predicted labels to ground-truth labels and calculate accuracy. num_correct = np.sum(np.equal(predicted_labels, labels)) accuracy = num_correct / (1.0 * len(labels)) per_class_accuracies = [] for i in range(self.classifier_params['num_classes']): idx = np.where(np.equal(labels, i))[0] num_correct = np.sum(np.equal(predicted_labels[idx], labels[idx])) accuracy_i = num_correct / (1.0 * len(labels[idx])) per_class_accuracies.append(accuracy_i) # Save results. if save_result: try: with open(acc_filename, 'w') as fp: fp.write("{}".format(accuracy)) except IOError as err: print("[!] I/O error({0}): {1}.".format(err.errno, err.strerror)) if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']: print('[*] Saved predicted labels {}.'.format(labels_filename)) print('[*] Saved predicted accuracy {}.'.format(acc_filename)) for i in range(self.classifier_params['num_classes']): try: with open(acc_filenames_i[i], 'w') as fp: fp.write("{}".format(per_class_accuracies[i])) except IOError as err: print("[!] I/O error({0}): {1}.".format(err.errno, err.strerror)) if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']: print('[*] Testing complete. Accuracy on {} split {}.'.format( input_split, accuracy)) for i in range(self.classifier_params['num_classes']): print('[*] Testing complete. Accuracy on {} split, class {}: {}.'.format(input_split, i, per_class_accuracies[i])) return predicted_labels, accuracy, per_class_accuracies
def validate(self): """Only needed for neural networks. Validates different checkpoints by testing them on the validation split and retaining the one with the top accuracy. Returns: best_model: Name of chosen best model (empty string if no validation was performed). An empty string is returned for non neural network classifiers. Raises: IOError: If an input error occurs when loading feature vectors, or an output error occurs when saving the chosen model. ValueError: If the specified dataset [mnist|f-mnist|celeba] or classifier type [svm|linear-svm|lmnn|logistic|knn|nn] is not supported. """ if 'verbose' in self.classifier_params and self.classifier_params['verbose']: print("[*] Validating.") # Get feature file paths. feature_dir = os.path.dirname(self.classifier_params['feature_file']) feature_file = os.path.basename(self.classifier_params['feature_file']) feature_file = feature_file.replace('train', 'val') feature_file = os.path.join(feature_dir, feature_file) # Load feature vectors. try: with open(feature_file, 'r') as f: features = cPickle.load(f) except IOError as err: print("[!] I/O error({0}): {1}.".format(err.errno, err.strerror)) if 'verbose' in self.classifier_params and self.classifier_params['verbose']: print('[*] Loaded feature vectors from {}.'.format(feature_file)) # Initialize the dataset object to load ground-truth labels. if self.classifier_params['dataset'] == 'mnist': ds = Mnist() elif self.classifier_params['dataset'] == 'f-mnist': ds = FMnist() elif self.classifier_params['dataset'] == 'celeba': ds = CelebA(resize_size=self.classifier_params['output_height'], attribute=self.classifier_params['attribute']) else: raise ValueError('[!] Dataset {} is not supported.'.format(self.classifier_params['dataset'])) # Load ground-truth labels from the validation split. _, labels, _ = ds.load('val') num_samples = min(np.shape(features)[0], len(labels)) labels = labels[:num_samples] features = features[:num_samples, :] if 'verbose' in self.classifier_params and self.classifier_params['verbose']: print('[*] Loaded ground-truth labels from {}.'.format( self.classifier_params['dataset'])) # Non neural network classifiers do not require validation as no intermediate models exist. if self.classifier_type in ('svm', 'logistic', 'knn', 'linear-svm', 'lmnn'): print('[!] No validation needed.') return "" # Neural network classifiers. elif self.classifier_type == 'nn': # Call the neural network validate function on the features. best_acc, best_model, _ = self.estimator.validate(features, labels, session=self.session) # Save results. try: with open(os.path.join(self.get_output_dir(), self.tf_checkpoint_dir(), 'chosen_model.txt'), 'w') as fp: fp.write("{} {}".format(os.path.basename(best_model), best_acc)) except IOError as err: print("[!] I/O error({0}): {1}.".format(err.errno, err.strerror)) if 'verbose' in self.classifier_params and self.classifier_params['verbose']: print( '[*] Chose model: {}, with validation accuracy {}.'.format(os.path.basename(best_model), best_acc)) return best_model else: raise ValueError('[!] Classifier type {} is not supported.'.format(self.classifier_type))
def train(self, features=None, labels=None, retrain=False, num_train=-1): """Trains classifier using training features and ground truth training labels. Args: features: Path to training feature vectors (use None to automatically load saved features from experiment output directory). labels: Path to ground truth train labels (use None to automatically load from dataset). retrain: Boolean, whether or not to retrain if classifier is already saved. num_train: Number of training samples to use (use -1 to include all training samples). Raises: ValueError: If the specified dataset [mnist|f-mnist|celeba] or classifier type [svm|linear-svm|lmnn|logistic|knn|nn] is not supported. """ # If no feature vector is provided load from experiment output directory. if features is None: feature_file = self.classifier_params['feature_file'] try: with open(feature_file, 'r') as f: features = cPickle.load(f) except IOError as err: print("[!] I/O error({0}): {1}.".format(err.errno, err.strerror)) if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']: print('[*] Loaded feature file from {}.'.format(feature_file)) # If no label vector is provided load from dataset. if labels is None: # Create dataset object based on dataset name. if self.classifier_params['dataset'] == 'mnist': ds = Mnist() elif self.classifier_params['dataset'] == 'f-mnist': ds = FMnist() elif self.classifier_params['dataset'] == 'celeba': ds = CelebA(resize_size=self.classifier_params['output_height'], attribute=self.classifier_params['attribute']) else: raise ValueError('[!] Dataset {} is not supported.'.format(self.classifier_params['dataset'])) # Load labels from the train split. _, labels, _ = ds.load('train') num_samples = min(np.shape(features)[0], len(labels)) # Restrict to the first num_train samples if num_train is not -1. if num_train > -1: num_samples = min(num_train, num_samples) labels = labels[:num_samples] features = features[:num_samples, :] if self.classifier_params.has_key('verbose') and self.classifier_params['verbose']: print('[*] Loaded ground truth labels from {}.'.format( self.classifier_params['dataset'])) # Train the classifier. if self.classifier_type in ('svm', 'logistic', 'knn', 'linear-svm'): self.estimator.fit(features, labels) # Neural network classifiers. elif self.classifier_type == 'nn': self.estimator.fit(features, labels, retrain=retrain, session=self.session) # For LMNN, first transform the feature vector then perform k-NN. elif self.classifier_type == 'lmnn': # Learn the metric. self.helper_estimator.fit(features, labels) # Transform feature space. transformed_features = self.helper_estimator.transform(features) # Create k-nn graph. self.estimator.fit(transformed_features, labels) else: raise ValueError('[!] Classifier type {} is not supported.'.format(self.classifier_type)) if ('verbose' in self.classifier_params) and self.classifier_params['verbose']: print('[*] Trained classifier.')