def main(data_dir, other_classes, weights_path, mtype, class_mode='binary', batch_size=16): """ Tests a model. Args: data_dir: Directory containing test classes, including "real" and `other_class`. other_classes: Collection of classes other than "real" to test on. weights_path: Path to HDF5 weights file for the model. class_mode: See `keras.preprocessing.image.ImageDataGenerator.flow_from_directory`. batch_size: Number of images to process at a time. """ # Make sure classes exist for c in other_classes + ['real']: test_dir = os.path.join(data_dir, c) if not os.path.exists(test_dir): print('ERROR: "{}" has no class "{}"'.format(test_dir, c), file=stderr) exit(2) # Make sure model is valid. if not mtype in MODEL_MAP: print('ERROR: "{}" is not a valid model type'.format(mtype), file=stderr) exit(2) # Create data generators. print('\nLoading testing data from "{}"...'.format(data_dir)) test_generator, _ = create_data_generator(data_dir, other_classes, batch_size, class_mode) # Create model. if class_mode == 'categorical': classes = len(other_classes) + 1 model = MODEL_MAP[mtype](class_mode=class_mode, classes=classes) model.load(weights_path) else: model = MODEL_MAP[mtype]() model.load(weights_path) # Test model. classes_str = ', '.join(other_classes) print('\nTesting {} model on class {}...\n'.format(mtype.upper(), classes_str.upper())) if class_mode == 'categorical': model.set_metrics(['acc', cat_acc_pred, tpr_cat_pred, tnr_cat_pred]) mse, acc, bacc, tpr, tnr = model.evaluate_with_generator(test_generator) print('mse:\t{}'.format(mse)) print('acc:\t{}'.format(acc)) # Categorical accuracy. print('bacc:\t{}'.format(bacc)) # Binary accuracy. print('tpr:\t{}'.format(tpr)) print('tnr:\t{}'.format(tnr)) else: model.set_metrics(['acc', tpr_pred, tnr_pred]) mse, acc, tpr, tnr = model.evaluate_with_generator(test_generator) print('mse:\t{}'.format(mse)) print('acc:\t{}'.format(acc)) print('tpr:\t{}'.format(tpr)) print('tnr:\t{}'.format(tnr))
def test_binary(mtype, data_dir, weights_path, classes, batch_size): """ Loads and tests a binary classifier. Args: mtype: Architecture of models to test. data_dir: Directory containing directories with test images for all classes. weights_path: Path to HDF5 weights for model. classes: List of classes other than "real" to load. batch_size: Number of images to process at a time. Returns: Accuracy of classifier against the testing set. """ model = MODEL_MAP[mtype](class_mode='binary') model.load(weights_path) model.set_metrics(['acc']) gen, _ = create_data_generator(data_dir, classes, batch_size, 'binary') _, acc = model.evaluate_with_generator(gen) return acc
def test_categorical(mtype, data_dir, weights_path, classes, batch_size): """ Loads and tests a categorical classifier. Args: mtype: Architecture of models to test. data_dir: Directory containing directories with test images for all classes. weights_path: Path to HDF5 weights for model. classes: List of classes other than "real" to load. batch_size: Number of images to process at a time. Returns: Accuracy when treated as a categorical classifier and when treated as a binary classifier. """ num_classes = len(classes) + 1 model = MODEL_MAP[mtype](class_mode='categorical', classes=num_classes) model.load(weights_path) model.set_metrics(['acc', cat_acc_pred]) gen, _ = create_data_generator(data_dir, classes, batch_size, 'categorical') _, cat_acc, cat_bin_acc = model.evaluate_with_generator(gen) return cat_acc, cat_bin_acc
from model import SpeechModel import config import os import utils if __name__ == '__main__': character_mapping = utils.create_character_mapping() data_details = utils.get_data_details(filename=os.path.join( config.preprocessing['data_dir'], 'metadata.csv')) config.training['steps_per_epoch'] = int(data_details['num_samples'] / config.training['batch_size']) config.model['max_input_length'] = data_details['max_input_length'] config.model['max_label_length'] = data_details['max_label_length'] config.model['vocab_size'] = len(character_mapping) data_generator = utils.create_data_generator( directory=config.preprocessing['data_dir'], max_input_length=config.model['max_input_length'], max_label_length=config.model['max_label_length'], batch_size=config.training['batch_size']) model = SpeechModel(hparams=config.model) model.train_generator(data_generator, config.training)
def main(data_dir, save_dir, other_classes, mtype, class_mode='binary', weights_path=None, epoch=1, transfer=False, batch_size=16): """ Trains a model. Args: data_dir: Directory containing a "train" and "val" directory, each with a directory for the "real" and `other_class` classes. save_dir: Directory to save checkpoints and CSV file with loss and accuracy. other_classes: Other classes to train on (wherein the default class is "real"). mtype: Model type. Should be "meso1", "meso4", "mesoinception4", or "mesoinc4frozen16" class_mode: See `keras.preprocessing.image.ImageDataGenerator.flow_from_directory`. weights_path: Path to HDF5 weights file to load model with. A new model will be created if set to None. epoch: Epoch to start on. transfer: Whether to transfer from a MesoInception4 to a MesoInc4Frozen4. mtype should be either "mesoinception4" or "mesoinc4frozen16", and a weights_path should be specified. batch_size: Number of images to process at a time. """ # Make sure training and validation set exists. train_dir = os.path.join(data_dir, 'train') valid_dir = os.path.join(data_dir, 'val') if not os.path.exists(train_dir): print('ERROR: "{}" has no "train" set'.format(data_dir), file=stderr) exit(2) if not os.path.exists(train_dir): print('ERROR: "{}" has no "val" set'.format(data_dir), file=stderr) exit(2) # Make sure classes exist. for c in other_classes + ['real']: class_train_dir = os.path.join(train_dir, c) class_valid_dir = os.path.join(valid_dir, c) if not os.path.exists(class_train_dir): print('ERROR: "{}" has no class "{}"'.format(train_dir, c), file=stderr) exit(2) if not os.path.exists(class_valid_dir): print('ERROR: "{}" has no class "{}"'.format(valid_dir, c), file=stderr) exit(2) # Make sure model is valid. if not mtype in MODEL_MAP: print('ERROR: "{}" is not a valid model type'.format(mtype), file=stderr) exit(2) # Make sure classification mode is valid. if not class_mode in CLASS_MODES: print('ERROR: "{}" is not a valid classification mode'.format( class_mode), file=stderr) exit(2) # Create save directory if it does not exist. if not os.path.exists(save_dir): os.makedirs(save_dir) # Create data generators. print('\nLoading training data from "{}"...'.format(train_dir)) train_generator, class_weight = create_data_generator( train_dir, other_classes, batch_size, class_mode) print('\nLoading validation data from "{}"...'.format(valid_dir)) valid_generator, _ = create_data_generator(valid_dir, other_classes, batch_size, class_mode) # Create model. if class_mode == 'categorical': classes = len(other_classes) + 1 model = MODEL_MAP[mtype](class_mode=class_mode, classes=classes) else: model = MODEL_MAP[mtype]() if transfer: print('\nTransferring MESOINCEPTION4 model from "{}"'.format( weights_path)) model.load_transfer(weights_path) elif not weights_path is None: print('\nLoading {} model from "{}"'.format(mtype.upper(), weights_path)) model.load(weights_path) # Train model. classes_str = ', '.join(other_classes) print('\nTraining {} model as a {} classifier on classes {}...\n'.format( mtype.upper(), class_mode, classes_str.upper())) callback = CustomCallback(save_dir, save_epoch=SAVE_EPOCH) model.fit_with_generator(train_generator, len(train_generator), validation_data=valid_generator, validation_steps=len(valid_generator), class_weight=class_weight, epochs=EPOCHS, initial_epoch=epoch, shuffle=True, callbacks=[callback])
def main(data_dir, models_dir, mtype, output_file, batch_size=16): """ Tests models on every available compression level. Args: data_dir: Directory containing test classes, including "real" and `other_class`. models_dir: Models directory as described in this script's docstring. mtype: Architecture of models to test. output_file: CSV file to output to. batch_size: Number of images to process at a time. """ # Make sure model is valid. if not mtype in MODEL_MAP: print('ERROR: "{}" is not a valid model type'.format(mtype), file=stderr) exit(2) print('Testing compression levels for {}'.format(mtype.upper())) print('Loading models from "{}"'.format(models_dir)) print('Outputting to "{}"'.format(output_file)) print('Batch size: {}'.format(batch_size)) # Open output file. Initialize with headers if it does not exist. init_output = not os.path.exists(output_file) output = open(output_file, 'a') output_csv = csv.writer(output) if init_output: headers = ( 'mtype', 'comp', 'class', 'acc_all', 'tpr_all', 'tnr_all', 'acc_c0', 'tpr_c0', 'tnr_c0', 'acc_c23', 'tpr_c23', 'tnr_c23', 'acc_c40', 'tpr_c40', 'tnr_c40' ) output_csv.writerow(headers) comps = ('all', 'c0', 'c23', 'c40') # Maps class types to maps of compression levels to data generators. # I.e. data generators for every class type for every compression level. generators = {} for comp_level in comps: print('\nRunning tests for compression level "{}"'.format(comp_level)) d = os.path.join(os.path.join(models_dir, comp_level, mtype.lower())) if not os.path.exists(d): print('ERROR: "{}" does not exist. Skipping.'.format(d), file=stderr) continue comp_dir = os.path.join(models_dir, comp_level, mtype.lower()) for md in sorted(os.listdir(comp_dir)): weights_dir = os.path.join(comp_dir, md) # Ignore files. if not os.path.isdir(weights_dir): continue # Make sure best weight parameters are present. best_path = os.path.join(weights_dir, 'best.hdf5') if not os.path.isfile(best_path): print('ERROR: File "{}" does not exist. Skipping.'.format(best_path), file=stderr) continue # Create data generators if they do not exist. class_type = os.path.basename(md) if not class_type in generators: print('\nCreating generators for class "{}"...'.format(class_type)) gens = {} gen_fail = False for comp_level_gen in comps: test_dir = os.path.join(data_dir, comp_level_gen, 'test') new_gen, _ = create_data_generator(test_dir, [class_type], batch_size, 'binary') if new_gen.samples < 10: print( 'ERROR: Only found {} samples for class ' \ '"{}" and "real" for compression level "{}" in "{}".'.format( new_gen.samples, class_type, comp_level_gen, test_dir), file=stderr ) print('Skipping.', file=stderr) gen_fail = True break gens[comp_level_gen] = new_gen if gen_fail: continue generators[class_type] = gens print('\nTesting class "{}" for compression level "{}"...'.format( class_type, comp_level)) # Load model. model = MODEL_MAP[mtype]() model.load(best_path) model.set_metrics(['acc', tpr_pred, tnr_pred]) # Test model on every compression level. results = {} for comp_level_test in comps: gen = generators[class_type][comp_level_test] gen.reset() res = {} _, res['acc'], res['tpr'], res['tnr'] = model.evaluate_with_generator(gen) results[comp_level_test] = res # Write data. data_line = ( mtype.lower(), comp_level, class_type, results['all']['acc'], results['all']['tpr'], results['all']['tnr'], results['c0']['acc'], results['c0']['tpr'], results['c0']['tnr'], results['c23']['acc'], results['c23']['tpr'], results['c23']['tnr'], results['c40']['acc'], results['c40']['tpr'], results['c40']['tnr'] ) output_csv.writerow(data_line) output.flush() output.close()