Python create_data_generator Examples, utils.create_data_generator Python Examples

Example #1

0

Show file

def main(data_dir, other_classes, weights_path, mtype,
         class_mode='binary',
         batch_size=16):
    """
    Tests a model.

    Args:
        data_dir: Directory containing test classes, including "real"
            and `other_class`.
        other_classes: Collection of classes other than "real" to test on.
        weights_path: Path to HDF5 weights file for the model.
        class_mode: See `keras.preprocessing.image.ImageDataGenerator.flow_from_directory`.
        batch_size: Number of images to process at a time.
    """
    # Make sure classes exist
    for c in other_classes + ['real']:
        test_dir = os.path.join(data_dir, c)
        if not os.path.exists(test_dir):
            print('ERROR: "{}" has no class "{}"'.format(test_dir, c),
                  file=stderr)
            exit(2)

    # Make sure model is valid.
    if not mtype in MODEL_MAP:
        print('ERROR: "{}" is not a valid model type'.format(mtype),
              file=stderr)
        exit(2)

    # Create data generators.
    print('\nLoading testing data from "{}"...'.format(data_dir))
    test_generator, _ = create_data_generator(data_dir, other_classes, batch_size, class_mode)

    # Create model.
    if class_mode == 'categorical':
        classes = len(other_classes) + 1
        model = MODEL_MAP[mtype](class_mode=class_mode, classes=classes)
        model.load(weights_path)
    else:
        model = MODEL_MAP[mtype]()
        model.load(weights_path)

    # Test model.
    classes_str = ', '.join(other_classes)
    print('\nTesting {} model on class {}...\n'.format(mtype.upper(), classes_str.upper()))
    if class_mode == 'categorical':
        model.set_metrics(['acc', cat_acc_pred, tpr_cat_pred, tnr_cat_pred])
        mse, acc, bacc, tpr, tnr = model.evaluate_with_generator(test_generator)
        print('mse:\t{}'.format(mse))
        print('acc:\t{}'.format(acc))  # Categorical accuracy.
        print('bacc:\t{}'.format(bacc))  # Binary accuracy.
        print('tpr:\t{}'.format(tpr))
        print('tnr:\t{}'.format(tnr))
    else:
        model.set_metrics(['acc', tpr_pred, tnr_pred])
        mse, acc, tpr, tnr = model.evaluate_with_generator(test_generator)
        print('mse:\t{}'.format(mse))
        print('acc:\t{}'.format(acc))
        print('tpr:\t{}'.format(tpr))
        print('tnr:\t{}'.format(tnr))

Example #2

0

Show file

def test_binary(mtype, data_dir, weights_path, classes, batch_size):
    """
    Loads and tests a binary classifier.

    Args:
        mtype: Architecture of models to test.
        data_dir: Directory containing directories with test images for all
            classes.
        weights_path: Path to HDF5 weights for model.
        classes: List of classes other than "real" to load.
        batch_size: Number of images to process at a time.

    Returns:
        Accuracy of classifier against the testing set.
    """
    model = MODEL_MAP[mtype](class_mode='binary')
    model.load(weights_path)
    model.set_metrics(['acc'])
    gen, _ = create_data_generator(data_dir, classes, batch_size, 'binary')
    _, acc = model.evaluate_with_generator(gen)
    return acc

Example #3

0

Show file

def test_categorical(mtype, data_dir, weights_path, classes, batch_size):
    """
    Loads and tests a categorical classifier.

    Args:
        mtype: Architecture of models to test.
        data_dir: Directory containing directories with test images for all
            classes.
        weights_path: Path to HDF5 weights for model.
        classes: List of classes other than "real" to load.
        batch_size: Number of images to process at a time.

    Returns:
        Accuracy when treated as a categorical classifier and when treated as a
        binary classifier.
    """
    num_classes = len(classes) + 1
    model = MODEL_MAP[mtype](class_mode='categorical', classes=num_classes)
    model.load(weights_path)
    model.set_metrics(['acc', cat_acc_pred])
    gen, _ = create_data_generator(data_dir, classes, batch_size,
                                   'categorical')
    _, cat_acc, cat_bin_acc = model.evaluate_with_generator(gen)
    return cat_acc, cat_bin_acc

Example #4

0

Show file

from model import SpeechModel
import config
import os
import utils

if __name__ == '__main__':

    character_mapping = utils.create_character_mapping()
    data_details = utils.get_data_details(filename=os.path.join(
        config.preprocessing['data_dir'], 'metadata.csv'))

    config.training['steps_per_epoch'] = int(data_details['num_samples'] /
                                             config.training['batch_size'])
    config.model['max_input_length'] = data_details['max_input_length']
    config.model['max_label_length'] = data_details['max_label_length']
    config.model['vocab_size'] = len(character_mapping)

    data_generator = utils.create_data_generator(
        directory=config.preprocessing['data_dir'],
        max_input_length=config.model['max_input_length'],
        max_label_length=config.model['max_label_length'],
        batch_size=config.training['batch_size'])

    model = SpeechModel(hparams=config.model)

    model.train_generator(data_generator, config.training)

Example #5

0

Show file

File: train.py Project: jcbrockschmidt/face-forgery-detection

def main(data_dir,
         save_dir,
         other_classes,
         mtype,
         class_mode='binary',
         weights_path=None,
         epoch=1,
         transfer=False,
         batch_size=16):
    """
    Trains a model.

    Args:
        data_dir: Directory containing a "train" and "val" directory,
            each with a directory for the "real" and `other_class` classes.
        save_dir: Directory to save checkpoints and CSV file with loss and accuracy.
        other_classes: Other classes to train on (wherein the default class is "real").
        mtype: Model type.  Should be "meso1", "meso4", "mesoinception4", or "mesoinc4frozen16"
        class_mode: See `keras.preprocessing.image.ImageDataGenerator.flow_from_directory`.
        weights_path: Path to HDF5 weights file to load model with.
            A new model will be created if set to None.
        epoch: Epoch to start on.
        transfer: Whether to transfer from a MesoInception4 to a MesoInc4Frozen4.
            mtype should be either "mesoinception4" or "mesoinc4frozen16",
            and a weights_path should be specified.
        batch_size: Number of images to process at a time.
    """
    # Make sure training and validation set exists.
    train_dir = os.path.join(data_dir, 'train')
    valid_dir = os.path.join(data_dir, 'val')
    if not os.path.exists(train_dir):
        print('ERROR: "{}" has no "train" set'.format(data_dir), file=stderr)
        exit(2)
    if not os.path.exists(train_dir):
        print('ERROR: "{}" has no "val" set'.format(data_dir), file=stderr)
        exit(2)

    # Make sure classes exist.
    for c in other_classes + ['real']:
        class_train_dir = os.path.join(train_dir, c)
        class_valid_dir = os.path.join(valid_dir, c)
        if not os.path.exists(class_train_dir):
            print('ERROR: "{}" has no class "{}"'.format(train_dir, c),
                  file=stderr)
            exit(2)
            if not os.path.exists(class_valid_dir):
                print('ERROR: "{}" has no class "{}"'.format(valid_dir, c),
                      file=stderr)
            exit(2)

    # Make sure model is valid.
    if not mtype in MODEL_MAP:
        print('ERROR: "{}" is not a valid model type'.format(mtype),
              file=stderr)
        exit(2)

    # Make sure classification mode is valid.
    if not class_mode in CLASS_MODES:
        print('ERROR: "{}" is not a valid classification mode'.format(
            class_mode),
              file=stderr)
        exit(2)

    # Create save directory if it does not exist.
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # Create data generators.
    print('\nLoading training data from "{}"...'.format(train_dir))
    train_generator, class_weight = create_data_generator(
        train_dir, other_classes, batch_size, class_mode)

    print('\nLoading validation data from "{}"...'.format(valid_dir))
    valid_generator, _ = create_data_generator(valid_dir, other_classes,
                                               batch_size, class_mode)

    # Create model.
    if class_mode == 'categorical':
        classes = len(other_classes) + 1
        model = MODEL_MAP[mtype](class_mode=class_mode, classes=classes)
    else:
        model = MODEL_MAP[mtype]()
    if transfer:
        print('\nTransferring MESOINCEPTION4 model from "{}"'.format(
            weights_path))
        model.load_transfer(weights_path)
    elif not weights_path is None:
        print('\nLoading {} model from "{}"'.format(mtype.upper(),
                                                    weights_path))
        model.load(weights_path)

    # Train model.
    classes_str = ', '.join(other_classes)
    print('\nTraining {} model as a {} classifier on classes {}...\n'.format(
        mtype.upper(), class_mode, classes_str.upper()))
    callback = CustomCallback(save_dir, save_epoch=SAVE_EPOCH)
    model.fit_with_generator(train_generator,
                             len(train_generator),
                             validation_data=valid_generator,
                             validation_steps=len(valid_generator),
                             class_weight=class_weight,
                             epochs=EPOCHS,
                             initial_epoch=epoch,
                             shuffle=True,
                             callbacks=[callback])

Example #6

0

Show file

def main(data_dir, models_dir, mtype, output_file, batch_size=16):
    """
    Tests models on every available compression level.

    Args:
        data_dir: Directory containing test classes, including "real"
            and `other_class`.
        models_dir: Models directory as described in this script's docstring.
        mtype: Architecture of models to test.
        output_file: CSV file to output to.
        batch_size: Number of images to process at a time.
    """
    # Make sure model is valid.
    if not mtype in MODEL_MAP:
        print('ERROR: "{}" is not a valid model type'.format(mtype),
              file=stderr)
        exit(2)

    print('Testing compression levels for {}'.format(mtype.upper()))
    print('Loading models from "{}"'.format(models_dir))
    print('Outputting to "{}"'.format(output_file))
    print('Batch size: {}'.format(batch_size))

    # Open output file.  Initialize with headers if it does not exist.
    init_output = not os.path.exists(output_file)
    output = open(output_file, 'a')
    output_csv = csv.writer(output)
    if init_output:
        headers = (
            'mtype', 'comp', 'class',
            'acc_all', 'tpr_all', 'tnr_all',
            'acc_c0', 'tpr_c0', 'tnr_c0',
            'acc_c23', 'tpr_c23', 'tnr_c23',
            'acc_c40', 'tpr_c40', 'tnr_c40'
        )
        output_csv.writerow(headers)

    comps = ('all', 'c0', 'c23', 'c40')

    # Maps class types to maps of compression levels to data generators.
    # I.e. data generators for every class type for every compression level.
    generators = {}
    
    for comp_level in comps:
        print('\nRunning tests for compression level "{}"'.format(comp_level))
        d = os.path.join(os.path.join(models_dir, comp_level, mtype.lower()))
        if not os.path.exists(d):
            print('ERROR: "{}" does not exist. Skipping.'.format(d), file=stderr)
            continue

        comp_dir = os.path.join(models_dir, comp_level, mtype.lower())
        for md in sorted(os.listdir(comp_dir)):
            weights_dir = os.path.join(comp_dir, md)

            # Ignore files.
            if not os.path.isdir(weights_dir):
                continue

            # Make sure best weight parameters are present.
            best_path = os.path.join(weights_dir, 'best.hdf5')
            if not os.path.isfile(best_path):
                print('ERROR: File "{}" does not exist. Skipping.'.format(best_path),
                      file=stderr)
                continue

            # Create data generators if they do not exist.
            class_type = os.path.basename(md)
            if not class_type in generators:
                print('\nCreating generators for class "{}"...'.format(class_type))
                gens = {}
                gen_fail = False
                for comp_level_gen in comps:
                    test_dir = os.path.join(data_dir, comp_level_gen, 'test')
                    new_gen, _ = create_data_generator(test_dir, [class_type], batch_size, 'binary')
                    if new_gen.samples < 10:
                        print(
                            'ERROR: Only found {} samples for class ' \
                            '"{}" and "real" for compression level "{}" in "{}".'.format(
                                new_gen.samples, class_type, comp_level_gen, test_dir),
                            file=stderr
                        )
                        print('Skipping.', file=stderr)
                        gen_fail = True
                        break
                    gens[comp_level_gen] = new_gen
                    if gen_fail:
                        continue
                generators[class_type] = gens

            print('\nTesting class "{}" for compression level "{}"...'.format(
                class_type, comp_level))

            # Load model.
            model = MODEL_MAP[mtype]()
            model.load(best_path)
            model.set_metrics(['acc', tpr_pred, tnr_pred])

            # Test model on every compression level.
            results = {}
            for comp_level_test in comps:
                gen = generators[class_type][comp_level_test]
                gen.reset()
                res = {}
                _, res['acc'], res['tpr'], res['tnr'] = model.evaluate_with_generator(gen)
                results[comp_level_test] = res

            # Write data.
            data_line = (
                mtype.lower(), comp_level, class_type,
                results['all']['acc'], results['all']['tpr'], results['all']['tnr'],
                results['c0']['acc'], results['c0']['tpr'], results['c0']['tnr'],
                results['c23']['acc'], results['c23']['tpr'], results['c23']['tnr'],
                results['c40']['acc'], results['c40']['tpr'], results['c40']['tnr']
            )
            output_csv.writerow(data_line)
            output.flush()

    output.close()