Ejemplo n.º 1
0
def main():
    """
	main path of execution
	"""

    # parse arguments
    args = parseArguments()
    args.image_size = 256

    # load model
    model, args.model = loadFromFile(args.model_path)
    plotSampleSizes(args.data_path)

    # select preprocess_input wrapper
    module = importlib.import_module('keras.applications.{}'.format(
        args.model))
    preprocess_input = module.preprocess_input

    # create test generator and compute results
    datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

    train_cm, train_wronguns = getResults(model, datagen, args, 'train')
    test_cm, test_wronguns = getResults(model, datagen, args, 'test')

    # plot confusion matrices
    plotConfusionMatrix([train_cm, test_cm])

    return
Ejemplo n.º 2
0
def main():
    """
	main path of execution
	"""

    # parse arguments
    args = parseArguments()
    args.image_size = 128

    # load pre-trained model from file
    model, model_type = loadFromFile(args.model_path)

    # select preprocess_input wrapper
    module = importlib.import_module(
        'keras.applications.{}'.format(model_type))
    preprocess_input = module.preprocess_input

    datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
    scaler = MinMaxScaler()

    # plot sample size plots and loss diagnostics
    # plotSampleSizes( args.data_path )
    # plotDiagnostics( args.model_path )

    # read dataframe and normalise target
    df_train = pd.read_csv(os.path.join(args.data_path, 'train.csv'))
    df_train['target'] = scaler.fit_transform(df_train[['target']])

    df_train = getPrediction(datagen, model, df_train,
                             os.path.join(args.data_path, 'train'))
    plotHeatMaps(df_train)

    # read dataframe and normalise target
    df_test = pd.read_csv(os.path.join(args.data_path, 'test.csv'))
    df_test['target'] = scaler.transform(df_test[['target']])

    df_test = getPrediction(datagen, model, df_test,
                            os.path.join(args.data_path, 'test'))

    # plot regression
    plotRegression([df_train, df_test])

    return
Ejemplo n.º 3
0
def main():

    """ 
    setup model based on command line input and execute training 
    """

    # parse arguments
    args = parseTrainArguments()
    print ( 'epochs {} / batch size {}'.format ( args.epochs, args.batch_size ) )

    # start session
    session = tf.keras.backend.get_session()
    init = tf.global_variables_initializer()
    session.run(init)

    # optional model creation
    if args.load_path is not None:

        # load model from file
        model, args.model = loadFromFile( args.load_path )

    else:

        # define topmost cnn layers plugged into pretrained model
        layers = {  'fc' : [    { 'units' : 256, 'activation' : 'tanh', 'dropout' : 0.2 },
                                { 'units' : 128, 'activation' : 'tanh', 'dropout' : 0.2 } ],
                    'out' : [   { 'units' : 1, 'activation' : 'sigmoid' } ]
        }

        # create model from argument
        cnn_library = { 'vgg16': getVgg16, 'resnet50': getResNet50, 'inception_v3': getInceptionV3 }
        model = cnn_library[ args.model ]( ( args.image_size, args.image_size, 3 ), layers )


    # valid model
    plot_model(model, show_shapes=True, to_file='model.png')
    if model is not None: 

        # setup optimiser and compile
        #opt = SGD( lr=0.01, momentum=0.9 )
        opt = Adam( lr=1e-6 )
        model.compile(  optimizer=opt, 
                        loss='binary_crossentropy', 
                        metrics=[AUC(name='auc'),Precision(name='precision'),Recall(name='recall') ] )
        
        #opt = Adam( lr=1e-6 )
        #model.compile(  optimizer=opt, 
        #                loss=[binary_focal_loss(alpha=.90, gamma=2)], 
        #                metrics=[AUC(name='auc'),Precision(name='precision'),Recall(name='recall') ] )
        model.summary()

        # select preprocess_input wrapper
        module = importlib.import_module( 'keras.applications.{}'.format( args.model ) )
        preprocess_input = module.preprocess_input
        
        # create data generators
        train_datagen = ImageDataGenerator( preprocessing_function=preprocess_input,
                                            horizontal_flip=True, 
                                            vertical_flip=True, 
                                            rotation_range=90 )
        
        test_datagen = ImageDataGenerator(  preprocessing_function=preprocess_input )

        # get train iterator - binary classification
        path = os.path.join( args.data_path, 'train' )
        train_it = train_datagen.flow_from_directory(   path, 
                                                        class_mode='binary', 
                                                        batch_size=args.batch_size, 
                                                        classes=[ 'dry', 'wet' ],
                                                        target_size=(args.image_size, args.image_size) )

        # get test iterator - binary classification
        path = os.path.join( args.data_path, 'test' )
        test_it = test_datagen.flow_from_directory( path, 
                                                    class_mode='binary', 
                                                    batch_size=args.batch_size, 
                                                    classes=[ 'dry', 'wet' ],
                                                    target_size=(args.image_size, args.image_size) )

        # confirm the iterator works
        batchX, batchy = train_it.next()
        print('Batch shape=%s, min=%.3f, max=%.3f, mean=%.3f, std=%.3f' % (batchX.shape, batchX.min(), batchX.max(), batchX.mean(), batchX.std() ))

        # setup callbacks
        callbacks = [ CSVLogger( 'log.csv', append=True ) ]
        if args.checkpoint_path is not None:

            # create sub-directory if required
            if not os.path.exists ( args.checkpoint_path ):
                os.makedirs( args.checkpoint_path )

            # setup checkpointing callback
            path = os.path.join( args.checkpoint_path, "weights-{epoch:02d}-{val_accuracy:.2f}.h5" )
            checkpoint = ModelCheckpoint(   path, 
                                            monitor='val_accuracy', 
                                            verbose=1, 
                                            save_best_only=True, 
                                            mode='max' )
            callbacks.append( checkpoint )


        # fit model
        weights = getBinaryClassWeights( args.data_path, [ 'dry', 'wet' ] )
        history = model.fit_generator(  train_it, 
                                        steps_per_epoch=len(train_it), 
                                        class_weight=weights,
                                        validation_data=test_it, 
                                        validation_steps=len(test_it), 
                                        epochs=args.epochs, 
                                        callbacks=callbacks,
                                        verbose=1 )

        # evaluate model
        scores = model.evaluate_generator(  test_it, 
                                            steps=len(test_it), 
                                            verbose=1 )
        print('Final Metric Scores> {}'.format( scores ) )

        # optional save
        if args.save_path is not None:
            saveToFile( model, args.save_path, args.model )

        # plot learning curves
        plotHistory(history)

    return
Ejemplo n.º 4
0
def main():
    """
	main path of execution
	"""

    # parse arguments
    args = parseArguments()
    args.image_size = 256

    # load pre-trained model from file
    model, model_type = loadFromFile(args.model_path)

    # select preprocess_input wrapper
    module = importlib.import_module(
        'keras.applications.{}'.format(model_type))
    preprocess_input = module.preprocess_input

    datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
    scaler = MinMaxScaler()

    # plot sample size plots and loss diagnostics
    plotSampleSizes(args.data_path)
    plotDiagnostics(args.model_path)

    # read dataframe and normalise target
    df_train = pd.read_csv(os.path.join(args.data_path, 'train.csv'))
    df_train['target'] = scaler.fit_transform(df_train[['target']])

    #df_train = getPrediction( datagen, model, df_train, os.path.join( args.data_path, 'train' ) )

    # read dataframe and normalise target
    df_test = pd.read_csv(os.path.join(args.data_path, 'test.csv'))
    df_test['target'] = scaler.transform(df_test[['target']])

    #df_test = getPrediction( datagen, model, df_test, os.path.join( args.data_path, 'test' ) )

    # plot regression
    #plotRegression( [ df_train, df_test ] )

    # finally run model against unlabelled images - unknown capacity
    path = os.path.join(args.data_path, 'unlabelled')
    it = datagen.flow_from_directory(path,
                                     classes=['test'],
                                     color_mode='rgb',
                                     shuffle=False,
                                     batch_size=1,
                                     target_size=(args.image_size,
                                                  args.image_size))

    # evaluate probabilities
    y_pred = model.predict_generator(it)

    # compile results
    records = []
    for idx, filename in enumerate(it.filenames):

        # assign label and confidence
        records.append({
            'uid': getUniqueId(filename),
            'capacity': float(np.exp(y_pred[idx]))
        })

    # convert to dataframe
    df = pd.DataFrame.from_dict(records)

    # compute mean for each uid - drop duplicates
    df['mean'] = df.groupby(['uid']).capacity.transform('mean')
    df.drop_duplicates(subset='uid', keep='first', inplace=True)
    df = df.drop(columns=['capacity'])

    for idx, row in df.iterrows():
        print(row['uid'], row['mean'])

    # create figure
    fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(14, 6))
    axes.set_title(
        'Model Predicted Capacity for Unlabelled Cement Factory Sites')
    axes.set_ylabel('Mt / year')

    axes.set_xticks(range(0, len(df)))
    axes.tick_params(axis='both', which='major', labelsize=8)

    axes.set_xticklabels(df['uid'].tolist(), rotation=90)
    axes.plot(df['mean'].tolist())

    # show figure
    fig.tight_layout(rect=[0, 0.05, 1, 0.95])
    plt.show()

    return
Ejemplo n.º 5
0
def main():
    """
	main path of execution
	"""

    # parse arguments
    args = parseArguments()
    args.image_size = 256

    # load model
    model, args.model = loadFromFile(args.model_path)
    plotSampleSizes(args.data_path)

    # select preprocess_input wrapper
    module = importlib.import_module('keras.applications.{}'.format(
        args.model))
    preprocess_input = module.preprocess_input

    # create test generator and compute results
    datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

    train_cm, train_wronguns = getResults(model, datagen, args, 'train')
    test_cm, test_wronguns = getResults(model, datagen, args, 'test')

    # plot confusion matrices
    plotConfusionMatrix([train_cm, test_cm])

    # finally run model against unlabelled images - unknown production type
    path = os.path.join(args.data_path, 'unlabelled')
    it = datagen.flow_from_directory(path,
                                     classes=['test'],
                                     color_mode='rgb',
                                     shuffle=False,
                                     batch_size=1,
                                     target_size=(args.image_size,
                                                  args.image_size))

    # evaluate probabilities
    y_pred = model.predict_generator(it)

    # compile results
    records = []
    for idx, filename in enumerate(it.filenames):

        # assign label and confidence
        label = 'wet' if y_pred[idx] > 0.5 else 'dry'
        confidence = 'high' if y_pred[idx] > 0.9 or y_pred[idx] < 0.1 else 'low'

        records.append({
            'uid': getUniqueId(filename),
            'label': label,
            'probability': y_pred[idx],
            'confidence': confidence
        })

    # convert to dataframe
    df = pd.DataFrame.from_dict(records)
    df.sort_values('label')

    print('Total number of unseen images: {}'.format(len(it.filenames)))
    print('High confidence dry predictions: {}'.format(
        len(df[(df['confidence'] == 'high') & (df['label'] == 'dry')])))
    print('High confidence wet predictions: {}'.format(
        len(df[(df['confidence'] == 'high') & (df['label'] == 'wet')])))

    for idx, row in df.iterrows():
        print(row['uid'], row['probability'], row['label'], row['confidence'])

    # create figure
    fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(4, 4))
    for idx, s in enumerate(['dry', 'wet']):

        df_subset = df[(df['label'] == s)]
        count = [
            len(df_subset[(df_subset['confidence'] == 'high')]),
            len(df_subset[(df_subset['confidence'] == 'low')])
        ]

        axes[idx].barh(['high', 'low'], count)
        axes[idx].set_title('Predictions: {}'.format(s))

    # show figure
    fig.tight_layout(rect=[0, 0.05, 1, 0.95])
    plt.show()

    return
Ejemplo n.º 6
0
def main():
    """ 
    setup model based on command line input and execute training 
    """

    # parse arguments
    args = parseTrainArguments()
    print('epochs {} / batch size {}'.format(args.epochs, args.batch_size))

    # create tf session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    session = tf.Session(config=config)

    # optional model creation
    if args.load_path is not None:

        # load model from file
        model, args.model = loadFromFile(args.load_path)

    else:

        # define topmost cnn layers plugged into pretrained model
        layers = {
            'fc': [{
                'units': 256,
                'activation': 'relu',
                'dropout': 0.2
            }, {
                'units': 128,
                'activation': 'relu',
                'dropout': 0.2
            }],
            'out': [{
                'units': 1,
                'activation': 'linear'
            }]
        }

        # create model from argument
        cnn_library = {
            'vgg16': getVgg16,
            'resnet50': getResNet50,
            'inception_v3': getInceptionV3
        }
        model = cnn_library[args.model]((args.image_size, args.image_size, 3),
                                        layers)

    # valid model
    # plot_model(model, show_shapes=True, to_file='model.png')
    if model is not None:

        # setup optimiser and compile
        opt = Adam(lr=1e-6)
        model.compile(loss='mean_absolute_error', optimizer=opt)

        # select preprocess_input wrapper
        module = importlib.import_module('keras.applications.{}'.format(
            args.model))
        preprocess_input = module.preprocess_input

        # create data generators
        train_datagen = ImageDataGenerator(
            preprocessing_function=preprocess_input,
            horizontal_flip=True,
            vertical_flip=True,
            rotation_range=90)

        # fit the data augmentation
        test_datagen = ImageDataGenerator(
            preprocessing_function=preprocess_input)
        scaler = MinMaxScaler()

        # load train dataframe
        df_train = pd.read_csv(os.path.join(args.data_path, 'train.csv'))
        df_train['target'] = scaler.fit_transform(df_train[['target']])

        # create train iterator
        data_path = os.path.join(args.data_path, 'train')
        train_it = train_datagen.flow_from_dataframe(
            dataframe=df_train,
            directory=data_path,
            x_col='image',
            y_col='target',
            class_mode='raw',
            color_mode='rgb',
            shuffle=True,
            target_size=(args.image_size, args.image_size),
            batch_size=args.batch_size)

        # create test iterator
        df_test = pd.read_csv(os.path.join(args.data_path, 'test.csv'))
        df_test['target'] = scaler.transform(df_test[['target']])

        # data_path = os.path.join( os.path.join( args.data_path, 'test' ), name )
        data_path = os.path.join(args.data_path, 'test')
        test_it = test_datagen.flow_from_dataframe(
            dataframe=df_test,
            directory=data_path,
            x_col='image',
            y_col='target',
            class_mode='raw',
            color_mode='rgb',
            shuffle=True,
            target_size=(args.image_size, args.image_size),
            batch_size=args.batch_size)

        # confirm the iterator works
        batchX, batchy = train_it.next()
        print('Batch shape=%s, min=%.3f, max=%.3f, mean=%.3f, std=%.3f' %
              (batchX.shape, batchX.min(), batchX.max(), batchX.mean(),
               batchX.std()))

        # setup callbacks
        callbacks = [CSVLogger('log.csv', append=True)]
        if args.checkpoint_path is not None:

            # create sub-directory if required
            if not os.path.exists(args.checkpoint_path):
                os.makedirs(args.checkpoint_path)

            # setup checkpointing callback
            path = os.path.join(args.checkpoint_path,
                                "weights-{epoch:02d}-{val_loss:.2f}.h5")
            checkpoint = ModelCheckpoint(path,
                                         monitor='val_loss',
                                         verbose=1,
                                         save_best_only=True,
                                         mode='min')
            callbacks.append(checkpoint)

        # execute fit
        history = model.fit_generator(train_it,
                                      steps_per_epoch=len(train_it),
                                      validation_data=test_it,
                                      validation_steps=len(test_it),
                                      epochs=args.epochs,
                                      callbacks=callbacks,
                                      verbose=1)

        # optional save
        if args.save_path is not None:
            saveToFile(model, args.save_path, args.model)

        # plot learning curves
        plotHistory(history)

    return