예제 #1
0
def training_BNN_gen_model(img_list_train, feat_list_new, model_func,
                           data_path, batch, dropout_rate, **model_params):
    get_model = model_func
    times = []
    lr_mins = []
    lr_maxes = []

    print('Preprocessing')
    tf.keras.backend.clear_session()
    data_vector_train = preprocessing_gen_model(data_path, img_list_train)
    perm_index = feat_list_new.index('GSW_perm')
    flood_index = feat_list_new.index('flooded')
    print(data_vector_train.shape)
    data_vector_train[data_vector_train[:, perm_index] == 1, flood_index] = 0
    data_vector_train = np.delete(data_vector_train, perm_index, axis=1)
    shape = data_vector_train.shape
    X_train, y_train = data_vector_train[:, 0:shape[1] -
                                         1], data_vector_train[:, shape[1] - 1]
    input_dims = X_train.shape[1]

    model_path = data_path / batch / 'models'
    metrics_path = data_path / batch / 'metrics' / 'training'

    lr_plots_path = metrics_path / 'lr_plots'
    lr_vals_path = metrics_path / 'lr_vals'
    try:
        metrics_path.mkdir(parents=True)
        model_path.mkdir(parents=True)
        lr_plots_path.mkdir(parents=True)
        lr_vals_path.mkdir(parents=True)
    except FileExistsError:
        pass

    # ---------------------------------------------------------------------------------------------------
    # Determine learning rate by finding max loss decrease during single epoch training
    lrRangeFinder = LrRangeFinder(start_lr=0.1, end_lr=2)

    lr_model_params = {
        'batch_size': model_params['batch_size'],
        'epochs': 1,
        'verbose': 2,
        'callbacks': [lrRangeFinder],
        'use_multiprocessing': True
    }

    model = model_func(input_dims, dropout_rate)

    print('Finding learning rate')
    model.fit(X_train, y_train, **lr_model_params)
    lr_min, lr_max, lr, losses = lr_plots(lrRangeFinder, lr_plots_path)
    lr_mins.append(lr_min)
    lr_maxes.append(lr_max)
    # ---------------------------------------------------------------------------------------------------
    # Training the model with cyclical learning rate scheduler
    model_path = model_path / 'gen_model.h5'
    scheduler = SGDRScheduler(min_lr=lr_min,
                              max_lr=lr_max,
                              lr_decay=0.9,
                              cycle_length=3,
                              mult_factor=1.5)

    callbacks = [
        tf.keras.callbacks.EarlyStopping(monitor='sparse_categorical_accuracy',
                                         min_delta=0.001,
                                         patience=10),
        tf.keras.callbacks.ModelCheckpoint(filepath=str(model_path),
                                           monitor='loss',
                                           save_best_only=True),
        CSVLogger(metrics_path / 'training_log.log'), scheduler
    ]

    model = get_model(input_dims, dropout_rate)

    print('Training full model with best LR')
    start_time = time.time()
    model.fit(X_train, y_train, **model_params, callbacks=callbacks)
    end_time = time.time()
    times.append(timer(start_time, end_time, False))

    metrics_path = metrics_path.parent
    times = [float(i) for i in times]
    times_df = pd.DataFrame(times, columns=['training_time'])
    times_df.to_csv(metrics_path / 'training_times.csv', index=False)

    lr_range = np.column_stack([lr_mins, lr_maxes])
    lr_avg = np.mean(lr_range, axis=1)
    lr_range = np.column_stack([lr_range, lr_avg])
    lr_range_df = pd.DataFrame(lr_range,
                               columns=['lr_min', 'lr_max', 'lr_avg'])
    lr_range_df.to_csv((lr_vals_path).with_suffix('.csv'), index=False)

    losses_path = lr_vals_path / 'gen_model_losses.csv'
    try:
        losses_path.parent.mkdir(parents=True)
    except FileExistsError:
        pass
    lr_losses = np.column_stack([lr, losses])
    lr_losses = pd.DataFrame(lr_losses, columns=['lr', 'losses'])
    lr_losses.to_csv(losses_path, index=False)
예제 #2
0
                    rle.rle_to_string(rle.rle_encode(resized_batch_preds[i]))
                })
    if sort:
        print("Sorting CSVfile")
        df = pd.read_csv(submission_name)
        df = df.sort_values(by=['img'])
        df.to_csv(submission_name, index=False)
    print("Submission created at: %s" % submission_name)


# Load the test Dataset
test_dataset = FullCarDataset(base_dir=TEST_DIR,
                              metadata_path=METADATA_PATH,
                              img_size=IMGSIZE,
                              resize=RESIZE)
print("Test Dataset: ", len(test_dataset), " samples")
# Turn it into a generator
test_gen = pyjet.DatasetGenerator(test_dataset,
                                  batch_size=BATCH_SIZE,
                                  shuffle=False)
print("Test Steps: ", test_gen.steps_per_epoch, " steps")
test_gen = GeneratorEnqueuer(test_gen)
test_gen.start(max_q_size=3)

# Create the model
model = model_func(test_dataset.img_size, train=False)
model.load_weights(MODEL_FILE)

# Create the submission
create_submission(model, test_gen, orig_imgsize=ORIG_IMGSIZE, resize=RESIZE)
예제 #3
0
파일: NN_noGSW.py 프로젝트: moghimis/CPR
def NN_training(img_list, pctls, model_func, feat_list_new, data_path, batch,
                **model_params):
    get_model = model_func
    for j, img in enumerate(img_list):
        print(img + ': stacking tif, generating clouds')
        times = []
        lr_mins = []
        lr_maxes = []
        tif_stacker(data_path,
                    img,
                    feat_list_new,
                    features=True,
                    overwrite=False)
        cloud_generator(img, data_path, overwrite=False)

        for i, pctl in enumerate(pctls):
            print(img, pctl, '% CLOUD COVER')
            print('Preprocessing')
            tf.keras.backend.clear_session()
            data_train, data_vector_train, data_ind_train, feat_keep = preprocessing(
                data_path, img, pctl, feat_list_new, test=False)
            perm_index = feat_keep.index('GSW_perm')
            flood_index = feat_keep.index('flooded')
            # data_vector_train[data_vector_train[:, perm_index] == 1, flood_index] = 0
            data_vector_train = np.delete(data_vector_train,
                                          perm_index,
                                          axis=1)
            shape = data_vector_train.shape
            X_train, y_train = data_vector_train[:, 0:shape[1] -
                                                 1], data_vector_train[:,
                                                                       shape[1]
                                                                       - 1]
            INPUT_DIMS = X_train.shape[1]

            model_path = data_path / batch / 'models' / img
            metrics_path = data_path / batch / 'metrics' / 'training' / img / '{}'.format(
                img + '_clouds_' + str(pctl))

            lr_plots_path = metrics_path.parents[1] / 'lr_plots'
            lr_vals_path = metrics_path.parents[1] / 'lr_vals'
            try:
                metrics_path.mkdir(parents=True)
                model_path.mkdir(parents=True)
                lr_plots_path.mkdir(parents=True)
                lr_vals_path.mkdir(parents=True)
            except FileExistsError:
                pass

            # ---------------------------------------------------------------------------------------------------
            # Determine learning rate by finding max loss decrease during single epoch training
            lrRangeFinder = LrRangeFinder(start_lr=0.1, end_lr=2)

            lr_model_params = {
                'batch_size': model_params['batch_size'],
                'epochs': 1,
                'verbose': 2,
                'callbacks': [lrRangeFinder],
                'use_multiprocessing': True
            }

            model = model_func(INPUT_DIMS)

            print('Finding learning rate')
            model.fit(X_train, y_train, **lr_model_params)
            lr_min, lr_max, lr, losses = lr_plots(lrRangeFinder, lr_plots_path,
                                                  img, pctl)
            lr_mins.append(lr_min)
            lr_maxes.append(lr_max)
            # ---------------------------------------------------------------------------------------------------
            # Training the model with cyclical learning rate scheduler
            model_path = model_path / '{}'.format(img + '_clouds_' +
                                                  str(pctl) + '.h5')
            scheduler = SGDRScheduler(min_lr=lr_min,
                                      max_lr=lr_max,
                                      lr_decay=0.9,
                                      cycle_length=3,
                                      mult_factor=1.5)

            callbacks = [
                tf.keras.callbacks.EarlyStopping(
                    monitor='sparse_categorical_accuracy',
                    min_delta=0.0001,
                    patience=10),
                tf.keras.callbacks.ModelCheckpoint(filepath=str(model_path),
                                                   monitor='loss',
                                                   save_best_only=True),
                CSVLogger(metrics_path / 'training_log.log'), scheduler
            ]

            model = get_model(INPUT_DIMS)

            print('Training full model with best LR')
            start_time = time.time()
            model.fit(X_train, y_train, **model_params, callbacks=callbacks)
            end_time = time.time()
            times.append(timer(start_time, end_time, False))
            # model.save(model_path)

        metrics_path = metrics_path.parent
        times = [float(i) for i in times]
        times = np.column_stack([pctls, times])
        times_df = pd.DataFrame(times,
                                columns=['cloud_cover', 'training_time'])
        times_df.to_csv(metrics_path / 'training_times.csv', index=False)

        lr_range = np.column_stack([pctls, lr_mins, lr_maxes])
        lr_avg = np.mean(lr_range[:, 1:2], axis=1)
        lr_range = np.column_stack([lr_range, lr_avg])
        lr_range_df = pd.DataFrame(
            lr_range, columns=['cloud_cover', 'lr_min', 'lr_max', 'lr_avg'])
        lr_range_df.to_csv((lr_vals_path / img).with_suffix('.csv'),
                           index=False)

        losses_path = lr_vals_path / img / '{}'.format('losses_' + str(pctl) +
                                                       '.csv')
        try:
            losses_path.parent.mkdir(parents=True)
        except FileExistsError:
            pass
        lr_losses = np.column_stack([lr, losses])
        lr_losses = pd.DataFrame(lr_losses, columns=['lr', 'losses'])
        lr_losses.to_csv(losses_path, index=False)
예제 #4
0
    reduce_lr = ReduceLROnPlateau(monitor='val_dice_loss',
                                  factor=0.1,
                                  patience=4,
                                  verbose=1,
                                  epsilon=1e-4,
                                  mode='max')
    callbacks = [best_model, reduce_lr]
    if args.plotter:
        from plotter_callback import Plotter
        # This will plot the losses while training
        plotter = Plotter(scale='log')
        callbacks.append(plotter)

    # Create the model and fit it
    model = model_func(car_dataset.img_size,
                       filters=args.filters,
                       optimizer=optimizer)
    # Load the initialization weights if given
    if args.load_weights:
        model.load_weights(args.load_weights)
    # Train the model
    fit = model.fit_generator(train_gen,
                              steps_per_epoch=train_gen.steps_per_epoch,
                              epochs=args.epochs,
                              verbose=args.train_verbosity,
                              callbacks=callbacks,
                              validation_data=val_gen,
                              validation_steps=val_gen.steps_per_epoch,
                              max_q_size=args.max_q_size,
                              initial_epoch=args.initial_epoch)