Ejemplo n.º 1
0
def GFM_MLC(args):

    # Parameters
    batch_size = 32
    dataset = args.dataset
    epochs = 1000  # early stopping on validation data
    verbosity = 0
    sklearn = False
    c = args.c
    print('Amount of regularization= {:.3f}'.format(c))

    features_train = np.load(
        '../data/{}/features/features_train_max.npy'.format(dataset))
    features_validation = np.load(
        '../data/{}/features/features_validation_max.npy'.format(dataset))
    features_test = np.load(
        '../data/{}/features/features_test_max.npy'.format(dataset))
    n_features = features_train.shape[1]

    # rescale
    from sklearn.preprocessing import StandardScaler
    featurescaler = StandardScaler().fit(features_train)

    features_train = featurescaler.transform(features_train)
    features_validation = featurescaler.transform(features_validation)
    features_test = featurescaler.transform(features_test)

    csv_path_train = '../data/{}/TRAIN.csv'.format(dataset)
    csv_path_validation = '../data/{}/VALIDATION.csv'.format(dataset)
    csv_path_test = '../data/{}/TEST.csv'.format(dataset)

    df_train = pd.read_csv(csv_path_train)
    df_validation = pd.read_csv(csv_path_validation)
    df_test = pd.read_csv(csv_path_test)

    train_steps = np.ceil(len(df_train) / batch_size)
    validation_steps = np.ceil(len(df_validation) / batch_size)
    test_steps = np.ceil(len(df_test) / batch_size)

    # Extract ground truth labels
    y_true_train = np.array([
        ast.literal_eval(df_train['marginal_labels'][i])
        for i in range(len(df_train))
    ])
    y_true_validation = np.array([
        ast.literal_eval(df_validation['marginal_labels'][i])
        for i in range(len(df_validation))
    ])
    y_true_test = np.array([
        ast.literal_eval(df_test['marginal_labels'][i])
        for i in range(len(df_test))
    ])

    n_labels = y_true_train.shape[1]

    y_gfm_train = np.array([
        ast.literal_eval(df_train['gfm_labels'][i])
        for i in range(len(df_train))
    ])
    y_gfm_validation = np.array([
        ast.literal_eval(df_validation['gfm_labels'][i])
        for i in range(len(df_validation))
    ])

    # Compute max_s: the maximum number of positive label for a single instance
    max_s = np.max(
        np.array([
            np.max(np.sum(y_true_train, axis=1)),
            np.max(np.sum(y_true_validation, axis=1)),
            np.max(np.sum(y_true_test, axis=1))
        ]))

    # Containers
    GFM_train_entries = []
    GFM_validation_entries = []
    GFM_test_entries = []

    for label in range(n_labels):
        # print('Label {} of {}...'.format(label, n_labels))
        # extract one multinomial regression problem
        if sklearn:
            y_label_train = np.argmax(y_gfm_train[:, label, :], axis=1)
            y_label_validation = np.argmax(y_gfm_validation[:, label, :],
                                           axis=1)
        else:
            y_label_train = y_gfm_train[:, label, :]
            y_label_validation = y_gfm_validation[:, label, :]
        # print(y_label_train.shape)

        if sklearn:
            from sklearn.linear_model import LogisticRegression
            model = LogisticRegression(multi_class='ovr', solver='lbfgs', C=c)

        else:
            model = GFM_labelwise_classifier(n_features, max_s + 1, c).model
            optimizer = Adam()
            model.compile(loss='categorical_crossentropy', optimizer=optimizer)
            callbacks = [
                EarlyStopping(monitor='val_loss',
                              min_delta=0,
                              patience=3,
                              verbose=verbosity,
                              mode='auto'),
                ModelCheckpoint(
                    '../models/GFMMLC_labelwise_{}.h5'.format(dataset),
                    monitor='val_loss',
                    save_best_only=True,
                    verbose=verbosity)
            ]

            model.fit(x=features_train,
                      y=y_label_train,
                      batch_size=batch_size,
                      epochs=epochs,
                      verbose=verbosity,
                      callbacks=callbacks,
                      validation_data=(features_validation,
                                       y_label_validation))
            # Load best model
            model.load_weights(
                '../models/GFMMLC_labelwise_{}.h5'.format(dataset))
            model.compile(loss='categorical_crossentropy', optimizer=optimizer)

        pis_train = model.predict(features_train)
        pis_validation = model.predict(features_validation)
        pis_test = model.predict(features_test)

        if sklearn:
            from sklearn.preprocessing import OneHotEncoder
            enc = OneHotEncoder()
            enc.fit(
                np.argmax(np.argmax(y_gfm_train[:, :, :], axis=1),
                          axis=1).reshape(-1, 1))
            pis_train = enc.transform(pis_train.reshape(-1, 1)).toarray()
            pis_validation = enc.transform(pis_validation.reshape(
                -1, 1)).toarray()
            pis_test = enc.transform(pis_test.reshape(-1, 1)).toarray()

        GFM_train_entries.append(pis_train)
        GFM_validation_entries.append(pis_validation)
        GFM_test_entries.append(pis_test)

    # Combine all the predictonis
    pis_train = np.stack(GFM_train_entries).transpose(1, 0, 2)
    pis_validation = np.stack(GFM_validation_entries).transpose(1, 0, 2)
    pis_test = np.stack(GFM_test_entries).transpose(1, 0, 2)

    pis_train_final = [
        complete_matrix_columns_with_zeros(mat[:, 1:], len=n_labels)
        for mat in pis_train
    ]
    pis_validation_final = [
        complete_matrix_columns_with_zeros(mat[:, 1:], len=n_labels)
        for mat in pis_validation
    ]
    pis_test_final = [
        complete_matrix_columns_with_zeros(mat[:, 1:], len=n_labels)
        for mat in pis_test
    ]

    # Compute optimal predictions for F1
    for beta in [1, 2]:
        GFM = GeneralFMaximizer(beta, n_labels)

        # Run GFM algo on this output
        (optimal_predictions_train,
         E_F_train) = GFM.get_predictions(predictions=pis_train_final)
        (optimal_predictions_validation, E_F_validation) = GFM.get_predictions(
            predictions=pis_validation_final)
        (optimal_predictions_test,
         E_F_test) = GFM.get_predictions(predictions=pis_test_final)

        # Evaluate F score
        F_train = compute_F_score(y_true_train,
                                  optimal_predictions_train,
                                  t=0.5,
                                  beta=beta)
        F_validation = compute_F_score(y_true_validation,
                                       optimal_predictions_validation,
                                       t=0.5,
                                       beta=beta)
        F_test = compute_F_score(y_true_test,
                                 optimal_predictions_test,
                                 t=0.5,
                                 beta=beta)

        print('GFM_MLC ({})'.format(dataset))
        print('-' * 50)
        # print('F{} score on training data: {:.4f}'.format(beta, F_train))
        # print('F{} score on validation data: {:.4f}'.format(beta, F_validation))
        print('F{} score on test data: {:.4f}'.format(beta, F_test))

        # Store test set predictions to submit to Kaggle
        if (dataset == 'KAGGLE_PLANET') and (beta == 2):
            # Map predictions to filenames
            def filepath_to_filename(s):
                return os.path.basename(os.path.normpath(s)).split('.')[0]

            test_filenames = [
                filepath_to_filename(f) for f in df_test['full_path']
            ]
            GFM_predictions_mapping = dict(
                zip(test_filenames, [
                    csv_helpers.decode_label_vector(f)
                    for f in optimal_predictions_test
                ]))
            # Create submission file
            csv_helpers.create_submission_file(GFM_predictions_mapping,
                                               name='Planet_GFM_MC_labelwise')
Ejemplo n.º 2
0
def BR(args, logger, timestamp):
    # Parameters
    im_size = args.im_size
    batch_size = 16
    dataset = args.dataset
    pretrained = args.pretrained
    epochs = 100  # early stopping on validation data
    verbosity = 1
    c = args.c
    lr = args.lr
    opt = args.opt
    imagenet = args.imagenet
    n_hidden = args.n_hidden

    features_train = None
    features_validation = None
    features_test = None
    if (dataset == 'KAGGLE_PLANET') or (dataset == 'MS_COCO'):
        dropout_rates = [0.10, 0.5]
    elif (dataset == 'PASCAL_VOC_2007') or (dataset == 'PASCAL_VOC_2012'):
        dropout_rates = [0.25, 0.75]

    if pretrained:
        features_train = np.load(
            '../data/{}/features/features_train_max.npy'.format(dataset))
        features_validation = np.load(
            '../data/{}/features/features_validation_max.npy'.format(dataset))
        features_test = np.load(
            '../data/{}/features/features_test_max.npy'.format(dataset))
        n_features = features_train.shape[1]

        # rescale
        from sklearn.preprocessing import StandardScaler
        featurescaler = StandardScaler().fit(features_train)

        features_train = featurescaler.transform(features_train)
        features_validation = featurescaler.transform(features_validation)
        features_test = featurescaler.transform(features_test)

    csv_path_train = '../data/{}/TRAIN.csv'.format(dataset)
    csv_path_validation = '../data/{}/VALIDATION.csv'.format(dataset)
    csv_path_test = '../data/{}/TEST.csv'.format(dataset)

    df_train = pd.read_csv(csv_path_train)
    df_validation = pd.read_csv(csv_path_validation)
    df_test = pd.read_csv(csv_path_test)

    train_steps = np.ceil(len(df_train) / batch_size)
    validation_steps = np.ceil(len(df_validation) / batch_size)
    test_steps = np.ceil(len(df_test) / batch_size)

    # Extract ground truth labels
    y_true_train = np.array(
        [np.array(ast.literal_eval(l)) for l in df_train['marginal_labels']])
    y_true_validation = np.array([
        np.array(ast.literal_eval(l)) for l in df_validation['marginal_labels']
    ])
    y_true_test = np.array(
        [np.array(ast.literal_eval(l)) for l in df_test['marginal_labels']])

    n_labels = y_true_train.shape[1]

    # Data generators for training
    train_gen = gn.DataGenerator(df=df_train,
                                 n_labels=n_labels,
                                 im_size=im_size,
                                 batch_size=batch_size,
                                 shuffle=True,
                                 mode='train',
                                 pretrained=pretrained,
                                 features=features_train).generate()

    validation_gen = gn.DataGenerator(df=df_validation,
                                      n_labels=n_labels,
                                      im_size=im_size,
                                      batch_size=batch_size,
                                      shuffle=False,
                                      mode='train',
                                      pretrained=pretrained,
                                      features=features_validation).generate()

    # Set up the model
    if pretrained:
        model = BR_classifier(n_features, n_labels, c).model
        optimizer = Adam()
    else:
        model = VGG_classifier(im_size, n_labels, n_hidden, imagenet).model
        if opt == 'sgd':
            optimizer = SGD(lr=lr)  # Use smaller lr
        else:
            optimizer = Adam(lr=lr)

    # First, freeze all layers but the final one
    for layer in model.layers[:-6]:
        layer.trainable = False

    # Disable dropout for the pretraining
    model.layers[-2].rate = dropout_rates[0]
    model.layers[-5].rate = dropout_rates[0]

    model.compile(loss='binary_crossentropy', optimizer=optimizer)

    print(model.summary())
    print(model.layers[-2].get_config())
    callbacks = [
        EarlyStopping(monitor='val_loss',
                      min_delta=0.,
                      patience=3,
                      verbose=1,
                      mode='auto'),
        ModelCheckpoint('../models/BR_{}_{}_{}.h5'.format(
            dataset, im_size, int(pretrained)),
                        monitor='val_loss',
                        save_best_only=True,
                        verbose=1)
    ]

    if pretrained:
        history = model.fit(x=features_train,
                            y=y_true_train,
                            batch_size=batch_size,
                            epochs=epochs,
                            verbose=verbosity,
                            callbacks=callbacks,
                            validation_data=(features_validation,
                                             y_true_validation))
    else:
        history = model.fit_generator(train_gen,
                                      steps_per_epoch=train_steps,
                                      epochs=epochs,
                                      verbose=verbosity,
                                      callbacks=callbacks,
                                      validation_data=validation_gen,
                                      validation_steps=validation_steps)

        # Store history
        import pickle
        pickle.dump(
            history.history,
            open(
                '../results/learningcurves/BR_{}_{}.p'.format(
                    dataset, timestamp), 'wb'))

    # Load best model
    model.load_weights('../models/BR_{}_{}_{}.h5'.format(
        dataset, im_size, int(pretrained)))

    # Recompile the model, set all layers to trainable, finetune with small lr
    for layer in model.layers:
        layer.trainable = True

    # Increase dropout rate
    model.layers[-2].rate = dropout_rates[1]
    model.layers[-5].rate = dropout_rates[1]
    optimizer = Adam(lr=1e-5)

    model.compile(loss='binary_crossentropy', optimizer=optimizer)
    print(model.summary())
    print(model.layers[-2].get_config())
    callbacks = [
        EarlyStopping(monitor='val_loss',
                      min_delta=0,
                      patience=2,
                      verbose=1,
                      mode='auto'),
        ModelCheckpoint('../models/BR_{}_{}_{}.h5'.format(
            dataset, im_size, int(pretrained)),
                        monitor='val_loss',
                        save_best_only=True,
                        verbose=1)
    ]

    model.fit_generator(train_gen,
                        steps_per_epoch=train_steps,
                        epochs=epochs,
                        verbose=verbosity,
                        callbacks=callbacks,
                        validation_data=validation_gen,
                        validation_steps=validation_steps)

    model.load_weights('../models/BR_{}_{}_{}.h5'.format(
        dataset, im_size, int(pretrained)))
    model.compile(loss='binary_crossentropy', optimizer=optimizer)

    # Data generators for inference
    train_gen_i = gn.DataGenerator(df=df_train,
                                   n_labels=n_labels,
                                   im_size=im_size,
                                   batch_size=batch_size,
                                   shuffle=False,
                                   mode='test',
                                   pretrained=pretrained,
                                   features=features_train).generate()
    validation_gen_i = gn.DataGenerator(
        df=df_validation,
        n_labels=n_labels,
        im_size=im_size,
        batch_size=batch_size,
        shuffle=False,
        mode='test',
        pretrained=pretrained,
        features=features_validation).generate()
    test_gen_i = gn.DataGenerator(df=df_test,
                                  n_labels=n_labels,
                                  im_size=im_size,
                                  batch_size=batch_size,
                                  shuffle=False,
                                  mode='test',
                                  pretrained=pretrained,
                                  features=features_test).generate()

    # Make predictions
    if pretrained:
        BR_predictions_train = model.predict(features_train, verbose=1)
        BR_predictions_validation = model.predict(features_validation,
                                                  verbose=1)
        BR_predictions_test = model.predict(features_test, verbose=1)
    else:
        BR_predictions_train = model.predict_generator(train_gen_i,
                                                       steps=train_steps,
                                                       verbose=1)
        BR_predictions_validation = model.predict_generator(
            validation_gen_i, steps=validation_steps, verbose=1)
        BR_predictions_test = model.predict_generator(test_gen_i,
                                                      steps=test_steps,
                                                      verbose=1)

    for beta in [1, 2]:

        F_train = compute_F_score(y_true_train,
                                  BR_predictions_train,
                                  t=0.5,
                                  beta=beta)
        F_validation = compute_F_score(y_true_validation,
                                       BR_predictions_validation,
                                       t=0.5,
                                       beta=beta)
        F_test = compute_F_score(y_true_test,
                                 BR_predictions_test,
                                 t=0.5,
                                 beta=beta)

        logger.log(
            'Binary relevance with threshold 0.5 - ({})'.format(dataset))
        logger.log('-' * 50)
        logger.log('F{} score on training data: {:.4f}'.format(beta, F_train))
        logger.log('F{} score on validation data: {:.4f}'.format(
            beta, F_validation))
        logger.log('F{} score on test data: {:.4f}'.format(beta, F_test))

        # Store test set predictions for the kaggle dataset to submit them to the website
        if (dataset == 'KAGGLE_PLANET') and (beta == 2):
            # Map predictions to filenames
            def filepath_to_filename(s):
                return os.path.basename(os.path.normpath(s)).split('.')[0]

            test_filenames = [
                filepath_to_filename(f) for f in df_test['full_path']
            ]
            GFM_predictions_mapping = dict(
                zip(test_filenames, [
                    csv_helpers.decode_label_vector(f)
                    for f in (BR_predictions_test > 0.5).astype(int)
                ]))
            # Create submission file
            csv_helpers.create_submission_file(GFM_predictions_mapping,
                                               name='Planet_BR_{}'.format(
                                                   int(pretrained)))

    # Store marginals
    np.save(
        '../results/BR_predictions_train_{}_pt{}'.format(
            dataset, int(pretrained)), BR_predictions_train)
    np.save(
        '../results/BR_predictions_validation_{}_pt{}'.format(
            dataset, int(pretrained)), BR_predictions_validation)
    np.save(
        '../results/BR_predictions_test_{}_pt{}'.format(
            dataset, int(pretrained)), BR_predictions_test)
Ejemplo n.º 3
0
def GFM_MLC(args, logger, timestamp):

    # Parameters
    im_size = args.im_size
    batch_size = 16
    dataset = args.dataset
    pretrained = args.pretrained
    epochs = 1000  # early stopping on validation data
    verbosity = 1
    c = args.c
    lr = args.lr
    opt = args.opt
    imagenet = args.imagenet
    n_hidden = args.n_hidden

    logger.log('PRETRAINED: {}'.format(pretrained))
    features_train = None
    features_validation = None
    features_test = None
    if (dataset == 'KAGGLE_PLANET') or (dataset == 'MS_COCO'):
        dropout_rates = [0.10, 0.5]
    elif (dataset == 'PASCAL_VOC_2007') or (dataset == 'PASCAL_VOC_2012'):
        dropout_rates = [0.25, 0.75]

    if pretrained:
        features_train = np.load(
            '../data/{}/features/features_train_max.npy'.format(dataset))
        features_validation = np.load(
            '../data/{}/features/features_validation_max.npy'.format(dataset))
        features_test = np.load(
            '../data/{}/features/features_test_max.npy'.format(dataset))
        n_features = features_train.shape[1]

        # rescale
        from sklearn.preprocessing import StandardScaler
        featurescaler = StandardScaler().fit(features_train)

        features_train = featurescaler.transform(features_train)
        features_validation = featurescaler.transform(features_validation)
        features_test = featurescaler.transform(features_test)

    csv_path_train = '../data/{}/TRAIN.csv'.format(dataset)
    csv_path_validation = '../data/{}/VALIDATION.csv'.format(dataset)
    csv_path_test = '../data/{}/TEST.csv'.format(dataset)

    df_train = pd.read_csv(csv_path_train)
    df_validation = pd.read_csv(csv_path_validation)
    df_test = pd.read_csv(csv_path_test)

    train_steps = np.ceil(len(df_train) / batch_size)
    validation_steps = np.ceil(len(df_validation) / batch_size)
    test_steps = np.ceil(len(df_test) / batch_size)

    # Extract ground truth labels
    y_true_train = np.array([
        ast.literal_eval(df_train['marginal_labels'][i])
        for i in range(len(df_train))
    ])
    y_true_validation = np.array([
        ast.literal_eval(df_validation['marginal_labels'][i])
        for i in range(len(df_validation))
    ])
    y_true_test = np.array([
        ast.literal_eval(df_test['marginal_labels'][i])
        for i in range(len(df_test))
    ])

    n_labels = y_true_train.shape[1]

    # extract GFM output in case of training without generator
    if pretrained:
        y_gfm_train = np.array([
            ast.literal_eval(df_train['gfm_labels'][i])
            for i in range(len(df_train))
        ])
        y_gfm_validation = np.array([
            ast.literal_eval(df_validation['gfm_labels'][i])
            for i in range(len(df_validation))
        ])

    # Compute max_s: the maximum number of positive label for a single instance
    max_s = np.max(
        np.array([
            np.max(np.sum(y_true_train, axis=1)),
            np.max(np.sum(y_true_validation, axis=1)),
            np.max(np.sum(y_true_test, axis=1))
        ]))

    # Data generators for training
    train_gen = gn.DataGenerator_gfm_MC(df=df_train,
                                        n_labels=n_labels,
                                        im_size=im_size,
                                        batch_size=batch_size,
                                        shuffle=True,
                                        mode='train',
                                        pretrained=False,
                                        max_s=max_s).generate()

    validation_gen = gn.DataGenerator_gfm_MC(df=df_validation,
                                             n_labels=n_labels,
                                             im_size=im_size,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             mode='train',
                                             pretrained=False,
                                             max_s=max_s).generate()

    # Set up the model
    if pretrained:
        model = GFM_classifier(n_features, n_labels, max_s, c).model
        optimizer = Adam()
    else:
        model = GFM_VGG_classifier(im_size, n_labels, n_hidden, imagenet,
                                   max_s).model
        optimizer = Adam(lr=lr)

    # Compile with specific loss function

    def GFM_loss(y_true, y_pred):
        """Custom loss function for the joint estimation of the required parameters for GFM.
        The combined loss is the row-wise sum of categorical losses over the rows of the matrix P
        Where each row corresponds to one label.

        """
        loss = K.constant(0, tf.float32)
        for i in range(n_labels):
            loss += K.categorical_crossentropy(target=y_true[:, i, :],
                                               output=y_pred[:, i, :],
                                               from_logits=True)
        return loss

    # First, freeze all layers but the final one
    for layer in model.layers[:-7]:
        layer.trainable = False

    print(model.summary())
    # Disable dropout for the pretraining
    model.layers[-3].rate = dropout_rates[0]
    model.layers[-6].rate = dropout_rates[0]

    model.compile(loss=GFM_loss, optimizer=optimizer)
    print(model.layers[-1].get_config())
    print(model.layers[-2].get_config())
    print(model.layers[-3].get_config())
    print(model.layers[-4].get_config())
    print(model.layers[-5].get_config())
    print(model.layers[-6].get_config())
    print(model.layers[-7].get_config())

    callbacks = [
        EarlyStopping(monitor='val_loss',
                      min_delta=0,
                      patience=3,
                      verbose=1,
                      mode='auto'),
        ModelCheckpoint('../models/GFMMLC_{}_{}_{}.h5'.format(
            dataset, im_size, int(pretrained)),
                        monitor='val_loss',
                        save_best_only=True,
                        verbose=verbosity)
    ]

    if pretrained:
        model.fit(x=features_train,
                  y=y_gfm_train,
                  batch_size=batch_size,
                  epochs=epochs,
                  verbose=verbosity,
                  callbacks=callbacks,
                  validation_data=(features_validation, y_gfm_validation))
    else:
        history = model.fit_generator(train_gen,
                                      steps_per_epoch=train_steps,
                                      epochs=epochs,
                                      verbose=verbosity,
                                      callbacks=callbacks,
                                      validation_data=validation_gen,
                                      validation_steps=validation_steps)
        # Store history
        import pickle
        pickle.dump(
            history.history,
            open(
                '../results/learningcurves/GFM_MLC_{}_{}.p'.format(
                    dataset, timestamp), 'wb'))

    # Load best model
    model.load_weights('../models/GFMMLC_{}_{}_{}.h5'.format(
        dataset, im_size, int(pretrained)))

    # Recompile the model, set all layers to trainable, finetune with small lr
    for layer in model.layers:
        layer.trainable = True

    # Increase dropout rate
    model.layers[-3].rate = dropout_rates[1]
    model.layers[-6].rate = dropout_rates[1]

    optimizer = Adam(lr=1e-5)

    model.compile(loss=GFM_loss, optimizer=optimizer)
    print(model.summary())
    callbacks = [
        EarlyStopping(monitor='val_loss',
                      min_delta=0,
                      patience=2,
                      verbose=verbosity,
                      mode='auto'),
        ModelCheckpoint('../models/GFMMLC_{}_{}_{}.h5'.format(
            dataset, im_size, int(pretrained)),
                        monitor='val_loss',
                        save_best_only=True,
                        verbose=1)
    ]

    model.fit_generator(train_gen,
                        steps_per_epoch=train_steps,
                        epochs=epochs,
                        verbose=verbosity,
                        callbacks=callbacks,
                        validation_data=validation_gen,
                        validation_steps=validation_steps)

    model.load_weights('../models/GFMMLC_{}_{}_{}.h5'.format(
        dataset, im_size, int(pretrained)))
    model.compile(loss=GFM_loss, optimizer=optimizer)
    # Data generators for inference
    train_gen_i = gn.DataGenerator_gfm_MC(df=df_train,
                                          n_labels=n_labels,
                                          im_size=im_size,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          mode='test',
                                          pretrained=False,
                                          max_s=max_s).generate()
    validation_gen_i = gn.DataGenerator_gfm_MC(df=df_validation,
                                               n_labels=n_labels,
                                               im_size=im_size,
                                               batch_size=batch_size,
                                               shuffle=False,
                                               mode='test',
                                               pretrained=False,
                                               max_s=max_s).generate()
    test_gen_i = gn.DataGenerator_gfm_MC(df=df_test,
                                         n_labels=n_labels,
                                         im_size=im_size,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         mode='test',
                                         pretrained=False,
                                         max_s=max_s).generate()

    # Make predictions
    if pretrained:
        pis_train = model.predict(features_train, verbose=1)
        pis_validation = model.predict(features_validation, verbose=1)
        pis_test = model.predict(features_test, verbose=1)
    else:
        pis_train = model.predict_generator(train_gen_i,
                                            steps=train_steps,
                                            verbose=1)
        pis_validation = model.predict_generator(validation_gen_i,
                                                 steps=validation_steps,
                                                 verbose=1)
        pis_test = model.predict_generator(test_gen_i,
                                           steps=test_steps,
                                           verbose=1)

    def softmax(v):
        """softmax a vector
        Adaptation for numerical stability according to
        http://python.usyiyi.cn/documents/effective-tf/12.html
        """
        exp = np.exp(v - np.max(v))
        return (exp / np.sum(exp))

    print('Softmaxing...')
    pis_train = np.apply_along_axis(softmax, 2, pis_train)
    pis_validation = np.apply_along_axis(softmax, 2, pis_validation)
    pis_test = np.apply_along_axis(softmax, 2, pis_test)
    print('Filling...')
    pis_train_filled = [
        complete_matrix_columns_with_zeros(mat[:, 1:], len=n_labels)
        for mat in tqdm(pis_train)
    ]
    pis_validation_filled = [
        complete_matrix_columns_with_zeros(mat[:, 1:], len=n_labels)
        for mat in tqdm(pis_validation)
    ]
    pis_test_filled = [
        complete_matrix_columns_with_zeros(mat[:, 1:], len=n_labels)
        for mat in tqdm(pis_test)
    ]

    # (Extra: do a postprocessing: constrain the rank of the output matrices) before running GFM
    # Store output of network to this end

    # np.save('../results/GFM_MLC_output_train_{}_pt{}'.format(dataset,
    #                                                         int(pretrained)), np.array(pis_train_filled))
    # np.save('../results/GFM_MLC_output_validation_{}_pt{}'.format(dataset, int(pretrained)),
    #        np.array(pis_validation_filled))
    # np.save('../results/GFM_MLC_output_test_{}_pt{}'.format(dataset,
    #                                                        int(pretrained)), np.array(pis_test_filled))
    # Compute optimal predictions for F1
    for beta in [1, 2]:
        GFM = GeneralFMaximizer(beta, n_labels)

        # Run GFM algo on this output
        (optimal_predictions_train,
         E_F_train) = GFM.get_predictions(predictions=pis_train_filled)
        (optimal_predictions_validation, E_F_validation) = GFM.get_predictions(
            predictions=pis_validation_filled)
        (optimal_predictions_test,
         E_F_test) = GFM.get_predictions(predictions=pis_test_filled)

        # Evaluate F score
        F_train = compute_F_score(y_true_train,
                                  optimal_predictions_train,
                                  t=0.5,
                                  beta=beta)
        F_validation = compute_F_score(y_true_validation,
                                       optimal_predictions_validation,
                                       t=0.5,
                                       beta=beta)
        F_test = compute_F_score(y_true_test,
                                 optimal_predictions_test,
                                 t=0.5,
                                 beta=beta)

        logger.log('GFM_MLC ({})'.format(dataset))
        logger.log('-' * 50)
        logger.log('F{} score on training data: {:.4f}'.format(beta, F_train))
        logger.log('F{} score on validation data: {:.4f}'.format(
            beta, F_validation))
        logger.log('F{} score on test data: {:.4f}'.format(beta, F_test))

        # Store test set predictions to submit to Kaggle
        if (dataset == 'KAGGLE_PLANET') and (beta == 2):
            # Map predictions to filenames
            def filepath_to_filename(s):
                return os.path.basename(os.path.normpath(s)).split('.')[0]

            test_filenames = [
                filepath_to_filename(f) for f in df_test['full_path']
            ]
            GFM_predictions_mapping = dict(
                zip(test_filenames, [
                    csv_helpers.decode_label_vector(f)
                    for f in optimal_predictions_test
                ]))
            # Create submission file
            csv_helpers.create_submission_file(
                GFM_predictions_mapping,
                name='Planet_GFM_MC_pt{}'.format(int(pretrained)))
Ejemplo n.º 4
0
def thresholding(args, logger):

    # Parameters
    dataset = args.dataset
    pretrained = args.pretrained

    csv_path_train = '../data/{}/TRAIN.csv'.format(dataset)
    csv_path_validation = '../data/{}/VALIDATION.csv'.format(dataset)
    csv_path_test = '../data/{}/TEST.csv'.format(dataset)

    df_train = pd.read_csv(csv_path_train)
    df_validation = pd.read_csv(csv_path_validation)
    df_test = pd.read_csv(csv_path_test)

    # Extract ground truth labels
    y_true_train = np.array(
        [np.array(ast.literal_eval(l)) for l in df_train['marginal_labels']])
    y_true_validation = np.array([
        np.array(ast.literal_eval(l)) for l in df_validation['marginal_labels']
    ])
    y_true_test = np.array(
        [np.array(ast.literal_eval(l)) for l in df_test['marginal_labels']])

    n_labels = y_true_train.shape[1]

    # Load the predicted marginals
    y_predicted_train = np.load(
        '../results/BR_predictions_train_{}_pt{}.npy'.format(
            dataset, int(pretrained)))
    y_predicted_validation = np.load(
        '../results/BR_predictions_validation_{}_pt{}.npy'.format(
            dataset, int(pretrained)))
    y_predicted_test = np.load(
        '../results/BR_predictions_test_{}_pt{}.npy'.format(
            dataset, int(pretrained)))

    for beta in [1, 2]:
        # Evaluate F score with threshold 0.5
        F_train = compute_F_score(y_true_train,
                                  y_predicted_train,
                                  t=0.5,
                                  beta=beta)
        F_validation = compute_F_score(y_true_validation,
                                       y_predicted_validation,
                                       t=0.5,
                                       beta=beta)
        F_test = compute_F_score(y_true_test,
                                 y_predicted_test,
                                 t=0.5,
                                 beta=beta)

        logger.log('\n')
        logger.log(
            'Binary relevance with threshold 0.5 - ({}) '.format(dataset))
        logger.log('-' * 50)
        logger.log('F{} score on training data: {:.4f}'.format(beta, F_train))
        logger.log('F{} score on validation data: {:.4f}'.format(
            beta, F_validation))
        logger.log('F{} score on test data: {:.4f}'.format(beta, F_test))

        # Thresholding algorithm 1
        # Sort true and predicted row-wise on Hi
        algorithm_1 = th.OptimalMeanThreshold(beta)
        optimal_t_1 = algorithm_1.get_optimal_t(y_true_train,
                                                y_predicted_train)
        # Also get instance-wise threshold for the validation data to use them as labels in algorithm 3
        optimal_t_1_validation = algorithm_1.get_optimal_t(
            y_true_validation, y_predicted_validation)

        # Evaluate F score
        F_train = compute_F_score(y_true_train,
                                  y_predicted_train,
                                  t=np.mean(optimal_t_1),
                                  beta=beta)
        F_validation = compute_F_score(y_true_validation,
                                       y_predicted_validation,
                                       t=np.mean(optimal_t_1),
                                       beta=beta)
        F_test = compute_F_score(y_true_test,
                                 y_predicted_test,
                                 t=np.mean(optimal_t_1),
                                 beta=beta)

        logger.log('\n')
        logger.log('Results with mean optimal threshold {:.2f} - ({})'.format(
            np.mean(optimal_t_1), dataset))
        logger.log('--' * 20)
        logger.log('Mean F{}-score with mean threshold - Train: {:.4f}'.format(
            beta, F_train))
        logger.log('Mean F{}-score with mean threshol - Val: {:.4f}'.format(
            beta, F_validation))
        logger.log('Mean F{}-score with mean threshol - Test: {:.4f}'.format(
            beta, F_test))

        # Store test set predictions to submit to Kaggle
        if (dataset == 'KAGGLE_PLANET') and (beta == 2):
            # Map predictions to filenames
            def filepath_to_filename(s):
                return os.path.basename(os.path.normpath(s)).split('.')[0]

            test_filenames = [
                filepath_to_filename(f) for f in df_test['full_path']
            ]
            GFM_predictions_mapping = dict(
                zip(test_filenames, [
                    csv_helpers.decode_label_vector(f) for f in (
                        y_predicted_test > np.mean(optimal_t_1)).astype(int)
                ]))
            # Create submission file
            csv_helpers.create_submission_file(
                GFM_predictions_mapping, name='Planet_BR_OptimalMeanThreshold')

        # Thresholding algorithm 2
        # Sort true and predicted row-wise on Hi
        algorithm_2 = th.OptimalGlobalThreshold(beta)
        optimal_t_2 = algorithm_2.get_optimal_t(y_true_train,
                                                y_predicted_train)

        # Evaluate F score
        F_train = compute_F_score(y_true_train,
                                  y_predicted_train,
                                  t=optimal_t_2,
                                  beta=beta)
        F_validation = compute_F_score(y_true_validation,
                                       y_predicted_validation,
                                       t=optimal_t_2,
                                       beta=beta)
        F_test = compute_F_score(y_true_test,
                                 y_predicted_test,
                                 t=optimal_t_2,
                                 beta=beta)

        logger.log('\n')
        logger.log(
            'Results with global optimal threshold {:.2f} - ({})'.format(
                optimal_t_2, dataset))
        logger.log('--' * 20)
        logger.log(
            'Mean F{}-score with global threshold - Train: {:.4f}'.format(
                beta, F_train))
        logger.log('Mean F{}-score with global threshold - Val: {:.4f}'.format(
            beta, F_validation))
        logger.log(
            'Mean F{}-score with global threshold - Test: {:.4f}'.format(
                beta, F_test))

        # Store test set predictions to submit to Kaggle
        if (dataset == 'KAGGLE_PLANET') and (beta == 2):
            # Map predictions to filenames
            def filepath_to_filename(s):
                return os.path.basename(os.path.normpath(s)).split('.')[0]

            test_filenames = [
                filepath_to_filename(f) for f in df_test['full_path']
            ]
            GFM_predictions_mapping = dict(
                zip(test_filenames, [
                    csv_helpers.decode_label_vector(f)
                    for f in (y_predicted_test > optimal_t_2).astype(int)
                ]))
            # Create submission file
            csv_helpers.create_submission_file(
                GFM_predictions_mapping,
                name='Planet_BR_OptimalGlobalThreshold_pt{}'.format(
                    int(pretrained)))
Ejemplo n.º 5
0
def GFM_MLC(args):

    # Parameters
    im_size = args.im_size
    batch_size = args.batch_size
    learning_rate = args.learning_rate
    augmentation = args.augmentation
    epochs = args.epochs
    earlystop = args.earlystop
    name = args.name
    n_neurons = args.n_neurons

    n_labels = 17
    dataset = 'KAGGLE_PLANET'
    csv_path_train = '../data/{}/TRAIN.csv'.format(dataset)
    csv_path_validation = '../data/{}/VALIDATION.csv'.format(dataset)
    csv_path_trainval = '../data/{}/TRAINVAL.csv'.format(dataset)
    csv_path_test = '../data/{}/TEST.csv'.format(dataset)

    df_train = pd.read_csv(csv_path_train)
    df_validation = pd.read_csv(csv_path_validation)
    df_trainval = pd.read_csv(csv_path_trainval)
    df_test = pd.read_csv(csv_path_test)

    train_steps = np.ceil(len(df_train) / batch_size)
    validation_steps = np.ceil(len(df_validation) / batch_size)
    trainval_steps = np.ceil(len(df_trainval) / batch_size)
    test_steps = np.ceil(len(df_test) / batch_size)

    # Extract ground truth labels
    y_true_train = np.array([ast.literal_eval(df_train['marginal_labels'][i])
                             for i in range(len(df_train))])
    y_true_validation = np.array([ast.literal_eval(df_validation['marginal_labels'][i])
                                  for i in range(len(df_validation))])
    y_true_train_val = np.array([ast.literal_eval(df_trainval['marginal_labels'][i])
                                 for i in range(len(df_trainval))])
    y_true_test = np.array([ast.literal_eval(df_test['marginal_labels'][i])
                            for i in range(len(df_test))])

    # Compute max_s: the maximum number of positive label for a single instance
    max_s = np.max(np.array([np.max(np.sum(y_true_train, axis=1)),
                             np.max(np.sum(y_true_validation, axis=1)),
                             np.max(np.sum(y_true_test, axis=1))]))

    print('Maximum value of s: {}'.format(max_s))

    # Data generators for training
    train_gen = gn.DataGenerator_gfm_MC(df=df_train, n_labels=n_labels,
                                        im_size=im_size, batch_size=batch_size,
                                        shuffle=True, mode='train',
                                        pretrained=False, max_s=max_s, augmentation=augmentation).generate()

    validation_gen = gn.DataGenerator_gfm_MC(df=df_validation, n_labels=n_labels,
                                             im_size=im_size, batch_size=batch_size, shuffle=False, mode='train', pretrained=False, max_s=max_s).generate()

    trainval_gen = gn.DataGenerator_gfm_MC(df=df_trainval, n_labels=n_labels,
                                           im_size=im_size, batch_size=batch_size,
                                           shuffle=True, mode='train',
                                           pretrained=False, max_s=max_s, augmentation=augmentation).generate()

    # Set up the model
    model = GFM_VGG_classifier(im_size, n_labels, n_neurons, max_s).model
    optimizer = Adam(lr=learning_rate)

    # Compile with specific loss function
    def GFM_loss(y_true, y_pred):
        """Custom loss function for the joint estimation of the required parameters for GFM.
        The combined loss is the row-wise sum of categorical losses over the rows of the matrix P
        Where each row corresponds to one label.

        """
        loss = K.constant(0, tf.float32)
        for i in range(n_labels):
            loss += K.categorical_crossentropy(target=y_true[:, i, :],
                                               output=y_pred[:, i, :], from_logits=True)
        return loss

    model.compile(loss=GFM_loss, optimizer=optimizer)

    print(model.summary())
    callbacks = [
        EarlyStopping(monitor='val_loss', min_delta=0,
                      patience=3, verbose=1, mode='auto')
    ]

    if earlystop:
        model.fit_generator(train_gen, steps_per_epoch=train_steps, epochs=epochs, verbose=1,
                            callbacks=callbacks, validation_data=validation_gen, validation_steps=validation_steps)
    else:
        model.fit_generator(trainval_gen, steps_per_epoch=trainval_steps, epochs=epochs, verbose=1)

        # Data generators for inference
    train_gen_i = gn.DataGenerator_gfm_MC(df=df_train, n_labels=n_labels,
                                          im_size=im_size, batch_size=batch_size, shuffle=False, mode='test', pretrained=False, max_s=max_s).generate()
    validation_gen_i = gn.DataGenerator_gfm_MC(df=df_validation, n_labels=n_labels,
                                               im_size=im_size, batch_size=batch_size, shuffle=False, mode='test', pretrained=False, max_s=max_s).generate()
    trainval_gen_i = gn.DataGenerator_gfm_MC(df=df_trainval, n_labels=n_labels, im_size=im_size,
                                             batch_size=batch_size, shuffle=False, mode='test', pretrained=False, max_s=max_s).generate()
    test_gen_i = gn.DataGenerator_gfm_MC(df=df_test, n_labels=n_labels,
                                         im_size=im_size, batch_size=batch_size, shuffle=False, mode='test', pretrained=False, max_s=max_s).generate()

    # Make predictions
    if earlystop:
        pis_train = model.predict_generator(train_gen_i, steps=train_steps, verbose=1)
        pis_validation = model.predict_generator(
            validation_gen_i, steps=validation_steps, verbose=1)
    else:
        pis_trainval = model.predict_generator(trainval_gen_i, steps=trainval_steps, verbose=1)
    pis_test = model.predict_generator(test_gen_i, steps=test_steps, verbose=1)

    def softmax(v):
        """softmax a vector
        Adaptation for numerical stability according to
        http://python.usyiyi.cn/documents/effective-tf/12.html
        """
        exp = np.exp(v - np.max(v))
        return(exp / np.sum(exp))

    print('Softmaxing...')
    if earlystop:
        pis_train = np.apply_along_axis(softmax, 2, pis_train)
        pis_validation = np.apply_along_axis(softmax, 2, pis_validation)
    else:
        pis_trainval = np.apply_along_axis(softmax, 2, pis_trainval)

    pis_test = np.apply_along_axis(softmax, 2, pis_test)

    print('Filling...')

    def fill(pis):

        return [complete_matrix_columns_with_zeros(mat[:, 1:], len=n_labels) for mat in tqdm(pis)]

    if earlystop:
        pis_train_filled = fill(pis_train)
        pis_validation_filled = fill(pis_validation)
    else:
        pis_trainval_filled = fill(pis_trainval)

    pis_test_filled = fill(pis_test)

    # Compute optimal predictions for F2
    beta = 2
    GFM = GeneralFMaximizer(beta, n_labels)

    # Run GFM algo on this output
    if earlystop:
        (optimal_predictions_train, E_F_train) = GFM.get_predictions(predictions=pis_train_filled)
        (optimal_predictions_validation, E_F_validation) = GFM.get_predictions(
            predictions=pis_validation_filled)
    else:
        (optimal_predictions_trainval, E_F_trainval) = GFM.get_predictions(
            predictions=pis_trainval_filled)

    (optimal_predictions_test, E_F_test) = GFM.get_predictions(predictions=pis_test_filled)

    # Evaluate F score
    if earlystop:
        F_train = compute_F_score(y_true_train, optimal_predictions_train, t=0.5, beta=beta)
        F_validation = compute_F_score(
            y_true_validation, optimal_predictions_validation, t=0.5, beta=beta)
    else:
        F_train = F_validation = compute_F_score(
            y_true_train_val, optimal_predictions_trainval, t=0.5, beta=beta)
    F_test = compute_F_score(y_true_test, optimal_predictions_test, t=0.5, beta=beta)

    print('GFM_MLC ({})'.format(dataset))
    if not earlystop:
        print('-' * 50)
        print('---- No early stopping on validation data ----')
    print('-' * 50)
    print('F{} score on training data: {:.4f}'.format(beta, F_train))
    print('F{} score on validation data: {:.4f}'.format(beta, F_validation))
    print('F{} score on test data: {:.4f}'.format(beta, F_test))

    # Map predictions to filenames
    def filepath_to_filename(s): return os.path.basename(os.path.normpath(s)).split('.')[0]
    test_filenames = [filepath_to_filename(f) for f in df_test['full_path']]
    GFM_predictions_mapping = dict(
        zip(test_filenames, [csv_helpers.decode_label_vector(f) for f in optimal_predictions_test]))
    # Create submission file
    csv_helpers.create_submission_file(
        GFM_predictions_mapping, name='GFM_KAGGLE_imsize{}_lr{}_ep{}_ag{}_{}_{}'.format(im_size, learning_rate, epochs, int(augmentation), n_neurons, name))
Ejemplo n.º 6
0
def thresh_stack(args, logger):

        # Parameters
    dataset = args.dataset
    pretrained = args.pretrained
    nonlinear = args.nonlinear

    csv_path_train = '../data/{}/TRAIN.csv'.format(dataset)
    csv_path_validation = '../data/{}/VALIDATION.csv'.format(dataset)
    csv_path_test = '../data/{}/TEST.csv'.format(dataset)

    df_train = pd.read_csv(csv_path_train)
    df_validation = pd.read_csv(csv_path_validation)
    df_test = pd.read_csv(csv_path_test)

    # Extract ground truth labels
    y_true_train = np.array([np.array(ast.literal_eval(l)) for l in df_train['marginal_labels']])
    y_true_validation = np.array([np.array(ast.literal_eval(l))
                                  for l in df_validation['marginal_labels']])
    y_true_test = np.array([np.array(ast.literal_eval(l))
                            for l in df_test['marginal_labels']])

    # Load the predicted marginals
    y_predicted_train = np.load(
        '../results/BR_predictions_train_{}_pt{}.npy'.format(dataset, int(pretrained)))
    y_predicted_validation = np.load(
        '../results/BR_predictions_validation_{}_pt{}.npy'.format(dataset, int(pretrained)))
    y_predicted_test = np.load(
        '../results/BR_predictions_test_{}_pt{}.npy'.format(dataset, int(pretrained)))

    n_labels = y_true_train.shape[1]

    # Thresholding algorithm 1

    for beta in [1, 2]:
        # Sort true and predicted row-wise on Hi
        algorithm_1 = th.OptimalMeanThreshold(beta)
        optimal_t_1 = algorithm_1.get_optimal_t(y_true_train, y_predicted_train)

        # Also get instance-wise threshold for the validation data to use them as labels in algorithm 3
        optimal_t_1_validation = algorithm_1.get_optimal_t(
            y_true_validation, y_predicted_validation)

        #t_train = np.log(optimal_t_1)
        #t_validation = np.log(optimal_t_1_validation)

        # 'temporary' hack: replace marginal labels in dataframe with optimized thresholds
        # So as to avoid having to write another generator
        df_train['marginal_labels'] = [str(t) for t in list(optimal_t_1)]
        df_validation['marginal_labels'] = [str(t) for t in list(optimal_t_1_validation)]

        from sklearn.linear_model import RidgeCV
        from sklearn.ensemble import RandomForestRegressor

        alphas = np.logspace(-5, 5, 100)
        model = RidgeCV(alphas=alphas)
        if nonlinear:
            model = RandomForestRegressor(n_estimators=100, n_jobs=-1)

        # Rescale
        y_mean = np.mean(y_predicted_train, axis=0)
        y_std = np.std(y_predicted_train, axis=0)

        m_train = (y_predicted_train - y_mean) / y_std
        m_validation = (y_predicted_validation - y_mean) / y_std
        m_test = (y_predicted_test - y_mean) / y_std

        model.fit(X=m_train, y=optimal_t_1)
        if not nonlinear:
            assert (model.alpha_ < alphas[-1]) and (model.alpha_ >
                                                    alphas[0]), 'Increase the search range for lambda'

        # Make prediction
        predictions_train_t = model.predict(m_train)
        predictions_validation_t = model.predict(m_validation)
        predictions_test_t = model.predict(m_test)

        from sklearn.metrics import r2_score
        logger.log('R²: {:.2f}'.format(r2_score(optimal_t_1_validation, predictions_validation_t)))

        # Store the true and predicted thresholds of the validation dataset to make plots
        np.save('../results/INST_THRESH/{}_{}_{}'.format(dataset,
                                                         beta, int(pretrained)), optimal_t_1_validation)
        np.save('../results/INST_THRESH/{}_{}_{}_predicted'.format(dataset,
                                                                   beta, int(pretrained)), predictions_validation_t)

        # Print results
        # Evaluate F score with threshold 0.5
        F_train = compute_F_score(y_true_train, y_predicted_train, t=0.5, beta=beta)
        F_validation = compute_F_score(y_true_validation, y_predicted_validation, t=0.5, beta=beta)
        F_test = compute_F_score(y_true_test, y_predicted_test, t=0.5, beta=beta)

        logger.log('\n')
        logger.log('Binary relevance with threshold 0.5 - ({}) '.format(dataset))
        logger.log('-' * 50)
        logger.log('F{} score on training data: {:.4f}'.format(beta, F_train))
        logger.log('F{} score on validation data: {:.4f}'.format(beta, F_validation))
        logger.log('F{} score on test data: {:.4f}'.format(beta, F_test))

        # Evaluate F score with mean threshold
        F_train = compute_F_score(y_true_train, y_predicted_train,
                                  t=np.mean(optimal_t_1), beta=beta)
        F_validation = compute_F_score(y_true_validation, y_predicted_validation,
                                       t=np.mean(optimal_t_1), beta=beta)
        F_test = compute_F_score(y_true_test, y_predicted_test, t=np.mean(optimal_t_1), beta=beta)

        logger.log('\n')
        logger.log(
            'Results with mean optimal threshold {:.2f} - ({})'.format(np.mean(optimal_t_1), dataset))
        logger.log('--' * 20)
        logger.log('Mean F{}-score with optimal thresholds - Train: {:.4f}'.format(beta, F_train))
        logger.log('Mean F{}-score with optimal thresholds - Val: {:.4f}'.format(beta, F_validation))
        logger.log('Mean F{}-score with optimal thresholds - Test: {:.4f}'.format(beta, F_test))

        # Evaluate F score with predicted instance-wise threshold
        from sklearn.metrics import fbeta_score

        def compute_F_score_instancewise_threshold(y_true, predictions, t, beta):
            return(fbeta_score(y_true, np.array([predictions[i, :] > t[i] for i in range(len(y_true))]).astype(int), beta=beta, average='samples'))

        F_train = compute_F_score_instancewise_threshold(
            y_true_train, y_predicted_train, t=predictions_train_t, beta=beta)
        F_validation = compute_F_score_instancewise_threshold(
            y_true_validation, y_predicted_validation, t=predictions_validation_t, beta=beta)
        F_test = compute_F_score_instancewise_threshold(
            y_true_test, y_predicted_test, t=predictions_test_t, beta=beta)

        logger.log('\n')
        logger.log('Results with instance-wise threshold')
        logger.log('--' * 20)
        logger.log('Mean F{}-score with instance-wise threshold - Train: {:.4f}'.format(beta, F_train))
        logger.log('Mean F{}-score with instance-wise threshold - Val: {:.4f}'.format(beta, F_validation))
        logger.log('Mean F{}-score with instance-wise threshold - Test: {:.4f}'.format(beta, F_test))

        # Store test set predictions to submit to Kaggle

        test_predictions = np.array([y_predicted_test[i, :] > predictions_test_t[i]
                                     for i in range(len(predictions_test_t))]).astype(int)

        if (dataset == 'KAGGLE_PLANET') and (beta == 2):
            # Map predictions to filenames
            def filepath_to_filename(s): return os.path.basename(os.path.normpath(s)).split('.')[0]
            test_filenames = [filepath_to_filename(f) for f in df_test['full_path']]
            GFM_predictions_mapping = dict(
                zip(test_filenames, [csv_helpers.decode_label_vector(f) for f in test_predictions]))
            # Create submission file
            csv_helpers.create_submission_file(
                GFM_predictions_mapping, name='Planet_BR_InstanceWiseThreshold_{}'.format(int(pretrained)))
Ejemplo n.º 7
0
def BR(args):

    # Parameters
    im_size = args.im_size
    batch_size = args.batch_size
    learning_rate = args.learning_rate
    augmentation = args.augmentation
    epochs = args.epochs
    earlystop = args.earlystop
    name = args.name
    n_neurons = args.n_neurons

    n_labels = 17
    dataset = 'KAGGLE_PLANET'
    csv_path_train = '../data/{}/TRAIN.csv'.format(dataset)
    csv_path_validation = '../data/{}/VALIDATION.csv'.format(dataset)
    csv_path_trainval = '../data/{}/TRAINVAL.csv'.format(dataset)
    csv_path_test = '../data/{}/TEST.csv'.format(dataset)

    df_train = pd.read_csv(csv_path_train)
    df_validation = pd.read_csv(csv_path_validation)
    df_trainval = pd.read_csv(csv_path_trainval)
    df_test = pd.read_csv(csv_path_test)

    train_steps = np.ceil(len(df_train) / batch_size)
    validation_steps = np.ceil(len(df_validation) / batch_size)
    trainval_steps = np.ceil(len(df_trainval) / batch_size)
    test_steps = np.ceil(len(df_test) / batch_size)

    # Extract ground truth labels
    y_true_train = np.array([
        ast.literal_eval(df_train['marginal_labels'][i])
        for i in range(len(df_train))
    ])
    y_true_validation = np.array([
        ast.literal_eval(df_validation['marginal_labels'][i])
        for i in range(len(df_validation))
    ])
    y_true_train_val = np.array([
        ast.literal_eval(df_trainval['marginal_labels'][i])
        for i in range(len(df_trainval))
    ])
    y_true_test = np.array([
        ast.literal_eval(df_test['marginal_labels'][i])
        for i in range(len(df_test))
    ])

    # Data generators for training
    train_gen = gn.DataGenerator(df=df_train,
                                 n_labels=n_labels,
                                 im_size=im_size,
                                 batch_size=batch_size,
                                 shuffle=True,
                                 mode='train',
                                 pretrained=False,
                                 augmentation=augmentation).generate()

    validation_gen = gn.DataGenerator(df=df_validation,
                                      n_labels=n_labels,
                                      im_size=im_size,
                                      batch_size=batch_size,
                                      shuffle=False,
                                      mode='train',
                                      pretrained=False).generate()

    trainval_gen = gn.DataGenerator(df=df_trainval,
                                    n_labels=n_labels,
                                    im_size=im_size,
                                    batch_size=batch_size,
                                    shuffle=True,
                                    mode='train',
                                    pretrained=False,
                                    augmentation=augmentation).generate()

    # Set up the model
    model = VGG_classifier(im_size, n_labels, n_neurons).model
    optimizer = Adam(lr=learning_rate)
    model.compile(loss='binary_crossentropy', optimizer=optimizer)

    callbacks = [
        EarlyStopping(monitor='val_loss',
                      min_delta=0,
                      patience=3,
                      verbose=1,
                      mode='auto')
    ]

    if earlystop:
        model.fit_generator(train_gen,
                            steps_per_epoch=train_steps,
                            epochs=epochs,
                            verbose=1,
                            callbacks=callbacks,
                            validation_data=validation_gen,
                            validation_steps=validation_steps)
    else:
        model.fit_generator(trainval_gen,
                            steps_per_epoch=trainval_steps,
                            epochs=epochs,
                            verbose=1)

        # Data generators for inference
    train_gen_i = gn.DataGenerator(df=df_train,
                                   n_labels=n_labels,
                                   im_size=im_size,
                                   batch_size=batch_size,
                                   shuffle=False,
                                   mode='test',
                                   pretrained=False).generate()
    validation_gen_i = gn.DataGenerator(df=df_validation,
                                        n_labels=n_labels,
                                        im_size=im_size,
                                        batch_size=batch_size,
                                        shuffle=False,
                                        mode='test',
                                        pretrained=False).generate()
    trainval_gen_i = gn.DataGenerator(df=df_trainval,
                                      n_labels=n_labels,
                                      im_size=im_size,
                                      batch_size=batch_size,
                                      shuffle=False,
                                      mode='test',
                                      pretrained=False).generate()
    test_gen_i = gn.DataGenerator(df=df_test,
                                  n_labels=n_labels,
                                  im_size=im_size,
                                  batch_size=batch_size,
                                  shuffle=False,
                                  mode='test',
                                  pretrained=False).generate()

    # Make predictions
    if earlystop:
        predicted_marginals_train = model.predict_generator(train_gen_i,
                                                            steps=train_steps,
                                                            verbose=1)
        predicted_marginals_validation = model.predict_generator(
            validation_gen_i, steps=validation_steps, verbose=1)
    else:
        predicted_marginals_trainval = model.predict_generator(
            trainval_gen_i, steps=trainval_steps, verbose=1)
    predicted_marginals_test = model.predict_generator(test_gen_i,
                                                       steps=test_steps,
                                                       verbose=1)

    # Compute optimal predictions for F2
    beta = 2
    # Evaluate F score
    if earlystop:
        F_train = compute_F_score(y_true_train,
                                  predicted_marginals_train,
                                  t=0.5,
                                  beta=beta)
        F_validation = compute_F_score(y_true_validation,
                                       predicted_marginals_validation,
                                       t=0.5,
                                       beta=beta)
    else:
        F_train = F_validation = compute_F_score(y_true_train_val,
                                                 predicted_marginals_trainval,
                                                 t=0.5,
                                                 beta=beta)
    F_test = compute_F_score(y_true_test,
                             predicted_marginals_test,
                             t=0.5,
                             beta=beta)

    print('BR with threshold 0.5 - ({})'.format(dataset))
    if not earlystop:
        print('-' * 50)
        print('---- No early stopping on validation data ----')
    print('-' * 50)
    print('F{} score on training data: {:.4f}'.format(beta, F_train))
    print('F{} score on validation data: {:.4f}'.format(beta, F_validation))
    print('F{} score on test data: {:.4f}'.format(beta, F_test))

    # Map predictions to filenames
    def filepath_to_filename(s):
        return os.path.basename(os.path.normpath(s)).split('.')[0]

    test_filenames = [filepath_to_filename(f) for f in df_test['full_path']]
    GFM_predictions_mapping = dict(
        zip(test_filenames, [
            csv_helpers.decode_label_vector(f)
            for f in (predicted_marginals_test > 0.5).astype(int)
        ]))
    # Create submission file
    csv_helpers.create_submission_file(
        GFM_predictions_mapping,
        name='BR0.5_KAGGLE_imsize{}_lr{}_ep{}_ag{}_{}'.format(
            im_size, learning_rate, epochs, int(augmentation), name))

    # Compute global optimal threshold
    # Sort true and predicted row-wise on Hi
    algorithm_2 = th.OptimalGlobalThreshold(beta)
    if earlystop:
        optimal_t_2 = algorithm_2.get_optimal_t(y_true_train,
                                                predicted_marginals_train)
    else:
        optimal_t_2 = algorithm_2.get_optimal_t(y_true_train_val,
                                                predicted_marginals_trainval)

    # Evaluate F score
    if earlystop:
        F_train = compute_F_score(y_true_train,
                                  predicted_marginals_train,
                                  t=optimal_t_2,
                                  beta=beta)
        F_validation = compute_F_score(y_true_validation,
                                       predicted_marginals_validation,
                                       t=optimal_t_2,
                                       beta=beta)
    else:
        F_train = F_validation = compute_F_score(y_true_train_val,
                                                 predicted_marginals_trainval,
                                                 t=optimal_t_2,
                                                 beta=beta)
    F_test = compute_F_score(y_true_test,
                             predicted_marginals_test,
                             t=optimal_t_2,
                             beta=beta)

    print('\n')
    print('Results with global optimal threshold {:.2f} - ({})'.format(
        optimal_t_2, dataset))
    print('--' * 20)
    print('Mean F{}-score with optimal threshold - Train: {:.4f}'.format(
        beta, F_train))
    print('Mean F{}-score with optimal threshold - Val: {:.4f}'.format(
        beta, F_validation))
    print('Mean F{}-score with optimal threshold - Test: {:.4f}'.format(
        beta, F_test))

    # Store test set predictions to submit to Kaggle
    if (dataset == 'KAGGLE_PLANET') and (beta == 2):
        # Map predictions to filenames
        def filepath_to_filename(s):
            return os.path.basename(os.path.normpath(s)).split('.')[0]

        test_filenames = [
            filepath_to_filename(f) for f in df_test['full_path']
        ]
        GFM_predictions_mapping = dict(
            zip(test_filenames, [
                csv_helpers.decode_label_vector(f)
                for f in (predicted_marginals_test > optimal_t_2).astype(int)
            ]))
        # Create submission file
        csv_helpers.create_submission_file(
            GFM_predictions_mapping,
            name='BR_opt_KAGGLE_imsize{}_lr{}_ep{}_ag{}_nn_{}_{}'.format(
                im_size, learning_rate, epochs, int(augmentation), n_neurons,
                name))
Ejemplo n.º 8
0
def ye_et_al(args, logger):

    # Parameters
    dataset = args.dataset
    pretrained = args.pretrained

    csv_path_train = '../data/{}/TRAIN.csv'.format(dataset)
    csv_path_validation = '../data/{}/VALIDATION.csv'.format(dataset)
    csv_path_test = '../data/{}/TEST.csv'.format(dataset)

    df_train = pd.read_csv(csv_path_train)
    df_validation = pd.read_csv(csv_path_validation)
    df_test = pd.read_csv(csv_path_test)

    # Extract ground truth labels
    y_true_train = np.array(
        [np.array(ast.literal_eval(l)) for l in df_train['marginal_labels']])
    y_true_validation = np.array([
        np.array(ast.literal_eval(l)) for l in df_validation['marginal_labels']
    ])
    y_true_test = np.array(
        [np.array(ast.literal_eval(l)) for l in df_test['marginal_labels']])

    n_labels = y_true_train.shape[1]
    # Load the predicted marginals
    y_predicted_train = np.load(
        '../results/BR_predictions_train_{}_pt{}.npy'.format(
            dataset, int(pretrained)))
    y_predicted_validation = np.load(
        '../results/BR_predictions_validation_{}_pt{}.npy'.format(
            dataset, int(pretrained)))
    y_predicted_test = np.load(
        '../results/BR_predictions_test_{}_pt{}.npy'.format(
            dataset, int(pretrained)))

    for beta in [1, 2]:

        # Evaluate F score with threshold 0.5
        F_train = compute_F_score(y_true_train,
                                  y_predicted_train,
                                  t=0.5,
                                  beta=beta)
        F_validation = compute_F_score(y_true_validation,
                                       y_predicted_validation,
                                       t=0.5,
                                       beta=beta)
        F_test = compute_F_score(y_true_test,
                                 y_predicted_test,
                                 t=0.5,
                                 beta=beta)

        logger.log('\n')
        logger.log(
            'Binary relevance with threshold 0.5 - ({})'.format(dataset))
        logger.log('-' * 50)
        logger.log('F{} score on training data: {:.4f}'.format(beta, F_train))
        logger.log('F{} score on validation data: {:.4f}'.format(
            beta, F_validation))
        logger.log('F{} score on test data: {:.4f}'.format(beta, F_test))

        # Ye et al (2012): plug-in rule algorithm that takes the predicted marginals as input
        algorithm = QuadraticTimeAlgorithm(beta)

        optimal_predictions_train = np.array(
            [algorithm.get_predictions(i) for i in tqdm(y_predicted_train)])
        optimal_predictions_validation = np.array([
            algorithm.get_predictions(i) for i in tqdm(y_predicted_validation)
        ])
        optimal_predictions_test = np.array(
            [algorithm.get_predictions(i) for i in tqdm(y_predicted_test)])

        F_GFM_MC_train = compute_F_score(y_true_train,
                                         optimal_predictions_train,
                                         t=0.5,
                                         beta=beta)
        F_GFM_MC_validation = compute_F_score(y_true_validation,
                                              optimal_predictions_validation,
                                              t=0.5,
                                              beta=beta)
        F_GFM_MC_test = compute_F_score(y_true_test,
                                        optimal_predictions_test,
                                        t=0.5,
                                        beta=beta)
        logger.log('\n')
        logger.log(
            'F{} scores with algorithm of Ye et al (2012) - ({})'.format(
                beta, dataset))
        logger.log('-' * 50)
        logger.log('F{} score on training data: {:.4f}'.format(
            beta, F_GFM_MC_train))
        logger.log('F{} score on validation data: {:.4f}'.format(
            beta, F_GFM_MC_validation))
        logger.log('F{} score on test data: {:.4f}'.format(
            beta, F_GFM_MC_test))

        if (dataset == 'KAGGLE_PLANET') and (beta == 2):
            # Map predictions to filenames
            def filepath_to_filename(s):
                return os.path.basename(os.path.normpath(s)).split('.')[0]

            test_filenames = [
                filepath_to_filename(f) for f in df_test['full_path']
            ]
            GFM_predictions_mapping = dict(
                zip(test_filenames, [
                    csv_helpers.decode_label_vector(f)
                    for f in optimal_predictions_test
                ]))
            # Create submission file
            csv_helpers.create_submission_file(
                GFM_predictions_mapping,
                name='Planet_Yeetal_2012_pt{}'.format(int(pretrained)))
Ejemplo n.º 9
0
def OR(args, logger):
    dataset = args.dataset
    g = args.g
    q = args.q
    pretrained = True

    csv_path_train = '../data/{}/TRAIN.csv'.format(dataset)
    csv_path_validation = '../data/{}/VALIDATION.csv'.format(dataset)
    csv_path_test = '../data/{}/TEST.csv'.format(dataset)

    df_train = pd.read_csv(csv_path_train)
    df_validation = pd.read_csv(csv_path_validation)
    df_test = pd.read_csv(csv_path_test)

    features_train = np.load(
        '../data/{}/features/features_train_max.npy'.format(dataset))
    features_validation = np.load(
        '../data/{}/features/features_validation_max.npy'.format(dataset))
    features_test = np.load(
        '../data/{}/features/features_test_max.npy'.format(dataset))

    # rescale
    from sklearn.preprocessing import StandardScaler
    featurescaler = StandardScaler().fit(features_train)

    features_train = featurescaler.transform(features_train)
    features_validation = featurescaler.transform(features_validation)
    features_test = featurescaler.transform(features_test)

    y_gfm_train = np.array([
        ast.literal_eval(df_train['gfm_labels'][i])
        for i in range(len(df_train))
    ])
    y_gfm_validation = np.array([
        ast.literal_eval(df_validation['gfm_labels'][i])
        for i in range(len(df_validation))
    ])

    # Extract ground truth labels to compute F scores
    y_true_train = np.array([
        ast.literal_eval(df_train['marginal_labels'][i])
        for i in range(len(df_train))
    ])
    y_true_validation = np.array([
        ast.literal_eval(df_validation['marginal_labels'][i])
        for i in range(len(df_validation))
    ])
    y_true_test = np.array([
        ast.literal_eval(df_test['marginal_labels'][i])
        for i in range(len(df_test))
    ])
    n_labels = y_true_train.shape[1]

    # Load the predicted marginals from BR method and replace the true test labels with them
    predicted_marginals_train = np.load(
        '../results/BR_predictions_train_{}_pt{}.npy'.format(
            dataset, int(pretrained)))
    predicted_marginals_validation = np.load(
        '../results/BR_predictions_validation_{}_pt{}.npy'.format(
            dataset, int(pretrained)))
    predicted_marginals_test = np.load(
        '../results/BR_predictions_test_{}_pt{}.npy'.format(
            dataset, int(pretrained)))

    # Containers
    GFM_train_entries = []
    GFM_validation_entries = []
    GFM_test_entries = []

    for label in range(n_labels):
        print('Label {} of {}...'.format(label, n_labels))
        # Extract one ordinal regression problem
        boolean_index_train = y_gfm_train[:, label, 0] == 0
        boolean_index_valid = y_gfm_validation[:, label, 0] == 0

        # we don't need the first row (P(y=0))
        y_dummies_train = y_gfm_train[boolean_index_train, label, 1:]
        y_dummies_validation = y_gfm_validation[boolean_index_valid, label, 1:]

        # We need to transform the labels such that they start from zero
        # And such that each class occurs at least once in the dataset (to avoid errors when minimizing the NLL)
        # A backtransform will be required later on

        y_train = np.argmax(y_dummies_train, axis=1)
        y_validation = np.argmax(y_dummies_validation, axis=1)

        y_train_transformed = y_train - y_train.min()
        y_validation_transformed = y_validation - y_train.min()

        x_train = features_train[boolean_index_train, :]
        x_validation = features_validation[boolean_index_valid, :]

        n_classes = len(np.unique(y_train))
        n_features = x_train.shape[1]

        # register TF session with keras
        sess = tf.Session()
        K.set_session(sess)

        # fetch NLL
        proportialoddsmodel = ProportionalOdds_TF(n_features, n_classes, g, q)
        features, y = proportialoddsmodel.features, proportialoddsmodel.y  # placeholders
        total_loss = proportialoddsmodel.total_loss  # loss
        b, w, xW = proportialoddsmodel.b, proportialoddsmodel.w, proportialoddsmodel.xW  # weights and biases

        train_step = tf.train.AdamOptimizer().minimize(total_loss)
        validation_loss_hist = []
        train_loss_hist = []
        patience_counter = 0
        patience = 3
        min_delta = 1e-4
        epochs = 2000  # Early stopping on validation data
        batch_size = 32

        def get_batch(x, y, i):
            n_steps = int(np.ceil(len(y) / batch_size))
            if i == (n_steps - 1):
                batch_x = x[i * batch_size:]
                batch_y = y[i * batch_size:]
                return batch_x, batch_y
            else:
                batch_x = x[i * batch_size:(i + 1) * batch_size]
                batch_y = y[i * batch_size:(i + 1) * batch_size]
                return batch_x, batch_y

        train_steps = int(np.ceil(len(y_train) / batch_size))
        with sess.as_default():
            sess.run(tf.global_variables_initializer())
            for i in range(epochs + 1):
                # shuffle x and y at beginning of epoch
                x_train_shuffle, y_train_shuffle = shuffle(
                    x_train, y_train_transformed)
                for j in range(train_steps):

                    batch_x, batch_y = get_batch(x_train_shuffle,
                                                 y_train_shuffle, j)
                    sess.run(train_step,
                             feed_dict={
                                 features: batch_x,
                                 y: batch_y.reshape(-1, 1)
                             })

                train_loss = sess.run(total_loss,
                                      feed_dict={
                                          features: x_train,
                                          y:
                                          y_train_transformed.reshape(-1, 1)
                                      })
                validation_loss = sess.run(
                    total_loss,
                    feed_dict={
                        features: x_validation,
                        y: y_validation_transformed.reshape(-1, 1)
                    })

                train_loss_hist.append(train_loss)
                validation_loss_hist.append(validation_loss)
                # print('Epoch {} - Training loss {:.3f} - Validation loss {:.3f}'.format(i,
                #                                                                            train_loss, validation_loss))

                if np.isnan(train_loss):
                    logger.log('NaN loss!')
                    b_current = b.eval()
                    w_current = w.eval()
                    encoding_current = xW.eval(feed_dict={features: x_train})
                    print('Current biases : {}'.format(b.eval()))
                    print('Current weights: {}'.format(w.eval()))
                    break

                # Control flow for early stopping
                if validation_loss_hist[i] - np.min(
                        validation_loss_hist) > min_delta:
                    patience_counter += 1
                else:
                    patience_counter = 0

                if patience_counter == patience or i == epochs:
                    print('Early stopping... ({} epochs)'.format(i))
                    print('Optimal biases : {}'.format(b.eval()))
                    break

            # Make predictions for the subset of data that was used to train the OR model
            b_opt = b.eval()
            encoding_train = xW.eval(feed_dict={features: x_train})
            encoding_validation = xW.eval(feed_dict={features: x_validation})
            preds_train = np.sum(encoding_train <= b_opt, axis=1)
            preds_validation = np.sum(encoding_validation <= b_opt, axis=1)
            acc_train = metrics.accuracy_score(y_train_transformed,
                                               preds_train)
            acc_validation = metrics.accuracy_score(y_validation_transformed,
                                                    preds_validation)
            print('Training set accuracy: {:.3f}'.format(acc_train))
            print('Validation set accuracy: {:.3f}'.format(acc_validation))

            # Finally, make predictions for all instances (we don't know where the exact marginals at test time)
            encoding_train_full = xW.eval(feed_dict={features: features_train})
            encoding_validation_full = xW.eval(
                feed_dict={features: features_validation})
            encoding_test_full = xW.eval(feed_dict={features: features_test})

        tf.reset_default_graph()

        # Go to probability estimates

        def sigmoid(v):
            return 1. / (1. + np.exp(-v))

        def or_to_probabilities(encoding, biases):
            return sigmoid(encoding +
                           np.hstack([-np.inf, biases, np.inf])[1:]) - sigmoid(
                               encoding +
                               np.hstack([-np.inf, biases, np.inf])[:-1])

        conditionals_train = or_to_probabilities(encoding_train_full, b_opt)
        conditionals_validation = or_to_probabilities(encoding_validation_full,
                                                      b_opt)
        conditionals_test = or_to_probabilities(encoding_test_full, b_opt)

        # Add columns of zeros for the classes that were not present
        index = [
            i not in np.unique(y_train_transformed + y_train.min())
            for i in np.arange(y_dummies_train.shape[1])
        ]
        missing = list(
            np.sort(np.arange(y_dummies_train.shape[1])[np.array(index)]))

        for m in missing:  # Has to be done in a loop to be correct
            conditionals_train = np.insert(conditionals_train, m, 0, axis=1)
            conditionals_validation = np.insert(conditionals_validation,
                                                m,
                                                0,
                                                axis=1)
            conditionals_test = np.insert(conditionals_test, m, 0, axis=1)

        # Multiply them with the marginals
        probabilities_train = conditionals_train * \
            predicted_marginals_train[:, label].reshape(-1, 1)
        probabilities_validation = conditionals_validation * \
            predicted_marginals_validation[:, label].reshape(-1, 1)
        probabilities_test = conditionals_test * predicted_marginals_test[:,
                                                                          label].reshape(
                                                                              -1,
                                                                              1
                                                                          )

        GFM_train_entries.append(probabilities_train)
        GFM_validation_entries.append(probabilities_validation)
        GFM_test_entries.append(probabilities_test)

    GFM_train_entries = np.stack(GFM_train_entries).transpose(1, 0, 2)
    GFM_validation_entries = np.stack(GFM_validation_entries).transpose(
        1, 0, 2)
    GFM_test_entries = np.stack(GFM_test_entries).transpose(1, 0, 2)

    # Store GFM train entries for debugging
    import pickle
    pickle.dump(GFM_train_entries,
                open('../notebooks/GFM_train_entries_original.p', 'wb'))

    # Fill them
    train_predictions_filled = [
        complete_matrix_columns_with_zeros(mat[:, :], len=n_labels)
        for mat in tqdm(GFM_train_entries)
    ]
    validation_predictions_filled = [
        complete_matrix_columns_with_zeros(mat[:, :], len=n_labels)
        for mat in tqdm(GFM_validation_entries)
    ]
    test_predictions_filled = [
        complete_matrix_columns_with_zeros(mat[:, :], len=n_labels)
        for mat in tqdm(GFM_test_entries)
    ]

    # Run GFM for F1 and F2

    for beta in [1, 2]:
        GFM = GeneralFMaximizer(beta, n_labels)

        # Run GFM algo on this output
        (optimal_predictions_train,
         E_F_train) = GFM.get_predictions(predictions=train_predictions_filled)
        (optimal_predictions_validation, E_F_validation) = GFM.get_predictions(
            predictions=validation_predictions_filled)
        (optimal_predictions_test,
         E_F_test) = GFM.get_predictions(predictions=test_predictions_filled)

        # Evaluate F score
        F_train = compute_F_score(y_true_train,
                                  optimal_predictions_train,
                                  t=0.5,
                                  beta=beta)
        F_validation = compute_F_score(y_true_validation,
                                       optimal_predictions_validation,
                                       t=0.5,
                                       beta=beta)
        F_test = compute_F_score(y_true_test,
                                 optimal_predictions_test,
                                 t=0.5,
                                 beta=beta)

        logger.log('GFM_OR ({})'.format(dataset))
        logger.log('-' * 50)
        logger.log('F{} score on training data: {:.4f}'.format(beta, F_train))
        logger.log('F{} score on validation data: {:.4f}'.format(
            beta, F_validation))
        logger.log('F{} score on test data: {:.4f}'.format(beta, F_test))

        if (dataset == 'KAGGLE_PLANET') and (beta == 2):
            # Map predictions to filenames
            def filepath_to_filename(s):
                return os.path.basename(os.path.normpath(s)).split('.')[0]

            test_filenames = [
                filepath_to_filename(f) for f in df_test['full_path']
            ]
            GFM_predictions_mapping = dict(
                zip(test_filenames, [
                    csv_helpers.decode_label_vector(f)
                    for f in optimal_predictions_test
                ]))
            # Create submission file
            csv_helpers.create_submission_file(GFM_predictions_mapping,
                                               name='Planet_OR_{}'.format(
                                                   int(pretrained)))