def test(data_dir, results_dir, base_model, start_fold=None, end_fold=10):
    if not start_fold:
        start_fold = current_fold(results_dir, base_model.name + '.fold')

    backend = 'tf' if K.backend() == 'tensorflow' else 'th'
    target_size = (base_model.img_height, base_model.img_width)
    test_datagen = ImageDataGenerator(rescale=1. / 255)

    folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)]
    for fold in folds:
        print 'Testing fold {} for {}'.format(fold, base_model.name)
        weights = base_model.best_weights.format(fold)
        model = base_model.load(weights=weights)

        results = list()
        test_dir = os.path.join(data_dir, fold, 'test')
        test_images = read_fold_dir(test_dir)
        for label, img_path in test_images:
            img = load_image(test_datagen, img_path, target_size)
            predictions = model.predict(img)
            results.append((img_path, label, predictions))

        results_fname = "{}.fold_{}.{}.csv".format(base_model.name, fold, backend)
        results_filepath = os.path.join(results_dir, results_fname)
        write_results(results, results_filepath)

        del model
        if K.backend() == 'tensorflow':
            K.clear_session()
Beispiel #2
0
def test_on_cnn(data_dir, results_dir, cnn_model, rf_model, start_fold=None, end_fold=10, progress_percent=.1):
    if not start_fold:
        start_fold = current_fold(results_dir, rf_model.name + '.fold')

    backend = 'tf' if K.backend() == 'tensorflow' else 'th'
    target_size = (cnn_model.img_height, cnn_model.img_width)
    test_datagen = ImageDataGenerator(rescale=1. / 255)

    folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)]
    for fold in folds:
        weights = cnn_model.best_weights.format(fold)
        base_model = cnn_model.load(weights=weights)

        layers_by_name = {l.name: l for l in base_model.layers}
        outputs = [layers_by_name[l].output for l in rf_model.layers]
        model = Model(inputs=base_model.input, outputs=outputs)

        weights = rf_model.weights.format(fold)
        rf = load_random_forest(weights)

        results = list()
        test_dir = os.path.join(data_dir, fold, 'test')
        test_images = read_fold_dir(test_dir)

        num_test_images = len(test_images)
        test_progress_percent = int(num_test_images * progress_percent)

        print 'Testing fold {} for {} + RF on layers {}'.format(fold, cnn_model.name, ', '.join(rf_model.layers))
        for i, (label, img_path) in enumerate(test_images):

            img = load_image(test_datagen, img_path, target_size)

            predictions = model.predict(img)
            if len(rf_model.layers) == 1:
                predictions = [predictions]

            # Concatenating features
            features = np.array([])
            for p in predictions:
                features = np.append(features, p[0].copy())
            prediction = rf.predict([features])[0].astype(np.int)

            results.append((img_path, label, prediction))

            if progress_percent and (i + 1) % test_progress_percent == 0:
                print("Progress %3.2f%% (%d/%d)" % ((i + 1) / num_test_images * 100, i + 1, num_test_images))

        results_fname = "{}.fold_{}.{}.csv".format(rf_model.name, fold, backend)
        results_filepath = os.path.join(results_dir, results_fname)
        write_results(results, results_filepath)

        del model
        if K.backend() == 'tensorflow':
            K.clear_session()
def extract_castro_features(cnn_model,
                            data_dir,
                            features_dir,
                            start_fold=1,
                            end_fold=5,
                            num_categories=21,
                            num_bins=10,
                            progress_percent=.05):

    backend = 'tf' if K.backend() == 'tensorflow' else 'th'
    target_size = (cnn_model.img_height, cnn_model.img_width)
    datagen = ImageDataGenerator(rescale=1. / 255)

    folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)]
    for fold in folds:
        weights = cnn_model.weights.format(fold)
        model = cnn_model.load(weights=weights)

        users = IO.load_annotations(ntcir.filepaths)

        ind_by_img_path = dict()
        for user_id, days in users.iteritems():
            for date, day in days.iteritems():
                for ind, image in enumerate(day.images):
                    relative_path = '/'.join(image.path.split('/')[-3:])
                    ind_by_img_path[relative_path] = ind

        test_dir = os.path.join(data_dir, fold, 'test')
        train_dir = os.path.join(data_dir, fold, 'train')
        validation_dir = os.path.join(data_dir, fold, 'validation')

        if os.path.isdir(validation_dir):
            images = read_fold_dir(train_dir) + read_fold_dir(
                test_dir) + read_fold_dir(validation_dir)
        else:
            images = read_fold_dir(train_dir) + read_fold_dir(test_dir)

        num_images = len(images)
        images_progress_percent = int(num_images * progress_percent)

        print 'Extracting temporal features on fold {} for {}'.format(
            fold, cnn_model.name)

        for i, (label, img_path) in enumerate(images):

            img = load_image(datagen, img_path, target_size)

            features = np.zeros((num_categories + 3 * num_bins + 3))
            features[:num_categories] = model.predict(img)
            features[num_categories] = image.hour
            features[num_categories + 1] = image.minute
            features[num_categories + 2] = image.weekday
            features[num_categories + 3:] = get_histogram(image.path, num_bins)

            rpath = os.path.realpath(img_path)
            user_id, date, filename = rpath.split('/')[-3:]

            relative_path = '/'.join([user_id, date, filename])

            img_ind = ind_by_img_path[relative_path]
            image = users[user_id][date].images[img_ind]
            image.features = features

            if progress_percent and (i + 1) % images_progress_percent == 0:
                print("Progress %3.2f%% (%d/%d)" %
                      ((i + 1) / num_images * 100, i + 1, num_images))

        features_filepath = "features.{}.fold_{}.{}.pkl".format(
            rf_model.name, fold, backend)
        features_filepath = os.path.join(features_dir, features_filepath)
        with open(features_filepath, 'w') as f:
            pickle.dump(users, f, pickle.HIGHEST_PROTOCOL)

        del model
        if K.backend() == 'tensorflow':
            K.clear_session()
def extract_rf_features(data_dir,
                        features_dir,
                        cnn_model,
                        rf_model,
                        start_fold=1,
                        end_fold=5,
                        progress_percent=.1):

    backend = 'tf' if K.backend() == 'tensorflow' else 'th'
    target_size = (cnn_model.img_height, cnn_model.img_width)
    datagen = ImageDataGenerator(rescale=1. / 255)

    folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)]
    for fold in folds:
        weights = cnn_model.weights.format(fold)
        base_model = cnn_model.load(weights=weights)

        layers_by_name = {l.name: l for l in base_model.layers}
        outputs = [layers_by_name[rf_model.layer].output]
        model = Model(inputs=base_model.input, outputs=outputs)

        weights = rf_model.weights.format(fold)
        rf = load_random_forest(weights)

        users = IO.load_annotations(ntcir.filepaths)

        ind_by_img_path = dict()
        for user_id, days in users.iteritems():
            for date, day in days.iteritems():
                for ind, image in enumerate(day.images):
                    relative_path = '/'.join(image.path.split('/')[-3:])
                    ind_by_img_path[relative_path] = ind

        test_dir = os.path.join(data_dir, fold, 'test')
        train_dir = os.path.join(data_dir, fold, 'train')
        validation_dir = os.path.join(data_dir, fold, 'validation')

        if os.path.isdir(validation_dir):
            images = read_fold_dir(train_dir) + read_fold_dir(
                test_dir) + read_fold_dir(validation_dir)
        else:
            images = read_fold_dir(train_dir) + read_fold_dir(test_dir)

        num_images = len(images)
        images_progress_percent = int(num_images * progress_percent)

        print 'Extracting temporal features on fold {} for {} + RF on layer {}'.format(
            fold, cnn_model.name, rf_model.layer)

        for i, (label, img_path) in enumerate(images):

            img = load_image(datagen, img_path, target_size)

            predictions = model.predict(img)

            # Concatenating features
            features = predictions[0].copy()
            probability = rf.predict_proba([features])[0]

            rpath = os.path.realpath(img_path)
            user_id, date, filename = rpath.split('/')[-3:]

            relative_path = '/'.join([user_id, date, filename])

            img_ind = ind_by_img_path[relative_path]
            image = users[user_id][date].images[img_ind]
            image.features = probability.copy()

            if progress_percent and (i + 1) % images_progress_percent == 0:
                print("Progress %3.2f%% (%d/%d)" %
                      ((i + 1) / num_images * 100, i + 1, num_images))

        features_filepath = "features.{}.fold_{}.{}.pkl".format(
            rf_model.name, fold, backend)
        features_filepath = os.path.join(features_dir, features_filepath)
        with open(features_filepath, 'w') as f:
            pickle.dump(users, f, pickle.HIGHEST_PROTOCOL)

        del model
        if K.backend() == 'tensorflow':
            K.clear_session()