def extract_cnn_features(cnn_model, layer, features_dir):
    # prepare data augmentation configuration
    datagen = ImageDataGenerator(rescale=1. / 255)

    base_model = cnn_model.load(weights=cnn_model.weights)

    target_size = (cnn_model.img_height, cnn_model.img_width)

    layers_by_name = {l.name: l for l in base_model.layers}
    outputs = layers_by_name[layer].output
    model = Model(inputs=base_model.input, outputs=outputs)

    users = IO.load_annotations(ntcir.filepaths)

    for user_id, user in users.iteritems():
        for date, day in user.iteritems():
            for image in day.images:
                img = load_image(datagen, image.path, target_size)
                image.features = model.predict(img).copy()

    features_filepath = os.path.join(features_dir,
                                     "features." + cnn_model.name + ".pkl")
    with open(features_filepath, 'w') as f:
        pickle.dump(users, f, pickle.HIGHEST_PROTOCOL)

    del model
    if K.backend() == 'tensorflow':
        K.clear_session()
def extract_cnn_features(cnn_model, layers, features_dir, start_fold=1, end_fold=10):
    folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)]
    for fold in folds:

        # prepare data augmentation configuration
        datagen = ImageDataGenerator(rescale=1. / 255)

        init_weights = cnn_model.weights.format(fold)
        base_model = cnn_model.load(weights=init_weights)

        target_size = (cnn_model.img_height, cnn_model.img_width)

        layers_by_name = {l.name: l for l in base_model.layers}
        outputs = [layers_by_name[l].output for l in layers]
        model = Model(inputs=base_model.input, outputs=outputs)

        users = IO.load_annotations(ntcir.filepaths)
        for user_id, user in users.iteritems():
            for date, day in user.iteritems():
                for image in day.images:
                    img = load_image(datagen, image.path, target_size)

                    predictions = model.predict(img)
                    if len(model.output_layers) == 1:
                        predictions = [predictions]

                    image.features = {l: predictions[i].copy() for i, l in enumerate(layers)}

        features_filepath = os.path.join(features_dir, "features." + cnn_model.name + ".fold_" + fold + ".pkl")
        with open(features_filepath, 'w') as f:
            pickle.dump(users, f, pickle.HIGHEST_PROTOCOL)

        del model
        if K.backend() == 'tensorflow':
            K.clear_session()
def test(data_dir, results_dir, base_model, start_fold=None, end_fold=10):
    if not start_fold:
        start_fold = current_fold(results_dir, base_model.name + '.fold')

    backend = 'tf' if K.backend() == 'tensorflow' else 'th'
    target_size = (base_model.img_height, base_model.img_width)
    test_datagen = ImageDataGenerator(rescale=1. / 255)

    folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)]
    for fold in folds:
        print 'Testing fold {} for {}'.format(fold, base_model.name)
        weights = base_model.best_weights.format(fold)
        model = base_model.load(weights=weights)

        results = list()
        test_dir = os.path.join(data_dir, fold, 'test')
        test_images = read_fold_dir(test_dir)
        for label, img_path in test_images:
            img = load_image(test_datagen, img_path, target_size)
            predictions = model.predict(img)
            results.append((img_path, label, predictions))

        results_fname = "{}.fold_{}.{}.csv".format(base_model.name, fold, backend)
        results_filepath = os.path.join(results_dir, results_fname)
        write_results(results, results_filepath)

        del model
        if K.backend() == 'tensorflow':
            K.clear_session()
Example #4
0
def test_on_cnn(data_dir, results_dir, cnn_model, rf_model, start_fold=None, end_fold=10, progress_percent=.1):
    if not start_fold:
        start_fold = current_fold(results_dir, rf_model.name + '.fold')

    backend = 'tf' if K.backend() == 'tensorflow' else 'th'
    target_size = (cnn_model.img_height, cnn_model.img_width)
    test_datagen = ImageDataGenerator(rescale=1. / 255)

    folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)]
    for fold in folds:
        weights = cnn_model.best_weights.format(fold)
        base_model = cnn_model.load(weights=weights)

        layers_by_name = {l.name: l for l in base_model.layers}
        outputs = [layers_by_name[l].output for l in rf_model.layers]
        model = Model(inputs=base_model.input, outputs=outputs)

        weights = rf_model.weights.format(fold)
        rf = load_random_forest(weights)

        results = list()
        test_dir = os.path.join(data_dir, fold, 'test')
        test_images = read_fold_dir(test_dir)

        num_test_images = len(test_images)
        test_progress_percent = int(num_test_images * progress_percent)

        print 'Testing fold {} for {} + RF on layers {}'.format(fold, cnn_model.name, ', '.join(rf_model.layers))
        for i, (label, img_path) in enumerate(test_images):

            img = load_image(test_datagen, img_path, target_size)

            predictions = model.predict(img)
            if len(rf_model.layers) == 1:
                predictions = [predictions]

            # Concatenating features
            features = np.array([])
            for p in predictions:
                features = np.append(features, p[0].copy())
            prediction = rf.predict([features])[0].astype(np.int)

            results.append((img_path, label, prediction))

            if progress_percent and (i + 1) % test_progress_percent == 0:
                print("Progress %3.2f%% (%d/%d)" % ((i + 1) / num_test_images * 100, i + 1, num_test_images))

        results_fname = "{}.fold_{}.{}.csv".format(rf_model.name, fold, backend)
        results_filepath = os.path.join(results_dir, results_fname)
        write_results(results, results_filepath)

        del model
        if K.backend() == 'tensorflow':
            K.clear_session()
def extract_castro_features(cnn_model,
                            data_dir,
                            features_dir,
                            start_fold=1,
                            end_fold=5,
                            num_categories=21,
                            num_bins=10,
                            progress_percent=.05):

    backend = 'tf' if K.backend() == 'tensorflow' else 'th'
    target_size = (cnn_model.img_height, cnn_model.img_width)
    datagen = ImageDataGenerator(rescale=1. / 255)

    folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)]
    for fold in folds:
        weights = cnn_model.weights.format(fold)
        model = cnn_model.load(weights=weights)

        users = IO.load_annotations(ntcir.filepaths)

        ind_by_img_path = dict()
        for user_id, days in users.iteritems():
            for date, day in days.iteritems():
                for ind, image in enumerate(day.images):
                    relative_path = '/'.join(image.path.split('/')[-3:])
                    ind_by_img_path[relative_path] = ind

        test_dir = os.path.join(data_dir, fold, 'test')
        train_dir = os.path.join(data_dir, fold, 'train')
        validation_dir = os.path.join(data_dir, fold, 'validation')

        if os.path.isdir(validation_dir):
            images = read_fold_dir(train_dir) + read_fold_dir(
                test_dir) + read_fold_dir(validation_dir)
        else:
            images = read_fold_dir(train_dir) + read_fold_dir(test_dir)

        num_images = len(images)
        images_progress_percent = int(num_images * progress_percent)

        print 'Extracting temporal features on fold {} for {}'.format(
            fold, cnn_model.name)

        for i, (label, img_path) in enumerate(images):

            img = load_image(datagen, img_path, target_size)

            features = np.zeros((num_categories + 3 * num_bins + 3))
            features[:num_categories] = model.predict(img)
            features[num_categories] = image.hour
            features[num_categories + 1] = image.minute
            features[num_categories + 2] = image.weekday
            features[num_categories + 3:] = get_histogram(image.path, num_bins)

            rpath = os.path.realpath(img_path)
            user_id, date, filename = rpath.split('/')[-3:]

            relative_path = '/'.join([user_id, date, filename])

            img_ind = ind_by_img_path[relative_path]
            image = users[user_id][date].images[img_ind]
            image.features = features

            if progress_percent and (i + 1) % images_progress_percent == 0:
                print("Progress %3.2f%% (%d/%d)" %
                      ((i + 1) / num_images * 100, i + 1, num_images))

        features_filepath = "features.{}.fold_{}.{}.pkl".format(
            rf_model.name, fold, backend)
        features_filepath = os.path.join(features_dir, features_filepath)
        with open(features_filepath, 'w') as f:
            pickle.dump(users, f, pickle.HIGHEST_PROTOCOL)

        del model
        if K.backend() == 'tensorflow':
            K.clear_session()
def extract_rf_features(data_dir,
                        features_dir,
                        cnn_model,
                        rf_model,
                        start_fold=1,
                        end_fold=5,
                        progress_percent=.1):

    backend = 'tf' if K.backend() == 'tensorflow' else 'th'
    target_size = (cnn_model.img_height, cnn_model.img_width)
    datagen = ImageDataGenerator(rescale=1. / 255)

    folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)]
    for fold in folds:
        weights = cnn_model.weights.format(fold)
        base_model = cnn_model.load(weights=weights)

        layers_by_name = {l.name: l for l in base_model.layers}
        outputs = [layers_by_name[rf_model.layer].output]
        model = Model(inputs=base_model.input, outputs=outputs)

        weights = rf_model.weights.format(fold)
        rf = load_random_forest(weights)

        users = IO.load_annotations(ntcir.filepaths)

        ind_by_img_path = dict()
        for user_id, days in users.iteritems():
            for date, day in days.iteritems():
                for ind, image in enumerate(day.images):
                    relative_path = '/'.join(image.path.split('/')[-3:])
                    ind_by_img_path[relative_path] = ind

        test_dir = os.path.join(data_dir, fold, 'test')
        train_dir = os.path.join(data_dir, fold, 'train')
        validation_dir = os.path.join(data_dir, fold, 'validation')

        if os.path.isdir(validation_dir):
            images = read_fold_dir(train_dir) + read_fold_dir(
                test_dir) + read_fold_dir(validation_dir)
        else:
            images = read_fold_dir(train_dir) + read_fold_dir(test_dir)

        num_images = len(images)
        images_progress_percent = int(num_images * progress_percent)

        print 'Extracting temporal features on fold {} for {} + RF on layer {}'.format(
            fold, cnn_model.name, rf_model.layer)

        for i, (label, img_path) in enumerate(images):

            img = load_image(datagen, img_path, target_size)

            predictions = model.predict(img)

            # Concatenating features
            features = predictions[0].copy()
            probability = rf.predict_proba([features])[0]

            rpath = os.path.realpath(img_path)
            user_id, date, filename = rpath.split('/')[-3:]

            relative_path = '/'.join([user_id, date, filename])

            img_ind = ind_by_img_path[relative_path]
            image = users[user_id][date].images[img_ind]
            image.features = probability.copy()

            if progress_percent and (i + 1) % images_progress_percent == 0:
                print("Progress %3.2f%% (%d/%d)" %
                      ((i + 1) / num_images * 100, i + 1, num_images))

        features_filepath = "features.{}.fold_{}.{}.pkl".format(
            rf_model.name, fold, backend)
        features_filepath = os.path.join(features_dir, features_filepath)
        with open(features_filepath, 'w') as f:
            pickle.dump(users, f, pickle.HIGHEST_PROTOCOL)

        del model
        if K.backend() == 'tensorflow':
            K.clear_session()