def test(data_dir, results_dir, base_model, start_fold=None, end_fold=10): if not start_fold: start_fold = current_fold(results_dir, base_model.name + '.fold') backend = 'tf' if K.backend() == 'tensorflow' else 'th' target_size = (base_model.img_height, base_model.img_width) test_datagen = ImageDataGenerator(rescale=1. / 255) folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)] for fold in folds: print 'Testing fold {} for {}'.format(fold, base_model.name) weights = base_model.best_weights.format(fold) model = base_model.load(weights=weights) results = list() test_dir = os.path.join(data_dir, fold, 'test') test_images = read_fold_dir(test_dir) for label, img_path in test_images: img = load_image(test_datagen, img_path, target_size) predictions = model.predict(img) results.append((img_path, label, predictions)) results_fname = "{}.fold_{}.{}.csv".format(base_model.name, fold, backend) results_filepath = os.path.join(results_dir, results_fname) write_results(results, results_filepath) del model if K.backend() == 'tensorflow': K.clear_session()
def test_on_cnn(data_dir, results_dir, cnn_model, rf_model, start_fold=None, end_fold=10, progress_percent=.1): if not start_fold: start_fold = current_fold(results_dir, rf_model.name + '.fold') backend = 'tf' if K.backend() == 'tensorflow' else 'th' target_size = (cnn_model.img_height, cnn_model.img_width) test_datagen = ImageDataGenerator(rescale=1. / 255) folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)] for fold in folds: weights = cnn_model.best_weights.format(fold) base_model = cnn_model.load(weights=weights) layers_by_name = {l.name: l for l in base_model.layers} outputs = [layers_by_name[l].output for l in rf_model.layers] model = Model(inputs=base_model.input, outputs=outputs) weights = rf_model.weights.format(fold) rf = load_random_forest(weights) results = list() test_dir = os.path.join(data_dir, fold, 'test') test_images = read_fold_dir(test_dir) num_test_images = len(test_images) test_progress_percent = int(num_test_images * progress_percent) print 'Testing fold {} for {} + RF on layers {}'.format(fold, cnn_model.name, ', '.join(rf_model.layers)) for i, (label, img_path) in enumerate(test_images): img = load_image(test_datagen, img_path, target_size) predictions = model.predict(img) if len(rf_model.layers) == 1: predictions = [predictions] # Concatenating features features = np.array([]) for p in predictions: features = np.append(features, p[0].copy()) prediction = rf.predict([features])[0].astype(np.int) results.append((img_path, label, prediction)) if progress_percent and (i + 1) % test_progress_percent == 0: print("Progress %3.2f%% (%d/%d)" % ((i + 1) / num_test_images * 100, i + 1, num_test_images)) results_fname = "{}.fold_{}.{}.csv".format(rf_model.name, fold, backend) results_filepath = os.path.join(results_dir, results_fname) write_results(results, results_filepath) del model if K.backend() == 'tensorflow': K.clear_session()
def extract_castro_features(cnn_model, data_dir, features_dir, start_fold=1, end_fold=5, num_categories=21, num_bins=10, progress_percent=.05): backend = 'tf' if K.backend() == 'tensorflow' else 'th' target_size = (cnn_model.img_height, cnn_model.img_width) datagen = ImageDataGenerator(rescale=1. / 255) folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)] for fold in folds: weights = cnn_model.weights.format(fold) model = cnn_model.load(weights=weights) users = IO.load_annotations(ntcir.filepaths) ind_by_img_path = dict() for user_id, days in users.iteritems(): for date, day in days.iteritems(): for ind, image in enumerate(day.images): relative_path = '/'.join(image.path.split('/')[-3:]) ind_by_img_path[relative_path] = ind test_dir = os.path.join(data_dir, fold, 'test') train_dir = os.path.join(data_dir, fold, 'train') validation_dir = os.path.join(data_dir, fold, 'validation') if os.path.isdir(validation_dir): images = read_fold_dir(train_dir) + read_fold_dir( test_dir) + read_fold_dir(validation_dir) else: images = read_fold_dir(train_dir) + read_fold_dir(test_dir) num_images = len(images) images_progress_percent = int(num_images * progress_percent) print 'Extracting temporal features on fold {} for {}'.format( fold, cnn_model.name) for i, (label, img_path) in enumerate(images): img = load_image(datagen, img_path, target_size) features = np.zeros((num_categories + 3 * num_bins + 3)) features[:num_categories] = model.predict(img) features[num_categories] = image.hour features[num_categories + 1] = image.minute features[num_categories + 2] = image.weekday features[num_categories + 3:] = get_histogram(image.path, num_bins) rpath = os.path.realpath(img_path) user_id, date, filename = rpath.split('/')[-3:] relative_path = '/'.join([user_id, date, filename]) img_ind = ind_by_img_path[relative_path] image = users[user_id][date].images[img_ind] image.features = features if progress_percent and (i + 1) % images_progress_percent == 0: print("Progress %3.2f%% (%d/%d)" % ((i + 1) / num_images * 100, i + 1, num_images)) features_filepath = "features.{}.fold_{}.{}.pkl".format( rf_model.name, fold, backend) features_filepath = os.path.join(features_dir, features_filepath) with open(features_filepath, 'w') as f: pickle.dump(users, f, pickle.HIGHEST_PROTOCOL) del model if K.backend() == 'tensorflow': K.clear_session()
def extract_rf_features(data_dir, features_dir, cnn_model, rf_model, start_fold=1, end_fold=5, progress_percent=.1): backend = 'tf' if K.backend() == 'tensorflow' else 'th' target_size = (cnn_model.img_height, cnn_model.img_width) datagen = ImageDataGenerator(rescale=1. / 255) folds = [str(fold).zfill(2) for fold in range(start_fold, end_fold + 1)] for fold in folds: weights = cnn_model.weights.format(fold) base_model = cnn_model.load(weights=weights) layers_by_name = {l.name: l for l in base_model.layers} outputs = [layers_by_name[rf_model.layer].output] model = Model(inputs=base_model.input, outputs=outputs) weights = rf_model.weights.format(fold) rf = load_random_forest(weights) users = IO.load_annotations(ntcir.filepaths) ind_by_img_path = dict() for user_id, days in users.iteritems(): for date, day in days.iteritems(): for ind, image in enumerate(day.images): relative_path = '/'.join(image.path.split('/')[-3:]) ind_by_img_path[relative_path] = ind test_dir = os.path.join(data_dir, fold, 'test') train_dir = os.path.join(data_dir, fold, 'train') validation_dir = os.path.join(data_dir, fold, 'validation') if os.path.isdir(validation_dir): images = read_fold_dir(train_dir) + read_fold_dir( test_dir) + read_fold_dir(validation_dir) else: images = read_fold_dir(train_dir) + read_fold_dir(test_dir) num_images = len(images) images_progress_percent = int(num_images * progress_percent) print 'Extracting temporal features on fold {} for {} + RF on layer {}'.format( fold, cnn_model.name, rf_model.layer) for i, (label, img_path) in enumerate(images): img = load_image(datagen, img_path, target_size) predictions = model.predict(img) # Concatenating features features = predictions[0].copy() probability = rf.predict_proba([features])[0] rpath = os.path.realpath(img_path) user_id, date, filename = rpath.split('/')[-3:] relative_path = '/'.join([user_id, date, filename]) img_ind = ind_by_img_path[relative_path] image = users[user_id][date].images[img_ind] image.features = probability.copy() if progress_percent and (i + 1) % images_progress_percent == 0: print("Progress %3.2f%% (%d/%d)" % ((i + 1) / num_images * 100, i + 1, num_images)) features_filepath = "features.{}.fold_{}.{}.pkl".format( rf_model.name, fold, backend) features_filepath = os.path.join(features_dir, features_filepath) with open(features_filepath, 'w') as f: pickle.dump(users, f, pickle.HIGHEST_PROTOCOL) del model if K.backend() == 'tensorflow': K.clear_session()