def update_test_files(model, multiple_image_columns=False): """ This function takes a model string as the main argument, initializes the appropriate ImageFeaturizer model, and uses it to predict on the test array and CSV. It logs whether the predictions have changed, and then updates the arrays and CSVs accordingly. Parameters ---------- model : str The name of one of pic2vec's supported models multiple_image_columns : bool A boolean that determines whether to update the csvs and arrays for single or multiple image columns Returns ------- None """ # Only autosample if updating the csvs and arrays for multiple image columns f = ImageFeaturizer(model=model, autosample=multiple_image_columns) # Load and featurize the data corresponding to either the single or multiple image columns load_data = LOAD_DATA_ARGS_MULT if multiple_image_columns else LOAD_DATA_ARGS_SINGLE f.featurize(**load_data) # Updating test CSVs features = f.features test_csv = CHECK_CSV_MULT if multiple_image_columns else CHECK_CSV_SINGLE # Have to convert to float32 current_csv = pd.read_csv(test_csv.format(model)) cols = current_csv.select_dtypes(include='float64').columns current_csv = current_csv.astype({col: 'float32' for col in cols}) # Check prediction consistency and update files for test CSVs if necessary test_csv_identical = features.equals(current_csv) logging.INFO("Test csv identical for {}?".format(model)) logging.INFO(test_csv_identical) if not test_csv_identical: features.to_csv(test_csv.format(model), index=False) # Updating test arrays features = f.features.astype(float).values test_array = CHECK_ARRAY_MULT if multiple_image_columns else CHECK_ARRAY_SINGLE # Check prediction consistency and update files for test arrays if necessary test_array_identical = np.array_equal(features, np.load(test_array.format(model))) logging.INFO("Test array identical for {}?".format(model)) logging.INFO(test_array_identical) if not test_array_identical: np.save(test_array.format(model), features)
def test_load_and_featurize_data_single_column_batch_overflow(): """Test featurizations and attributes for each model are correct with multiple image columns""" feat = ImageFeaturizer() feat.featurize(save_features=True, **LOAD_DATA_ARGS) check_array = np.load(CHECK_ARRAY.format('squeezenet')) try: compare_featurizer_class(feat, (227, 227), check_array, featurized=True, check_csv=CHECK_CSV.format('squeezenet'), **COMPARE_ARGS) finally: # Remove path to the generated csv at the end of the test remove_generated_paths() del feat
def test_load_and_featurize_data_multiple_columns_with_batch_processing(model, size, array_path): """Test featurizations and attributes for each model are correct with multiple image columns""" feat = ImageFeaturizer(model=model, auto_sample=True) feat.featurize(batch_size=2, save_features=True, **LOAD_DATA_ARGS_MULT) check_array = np.load(array_path) try: compare_featurizer_class(feat, size, check_array, featurized=True, check_csv=CHECK_CSV_MULT.format(model), **COMPARE_ARGS_MULT) finally: # Remove path to the generated csv at the end of the test remove_generated_paths() del feat
def test_load_and_featurize_single_column_no_batch_processing(model, size, array_path): """Test that all of the featurizations and attributes for each model are correct""" feat = ImageFeaturizer(model=model) feat.featurize(batch_size=0, save_features=True, **LOAD_DATA_ARGS) check_array = np.load(array_path) try: compare_featurizer_class(feat, size, check_array, featurized=True, check_csv=CHECK_CSV.format(model), **COMPARE_ARGS) finally: # Remove path to the generated csv at the end of the test remove_generated_paths() del feat
def test_load_and_featurize_save_csv(): """Make sure the featurizer writes the name correctly to csv with robust naming config""" f = ImageFeaturizer() name, ext = os.path.splitext(CSV_NAME_MULT) check_array_path = "{}_{}".format(name, 'squeezenet_depth-1_output-512') f.featurize(save_csv=True, save_features=True, omit_time=True, **LOAD_DATA_ARGS_MULT) full_check = "{}{}{}".format(check_array_path, '_full', ext) feature_check = "{}{}{}".format(check_array_path, '_features_only', ext) f.save_csv(save_features=True, omit_time=True) try: assert os.path.isfile(full_check) assert os.path.isfile(feature_check) finally: remove_generated_paths(assert_not=False) if os.path.isfile("{}{}{}".format(check_array_path, '_features_only', ext)): os.remove("{}{}{}".format(check_array_path, '_features_only', ext)) if os.path.isfile("{}{}{}".format(check_array_path, '_full', ext)): os.remove("{}{}{}".format(check_array_path, '_full', ext))
def test_clear_input(): f = ImageFeaturizer() f.featurize(save_features=True, omit_time=True, omit_model=True, omit_depth=True, omit_output=True, **LOAD_DATA_ARGS) f.clear_input(confirm=True) compare_empty_input(f)
from pic2vec import ImageFeaturizer import os import argparse parser = argparse.ArgumentParser() parser.add_argument('--path', help='Path to the images') parser.add_argument('--depth', help='Depth of Xception') parser.set_defaults(depth=2) args = parser.parse_args() image_column_name = 'images' my_featurizer = ImageFeaturizer(model='xception', depth=int(args.depth), autosample=True) featurized_df = my_featurizer.featurize(image_column_name, image_path=args.path) featurized_df.to_csv(os.path.join(args.path, 'features.csv'), index=False)
def test_featurize_first(): """Test that the featurizer raises an error if featurize is called before loading data""" f = ImageFeaturizer() # Raise error if attempting to featurize before loading data with pytest.raises(IOError): f.featurize()