Example #1
0
def update_test_files(model, multiple_image_columns=False):
    """
    This function takes a model string as the main argument, initializes the appropriate
    ImageFeaturizer model, and uses it to predict on the test array and CSV. It logs
    whether the predictions have changed, and then updates the arrays and CSVs accordingly.

    Parameters
    ----------
    model : str
        The name of one of pic2vec's supported models

    multiple_image_columns : bool
        A boolean that determines whether to update the csvs and arrays for single or multiple
        image columns

    Returns
    -------
    None
    """
    # Only autosample if updating the csvs and arrays for multiple image columns
    f = ImageFeaturizer(model=model, autosample=multiple_image_columns)

    # Load and featurize the data corresponding to either the single or multiple image columns
    load_data = LOAD_DATA_ARGS_MULT if multiple_image_columns else LOAD_DATA_ARGS_SINGLE
    f.featurize(**load_data)

    # Updating test CSVs
    features = f.features
    test_csv = CHECK_CSV_MULT if multiple_image_columns else CHECK_CSV_SINGLE

    # Have to convert to float32
    current_csv = pd.read_csv(test_csv.format(model))
    cols = current_csv.select_dtypes(include='float64').columns
    current_csv = current_csv.astype({col: 'float32' for col in cols})

    # Check prediction consistency and update files for test CSVs if necessary
    test_csv_identical = features.equals(current_csv)
    logging.INFO("Test csv identical for {}?".format(model))
    logging.INFO(test_csv_identical)

    if not test_csv_identical:
        features.to_csv(test_csv.format(model), index=False)

    # Updating test arrays
    features = f.features.astype(float).values
    test_array = CHECK_ARRAY_MULT if multiple_image_columns else CHECK_ARRAY_SINGLE

    # Check prediction consistency and update files for test arrays if necessary
    test_array_identical = np.array_equal(features,
                                          np.load(test_array.format(model)))

    logging.INFO("Test array identical for {}?".format(model))
    logging.INFO(test_array_identical)

    if not test_array_identical:
        np.save(test_array.format(model), features)
Example #2
0
def test_load_and_featurize_data_single_column_batch_overflow():
    """Test featurizations and attributes for each model are correct with multiple image columns"""
    feat = ImageFeaturizer()
    feat.featurize(save_features=True, **LOAD_DATA_ARGS)
    check_array = np.load(CHECK_ARRAY.format('squeezenet'))
    try:
        compare_featurizer_class(feat, (227, 227), check_array, featurized=True,
                                 check_csv=CHECK_CSV.format('squeezenet'), **COMPARE_ARGS)
    finally:
        # Remove path to the generated csv at the end of the test
        remove_generated_paths()
        del feat
Example #3
0
def test_load_and_featurize_data_multiple_columns_with_batch_processing(model, size, array_path):
    """Test featurizations and attributes for each model are correct with multiple image columns"""
    feat = ImageFeaturizer(model=model, auto_sample=True)
    feat.featurize(batch_size=2, save_features=True, **LOAD_DATA_ARGS_MULT)
    check_array = np.load(array_path)

    try:
        compare_featurizer_class(feat, size, check_array, featurized=True,
                                 check_csv=CHECK_CSV_MULT.format(model), **COMPARE_ARGS_MULT)
    finally:
        # Remove path to the generated csv at the end of the test
        remove_generated_paths()
        del feat
Example #4
0
def test_load_and_featurize_single_column_no_batch_processing(model, size, array_path):
    """Test that all of the featurizations and attributes for each model are correct"""
    feat = ImageFeaturizer(model=model)
    feat.featurize(batch_size=0, save_features=True, **LOAD_DATA_ARGS)

    check_array = np.load(array_path)

    try:
        compare_featurizer_class(feat, size, check_array, featurized=True,
                                 check_csv=CHECK_CSV.format(model), **COMPARE_ARGS)
    finally:
        # Remove path to the generated csv at the end of the test
        remove_generated_paths()
        del feat
Example #5
0
def test_load_and_featurize_save_csv():
    """Make sure the featurizer writes the name correctly to csv with robust naming config"""
    f = ImageFeaturizer()
    name, ext = os.path.splitext(CSV_NAME_MULT)
    check_array_path = "{}_{}".format(name, 'squeezenet_depth-1_output-512')
    f.featurize(save_csv=True, save_features=True, omit_time=True,
                **LOAD_DATA_ARGS_MULT)
    full_check = "{}{}{}".format(check_array_path, '_full', ext)
    feature_check = "{}{}{}".format(check_array_path, '_features_only', ext)
    f.save_csv(save_features=True, omit_time=True)
    try:
        assert os.path.isfile(full_check)
        assert os.path.isfile(feature_check)
    finally:
        remove_generated_paths(assert_not=False)
        if os.path.isfile("{}{}{}".format(check_array_path, '_features_only', ext)):
            os.remove("{}{}{}".format(check_array_path, '_features_only', ext))
        if os.path.isfile("{}{}{}".format(check_array_path, '_full', ext)):
            os.remove("{}{}{}".format(check_array_path, '_full', ext))
Example #6
0
def test_clear_input():
    f = ImageFeaturizer()
    f.featurize(save_features=True, omit_time=True, omit_model=True,
                omit_depth=True, omit_output=True, **LOAD_DATA_ARGS)
    f.clear_input(confirm=True)
    compare_empty_input(f)
Example #7
0
from pic2vec import ImageFeaturizer
import os

import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--path', help='Path to the images')
parser.add_argument('--depth', help='Depth of Xception')
parser.set_defaults(depth=2)
args = parser.parse_args()

image_column_name = 'images'

my_featurizer = ImageFeaturizer(model='xception', depth=int(args.depth), autosample=True)

featurized_df = my_featurizer.featurize(image_column_name, image_path=args.path)

featurized_df.to_csv(os.path.join(args.path, 'features.csv'), index=False)
def test_featurize_first():
    """Test that the featurizer raises an error if featurize is called before loading data"""
    f = ImageFeaturizer()
    # Raise error if attempting to featurize before loading data
    with pytest.raises(IOError):
        f.featurize()