Esempio n. 1
0
def test_load_data_single_column():
    """Test that the featurizer saves attributes correctly after loading data"""
    f = ImageFeaturizer()
    f.load_data(**LOAD_DATA_ARGS)
    compare_featurizer_class(f, (227, 227), np.zeros((1)), **COMPARE_ARGS)

    # Remove path to the generated csv at end of test
    remove_generated_paths()
def test_load_data_single_column():
    """Test that the featurizer saves attributes correctly after loading data"""
    f = ImageFeaturizer()
    f.load_data(**LOAD_DATA_ARGS)
    compare_featurizer_class(f, (227, 227), np.zeros((1)), **COMPARE_ARGS)

    # Remove path to the generated csv at end of test
    if os.path.isdir('tests/ImageFeaturizer_testing/csv_tests'):
        shutil.rmtree('tests/ImageFeaturizer_testing/csv_tests')
Esempio n. 3
0
def update_test_files(model, multiple_image_columns=False):
    """
    This function takes a model string as the main argument, initializes the appropriate
    ImageFeaturizer model, and uses it to predict on the test array and CSV. It logs
    whether the predictions have changed, and then updates the arrays and CSVs accordingly.

    Parameters
    ----------
    model : str
        The name of one of pic2vec's supported models

    multiple_image_columns : bool
        A boolean that determines whether to update the csvs and arrays for single or multiple
        image columns

    Returns
    -------
    None
    """
    # Only autosample if updating the csvs and arrays for multiple image columns
    f = ImageFeaturizer(model=model, autosample=multiple_image_columns)

    # Load and featurize the data corresponding to either the single or multiple image columns
    load_data = LOAD_DATA_ARGS_MULT if multiple_image_columns else LOAD_DATA_ARGS_SINGLE
    f.featurize(**load_data)

    # Updating test CSVs
    features = f.features
    test_csv = CHECK_CSV_MULT if multiple_image_columns else CHECK_CSV_SINGLE

    # Have to convert to float32
    current_csv = pd.read_csv(test_csv.format(model))
    cols = current_csv.select_dtypes(include='float64').columns
    current_csv = current_csv.astype({col: 'float32' for col in cols})

    # Check prediction consistency and update files for test CSVs if necessary
    test_csv_identical = features.equals(current_csv)
    logging.INFO("Test csv identical for {}?".format(model))
    logging.INFO(test_csv_identical)

    if not test_csv_identical:
        features.to_csv(test_csv.format(model), index=False)

    # Updating test arrays
    features = f.features.astype(float).values
    test_array = CHECK_ARRAY_MULT if multiple_image_columns else CHECK_ARRAY_SINGLE

    # Check prediction consistency and update files for test arrays if necessary
    test_array_identical = np.array_equal(features,
                                          np.load(test_array.format(model)))

    logging.INFO("Test array identical for {}?".format(model))
    logging.INFO(test_array_identical)

    if not test_array_identical:
        np.save(test_array.format(model), features)
Esempio n. 4
0
def test_load_and_featurize_data_single_column_batch_overflow():
    """Test featurizations and attributes for each model are correct with multiple image columns"""
    feat = ImageFeaturizer()
    feat.featurize(save_features=True, **LOAD_DATA_ARGS)
    check_array = np.load(CHECK_ARRAY.format('squeezenet'))
    try:
        compare_featurizer_class(feat, (227, 227), check_array, featurized=True,
                                 check_csv=CHECK_CSV.format('squeezenet'), **COMPARE_ARGS)
    finally:
        # Remove path to the generated csv at the end of the test
        remove_generated_paths()
        del feat
Esempio n. 5
0
def test_load_and_featurize_data_multiple_columns_with_batch_processing(model, size, array_path):
    """Test featurizations and attributes for each model are correct with multiple image columns"""
    feat = ImageFeaturizer(model=model, auto_sample=True)
    feat.featurize(batch_size=2, save_features=True, **LOAD_DATA_ARGS_MULT)
    check_array = np.load(array_path)

    try:
        compare_featurizer_class(feat, size, check_array, featurized=True,
                                 check_csv=CHECK_CSV_MULT.format(model), **COMPARE_ARGS_MULT)
    finally:
        # Remove path to the generated csv at the end of the test
        remove_generated_paths()
        del feat
Esempio n. 6
0
def test_load_and_featurize_single_column_no_batch_processing(model, size, array_path):
    """Test that all of the featurizations and attributes for each model are correct"""
    feat = ImageFeaturizer(model=model)
    feat.featurize(batch_size=0, save_features=True, **LOAD_DATA_ARGS)

    check_array = np.load(array_path)

    try:
        compare_featurizer_class(feat, size, check_array, featurized=True,
                                 check_csv=CHECK_CSV.format(model), **COMPARE_ARGS)
    finally:
        # Remove path to the generated csv at the end of the test
        remove_generated_paths()
        del feat
def test_load_and_featurize_single_column(model, size, array_path):
    """Test that all of the featurizations and attributes for each model are correct"""
    feat = ImageFeaturizer(model=model)
    feat.load_and_featurize_data(save_features=True,
                                 omit_time=True,
                                 omit_model=True,
                                 omit_depth=True,
                                 omit_output=True,
                                 **LOAD_DATA_ARGS)

    check_array = np.load(array_path)

    try:
        compare_featurizer_class(feat, size, check_array, **COMPARE_ARGS)
    finally:
        # Remove path to the generated csv at the end of the test
        if os.path.isdir('tests/ImageFeaturizer_testing/csv_tests'):
            shutil.rmtree('tests/ImageFeaturizer_testing/csv_tests')
        del feat
def test_writing_features_to_csv_with_robust_naming():
    """Make sure the featurizer writes the name correctly to csv with robust naming config"""
    f = ImageFeaturizer()
    f.load_and_featurize_data(save_features=True,
                              omit_time=True,
                              **LOAD_DATA_ARGS_MULT)
    check_array_path = '{}_squeezenet_depth-1_output-512'.format(CSV_NAME_MULT)
    full_check = '{}_full'.format(check_array_path)
    feature_check = '{}_features_only'.format(check_array_path)
    try:
        assert os.path.isfile(full_check)
        assert os.path.isfile(feature_check)
    finally:
        if os.path.isdir('tests/ImageFeaturizer_testing/csv_tests'):
            shutil.rmtree('tests/ImageFeaturizer_testing/csv_tests')

        if os.path.isfile('{}_full'.format(check_array_path)):
            os.remove('{}_full'.format(check_array_path))
            pass
        if os.path.isfile('{}_features_only'.format(check_array_path)):
            os.remove('{}_features_only'.format(check_array_path))
def test_load_and_featurize_data_multiple_columns(model, size, array_path):
    """Test featurizations and attributes for each model are correct with multiple image columns"""
    feat = ImageFeaturizer(model=model, auto_sample=True)
    feat.load_and_featurize_data(save_features=True,
                                 omit_time=True,
                                 omit_model=True,
                                 omit_depth=True,
                                 omit_output=True,
                                 **LOAD_DATA_ARGS_MULT)
    check_array = np.load(array_path)

    try:
        compare_featurizer_class(feat, size, check_array, **COMPARE_ARGS_MULT)
    finally:
        # Remove path to the generated csv at the end of the test
        if os.path.isdir('tests/ImageFeaturizer_testing/csv_tests'):
            shutil.rmtree('tests/ImageFeaturizer_testing/csv_tests')

        if os.path.isfile('{}_full'.format(CSV_NAME_MULT)):
            os.remove('{}_full'.format(CSV_NAME_MULT))
            pass
        if os.path.isfile('{}_features_only'.format(CSV_NAME_MULT)):
            os.remove('{}_features_only'.format(CSV_NAME_MULT))
        del feat
Esempio n. 10
0
def test_load_then_featurize_data_multiple_columns():
    """Test featurizations and attributes for each model are correct with multiple image columns"""
    feat = ImageFeaturizer(auto_sample=True)
    feat.load_data(**LOAD_DATA_ARGS_MULT)
    feat.featurize_preloaded_data(save_features=True)
    check_array = np.load(CHECK_ARRAY_MULT.format('squeezenet'))

    try:
        compare_featurizer_class(feat, (227, 227), check_array, featurized=True,
                                 check_csv=CHECK_CSV_MULT.format('squeezenet'), **COMPARE_ARGS_MULT)

    finally:
        # Remove path to the generated csv at the end of the test
        remove_generated_paths()
        del feat
Esempio n. 11
0
def test_load_and_featurize_save_csv():
    """Make sure the featurizer writes the name correctly to csv with robust naming config"""
    f = ImageFeaturizer()
    name, ext = os.path.splitext(CSV_NAME_MULT)
    check_array_path = "{}_{}".format(name, 'squeezenet_depth-1_output-512')
    f.featurize(save_csv=True, save_features=True, omit_time=True,
                **LOAD_DATA_ARGS_MULT)
    full_check = "{}{}{}".format(check_array_path, '_full', ext)
    feature_check = "{}{}{}".format(check_array_path, '_features_only', ext)
    f.save_csv(save_features=True, omit_time=True)
    try:
        assert os.path.isfile(full_check)
        assert os.path.isfile(feature_check)
    finally:
        remove_generated_paths(assert_not=False)
        if os.path.isfile("{}{}{}".format(check_array_path, '_features_only', ext)):
            os.remove("{}{}{}".format(check_array_path, '_features_only', ext))
        if os.path.isfile("{}{}{}".format(check_array_path, '_full', ext)):
            os.remove("{}{}{}".format(check_array_path, '_full', ext))
Esempio n. 12
0
def create_numpy_arrays(model):
    """Create the prediction arrays"""
    f = ImageFeaturizer(model=model, auto_sample=True)
    f.load_and_featurize_data(**LOAD_DATA_ARGS_MULT)
    np.save(CHECK_ARRAY_MULT.format(model), f.featurized_data)
    return f
Esempio n. 13
0
def test_clear_input_no_confirm():
    f = ImageFeaturizer()
    with pytest.raises(ValueError):
        f.clear_input()
Esempio n. 14
0
def test_clear_input():
    f = ImageFeaturizer()
    f.featurize(save_features=True, omit_time=True, omit_model=True,
                omit_depth=True, omit_output=True, **LOAD_DATA_ARGS)
    f.clear_input(confirm=True)
    compare_empty_input(f)
Esempio n. 15
0
def test_load_data_multiple_columns():
    """Test featurizer loads data correctly with multiple image columns"""
    f = ImageFeaturizer(auto_sample=True)
    f.load_data(**LOAD_DATA_ARGS_MULT)
    compare_featurizer_class(f, (227, 227), np.zeros((1)), **COMPARE_ARGS_MULT)
Esempio n. 16
0
def test_load_data_multiple_columns_no_csv():
    """Test featurizer raises error if multiple columns passed with only a directory"""
    f = ImageFeaturizer()
    with pytest.raises(ValueError):
        f.load_data(**LOAD_DATA_ARGS_MULT_ERROR)
Esempio n. 17
0
def testing_featurizer_build():
    """Test that the featurizer saves empty attributes correctly after initializing"""
    f = ImageFeaturizer()
    compare_featurizer_class(f, (0, 0), np.zeros((1)), 0, '', False, '', {}, 1)
Esempio n. 18
0
def test_featurize_first():
    """Test that the featurizer raises an error if featurize is called before loading data"""
    f = ImageFeaturizer()
    # Raise error if attempting to featurize before loading data
    with pytest.raises(IOError):
        f.featurize_preloaded_data()
Esempio n. 19
0
from pic2vec import ImageFeaturizer
import os

import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--path', help='Path to the images')
parser.add_argument('--depth', help='Depth of Xception')
parser.set_defaults(depth=2)
args = parser.parse_args()

image_column_name = 'images'

my_featurizer = ImageFeaturizer(model='xception', depth=int(args.depth), autosample=True)

featurized_df = my_featurizer.featurize(image_column_name, image_path=args.path)

featurized_df.to_csv(os.path.join(args.path, 'features.csv'), index=False)
Esempio n. 20
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--infolder',
                        default='../../original/Suomi/Kunta/', 
                        help='folder containing the source images')
    parser.add_argument('--coatfolder',
                        default=None, 
                        help='folder containing the output images')
    parser.add_argument('--outimage', 
                        default='embedding.png', 
                        help='Name of the output image')
    parser.add_argument('--outcsv', 
                        default='vectors.csv', 
                        help='Name of the csv output')
    parser.add_argument('--dpi', 
                        default=300, type=int,
                        help='Resolution of the output image')
    
    parser.add_argument('--zoom', 
                        default=0.1, type=float,
                        help='size of a coat in output image')
    
    parser.add_argument('--depth', 
                        default=1, type=int,
                        help='Network depth')
    
    parser.add_argument('--model', 
                        default='squeezenet',
                        help='Network type')
    
    args = parser.parse_args()
    
    
    image_path = args.infolder
    outimage = args.outimage
    if args.coatfolder is None:
        coatfolder = image_path
    else:
        coatfolder = args.coatfolder
    
    #    Extract vectors
    featurizer = ImageFeaturizer(depth=args.depth, autosample = False, model=args.model)
    featurizer.load_data('images', image_path=image_path)
    featurize_preloaded_df = featurizer.featurize_preloaded_data(save_features=True)
    
    #    Output to csv file
    featurize_preloaded_df.to_csv(path_or_buf = args.outcsv, sep = '\t')
    print('Vector data saved to {}'.format(args.outcsv))
    
    images = featurize_preloaded_df.values[:,0]
    
    paths = [coatfolder+x for x in images]
    
    #    extract vectors:
    X = featurize_preloaded_df.values[:,2:]
    
    #    First dimensionality reduction to 32 dimensions using truncated singular value decomposition 
    Y = TruncatedSVD(32).fit_transform(X)
    
    #    For plotting dimensionality reduction to 2D with TSNE
    Y = TSNE(n_components=2, perplexity=30.0).fit_transform(Y)
    
    #    Plot the output image
    plt.tight_layout()
    fig, ax = plt.subplots()
    ax.set_axis_off()
    ax.scatter(Y[:,0], Y[:,1])

    arr = []
    for x0, y0, path in zip(Y[:,0], Y[:,1], paths):
        try:
            ab = AnnotationBbox(getImage(path, zoom=args.zoom), (x0, y0), frameon=False)
            arr.append(ax.add_artist(ab))
        except FileNotFoundError:
            pass
    
    fig.savefig(outimage, dpi=args.dpi, bbox_inches='tight')
    print("{} saved.".format(outimage))