Esempio n. 1
0
    def test_train_test_split(self):
        ts = dtr.TrainingSelector(img_filepaths=self.img_filepaths)

        X = ts.descr_feature_matrix
        self.assertEqual(len(X), len(self.img_filepaths))

        # test `method` argument
        split_df = ts.train_test_split()
        self.assertIn('train', split_df)
        self.assertIn('img_cluster', split_df)
        split_df = ts.train_test_split(method='cluster-I')
        self.assertIn('train', split_df)
        self.assertNotIn('img_cluster', split_df)

        # test `return_evr` argument (expected variance ratio)
        split_df, evr = ts.train_test_split(return_evr=True)
        self.assertIsInstance(split_df, pd.DataFrame)
        self.assertIsInstance(evr, float)
Esempio n. 2
0
    def test_init(self):
        # if providing `img_filepaths`, `img_dir` and `img_filename_pattern`
        # are ignored
        ts = dtr.TrainingSelector(img_filepaths=self.img_filepaths)
        self.assertEqual(
            ts.img_filepaths,
            dtr.TrainingSelector(img_filepaths=self.img_filepaths,
                                 img_dir='foo').img_filepaths)
        self.assertEqual(
            ts.img_filepaths,
            dtr.TrainingSelector(img_filepaths=self.img_filepaths,
                                 img_filename_pattern='foo').img_filepaths)
        self.assertEqual(
            ts.img_filepaths,
            dtr.TrainingSelector(img_filepaths=self.img_filepaths,
                                 img_dir='foo',
                                 img_filename_pattern='foo').img_filepaths)

        # if not providing `img_filepaths`, inexistent `img_dir` or non-tif
        # `img_filename_pattern` will result in an empty `img_filepaths`
        # attribute
        self.assertEqual(
            len(dtr.TrainingSelector(img_dir='foo').img_filepaths), 0)
        self.assertEqual(
            len(
                dtr.TrainingSelector(
                    img_dir=self.img_dir,
                    img_filename_pattern='foo').img_filepaths), 0)
        # otherwise, there should be at least one img
        self.assertGreater(
            len(dtr.TrainingSelector(img_dir=self.img_dir).img_filepaths), 0)

        # even when providing an integer in the `gabor_num_orientations`
        # argument, the respective attribute will be a tuple after `__init__`
        # is executed
        self.assertIsInstance(
            dtr.TrainingSelector(
                self.img_filepaths,
                gabor_num_orientations=8).gabor_num_orientations, tuple)
Esempio n. 3
0
def train_test_split(ctx, img_filepaths, img_dir, img_filename_pattern,
                     gabor_frequencies, gabor_num_orientations,
                     response_bins_per_axis, num_color_bins, method,
                     num_components, num_img_clusters, train_prop,
                     output_filepath):
    logger = ctx.obj['LOGGER']

    ts = dtr.TrainingSelector(img_filepaths=img_filepaths,
                              img_dir=img_dir,
                              img_filename_pattern=img_filename_pattern,
                              gabor_frequencies=gabor_frequencies,
                              gabor_num_orientations=gabor_num_orientations,
                              response_bins_per_axis=response_bins_per_axis,
                              num_color_bins=num_color_bins)
    num_imgs = len(ts.img_filepaths)
    if img_dir is not None:
        logger.info("Loaded %d images from %s", num_imgs, img_dir)
    else:
        logger.info("Loaded %d images", num_imgs)

    tts_kws = {}
    if method is not None:
        tts_kws['method'] = method
    if num_components is not None:
        tts_kws['num_components'] = num_components
    if num_img_clusters is not None:
        tts_kws['num_img_clusters'] = num_img_clusters
    if train_prop is not None:
        tts_kws['train_prop'] = train_prop
    df, evr = ts.train_test_split(return_evr=True, **tts_kws)
    logger.info("Variance ratio explained by PCA: %f", evr)

    if output_filepath is None:
        output_filepath = 'split.csv'

    df.to_csv(output_filepath)
    logger.info("Dumped train/test split to %s", output_filepath)