def process_sceneset_results(self, training_results, validation_results,
                                 tmp_dir):
        """After all scenes have been processed, process the result set.

        This writes a zip file for a group of scenes at {chip_uri}/{uuid}.zip
        containing:
        train/{scene_id}-{ind}.png
        train/{scene_id}-labels.json
        val/{scene_id}-{ind}.png
        val/{scene_id}-labels.json

        Args:
            training_results: dependent on the ml_backend's process_scene_data
            validation_results: dependent on the ml_backend's
                process_scene_data
        """
        self.log_options()

        group = str(uuid.uuid4())
        group_uri = join(self.backend_opts.chip_uri, '{}.zip'.format(group))
        group_path = get_local_path(group_uri, tmp_dir)
        make_dir(group_path, use_dirname=True)

        with zipfile.ZipFile(group_path, 'w', zipfile.ZIP_DEFLATED) as zipf:

            def _write_zip(results, split):
                for scene_dir in results:
                    scene_paths = glob.glob(join(scene_dir, '*'))
                    for p in scene_paths:
                        zipf.write(p, join(split, basename(p)))

            _write_zip(training_results, 'train')
            _write_zip(validation_results, 'valid')

        upload_or_copy(group_path, group_uri)
Ejemplo n.º 2
0
    def _make_debug_chips(split):
        debug_chips_dir = join(tmp_dir, '{}-debug-chips'.format(split))
        zip_path = join(tmp_dir, '{}-debug-chips.zip'.format(split))
        zip_uri = join(train_uri, '{}-debug-chips.zip'.format(split))
        make_dir(debug_chips_dir)
        dl = data.train_dl if split == 'train' else data.valid_dl
        i = 0
        for _, (x_batch, y_batch) in enumerate(dl):
            for x, y in zip(x_batch, y_batch):
                x = x.squeeze()
                y = y.squeeze()

                # fastai has an x.show(y=y) method, but we need to plot the
                # debug chips ourselves in order to use
                # a custom color map that matches the colors in the class_map.
                # This could be a good things to contribute upstream to fastai.
                plt.axis('off')
                plt.imshow(x.data.permute((1, 2, 0)).numpy())
                plt.imshow(y.data.squeeze().numpy(), alpha=0.4, vmin=0,
                            vmax=len(colors), cmap=cmap)
                plt.savefig(join(debug_chips_dir, '{}.png'.format(i)),
                            figsize=(3, 3))
                plt.close()
                i += 1

                if i > max_count:
                    break
            if i > max_count:
                break

        zipdir(debug_chips_dir, zip_path)
        upload_or_copy(zip_path, zip_uri)
Ejemplo n.º 3
0
    def test_download_if_needed_local(self):
        with self.assertRaises(NotReadableError):
            download_if_needed(self.local_path, self.temp_dir.name)

        str_to_file(self.content_str, self.local_path)
        upload_or_copy(self.local_path, self.local_path)
        local_path = download_if_needed(self.local_path, self.temp_dir.name)
        self.assertEqual(local_path, self.local_path)
Ejemplo n.º 4
0
 def _copy_train_chips(img_or_labels):
     all_uri = join(chip_dir, 'train-{}'.format(img_or_labels))
     sample_dir = 'train-{}-{}'.format(str(sample_size), img_or_labels)
     sample_dir_uri = join(chip_dir, sample_dir)
     make_dir(sample_dir_uri)
     for s in sample_images:
         upload_or_copy(join(all_uri, s), join(sample_dir_uri, s))
     return sample_dir
Ejemplo n.º 5
0
 def _upload(data_dir, zip_uri, split):
     if not any(os.scandir(data_dir)):
         log.warn(
             'No data to write for split {} in partition {}...'.format(
                 split, self.partition_id))
     else:
         shutil.make_archive(data_dir, 'zip', data_dir)
         upload_or_copy(data_dir + '.zip', zip_uri)
 def save_debug_predict_image(self, scene, debug_dir_uri):
     img = draw_debug_predict_image(scene, self.config.class_map)
     # Saving to a jpg leads to segfault for unknown reasons.
     debug_image_uri = join(debug_dir_uri, scene.id + '.png')
     with RVConfig.get_tmp_dir() as temp_dir:
         debug_image_path = get_local_path(debug_image_uri, temp_dir)
         make_dir(debug_image_path, use_dirname=True)
         img.save(debug_image_path)
         upload_or_copy(debug_image_path, debug_image_uri)
Ejemplo n.º 7
0
    def upload_or_copy(self, uri):
        """Upload file if it's remote.

        This knows how to generate the path to the local copy of the file.

        Args:
            uri: (string) URI of file, possibly remote
        """
        upload_or_copy(self.get_local_path(uri), uri)
Ejemplo n.º 8
0
def split_image(image_uri, split_dir):
    with rasterio.open(image_uri) as src:
        width = src.width
        height = src.height

    win_size = 8704
    wins = []
    print("Splitting image", image_uri)
    for c in list(range(0, width, win_size)):
        if c >= width:
            continue
        if (c + win_size) > width:
            win_width = width - c - 1
        else:
            win_width = win_size
        for r in list(range(0, height, win_size)):
            if r >= height:
                continue
            if (r + win_size) > height:
                win_height = height - r - 1
            else:
                win_height = win_size
            wins.append(Window(c, r, win_width, win_height))

    i = 0
    for win in wins:
        print("Doing window", i, " params:   ", win)
        with rasterio.open(image_uri) as src:
            img = src.read(window=win)
            win_transform = src.window_transform(win)
            kwargs = src.meta.copy()

        if np.max(img[-1]) == 255:
            area = image_uri.split("/")[-3]
            image_id = image_uri.split("/")[-2]
            output_uri = join(split_dir, area, image_id, f"{image_id}_{i}.tif")
            kwargs.update({
                "height": win.height,
                "width": win.width,
                "transform": win_transform
            })

            tmp_uri = join("/tmp/", basename(output_uri))
            with rasterio.open(tmp_uri, "w", **kwargs) as dst:
                dst.write(img)
            tmp_cmpr_file = tmp_uri.replace(".tif", "_jpg.tif")
            gdal_command = (
                f"gdal_translate {tmp_uri} {tmp_cmpr_file} "
                f"-co COMPRESS=JPEG -co JPEG_QUALITY=100 -co TILED=YES "
                f"-co COPY_SRC_OVERVIEWS=YES -co BLOCKXSIZE=512 -co BLOCKYSIZE=512 "
                f"--config COMPRESS_OVERVIEW JPEG")
            call(gdal_command, shell=True)
            upload_or_copy(tmp_cmpr_file, output_uri)
            for t in (tmp_uri, tmp_cmpr_file):
                os.remove(t)
            i += 1
Ejemplo n.º 9
0
    def test_file_exists_s3_true(self):
        path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)

        s3_path = 's3://{}/lorem.txt'.format(self.bucket_name)
        upload_or_copy(path, s3_path)

        self.assertTrue(file_exists(s3_path))
Ejemplo n.º 10
0
    def process_sceneset_results(self, training_results: List[str],
                                 validation_results: List[str],
                                 tmp_dir: str) -> None:
        """Merge TFRecord files from individual scenes into two at-large files
        (one for training data and one for validation data).

        Args:
             training_results: A list of paths to TFRecords containing
                  training data.
             validation_results: A list of paths to TFRecords
                  containing validation data.
             tmp_dir: (str) temporary directory to use
        Returns:
             None

        """
        base_uri = self.backend_config.training_data_uri
        chip_suffix = str(uuid.uuid4()).split('-')[0]
        training_record_path = get_record_uri(base_uri, TRAIN, chip_suffix)
        training_record_path_local = get_local_path(training_record_path,
                                                    tmp_dir)
        validation_record_path = get_record_uri(base_uri, VALIDATION,
                                                chip_suffix)
        validation_record_path_local = get_local_path(validation_record_path,
                                                      tmp_dir)

        make_dir(training_record_path_local, use_dirname=True)
        make_dir(validation_record_path_local, use_dirname=True)  # sic
        merge_tf_records(training_record_path_local, training_results)
        merge_tf_records(validation_record_path_local, validation_results)
        upload_or_copy(training_record_path_local, training_record_path)
        upload_or_copy(validation_record_path_local, validation_record_path)

        if self.backend_config.debug:

            def _make_debug_chips(split, record_path_local):
                zip_path = join(base_uri, '{}-debug'.format(split),
                                chip_suffix)
                zip_path_local = get_local_path(zip_path, tmp_dir)

                debug_dir = join(tmp_dir, '{}-debug'.format(split),
                                 chip_suffix)
                make_debug_images(
                    record_path_local, debug_dir, self.class_map,
                    self.task_config.chip_options.debug_chip_probability)
                shutil.make_archive(zip_path_local, 'zip', debug_dir)

                upload_or_copy('{}.zip'.format(zip_path_local),
                               '{}.zip'.format(zip_path))

            if training_results:
                _make_debug_chips(TRAIN, training_record_path_local)
            if validation_results:
                _make_debug_chips(VALIDATION, validation_record_path_local)
Ejemplo n.º 11
0
    def test_copy_to_local(self):
        path1 = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt')
        path2 = os.path.join(self.temp_dir.name, 'yyy', 'ipsum.txt')
        dir1 = os.path.dirname(path1)
        dir2 = os.path.dirname(path2)
        make_dir(dir1, check_empty=False)
        make_dir(dir2, check_empty=False)

        str_to_file(self.lorem, path1)

        upload_or_copy(path1, path2)
        self.assertEqual(len(list_paths(dir2)), 1)
Ejemplo n.º 12
0
    def run(self, tmp_dir=None):
        if not tmp_dir:
            tmp_dir = self.get_tmp_dir()

        cc = self.command_config

        if not cc.task.predict_package_uri:
            msg = 'Skipping bundling of prediction package, no URI is set...'.format(
                cc.task.predict_package_uri)
            click.echo(click.style(msg, fg='yellow'))
            return

        msg = 'Bundling prediction package to {}...'.format(
            cc.task.predict_package_uri)
        log.info(msg)

        bundle_dir = os.path.join(tmp_dir, 'bundle')
        make_dir(bundle_dir)
        package_path = os.path.join(tmp_dir, 'predict_package.zip')
        bundle_files = []
        new_task, task_files = cc.task.save_bundle_files(bundle_dir)
        bundle_files.extend(task_files)
        new_backend, backend_files = cc.backend.save_bundle_files(bundle_dir)
        bundle_files.extend(backend_files)
        new_scene, scene_files = cc.scene.save_bundle_files(bundle_dir)
        bundle_files.extend(scene_files)
        new_analyzers = []
        for analyzer in cc.analyzers:
            new_analyzer, analyzer_files = analyzer.save_bundle_files(
                bundle_dir)
            new_analyzers.append(new_analyzer)
            bundle_files.extend(analyzer_files)

        new_bundle_config = cc.to_builder() \
                              .with_task(new_task) \
                              .with_backend(new_backend) \
                              .with_scene(new_scene) \
                              .with_analyzers(new_analyzers) \
                              .build()

        # Save bundle command config
        bundle_config_path = os.path.join(tmp_dir, 'bundle_config.json')
        bundle_json = json_format.MessageToJson(new_bundle_config.to_proto())
        with open(bundle_config_path, 'w') as f:
            f.write(bundle_json)

        with zipfile.ZipFile(package_path, 'w') as package_zip:
            for path in bundle_files:
                package_zip.write(path, arcname=os.path.basename(path))
            package_zip.write(bundle_config_path,
                              arcname=os.path.basename(bundle_config_path))

        upload_or_copy(package_path, cc.task.predict_package_uri)
Ejemplo n.º 13
0
    def test_list_paths_s3(self):
        path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt')
        s3_path = 's3://{}/xxx/lorem.txt'.format(self.bucket_name)
        s3_directory = 's3://{}/xxx/'.format(self.bucket_name)
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)

        list_paths(s3_directory)
        self.assertEqual(len(list_paths(s3_directory)), 1)
Ejemplo n.º 14
0
    def test_last_modified_s3(self):
        path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum1.txt')
        s3_path = 's3://{}/lorem1.txt'.format(self.bucket_name)
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        fs = FileSystem.get_file_system(s3_path, 'r')

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)
        stamp = fs.last_modified(s3_path)

        self.assertTrue(isinstance(stamp, datetime.datetime))
Ejemplo n.º 15
0
    def test_download_if_needed_s3(self):
        with self.assertRaises(NotReadableError):
            download_if_needed(self.s3_path, self.temp_dir.name)

        str_to_file(self.content_str, self.local_path)
        upload_or_copy(self.local_path, self.s3_path)
        local_path = download_if_needed(self.s3_path, self.temp_dir.name)
        content_str = file_to_str(local_path)
        self.assertEqual(self.content_str, content_str)

        wrong_path = 's3://wrongpath/x.txt'
        with self.assertRaises(NotWritableError):
            upload_or_copy(local_path, wrong_path)
 def _make_debug_chips(split):
     debug_chips_dir = join(tmp_dir, '{}-debug-chips'.format(split))
     zip_path = join(tmp_dir, '{}-debug-chips.zip'.format(split))
     zip_uri = join(train_uri, '{}-debug-chips.zip'.format(split))
     make_dir(debug_chips_dir)
     ds = data.train_ds if split == 'train' else data.valid_ds
     for i, (x, y) in enumerate(ds):
         if random.uniform(0, 1) < debug_prob:
             x.show(y=y)
             plt.savefig(join(debug_chips_dir, '{}.png'.format(i)),
                         figsize=(3, 3))
             plt.close()
     zipdir(debug_chips_dir, zip_path)
     upload_or_copy(zip_path, zip_uri)
Ejemplo n.º 17
0
            def _make_debug_chips(split, record_path_local):
                zip_path = join(base_uri, '{}-debug'.format(split),
                                chip_suffix)
                zip_path_local = get_local_path(zip_path, tmp_dir)

                debug_dir = join(tmp_dir, '{}-debug'.format(split),
                                 chip_suffix)
                make_debug_images(
                    record_path_local, debug_dir, self.class_map,
                    self.task_config.chip_options.debug_chip_probability)
                shutil.make_archive(zip_path_local, 'zip', debug_dir)

                upload_or_copy('{}.zip'.format(zip_path_local),
                               '{}.zip'.format(zip_path))
Ejemplo n.º 18
0
    def test_file_exists(self):
        path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt')
        s3_path = 's3://{}/xxx/lorem.txt'.format(self.bucket_name)
        s3_directory = 's3://{}/xxx/'.format(self.bucket_name)
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)

        self.assertTrue(file_exists(s3_directory, include_dir=True))
        self.assertTrue(file_exists(s3_path, include_dir=False))
        self.assertFalse(file_exists(s3_directory, include_dir=False))
        self.assertFalse(
            file_exists(s3_directory + 'NOTPOSSIBLE', include_dir=False))
    def process_sceneset_results(self, training_results, validation_results,
                                 tmp_dir):
        """After all scenes have been processed, process the result set.

        This writes a zip file for a group of scenes at {chip_uri}/{uuid}.zip
        containing:
        train-img/{scene_id}-{ind}.png
        train-labels/{scene_id}-{ind}.png
        val-img/{scene_id}-{ind}.png
        val-labels/{scene_id}-{ind}.png

        Args:
            training_results: dependent on the ml_backend's process_scene_data
            validation_results: dependent on the ml_backend's
                process_scene_data
        """
        # This is responsible for aggregating the results of chipping several Scenes into a zip
        # file. Takes in the results from process_scene and makes a zip file out of them. Can do
        # whatever it needs to in order for the training process to be able to access the data.
        # Can probably avoid touching these, unless I decide to use 8-band data. In that case would
        # need to write out tiffs or some other multiband format. Compressed numpy arrays work
        # pretty well.
        # ? Overall, what's the role of this function in the pipeline?
        # ? Can you give examples of what training_results and validation_results might look like?
        # Does this mean rasters or vectors or accuracy metrics? What calls this?
        if self.train_opts.debug:
            self.print_options()

        group = str(uuid.uuid4())
        group_uri = join(self.backend_opts.chip_uri, '{}.zip'.format(group))
        group_path = get_local_path(group_uri, tmp_dir)
        make_dir(group_path, use_dirname=True)

        with zipfile.ZipFile(group_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
            def _write_zip(results, split):
                for scene_dir in results:
                    scene_paths = glob.glob(join(scene_dir, '**/*.tif'))
                    for p in scene_paths:
                        zipf.write(p, join(
                            '{}-{}'.format(
                                split,
                                dirname(p).split('/')[-1]),
                            basename(p)))
            _write_zip(training_results, 'train')
            _write_zip(validation_results, 'val')

        upload_or_copy(group_path, group_uri)
Ejemplo n.º 20
0
def _postprocess(pred_uri, experiment_id, root_uri):
    tmp_pred_uri = download_if_needed(pred_uri, "/opt/data/predict/")
    tmp_postprocess_uri = tmp_pred_uri.replace("/predict/", "/postprocess/")

    os.makedirs(dirname(tmp_postprocess_uri), exist_ok=True)
    out_uri = join(root_uri, "postprocess", experiment_id, basename(pred_uri))

    with rasterio.open(tmp_pred_uri) as src:
        img = src.read()
        img = np.where(img == 2, 0, img)
        profile = src.profile
    with rasterio.open(tmp_postprocess_uri, "w", **profile) as dst:
        dst.write(img)

    upload_or_copy(tmp_postprocess_uri, out_uri)
    for t in (tmp_pred_uri, tmp_postprocess_uri):
        os.remove(t)
Ejemplo n.º 21
0
 def run(self):
     experiment_id = self.command_config.get("experiment_id")
     root_uri = self.command_config.get("root_uri")
     tmp_dir = "/opt/data/tmp/"
     os.makedirs(tmp_dir)
     print("made tmp_dir")
     s3_command = (
         f"aws s3 sync s3://carderne-rv/postprocess/{experiment_id}/ {tmp_dir}"
     )
     call(s3_command, shell=True)
     print("downloaded files")
     zip_file = f"/opt/data/zipped.zip"
     zipdir(tmp_dir, zip_file)
     print("zipped")
     upload_or_copy(zip_file, join(root_uri, "final",
                                   f"{experiment_id}.zip"))
     print("uploaded")
Ejemplo n.º 22
0
    def process_sceneset_results(self, training_results, validation_results,
                                 tmp_dir):
        """Write zip file with chips for a set of scenes.

        This writes a zip file for a group of scenes at {chip_uri}/{uuid}.zip containing:
        train/img/{scene_id}-{ind}.png
        train/labels/{scene_id}-{ind}.png
        val/img/{scene_id}-{ind}.png
        val/labels/{scene_id}-{ind}.png

        This method is called once per instance of the chip command.
        A number of instances of the chip command can run simultaneously to
        process chips in parallel. The uuid in the path above is what allows
        separate instances to avoid overwriting each others' output.

        Args:
            training_results: list of directories generated by process_scene_data
                that all hold training chips
            validation_results: list of directories generated by process_scene_data
                that all hold validation chips
        """
        self.log_options()

        group = str(uuid.uuid4())
        group_uri = join(self.backend_opts.chip_uri, '{}.zip'.format(group))
        group_path = get_local_path(group_uri, tmp_dir)
        make_dir(group_path, use_dirname=True)

        with zipfile.ZipFile(group_path, 'w', zipfile.ZIP_DEFLATED) as zipf:

            def _write_zip(results, split):
                for scene_dir in results:
                    scene_paths = glob.glob(join(scene_dir, '**/*.png'))
                    for p in scene_paths:
                        zipf.write(
                            p,
                            join(
                                '{}/{}'.format(split,
                                               dirname(p).split('/')[-1]),
                                basename(p)))

            _write_zip(training_results, 'train')
            _write_zip(validation_results, 'valid')

        upload_or_copy(group_path, group_uri)
Ejemplo n.º 23
0
    def _make_debug_chips(split):
        debug_chips_dir = join(tmp_dir, '{}-debug-chips'.format(split))
        zip_path = join(tmp_dir, '{}-debug-chips.zip'.format(split))
        zip_uri = join(train_uri, '{}-debug-chips.zip'.format(split))
        make_dir(debug_chips_dir)
        ds = databunch.train_ds if split == 'train' else databunch.valid_ds
        for i, (x, y) in enumerate(ds):
            if i >= max_count:
                break

            fig, ax = plt.subplots(1)
            plot_xy(ax, x, class_map, y=y)
            plt.savefig(join(debug_chips_dir, '{}.png'.format(i)),
                        figsize=(6, 6))
            plt.close()

        zipdir(debug_chips_dir, zip_path)
        upload_or_copy(zip_path, zip_uri)
Ejemplo n.º 24
0
    def upload(self, debug=False):
        """Upload training and validation data, and class map files.

        Args:
            debug: (bool) if True, also upload the corresponding debug chip
                zip files
        """
        if os.path.exists(self.training_local_record_path):
            upload_or_copy(self.training_local_record_path,
                           self.training_record_uri)
        if os.path.exists(self.validation_local_record_path):
            upload_or_copy(self.validation_local_record_path,
                           self.validation_record_uri)
        if debug:
            if os.path.exists(self.get_debug_chips_uri(TRAIN)):
                self.upload_or_copy(self.get_debug_chips_uri(TRAIN))
            if os.path.exists(self.get_debug_chips_uri(VALIDATION)):
                self.upload_or_copy(self.get_debug_chips_uri(VALIDATION))
    def _make_debug_chips(split):
        debug_chips_dir = join(tmp_dir, '{}-debug-chips'.format(split))
        zip_path = join(tmp_dir, '{}-debug-chips.zip'.format(split))
        zip_uri = join(train_uri, '{}-debug-chips.zip'.format(split))
        make_dir(debug_chips_dir)
        ds = data.train_ds if split == 'train' else data.valid_ds
        n = 0
        for i, (x, y) in enumerate(ds):
            if i >= max_count:
                break

            x.show(y=y)
            plt.savefig(join(debug_chips_dir, '{}.png'.format(i)),
                        figsize=(5, 5))
            plt.close()

        zipdir(debug_chips_dir, zip_path)
        upload_or_copy(zip_path, zip_uri)
Ejemplo n.º 26
0
def create_cog(source_uri,
               dest_uri,
               local_dir,
               block_size=DEFAULT_BLOCK_SIZE,
               resample_method=DEFAULT_RESAMPLE_METHOD,
               compression=DEFAULT_COMPRESSION,
               overviews=None):
    local_path = download_or_copy(source_uri, local_dir)

    commands, output_path = gdal_cog_commands(local_path,
                                              local_dir,
                                              block_size=block_size,
                                              resample_method=resample_method,
                                              compression=compression,
                                              overviews=overviews)
    for command in commands:
        run_cmd(command)

    upload_or_copy(output_path, dest_uri)
Ejemplo n.º 27
0
def crop_image(image_uri, window, crop_uri):
    im_dataset = rasterio.open(image_uri)
    rasterio_window = window.rasterio_format()
    im = im_dataset.read(window=rasterio_window)

    with tempfile.TemporaryDirectory() as tmp_dir:
        crop_path = get_local_path(crop_uri, tmp_dir)
        make_dir(crop_path, use_dirname=True)

        meta = im_dataset.meta
        meta['width'], meta['height'] = window.get_width(), window.get_height()
        meta['transform'] = rasterio.windows.transform(
            rasterio_window, im_dataset.transform)

        with rasterio.open(crop_path, 'w', **meta) as dst:
            dst.colorinterp = im_dataset.colorinterp
            dst.write(im)

        upload_or_copy(crop_path, crop_uri)
Ejemplo n.º 28
0
    def test_copy_to_http(self):
        path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt')
        dst = 'http://localhost/'
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)

        self.assertRaises(NotWritableError, lambda: upload_or_copy(path, dst))
        os.remove(path)
Ejemplo n.º 29
0
 def _make_debug_chips(split):
     debug_chips_dir = join(tmp_dir, '{}-debug-chips'.format(split))
     zip_path = join(tmp_dir, '{}-debug-chips.zip'.format(split))
     zip_uri = join(train_uri, '{}-debug-chips.zip'.format(split))
     make_dir(debug_chips_dir)
     ds = data.train_ds if split == 'train' else data.valid_ds
     for i, (x, y) in enumerate(ds):
         if random.uniform(0, 1) < debug_prob:
             plt.axis('off')
             plt.imshow(x.data.permute((1, 2, 0)).numpy())
             plt.imshow(y.data.squeeze().numpy(),
                        alpha=0.4,
                        vmin=0,
                        vmax=len(colors),
                        cmap=cmap)
             plt.savefig(join(debug_chips_dir, '{}.png'.format(i)),
                         figsize=(3, 3))
             plt.close()
     zipdir(debug_chips_dir, zip_path)
     upload_or_copy(zip_path, zip_uri)
    def train(self, tmp_dir):
        """Find dataset and model locations for use in training script. Then,
           update model files with trained model.

        Args:
            tmp_dir: temporary directory

        Returns:
            None
        """
        dataset_files = DatasetFiles(self.config.training_data_uri, tmp_dir)
        dataset_files.download()

        model_files = ModelFiles(self.config.training_output_uri, tmp_dir)
        model_paths = model_files.get_backend_config(
            self.config.pretrained_model_uri, self.config.gcv_config,
            dataset_files, self.class_map)

        backend_config_dict, pretrained_model_path = model_paths
        self.model_path = backend_config_dict['model']['model_path']

        self.model = gluoncv_train(backend_config_dict, self.model_path)

        upload_or_copy(self.model_path, model_files.model_uri)