def collect_experiment(key, root_uri, output_dir, get_pred_package=False):
    print('\nCollecting experiment {}...\n'.format(key))

    if root_uri.startswith('s3://'):
        predict_package_uris = list_paths(join(root_uri, key, 'bundle'), ext='predict_package.zip')
        eval_json_uris = list_paths(join(root_uri, key, 'eval'), ext='eval.json')
    else:
        predict_package_uris = glob.glob(join(root_uri, key, 'bundle', '*', 'predict_package.zip'))
        eval_json_uris = glob.glob(join(root_uri, key, 'eval', '*', 'eval.json'))

    if len(predict_package_uris) > 1 or len(eval_json_uris) > 1:
        print('Cannot collect from key with multiple experiments!!!')
        return

    if len(predict_package_uris) == 0 or len(eval_json_uris) == 0:
        print('Missing output!!!')
        return

    predict_package_uri = predict_package_uris[0]
    eval_json_uri = eval_json_uris[0]
    make_dir(join(output_dir, key))
    if get_pred_package:
        download_or_copy(predict_package_uri, join(output_dir, key))

    download_or_copy(eval_json_uri, join(output_dir, key))

    eval_json = file_to_json(join(output_dir, key, 'eval.json'))
    pprint.pprint(eval_json['overall'], indent=4)
    def process_sceneset_results(self, training_results, validation_results,
                                 tmp_dir):
        """After all scenes have been processed, process the result set.

        This writes a zip file for a group of scenes at {chip_uri}/{uuid}.zip
        containing:
        train/{scene_id}-{ind}.png
        train/{scene_id}-labels.json
        val/{scene_id}-{ind}.png
        val/{scene_id}-labels.json

        Args:
            training_results: dependent on the ml_backend's process_scene_data
            validation_results: dependent on the ml_backend's
                process_scene_data
        """
        self.log_options()

        group = str(uuid.uuid4())
        group_uri = join(self.backend_opts.chip_uri, '{}.zip'.format(group))
        group_path = get_local_path(group_uri, tmp_dir)
        make_dir(group_path, use_dirname=True)

        with zipfile.ZipFile(group_path, 'w', zipfile.ZIP_DEFLATED) as zipf:

            def _write_zip(results, split):
                for scene_dir in results:
                    scene_paths = glob.glob(join(scene_dir, '*'))
                    for p in scene_paths:
                        zipf.write(p, join(split, basename(p)))

            _write_zip(training_results, 'train')
            _write_zip(validation_results, 'valid')

        upload_or_copy(group_path, group_uri)
Beispiel #3
0
def get_local_path(uri, working_dir):
    """
    This method will simply pass along the URI if it is local.
    If the URI is on S3, it will download the data to the working directory,
    in a structure that matches s3, and return the local path.
    If the local path already exists, and the timestamp of the S3 object is at or before
    the local path, the download will be skipped
    """

    fs = FileSystem.get_file_system(uri)
    if fs is LocalFileSystem:
        return uri

    local_path = fs.local_path(uri, working_dir)
    do_copy = True
    if os.path.exists(local_path):
        last_modified = fs.last_modified(uri)
        if last_modified:
            # If thel local file is older than the remote file, download it.
            local_last_modified = datetime.utcfromtimestamp(
                os.path.getmtime(local_path))
            if local_last_modified.replace(
                    tzinfo=timezone.utc) > last_modified:
                do_copy = False
        else:
            # This FileSystem doesn't support last modified.
            # By default, don't download a new version.
            do_copy = False

    if do_copy:
        dir_name = os.path.dirname(local_path)
        make_dir(dir_name)
        fs.copy_from(uri, local_path)

    return local_path
Beispiel #4
0
    def process_scene_data(self, scene, data, tmp_dir):
        """Process each scene's training data

        Args:
            scene: Scene
            data: TrainingData

        Returns:
            dictionary of Scene's classes and corresponding local directory
                path
        """
        scratch_dir = join(tmp_dir, 'scratch-{}'.format(uuid.uuid4()))
        # Ensure directory is unique since scene id's could be shared between
        # training and test sets.
        scene_dir = join(scratch_dir, '{}-{}'.format(scene.id, uuid.uuid4()))
        class_dirs = {}

        for chip_idx, (chip, window, labels) in enumerate(data):
            class_id = labels.get_cell_class_id(window)
            # If a chip is not associated with a class, don't
            # use it in training data.
            if class_id is None:
                continue
            class_name = self.class_map.get_by_id(class_id).name
            class_dir = join(scene_dir, class_name)
            make_dir(class_dir)
            class_dirs[class_name] = class_dir
            chip_name = '{}.png'.format(chip_idx)
            chip_path = join(class_dir, chip_name)
            save_img(chip, chip_path)

        return class_dirs
Beispiel #5
0
    def download_pretrained_model(self, pretrained_model_zip_uri):
        """Download pretrained model and unzip it.

        This is used before training a model.

        Args:
            pretrained_model_zip_uri: (string) URI of .tar.gz file containing
                pretrained model. This file is of the form that comes from the
                Model Zoo at https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md # noqa
                It contains a directory at the top level with the same name as
                root of the zip file (if zip file is x.tar.gz, the directory
                is x), and a set of files of the form model.ckpt.*. This file
                can be located anywhere, and is not expected to be in the
                directory encapsulated by this class that is generated by the
                make_chips command. That is why it is passed in
                separately.

        Returns:
            (string) path to pretrained model file (which is model.ckpt in
                the zip file)
        """
        pretrained_model_zip_path = self.download_if_needed(
            pretrained_model_zip_uri)
        pretrained_model_dir = join(self.temp_dir, 'pretrained_model')
        make_dir(pretrained_model_dir)
        with tarfile.open(pretrained_model_zip_path, 'r:gz') as tar:
            tar.extractall(pretrained_model_dir)
        model_name = os.path.splitext(
            os.path.splitext(os.path.basename(pretrained_model_zip_uri))[0])[0]
        # The unzipped file is assumed to have a single directory with
        # the name of the model derived from the zip file.
        pretrained_model_path = join(pretrained_model_dir, model_name,
                                     'model.ckpt')
        return pretrained_model_path
    def process_scene_data(self, scene, data, tmp_dir):
        """Process each scene's training data.

        This writes {scene_id}/img/{scene_id}-{ind}.png and
        {scene_id}/labels/{scene_id}-{ind}.png

        Args:
            scene: Scene
            data: TrainingData

        Returns:
            backend-specific data-structures consumed by backend's
            process_sceneset_results
        """
        scene_dir = join(tmp_dir, str(scene.id))
        img_dir = join(scene_dir, 'img')
        labels_dir = join(scene_dir, 'labels')

        make_dir(img_dir)
        make_dir(labels_dir)

        for ind, (chip, window, labels) in enumerate(data):
            chip_path = join(img_dir, '{}-{}.png'.format(scene.id, ind))
            label_path = join(labels_dir, '{}-{}.png'.format(scene.id, ind))
            save_img(chip, chip_path)
            label_im = labels.get_label_arr(window).astype(np.uint8)
            save_img(label_im, label_path)

        return scene_dir
Beispiel #7
0
    def process_scene_data(self, scene: Scene, data: TrainingData,
                           tmp_dir: str) -> str:
        """Process the given scene and data into a TFRecord file specifically
        associated with that file.

        Args:
             scene: The scene data (labels stores, the raster sources,
                  and so on).
             data: The training data.
             tmp_dir: (str) temporary directory to use
        Returns:
            The local path to the generated file.
        """
        # Currently TF Deeplab can only handle uint8
        if scene.raster_source.get_dtype() != np.uint8:
            raise Exception('Cannot use {} backend for imagery that does '
                            'not have data type uint8. '
                            'Use the StatsAnalyzer and StatsTransformer '
                            'to turn the raster data into uint8 data'.format(
                                rv.TF_DEEPLAB))

        tf_examples = make_tf_examples(data, self.class_map)

        base_uri = self.backend_config.training_data_uri
        split = '{}-{}'.format(scene.id, uuid.uuid4())
        record_path = join(base_uri, '{}.record'.format(split))
        record_path = get_local_path(record_path, tmp_dir)

        make_dir(record_path, use_dirname=True)
        write_tf_record(tf_examples, record_path)

        return record_path
Beispiel #8
0
    def process_sceneset_results(self, training_results: List[str],
                                 validation_results: List[str],
                                 tmp_dir: str) -> None:
        """Merge TFRecord files from individual scenes into two at-large files
        (one for training data and one for validation data).

        Args:
             training_results: A list of paths to TFRecords containing
                  training data.
             validation_results: A list of paths to TFRecords
                  containing validation data.
             tmp_dir: (str) temporary directory to use
        Returns:
             None

        """
        base_uri = self.backend_config.training_data_uri
        chip_suffix = str(uuid.uuid4()).split('-')[0]
        training_record_path = get_record_uri(base_uri, TRAIN, chip_suffix)
        training_record_path_local = get_local_path(training_record_path,
                                                    tmp_dir)
        validation_record_path = get_record_uri(base_uri, VALIDATION,
                                                chip_suffix)
        validation_record_path_local = get_local_path(validation_record_path,
                                                      tmp_dir)

        make_dir(training_record_path_local, use_dirname=True)
        make_dir(validation_record_path_local, use_dirname=True)  # sic
        merge_tf_records(training_record_path_local, training_results)
        merge_tf_records(validation_record_path_local, validation_results)
        upload_or_copy(training_record_path_local, training_record_path)
        upload_or_copy(validation_record_path_local, validation_record_path)

        if self.backend_config.debug:
            training_zip_path = join(base_uri, '{}'.format(TRAIN))
            training_zip_path_local = get_local_path(training_zip_path,
                                                     tmp_dir)
            validation_zip_path = join(base_uri, '{}'.format(VALIDATION))
            validation_zip_path_local = get_local_path(validation_zip_path,
                                                       tmp_dir)

            training_debug_dir = join(tmp_dir, 'training-debug')
            make_debug_images(
                training_record_path_local, training_debug_dir, self.class_map,
                self.task_config.chip_options.debug_chip_probability)
            shutil.make_archive(training_zip_path_local, 'zip',
                                training_debug_dir)

            validation_debug_dir = join(tmp_dir, 'validation-debug')
            make_debug_images(
                validation_record_path_local, validation_debug_dir,
                self.class_map,
                self.task_config.chip_options.debug_chip_probability)
            shutil.make_archive(validation_zip_path_local, 'zip',
                                validation_debug_dir)

            upload_or_copy('{}.zip'.format(training_zip_path_local),
                           '{}.zip'.format(training_zip_path))
            upload_or_copy('{}.zip'.format(validation_zip_path_local),
                           '{}.zip'.format(validation_zip_path))
Beispiel #9
0
    def process_scene_data(self, scene, data, tmp_dir):
        """Make training chips for a scene.

        This writes a set of image chips to {scene_id}/{class_name}/{scene_id}-{ind}.png

        Args:
            scene: (rv.data.Scene)
            data: (rv.data.Dataset)
            tmp_dir: (str) path to temp directory

        Returns:
            (str) path to directory with scene chips {tmp_dir}/{scene_id}
        """
        scene_dir = join(tmp_dir, str(scene.id))

        for ind, (chip, window, labels) in enumerate(data):
            class_id = labels.get_cell_class_id(window)
            # If a chip is not associated with a class, don't
            # use it in training data.
            if class_id is None:
                continue

            class_name = self.task_config.class_map.get_by_id(class_id).name
            class_dir = join(scene_dir, class_name)
            make_dir(class_dir)
            chip_path = join(class_dir, '{}-{}.png'.format(scene.id, ind))
            save_img(chip, chip_path)

        return scene_dir
    def process_scene_data(self, scene, data, tmp_dir):
        """Process each scene's training data
        Args:
            scene: Scene
            data: TrainingData
        Returns:
            dictionary of Scene's classes and corresponding local directory
                path
        """
        dataset_files = DatasetFiles(self.config.training_data_uri, tmp_dir)
        scratch_dir = dataset_files.get_local_path(dataset_files.scratch_uri)
        scene_dir = join(scratch_dir, '{}-{}'.format(scene.id, uuid.uuid4()))
        class_dirs = {}

        for chip_idx, (chip, window, labels) in enumerate(data):
            class_id = labels.get_cell_class_id(window)
            if class_id is None:
                continue
            class_name = self.class_map.get_by_id(class_id).name
            class_dir = join(scene_dir, class_name)
            make_dir(class_dir)
            class_dirs[class_name] = class_dir
            chip_name = '{}.png'.format(chip_idx)
            chip_path = join(class_dir, chip_name)
            save_img(chip, chip_path)

        return class_dirs
Beispiel #11
0
    def _make_debug_chips(split):
        debug_chips_dir = join(tmp_dir, '{}-debug-chips'.format(split))
        zip_path = join(tmp_dir, '{}-debug-chips.zip'.format(split))
        zip_uri = join(train_uri, '{}-debug-chips.zip'.format(split))
        make_dir(debug_chips_dir)
        dl = data.train_dl if split == 'train' else data.valid_dl
        i = 0
        for _, (x_batch, y_batch) in enumerate(dl):
            for x, y in zip(x_batch, y_batch):
                x = x.squeeze()
                y = y.squeeze()

                # fastai has an x.show(y=y) method, but we need to plot the
                # debug chips ourselves in order to use
                # a custom color map that matches the colors in the class_map.
                # This could be a good things to contribute upstream to fastai.
                plt.axis('off')
                plt.imshow(x.data.permute((1, 2, 0)).numpy())
                plt.imshow(y.data.squeeze().numpy(), alpha=0.4, vmin=0,
                            vmax=len(colors), cmap=cmap)
                plt.savefig(join(debug_chips_dir, '{}.png'.format(i)),
                            figsize=(3, 3))
                plt.close()
                i += 1

                if i > max_count:
                    break
            if i > max_count:
                break

        zipdir(debug_chips_dir, zip_path)
        upload_or_copy(zip_path, zip_uri)
Beispiel #12
0
    def process_scene_data(self, scene, data, tmp_dir):
        """Make training chips for a scene.

        This writes a set of image chips to {scene_id}/img/{scene_id}-{ind}.png
        and corresponding label chips to {scene_id}/labels/{scene_id}-{ind}.png.

        Args:
            scene: (rv.data.Scene)
            data: (rv.data.Dataset)
            tmp_dir: (str) path to temp directory

        Returns:
            (str) path to directory with scene chips {tmp_dir}/{scene_id}
        """
        scene_dir = join(tmp_dir, str(scene.id))
        img_dir = join(scene_dir, 'img')
        labels_dir = join(scene_dir, 'labels')

        make_dir(img_dir)
        make_dir(labels_dir)

        for ind, (chip, window, labels) in enumerate(data):
            chip_path = join(img_dir, '{}-{}.png'.format(scene.id, ind))
            label_path = join(labels_dir, '{}-{}.png'.format(scene.id, ind))

            label_im = labels.get_label_arr(window).astype(np.uint8)
            save_img(label_im, label_path)
            save_img(chip, chip_path)

        return scene_dir
Beispiel #13
0
 def _copy_train_chips(img_or_labels):
     all_uri = join(chip_dir, 'train-{}'.format(img_or_labels))
     sample_dir = 'train-{}-{}'.format(str(sample_size), img_or_labels)
     sample_dir_uri = join(chip_dir, sample_dir)
     make_dir(sample_dir_uri)
     for s in sample_images:
         upload_or_copy(join(all_uri, s), join(sample_dir_uri, s))
     return sample_dir
Beispiel #14
0
    def test_check_empty(self):
        path = os.path.join(self.temp_dir.name, 'hello', 'hello.txt')
        dir = os.path.dirname(path)
        str_to_file('hello', path)

        make_dir(dir, check_empty=False)
        with self.assertRaises(Exception):
            make_dir(dir, check_empty=True)
Beispiel #15
0
 def save_model_bundle(self):
     model_bundle_dir = join(self.tmp_dir, 'model-bundle')
     make_dir(model_bundle_dir)
     shutil.copyfile(self.last_model_path,
                     join(model_bundle_dir, 'model.pth'))
     shutil.copyfile(self.config_path, join(model_bundle_dir,
                                            'config.json'))
     zipdir(model_bundle_dir, self.model_bundle_path)
Beispiel #16
0
    def test_file_exists_local_true(self):
        path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)

        self.assertTrue(file_exists(path))
    def process_scene_data(self, scene, data, tmp_dir):
        """Process each scene's training data.

        This writes {scene_id}/{scene_id}-{ind}.png and
        {scene_id}/{scene_id}-labels.json in COCO format.

        Args:
            scene: Scene
            data: TrainingData

        Returns:
            backend-specific data-structures consumed by backend's
            process_sceneset_results
        """
        scene_dir = join(tmp_dir, str(scene.id))
        labels_path = join(scene_dir, '{}-labels.json'.format(scene.id))

        make_dir(scene_dir)
        images = []
        annotations = []
        categories = [{
            'id': item.id,
            'name': item.name
        } for item in self.task_config.class_map.get_items()]

        for im_ind, (chip, window, labels) in enumerate(data):
            im_id = '{}-{}'.format(scene.id, im_ind)
            fn = '{}.png'.format(im_id)
            chip_path = join(scene_dir, fn)
            save_img(chip, chip_path)
            images.append({
                'file_name': fn,
                'id': im_id,
                'height': chip.shape[0],
                'width': chip.shape[1]
            })

            npboxes = labels.get_npboxes()
            npboxes = ObjectDetectionLabels.global_to_local(npboxes, window)
            for box_ind, (box, class_id) in enumerate(
                    zip(npboxes, labels.get_class_ids())):
                bbox = [box[1], box[0], box[3] - box[1], box[2] - box[0]]
                bbox = [int(i) for i in bbox]
                annotations.append({
                    'id': '{}-{}'.format(im_id, box_ind),
                    'image_id': im_id,
                    'bbox': bbox,
                    'category_id': int(class_id)
                })

        coco_dict = {
            'images': images,
            'annotations': annotations,
            'categories': categories
        }
        json_to_file(coco_dict, labels_path)

        return scene_dir
 def save_debug_predict_image(self, scene, debug_dir_uri):
     img = draw_debug_predict_image(scene, self.config.class_map)
     # Saving to a jpg leads to segfault for unknown reasons.
     debug_image_uri = join(debug_dir_uri, scene.id + '.png')
     with RVConfig.get_tmp_dir() as temp_dir:
         debug_image_path = get_local_path(debug_image_uri, temp_dir)
         make_dir(debug_image_path, use_dirname=True)
         img.save(debug_image_path)
         upload_or_copy(debug_image_path, debug_image_uri)
 def _make_debug_chips(split):
     debug_chips_dir = join(train_uri,
                            '{}-debug-chips'.format(split))
     make_dir(debug_chips_dir)
     ds = data.train_ds if split == 'train' else data.valid_ds
     for i, (x, y) in enumerate(ds):
         x.show(y=y)
         plt.savefig(join(debug_chips_dir, '{}.png'.format(i)))
         plt.close()
Beispiel #20
0
    def test_force_empty(self):
        path = os.path.join(self.temp_dir.name, 'hello', 'hello.txt')
        dir = os.path.dirname(path)
        str_to_file('hello', path)

        make_dir(dir, force_empty=False)
        self.assertTrue(os.path.isfile(path))
        make_dir(dir, force_empty=True)
        is_empty = len(os.listdir(dir)) == 0
        self.assertTrue(is_empty)
Beispiel #21
0
    def test_sync_from_dir_noop_local(self):
        path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt')
        src = os.path.join(self.temp_dir.name, 'lorem')
        make_dir(src, check_empty=False)

        fs = FileSystem.get_file_system(src, 'r')
        fs.write_bytes(path, bytes([0x00, 0x01]))
        sync_from_dir(src, src, delete=True)

        self.assertEqual(len(list_paths(src)), 1)
Beispiel #22
0
    def test_copy_to_http(self):
        path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt')
        dst = 'http://localhost/'
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)

        self.assertRaises(NotWritableError, lambda: upload_or_copy(path, dst))
        os.remove(path)
Beispiel #23
0
    def test_file_exists_s3_true(self):
        path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)

        s3_path = 's3://{}/lorem.txt'.format(self.bucket_name)
        upload_or_copy(path, s3_path)

        self.assertTrue(file_exists(s3_path))
Beispiel #24
0
    def test_last_modified(self):
        path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum1.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        fs = FileSystem.get_file_system(path, 'r')

        str_to_file(self.lorem, path)
        stamp = fs.last_modified(path)

        self.assertTrue(isinstance(stamp, datetime.datetime))
Beispiel #25
0
    def test_bundle_od_command(self):
        def get_task(tmp_dir):
            predict_package_uri = os.path.join(tmp_dir, 'predict_package.zip')
            t = rv.TaskConfig.builder(rv.OBJECT_DETECTION) \
                             .with_predict_package_uri(predict_package_uri) \
                             .with_classes(['class1']) \
                             .build()
            return t

        def get_backend(task, tmp_dir):
            model_uri = os.path.join(tmp_dir, 'model')
            template_uri = data_file_path(
                'tf_object_detection/embedded_ssd_mobilenet_v1_coco.config')
            with open(model_uri, 'w') as f:
                f.write('DUMMY')
            b = rv.BackendConfig.builder(rv.TF_OBJECT_DETECTION) \
                                .with_task(task) \
                                .with_template(template_uri) \
                                .with_model_uri(model_uri) \
                                .build()
            return b

        with RVConfig.get_tmp_dir() as tmp_dir:
            task = get_task(tmp_dir)
            backend = get_backend(task, tmp_dir)
            analyzer = self.get_analyzer(tmp_dir)
            scene = self.get_scene(tmp_dir)
            cmd = rv.CommandConfig.builder(rv.BUNDLE) \
                                  .with_task(task) \
                                  .with_root_uri(tmp_dir) \
                                  .with_backend(backend) \
                                  .with_analyzers([analyzer]) \
                                  .with_scene(scene) \
                                  .build() \
                                  .create_command()

            cmd.run(tmp_dir)

            package_dir = os.path.join(tmp_dir, 'package')
            make_dir(package_dir)
            with zipfile.ZipFile(task.predict_package_uri, 'r') as package_zip:
                package_zip.extractall(path=package_dir)

            bundle_config_path = os.path.join(package_dir,
                                              'bundle_config.json')
            bundle_config = load_json_config(bundle_config_path,
                                             CommandConfigMsg())

            self.assertEqual(bundle_config.command_type, rv.BUNDLE)

            actual = set(os.listdir(package_dir))
            expected = set(['stats.json', 'model', 'bundle_config.json'])

            self.assertEqual(actual, expected)
Beispiel #26
0
    def run(self, tmp_dir=None):
        if not tmp_dir:
            tmp_dir = self.get_tmp_dir()

        cc = self.command_config

        if not cc.task.predict_package_uri:
            msg = 'Skipping bundling of prediction package, no URI is set...'.format(
                cc.task.predict_package_uri)
            click.echo(click.style(msg, fg='yellow'))
            return

        msg = 'Bundling prediction package to {}...'.format(
            cc.task.predict_package_uri)
        log.info(msg)

        bundle_dir = os.path.join(tmp_dir, 'bundle')
        make_dir(bundle_dir)
        package_path = os.path.join(tmp_dir, 'predict_package.zip')
        bundle_files = []
        new_task, task_files = cc.task.save_bundle_files(bundle_dir)
        bundle_files.extend(task_files)
        new_backend, backend_files = cc.backend.save_bundle_files(bundle_dir)
        bundle_files.extend(backend_files)
        new_scene, scene_files = cc.scene.save_bundle_files(bundle_dir)
        bundle_files.extend(scene_files)
        new_analyzers = []
        for analyzer in cc.analyzers:
            new_analyzer, analyzer_files = analyzer.save_bundle_files(
                bundle_dir)
            new_analyzers.append(new_analyzer)
            bundle_files.extend(analyzer_files)

        new_bundle_config = cc.to_builder() \
                              .with_task(new_task) \
                              .with_backend(new_backend) \
                              .with_scene(new_scene) \
                              .with_analyzers(new_analyzers) \
                              .build()

        # Save bundle command config
        bundle_config_path = os.path.join(tmp_dir, 'bundle_config.json')
        bundle_json = json_format.MessageToJson(new_bundle_config.to_proto())
        with open(bundle_config_path, 'w') as f:
            f.write(bundle_json)

        with zipfile.ZipFile(package_path, 'w') as package_zip:
            for path in bundle_files:
                package_zip.write(path, arcname=os.path.basename(path))
            package_zip.write(bundle_config_path,
                              arcname=os.path.basename(bundle_config_path))

        upload_or_copy(package_path, cc.task.predict_package_uri)
Beispiel #27
0
    def test_list_paths_s3(self):
        path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt')
        s3_path = 's3://{}/xxx/lorem.txt'.format(self.bucket_name)
        s3_directory = 's3://{}/xxx/'.format(self.bucket_name)
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)

        list_paths(s3_directory)
        self.assertEqual(len(list_paths(s3_directory)), 1)
Beispiel #28
0
    def test_bytes_local(self):
        path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt')
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        expected = bytes([0x00, 0x01, 0x02])
        fs = FileSystem.get_file_system(path, 'r')

        fs.write_bytes(path, expected)
        actual = fs.read_bytes(path)

        self.assertEqual(actual, expected)
Beispiel #29
0
    def test_sync_to_dir_local(self):
        path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt')
        src = os.path.dirname(path)
        dst = os.path.join(self.temp_dir.name, 'xxx')
        make_dir(src, check_empty=False)
        make_dir(dst, check_empty=False)

        fs = FileSystem.get_file_system(path, 'r')
        fs.write_bytes(path, bytes([0x00, 0x01]))
        sync_to_dir(src, dst, delete=True)

        self.assertEqual(len(list_paths(dst)), 1)
Beispiel #30
0
    def test_copy_to_local(self):
        path1 = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt')
        path2 = os.path.join(self.temp_dir.name, 'yyy', 'ipsum.txt')
        dir1 = os.path.dirname(path1)
        dir2 = os.path.dirname(path2)
        make_dir(dir1, check_empty=False)
        make_dir(dir2, check_empty=False)

        str_to_file(self.lorem, path1)

        upload_or_copy(path1, path2)
        self.assertEqual(len(list_paths(dir2)), 1)