Ejemplo n.º 1
0
def collect_experiment(key, root_uri, output_dir, get_pred_package=False):
    print('\nCollecting experiment {}...\n'.format(key))

    if root_uri.startswith('s3://'):
        predict_package_uris = list_paths(join(root_uri, key, 'bundle'), ext='predict_package.zip')
        eval_json_uris = list_paths(join(root_uri, key, 'eval'), ext='eval.json')
    else:
        predict_package_uris = glob.glob(join(root_uri, key, 'bundle', '*', 'predict_package.zip'))
        eval_json_uris = glob.glob(join(root_uri, key, 'eval', '*', 'eval.json'))

    if len(predict_package_uris) > 1 or len(eval_json_uris) > 1:
        print('Cannot collect from key with multiple experiments!!!')
        return

    if len(predict_package_uris) == 0 or len(eval_json_uris) == 0:
        print('Missing output!!!')
        return

    predict_package_uri = predict_package_uris[0]
    eval_json_uri = eval_json_uris[0]
    make_dir(join(output_dir, key))
    if get_pred_package:
        download_or_copy(predict_package_uri, join(output_dir, key))

    download_or_copy(eval_json_uri, join(output_dir, key))

    eval_json = file_to_json(join(output_dir, key, 'eval.json'))
    pprint.pprint(eval_json['overall'], indent=4)
Ejemplo n.º 2
0
    def download_config(self, class_map):
        from rastervision.protos.tf_object_detection.pipeline_pb2 \
            import TrainEvalPipelineConfig
        """Download a model and backend config and update its fields.

        This is used before training a model. This downloads and unzips a bunch
        of files that are needed to train a model, and then downloads and
        updates the backend config file with local paths to these files. These
        files include the pretrained model, the class map, and the training and
        validation datasets.

        Args:
            pretrained_model_zip_uri: (string) URI of .tar.gz file containing
                pretrained model. (See download_pretrained_model method for more
                details.)
            backend_config_uri: (string) URI of backend config file which is
                a config file for the TF Object Detection API. Examples can be
                found here https://github.com/tensorflow/models/tree/master/research/object_detection/samples/configs  # noqa
        """

        # Parse configuration
        # We must remove 'nulls' that appear due to translating empty
        # messages. These appear when translating between text and JSON based
        # protobuf messages, and using the google.protobuf.Struct type to store
        # the JSON. This appears when TFOD uses empty message types as an enum.
        config = json_format.ParseDict(
            replace_nones_in_dict(self.config.tfod_config, {}),
            TrainEvalPipelineConfig())

        # Update config using local paths.
        if config.train_config.fine_tune_checkpoint:
            pretrained_model_path = self.download_pretrained_model(
                config.train_config.fine_tune_checkpoint)
            config.train_config.fine_tune_checkpoint = pretrained_model_path

        # Save TF label map based on class_map.
        class_map_path = os.path.join(self.temp_dir, 'label-map.pbtxt')
        tf_class_map = make_tf_class_map(class_map)
        save_tf_class_map(tf_class_map, class_map_path)

        train_record_uris = list_paths(self.training_download_uri, 'record')
        config.train_input_reader.tf_record_input_reader.input_path[:] = train_record_uris
        config.train_input_reader.label_map_path = class_map_path

        eval_record_uris = list_paths(self.validation_download_uri, 'record')
        config.eval_input_reader[
            0].tf_record_input_reader.input_path[:] = eval_record_uris
        config.eval_input_reader[0].label_map_path = class_map_path

        # Save an updated copy of the config file.
        config_path = join(self.temp_dir, 'ml.config')
        config_str = text_format.MessageToString(config)
        with open(config_path, 'w') as config_file:
            config_file.write(config_str)
        return config_path
Ejemplo n.º 3
0
    def test_list_paths_s3(self):
        path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt')
        s3_path = 's3://{}/xxx/lorem.txt'.format(self.bucket_name)
        s3_directory = 's3://{}/xxx/'.format(self.bucket_name)
        directory = os.path.dirname(path)
        make_dir(directory, check_empty=False)

        str_to_file(self.lorem, path)
        upload_or_copy(path, s3_path)

        list_paths(s3_directory)
        self.assertEqual(len(list_paths(s3_directory)), 1)
Ejemplo n.º 4
0
def collect_eval_dir(root_uri):
    eval_json_uris = list_paths(join(root_uri, 'eval'), ext='eval.json')
    for eval_json_uri in eval_json_uris:
        eval_json = file_to_json(eval_json_uri)
        print(basename(dirname(eval_json_uri)))
        print(eval_json['overall'][-1]['f1'])
        print()
Ejemplo n.º 5
0
 def get_scene_ids(self):
     label_dir = os.path.join(self.base_uri, self.label_dir)
     label_paths = list_paths(label_dir, ext='.geojson')
     label_re = re.compile(r'.*{}(\d+)\.geojson'.format(self.label_fn_prefix))
     scene_ids = [
         label_re.match(label_path).group(1)
         for label_path in label_paths]
     return scene_ids
Ejemplo n.º 6
0
 def _download(split, output_dir):
     for uri in list_paths(self.base_uri, 'record'):
         base_name = os.path.basename(uri)
         if base_name.startswith(split):
             record_path = self.download_if_needed(uri)
             target_record_path = os.path.join(
                 output_dir, os.path.basename(record_path))
             shutil.move(record_path, target_record_path)
Ejemplo n.º 7
0
    def test_sync_from_dir_noop_local(self):
        path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt')
        src = os.path.join(self.temp_dir.name, 'lorem')
        make_dir(src, check_empty=False)

        fs = FileSystem.get_file_system(src, 'r')
        fs.write_bytes(path, bytes([0x00, 0x01]))
        sync_from_dir(src, src, delete=True)

        self.assertEqual(len(list_paths(src)), 1)
Ejemplo n.º 8
0
    def test_sync_to_dir_local(self):
        path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt')
        src = os.path.dirname(path)
        dst = os.path.join(self.temp_dir.name, 'xxx')
        make_dir(src, check_empty=False)
        make_dir(dst, check_empty=False)

        fs = FileSystem.get_file_system(path, 'r')
        fs.write_bytes(path, bytes([0x00, 0x01]))
        sync_to_dir(src, dst, delete=True)

        self.assertEqual(len(list_paths(dst)), 1)
Ejemplo n.º 9
0
    def test_copy_to_local(self):
        path1 = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt')
        path2 = os.path.join(self.temp_dir.name, 'yyy', 'ipsum.txt')
        dir1 = os.path.dirname(path1)
        dir2 = os.path.dirname(path2)
        make_dir(dir1, check_empty=False)
        make_dir(dir2, check_empty=False)

        str_to_file(self.lorem, path1)

        upload_or_copy(path1, path2)
        self.assertEqual(len(list_paths(dir2)), 1)
def fitness(data_img_dir, truth_img_dir, compiler, individual):
    """
    Return a score representing the fitness of a particular program.

    Params individual: The individual to be evaluated.  train_files: A list of tuples of the
    form (raster, geojson), where raster is a filename of a GeoTIFF containing multiband raster
    data, and geojson is the name of a GeoJSON file representing ground-truth.
    """
    # We will assume for the time being that list_paths() returns in a consistent order, because
    # it seems to.
    eval_data = zip(list_paths(data_img_dir), list_paths(truth_img_dir))

    total_error = 0
    func = compiler(expr=individual)
    for input_file, truth_file in eval_data:
        # Load truth data
        input_pixels, truth_pixels = read_input_truth(input_file, truth_file)
        output = apply_to_raster(func, input_pixels, truth_pixels.shape)
        errors = output - truth_pixels
        #total_error += np.sum(np.square(errors))
        # Return mean squared error
        total_error += np.mean(np.square(errors))
    return (total_error,)
Ejemplo n.º 11
0
        def _download(split, output_dir):
            scene_class_dirs = []
            for uri in list_paths(self.base_uri, 'zip'):
                base_name = os.path.basename(uri)
                if base_name.startswith(split):
                    data_zip_path = self.download_if_needed(uri)
                    data_dir = os.path.splitext(data_zip_path)[0]
                    shutil.unpack_archive(data_zip_path, data_dir)

                    # Append each of the directories containing this partitions'
                    # labeled images based on the class directory.
                    data_dir_subdirectories = next(os.walk(data_dir))[1]
                    scene_class_dirs.append(
                        dict([(class_name, os.path.join(data_dir, class_name))
                              for class_name in data_dir_subdirectories]))
            merge_class_dirs(scene_class_dirs, output_dir)
Ejemplo n.º 12
0
    def build_data(self):
        cfg = self.cfg
        batch_sz = cfg.solver.batch_sz
        num_workers = cfg.data.num_workers
        label_names = cfg.data.labels

        # download and unzip data
        if cfg.data.data_format == 'image_folder':
            if cfg.data.uri.startswith('s3://') or cfg.data.uri.startswith(
                    '/'):
                data_uri = cfg.data.uri
            else:
                data_uri = join(cfg.base_uri, cfg.data.uri)

            data_dirs = []
            zip_uris = [data_uri] if data_uri.endswith('.zip') else list_paths(
                data_uri, 'zip')
            for zip_ind, zip_uri in enumerate(zip_uris):
                zip_path = get_local_path(zip_uri, self.data_cache_dir)
                if not isfile(zip_path):
                    zip_path = download_if_needed(zip_uri, self.data_cache_dir)
                with zipfile.ZipFile(zip_path, 'r') as zipf:
                    data_dir = join(self.tmp_dir, 'data', str(zip_ind))
                    data_dirs.append(data_dir)
                    zipf.extractall(data_dir)

        train_ds, valid_ds, test_ds = [], [], []
        for data_dir in data_dirs:
            train_dir = join(data_dir, 'train')
            valid_dir = join(data_dir, 'valid')

            # build datasets
            transform = Compose(
                [Resize((cfg.data.img_sz, cfg.data.img_sz)),
                 ToTensor()])
            aug_transform = Compose([
                RandomHorizontalFlip(),
                RandomVerticalFlip(),
                ColorJitter(0.1, 0.1, 0.1, 0.1),
                Resize((cfg.data.img_sz, cfg.data.img_sz)),
                ToTensor()
            ])

            if isdir(train_dir):
                if cfg.overfit_mode:
                    train_ds.append(
                        ImageFolder(
                            train_dir,
                            transform=transform,
                            classes=label_names))
                else:
                    train_ds.append(
                        ImageFolder(
                            train_dir,
                            transform=aug_transform,
                            classes=label_names))

            if isdir(valid_dir):
                valid_ds.append(
                    ImageFolder(
                        valid_dir, transform=transform, classes=label_names))
                test_ds.append(
                    ImageFolder(
                        valid_dir, transform=transform, classes=label_names))

        train_ds, valid_ds, test_ds = \
            ConcatDataset(train_ds), ConcatDataset(valid_ds), ConcatDataset(test_ds)

        if cfg.overfit_mode:
            train_ds = Subset(train_ds, range(batch_sz))
            valid_ds = train_ds
            test_ds = train_ds
        elif cfg.test_mode:
            train_ds = Subset(train_ds, range(batch_sz))
            valid_ds = Subset(valid_ds, range(batch_sz))
            test_ds = Subset(test_ds, range(batch_sz))

        train_dl = DataLoader(
            train_ds,
            shuffle=True,
            batch_size=batch_sz,
            num_workers=num_workers,
            pin_memory=True)
        valid_dl = DataLoader(
            valid_ds,
            shuffle=True,
            batch_size=batch_sz,
            num_workers=num_workers,
            pin_memory=True)
        test_dl = DataLoader(
            test_ds,
            shuffle=True,
            batch_size=batch_sz,
            num_workers=num_workers,
            pin_memory=True)

        self.train_ds, self.valid_ds, self.test_ds = (train_ds, valid_ds,
                                                      test_ds)
        self.train_dl, self.valid_dl, self.test_dl = (train_dl, valid_dl,
                                                      test_dl)
Ejemplo n.º 13
0
    def train(self, tmp_dir):
        """Train a model."""
        self.print_options()

        # Sync output of previous training run from cloud.
        train_uri = self.backend_opts.train_uri
        train_dir = get_local_path(train_uri, tmp_dir)
        make_dir(train_dir)
        sync_from_dir(train_uri, train_dir)

        # Get zip file for each group, and unzip them into chip_dir.
        chip_dir = join(tmp_dir, 'chips')
        make_dir(chip_dir)
        for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'):
            zip_path = download_if_needed(zip_uri, tmp_dir)
            with zipfile.ZipFile(zip_path, 'r') as zipf:
                zipf.extractall(chip_dir)

        # Setup data loader.
        def get_label_path(im_path):
            return Path(str(im_path.parent)[:-4] + '-labels') / im_path.name

        size = self.task_config.chip_size
        class_map = self.task_config.class_map
        classes = class_map.get_class_names()
        if 0 not in class_map.get_keys():
            classes = ['nodata'] + classes
        num_workers = 0 if self.train_opts.debug else 4

        train_img_dir = self.subset_training_data(chip_dir)

        def get_data(train_sampler=None):
            data = (SegmentationItemList.from_folder(chip_dir).split_by_folder(
                train=train_img_dir, valid='val-img').label_from_func(
                    get_label_path, classes=classes).transform(
                        get_transforms(flip_vert=self.train_opts.flip_vert),
                        size=size,
                        tfm_y=True).databunch(bs=self.train_opts.batch_sz,
                                              num_workers=num_workers,
                                              train_sampler=train_sampler))
            return data

        data = get_data()
        oversample = self.train_opts.oversample
        if oversample:
            sampler = get_weighted_sampler(data.train_ds,
                                           oversample['rare_class_ids'],
                                           oversample['rare_target_prop'])
            data = get_data(train_sampler=sampler)

        if self.train_opts.debug:
            make_debug_chips(data, class_map, tmp_dir, train_uri)

        # Setup learner.
        ignore_idx = 0
        metrics = [
            Precision(average='weighted', clas_idx=1, ignore_idx=ignore_idx),
            Recall(average='weighted', clas_idx=1, ignore_idx=ignore_idx),
            FBeta(average='weighted',
                  clas_idx=1,
                  beta=1,
                  ignore_idx=ignore_idx)
        ]
        model_arch = getattr(models, self.train_opts.model_arch)
        learn = unet_learner(data,
                             model_arch,
                             metrics=metrics,
                             wd=self.train_opts.weight_decay,
                             bottle=True,
                             path=train_dir)
        learn.unfreeze()

        if self.train_opts.fp16 and torch.cuda.is_available():
            # This loss_scale works for Resnet 34 and 50. You might need to adjust this
            # for other models.
            learn = learn.to_fp16(loss_scale=256)

        # Setup callbacks and train model.
        model_path = get_local_path(self.backend_opts.model_uri, tmp_dir)

        pretrained_uri = self.backend_opts.pretrained_uri
        if pretrained_uri:
            print('Loading weights from pretrained_uri: {}'.format(
                pretrained_uri))
            pretrained_path = download_if_needed(pretrained_uri, tmp_dir)
            learn.model.load_state_dict(torch.load(
                pretrained_path, map_location=learn.data.device),
                                        strict=False)

        # Save every epoch so that resume functionality provided by
        # TrackEpochCallback will work.
        callbacks = [
            TrackEpochCallback(learn),
            MySaveModelCallback(learn, every='epoch'),
            MyCSVLogger(learn, filename='log'),
            ExportCallback(learn, model_path, monitor='f_beta'),
            SyncCallback(train_dir, self.backend_opts.train_uri,
                         self.train_opts.sync_interval)
        ]

        lr = self.train_opts.lr
        num_epochs = self.train_opts.num_epochs
        if self.train_opts.one_cycle:
            if lr is None:
                learn.lr_find()
                learn.recorder.plot(suggestion=True, return_fig=True)
                lr = learn.recorder.min_grad_lr
                print('lr_find() found lr: {}'.format(lr))
            learn.fit_one_cycle(num_epochs, lr, callbacks=callbacks)
        else:
            learn.fit(num_epochs, lr, callbacks=callbacks)

        # Since model is exported every epoch, we need some other way to
        # show that training is finished.
        str_to_file('done!', self.backend_opts.train_done_uri)

        # Sync output to cloud.
        sync_to_dir(train_dir, self.backend_opts.train_uri)
    def train(self, tmp_dir):
        """Train a model."""
        # Setup hyperparams.
        bs = int(self.config.get('bs', 8))
        wd = self.config.get('wd', 1e-2)
        lr = self.config.get('lr', 2e-3)
        num_epochs = int(self.config.get('num_epochs', 10))
        model_arch = self.config.get('model_arch', 'resnet50')
        model_arch = getattr(models, model_arch)
        fp16 = self.config.get('fp16', False)
        sync_interval = self.config.get('sync_interval', 1)
        debug = self.config.get('debug', False)

        chip_uri = self.config['chip_uri']
        train_uri = self.config['train_uri']

        # Sync output of previous training run from cloud.
        train_dir = get_local_path(train_uri, tmp_dir)
        make_dir(train_dir)
        sync_from_dir(train_uri, train_dir)

        # Get zip file for each group, and unzip them into chip_dir.
        chip_dir = join(tmp_dir, 'chips')
        make_dir(chip_dir)
        for zip_uri in list_paths(chip_uri, 'zip'):
            zip_path = download_if_needed(zip_uri, tmp_dir)
            with zipfile.ZipFile(zip_path, 'r') as zipf:
                zipf.extractall(chip_dir)

        # Setup data loader.
        def get_label_path(im_path):
            return Path(str(im_path.parent)[:-4] + '-labels') / im_path.name

        size = self.task_config.chip_size
        classes = ['nodata'] + self.task_config.class_map.get_class_names()
        data = (SegmentationItemList.from_folder(chip_dir).split_by_folder(
            train='train-img', valid='val-img').label_from_func(
                get_label_path,
                classes=classes).transform(get_transforms(),
                                           size=size,
                                           tfm_y=True).databunch(bs=bs))
        print(data)

        if debug:
            # We make debug chips during the run-time of the train command
            # rather than the chip command
            # because this is a better test (see "visualize just before the net"
            # in https://karpathy.github.io/2019/04/25/recipe/), and because
            # it's more convenient since we have the databunch here.
            # TODO make color map based on colors in class_map
            # TODO get rid of white frame
            # TODO zip them
            def _make_debug_chips(split):
                debug_chips_dir = join(train_uri,
                                       '{}-debug-chips'.format(split))
                make_dir(debug_chips_dir)
                ds = data.train_ds if split == 'train' else data.valid_ds
                for i, (x, y) in enumerate(ds):
                    x.show(y=y)
                    plt.savefig(join(debug_chips_dir, '{}.png'.format(i)))
                    plt.close()

            _make_debug_chips('train')
            _make_debug_chips('val')

        # Setup learner.
        metrics = [semseg_acc]
        learn = unet_learner(data,
                             model_arch,
                             metrics=metrics,
                             wd=wd,
                             bottle=True)
        learn.unfreeze()

        if fp16 and torch.cuda.is_available():
            # This loss_scale works for Resnet 34 and 50. You might need to adjust this
            # for other models.
            learn = learn.to_fp16(loss_scale=256)

        # Setup ability to resume training if model exists.
        # This hack won't properly set the learning as a function of epochs
        # when resuming.
        learner_path = join(train_dir, 'learner.pth')
        log_path = join(train_dir, 'log')

        start_epoch = 0
        if isfile(learner_path):
            print('Loading saved model...')
            start_epoch = get_last_epoch(str(log_path) + '.csv') + 1
            if start_epoch >= num_epochs:
                print('Training is already done. If you would like to re-train'
                      ', delete the previous results of training in '
                      '{}.'.format(train_uri))
                return

            learn.load(learner_path[:-4])
            print('Resuming from epoch {}'.format(start_epoch))
            print(
                'Note: fastai does not support a start_epoch, so epoch 0 below '
                'corresponds to {}'.format(start_epoch))
        epochs_left = num_epochs - start_epoch

        # Setup callbacks and train model.
        callbacks = [
            SaveModelCallback(learn, name=learner_path[:-4]),
            MyCSVLogger(learn, filename=log_path, start_epoch=start_epoch),
            SyncCallback(train_dir, train_uri, sync_interval)
        ]
        learn.fit(epochs_left, lr, callbacks=callbacks)

        # Export model for inference
        model_uri = self.config['model_uri']
        model_path = get_local_path(model_uri, tmp_dir)
        learn.export(model_path)

        # Sync output to cloud.
        sync_to_dir(train_dir, train_uri)
Ejemplo n.º 15
0
    def train(self, tmp_dir):
        """Train a model.

        This downloads any previous output saved to the train_uri,
        starts training (or resumes from a checkpoint), periodically
        syncs contents of train_dir to train_uri and after training finishes.

        Args:
            tmp_dir: (str) path to temp directory
        """
        self.log_options()

        # Sync output of previous training run from cloud.
        train_uri = self.backend_opts.train_uri
        train_dir = get_local_path(train_uri, tmp_dir)
        make_dir(train_dir)
        sync_from_dir(train_uri, train_dir)

        # Get zip file for each group, and unzip them into chip_dir.
        chip_dir = join(tmp_dir, 'chips')
        make_dir(chip_dir)
        for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'):
            zip_path = download_if_needed(zip_uri, tmp_dir)
            with zipfile.ZipFile(zip_path, 'r') as zipf:
                zipf.extractall(chip_dir)

        # Setup data loader.
        def get_label_path(im_path):
            return Path(str(im_path.parent)[:-4] + '-labels') / im_path.name

        size = self.task_config.chip_size
        class_map = self.task_config.class_map
        classes = class_map.get_class_names()
        if 0 not in class_map.get_keys():
            classes = ['nodata'] + classes
        num_workers = 0 if self.train_opts.debug else 4

        data = (SegmentationItemList.from_folder(chip_dir)
                .split_by_folder(train='train-img', valid='val-img'))
        train_count = None
        if self.train_opts.train_count is not None:
            train_count = min(len(data.train), self.train_opts.train_count)
        elif self.train_opts.train_prop != 1.0:
            train_count = int(round(self.train_opts.train_prop * len(data.train)))
        train_items = data.train.items
        if train_count is not None:
            train_inds = np.random.permutation(np.arange(len(data.train)))[0:train_count]
            train_items = train_items[train_inds]
        items = np.concatenate([train_items, data.valid.items])

        data = (SegmentationItemList(items, chip_dir)
                .split_by_folder(train='train-img', valid='val-img')
                .label_from_func(get_label_path, classes=classes)
                .transform(get_transforms(flip_vert=self.train_opts.flip_vert),
                           size=size, tfm_y=True)
                .databunch(bs=self.train_opts.batch_sz,
                           num_workers=num_workers))
        print(data)

        # Setup learner.
        ignore_idx = 0
        metrics = [
            Precision(average='weighted', clas_idx=1, ignore_idx=ignore_idx),
            Recall(average='weighted', clas_idx=1, ignore_idx=ignore_idx),
            FBeta(average='weighted', clas_idx=1, beta=1, ignore_idx=ignore_idx)]
        model_arch = getattr(models, self.train_opts.model_arch)
        learn = unet_learner(
            data, model_arch, metrics=metrics, wd=self.train_opts.weight_decay,
            bottle=True, path=train_dir)
        learn.unfreeze()

        if self.train_opts.mixed_prec and torch.cuda.is_available():
            # This loss_scale works for Resnet 34 and 50. You might need to adjust this
            # for other models.
            learn = learn.to_fp16(loss_scale=256)

        # Setup callbacks and train model.
        model_path = get_local_path(self.backend_opts.model_uri, tmp_dir)

        pretrained_uri = self.backend_opts.pretrained_uri
        if pretrained_uri:
            print('Loading weights from pretrained_uri: {}'.format(
                pretrained_uri))
            pretrained_path = download_if_needed(pretrained_uri, tmp_dir)
            learn.model.load_state_dict(
                torch.load(pretrained_path, map_location=learn.data.device),
                strict=False)

        # Save every epoch so that resume functionality provided by
        # TrackEpochCallback will work.
        callbacks = [
            TrackEpochCallback(learn),
            MySaveModelCallback(learn, every='epoch'),
            MyCSVLogger(learn, filename='log'),
            ExportCallback(learn, model_path, monitor='f_beta'),
            SyncCallback(train_dir, self.backend_opts.train_uri,
                         self.train_opts.sync_interval)
        ]

        oversample = self.train_opts.oversample
        if oversample:
            weights = get_oversampling_weights(
                data.train_ds, oversample['rare_class_ids'],
                oversample['rare_target_prop'])
            oversample_callback = OverSamplingCallback(learn, weights=weights)
            callbacks.append(oversample_callback)

        if self.train_opts.debug:
            if oversample:
                oversample_callback.on_train_begin()
            make_debug_chips(data, class_map, tmp_dir, train_uri)

        if self.train_opts.log_tensorboard:
            callbacks.append(TensorboardLogger(learn, 'run'))

        if self.train_opts.run_tensorboard:
            log.info('Starting tensorboard process')
            log_dir = join(train_dir, 'logs', 'run')
            tensorboard_process = Popen(
                ['tensorboard', '--logdir={}'.format(log_dir)])
            terminate_at_exit(tensorboard_process)

        lr = self.train_opts.lr
        num_epochs = self.train_opts.num_epochs
        if self.train_opts.one_cycle:
            if lr is None:
                learn.lr_find()
                learn.recorder.plot(suggestion=True, return_fig=True)
                lr = learn.recorder.min_grad_lr
                print('lr_find() found lr: {}'.format(lr))
            learn.fit_one_cycle(num_epochs, lr, callbacks=callbacks)
        else:
            learn.fit(num_epochs, lr, callbacks=callbacks)

        if self.train_opts.run_tensorboard:
            tensorboard_process.terminate()

        # Since model is exported every epoch, we need some other way to
        # show that training is finished.
        str_to_file('done!', self.backend_opts.train_done_uri)

        # Sync output to cloud.
        sync_to_dir(train_dir, self.backend_opts.train_uri)
Ejemplo n.º 16
0
    def train(self, tmp_dir):
        """Train a model."""
        self.print_options()

        # Sync output of previous training run from cloud.
        train_uri = self.backend_opts.train_uri
        train_dir = get_local_path(train_uri, tmp_dir)
        make_dir(train_dir)
        sync_from_dir(train_uri, train_dir)
        '''
            Get zip file for each group, and unzip them into chip_dir in a
            way that works well with FastAI.

            The resulting directory structure would be:
            <chip_dir>/
                train/
                    training-<uuid1>/
                        <class1>/
                            ...
                        <class2>/
                            ...
                        ...
                    training-<uuid2>/
                        <class1>/
                            ...
                        <class2>/
                            ...
                        ...
                    ...
                val/
                    validation-<uuid1>/
                        <class1>/
                            ...
                        <class2>/
                            ...
                        ...
                    validation-<uuid2>/
                        <class1>/
                            ...
                        <class2>/
                            ...
                        ...
                    ...

        '''
        chip_dir = join(tmp_dir, 'chips/')
        make_dir(chip_dir)
        for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'):
            zip_name = Path(zip_uri).name
            if zip_name.startswith('train'):
                extract_dir = chip_dir + 'train/'
            elif zip_name.startswith('val'):
                extract_dir = chip_dir + 'val/'
            else:
                continue
            zip_path = download_if_needed(zip_uri, tmp_dir)
            with zipfile.ZipFile(zip_path, 'r') as zipf:
                zipf.extractall(extract_dir)

        # Setup data loader.
        def get_label_path(im_path):
            return Path(str(im_path.parent)[:-4] + '-labels') / im_path.name

        size = self.task_config.chip_size
        class_map = self.task_config.class_map
        classes = class_map.get_class_names()
        num_workers = 0 if self.train_opts.debug else 4
        tfms = get_transforms(flip_vert=self.train_opts.flip_vert)

        def get_data(train_sampler=None):
            data = (ImageList.from_folder(chip_dir).split_by_folder(
                train='train', valid='val').label_from_folder().transform(
                    tfms, size=size).databunch(
                        bs=self.train_opts.batch_sz,
                        num_workers=num_workers,
                    ))
            return data

        data = get_data()

        if self.train_opts.debug:
            make_debug_chips(data, class_map, tmp_dir, train_uri)

        # Setup learner.
        ignore_idx = -1
        metrics = [
            Precision(average='weighted', clas_idx=1, ignore_idx=ignore_idx),
            Recall(average='weighted', clas_idx=1, ignore_idx=ignore_idx),
            FBeta(average='weighted',
                  clas_idx=1,
                  beta=1,
                  ignore_idx=ignore_idx)
        ]
        model_arch = getattr(models, self.train_opts.model_arch)
        learn = cnn_learner(data,
                            model_arch,
                            metrics=metrics,
                            wd=self.train_opts.weight_decay,
                            path=train_dir)

        learn.unfreeze()

        if self.train_opts.fp16 and torch.cuda.is_available():
            # This loss_scale works for Resnet 34 and 50. You might need to adjust this
            # for other models.
            learn = learn.to_fp16(loss_scale=256)

        # Setup callbacks and train model.
        model_path = get_local_path(self.backend_opts.model_uri, tmp_dir)

        pretrained_uri = self.backend_opts.pretrained_uri
        if pretrained_uri:
            print('Loading weights from pretrained_uri: {}'.format(
                pretrained_uri))
            pretrained_path = download_if_needed(pretrained_uri, tmp_dir)
            learn.model.load_state_dict(torch.load(
                pretrained_path, map_location=learn.data.device),
                                        strict=False)

        # Save every epoch so that resume functionality provided by
        # TrackEpochCallback will work.
        callbacks = [
            TrackEpochCallback(learn),
            MySaveModelCallback(learn, every='epoch'),
            MyCSVLogger(learn, filename='log'),
            ExportCallback(learn, model_path, monitor='f_beta'),
            SyncCallback(train_dir, self.backend_opts.train_uri,
                         self.train_opts.sync_interval)
        ]

        lr = self.train_opts.lr
        num_epochs = self.train_opts.num_epochs
        if self.train_opts.one_cycle:
            if lr is None:
                learn.lr_find()
                learn.recorder.plot(suggestion=True, return_fig=True)
                lr = learn.recorder.min_grad_lr
                print('lr_find() found lr: {}'.format(lr))
            learn.fit_one_cycle(num_epochs, lr, callbacks=callbacks)
        else:
            learn.fit(num_epochs, lr, callbacks=callbacks)

        # Since model is exported every epoch, we need some other way to
        # show that training is finished.
        str_to_file('done!', self.backend_opts.train_done_uri)

        # Sync output to cloud.
        sync_to_dir(train_dir, self.backend_opts.train_uri)
Ejemplo n.º 17
0
    def train(self, tmp_dir: str) -> None:
        """Train a DeepLab model the task and backend config.

        Args:
            tmp_dir: (str) temporary directory to use

        Returns:
             None
        """
        train_py = self.backend_config.script_locations.train_py
        eval_py = self.backend_config.script_locations.eval_py
        export_py = self.backend_config.script_locations.export_py

        # Setup local input and output directories
        log.info('Setting up local input and output directories')
        train_logdir = self.backend_config.training_output_uri
        train_logdir_local = get_local_path(train_logdir, tmp_dir)
        dataset_dir = get_record_dir(self.backend_config.training_data_uri,
                                     TRAIN)
        dataset_dir_local = get_local_path(dataset_dir, tmp_dir)
        make_dir(tmp_dir)
        make_dir(train_logdir_local)
        make_dir(dataset_dir_local)

        # Download training data
        log.info('Downloading training data')
        for i, record_file in enumerate(list_paths(dataset_dir)):
            download_if_needed(record_file, tmp_dir)

        # Download and untar initial checkpoint.
        log.info('Downloading and untarring initial checkpoint')
        tf_initial_checkpoints_uri = self.backend_config.pretrained_model_uri
        download_if_needed(tf_initial_checkpoints_uri, tmp_dir)
        tfic_tarball = get_local_path(tf_initial_checkpoints_uri, tmp_dir)
        tfic_dir = os.path.dirname(tfic_tarball)
        with tarfile.open(tfic_tarball, 'r:gz') as tar:
            tar.extractall(tfic_dir)
        tfic_ckpt = glob.glob('{}/*/*.index'.format(tfic_dir))[0]
        tfic_ckpt = tfic_ckpt[0:-len('.index')]

        # Restart support
        train_restart_dir = self.backend_config.train_options.train_restart_dir
        if type(train_restart_dir) is not str or len(train_restart_dir) == 0:
            train_restart_dir = train_logdir

        # Get output from potential previous run so we can resume training.
        if type(train_restart_dir) is str and len(
                train_restart_dir
        ) > 0 and not self.backend_config.train_options.replace_model:
            sync_from_dir(train_restart_dir, train_logdir_local)
        else:
            if self.backend_config.train_options.replace_model:
                if os.path.exists(train_logdir_local):
                    shutil.rmtree(train_logdir_local)
                make_dir(train_logdir_local)

        # Periodically synchronize with remote
        sync = start_sync(
            train_logdir_local,
            train_logdir,
            sync_interval=self.backend_config.train_options.sync_interval)

        with sync:
            # Setup TFDL config
            tfdl_config = json_format.ParseDict(
                self.backend_config.tfdl_config, TrainingParametersMsg())
            log.info('tfdl_config={}'.format(tfdl_config))
            log.info('Training steps={}'.format(
                tfdl_config.training_number_of_steps))

            # Additional training options
            max_class = max(
                list(map(lambda c: c.id, self.class_map.get_items())))
            num_classes = len(self.class_map.get_items())
            num_classes = max(max_class, num_classes) + 1
            (train_args, train_env) = get_training_args(
                train_py, train_logdir_local, tfic_ckpt, dataset_dir_local,
                num_classes, tfdl_config)

            # Start training
            log.info('Starting training process')
            log.info(' '.join(train_args))
            train_process = Popen(train_args, env=train_env)
            terminate_at_exit(train_process)

            if self.backend_config.train_options.do_monitoring:
                # Start tensorboard
                log.info('Starting tensorboard process')
                tensorboard_process = Popen(
                    ['tensorboard', '--logdir={}'.format(train_logdir_local)])
                terminate_at_exit(tensorboard_process)

            if self.backend_config.train_options.do_eval:
                # Start eval script
                log.info('Starting eval script')
                eval_logdir = train_logdir_local
                eval_args = get_evaluation_args(eval_py, train_logdir_local,
                                                dataset_dir_local, eval_logdir,
                                                tfdl_config)
                eval_process = Popen(eval_args, env=train_env)
                terminate_at_exit(eval_process)

            # Wait for training and tensorboard
            log.info('Waiting for training and tensorboard processes')
            train_process.wait()
            if self.backend_config.train_options.do_monitoring:
                tensorboard_process.terminate()

            # Export frozen graph
            log.info(
                'Exporting frozen graph ({}/model)'.format(train_logdir_local))
            export_args = get_export_args(export_py, train_logdir_local,
                                          num_classes, tfdl_config)
            export_process = Popen(export_args)
            terminate_at_exit(export_process)
            export_process.wait()

            # Package up the model files for usage as fine tuning checkpoints
            fine_tune_checkpoint_name = self.backend_config.fine_tune_checkpoint_name
            latest_checkpoints = get_latest_checkpoint(train_logdir_local)
            model_checkpoint_files = glob.glob(
                '{}*'.format(latest_checkpoints))
            inference_graph_path = os.path.join(train_logdir_local, 'model')

            with RVConfig.get_tmp_dir() as tmp_dir:
                model_dir = os.path.join(tmp_dir, fine_tune_checkpoint_name)
                make_dir(model_dir)
                model_tar = os.path.join(
                    train_logdir_local,
                    '{}.tar.gz'.format(fine_tune_checkpoint_name))
                shutil.copy(inference_graph_path,
                            '{}/frozen_inference_graph.pb'.format(model_dir))
                for path in model_checkpoint_files:
                    shutil.copy(path, model_dir)
                with tarfile.open(model_tar, 'w:gz') as tar:
                    tar.add(model_dir, arcname=os.path.basename(model_dir))

        # Perform final sync
        sync_to_dir(train_logdir_local, train_logdir, delete=False)
Ejemplo n.º 18
0
    def exp_main(self, raw_uri, root_uri, test=False):
        """Run an experiment on the Spacenet Vegas building dataset.

        This is a simple example of how to do semantic segmentation on data that
        doesn't require any pre-processing or special permission to access.

        Args:
            raw_uri: (str) directory of raw data (the root of the Spacenet dataset)
            root_uri: (str) root directory for experiment output
            test: (bool) if True, run a very small experiment as a test and generate
                debug output
        """
        # Specify the location of the raw data
        base_uri = join(raw_uri, 'spacenet/SN2_buildings/train/AOI_2_Vegas')
        # The images and labels are in two separate directories within the base_uri
        raster_uri = join(base_uri, 'PS-RGB')
        label_uri = join(base_uri, 'geojson_buildings')
        # The tiff (raster) and geojson (label) files have have a naming convention of
        # '[prefix]_[image id].geojson.' The prefix indicates the type of data and the
        # image id indicates which scene each is associated with.
        raster_fn_prefix = 'SN2_buildings_train_AOI_2_Vegas_PS-RGB_img'
        label_fn_prefix = 'SN2_buildings_train_AOI_2_Vegas_geojson_buildings_img'
        # Find all of the image ids that have associated images and labels. Collect
        # these values to use as our scene ids.
        label_paths = list_paths(label_uri, ext='.geojson')
        label_re = re.compile(r'.*{}(\d+)\.geojson'.format(label_fn_prefix))
        scene_ids = [
            label_re.match(label_path).group(1) for label_path in label_paths
        ]

        # Set some trainin parameters:
        # The exp_id will be the label associated with this experiment, it will be used
        # to name the experiment config json.
        exp_id = 'spacenet-simple-seg'
        # Number of times to go through the entire dataset during training.
        num_epochs = 2
        # Number of images in each batch
        batch_size = 8
        # Specify whether or not to make debug chips (a zipped sample of png chips
        # that you can examine to help debug the chipping process)
        debug = False

        # This experiment includes an option to run a small test experiment before
        # running the whole thing. You can set this using the 'test' parameter. If
        # this parameter is set to True it will run a tiny test example with a new
        # experiment id. This will be small enough to run locally. It is recommended
        # to run a test example locally before submitting the whole experiment to AWs
        # Batch.
        test = str_to_bool(test)
        if test:
            exp_id += '-test'
            num_epochs = 1
            batch_size = 2
            debug = True
            scene_ids = scene_ids[0:10]

        # Split the data into training and validation sets:
        # Randomize the order of all scene ids
        random.seed(5678)
        scene_ids = sorted(scene_ids)
        random.shuffle(scene_ids)
        # Workaround to handle scene 1000 missing on S3.
        if '1000' in scene_ids:
            scene_ids.remove('1000')
        # Figure out how many scenes make up 80% of the whole set
        num_train_ids = round(len(scene_ids) * 0.8)
        # Split the scene ids into training and validation lists
        train_ids = scene_ids[0:num_train_ids]
        val_ids = scene_ids[num_train_ids:]

        # The TaskConfigBuilder constructs a child class of TaskConfig that
        # corresponds to the type of computer vision task you are taking on.
        # This experiment includes a semantic segmentation task but Raster
        # Vision also has backends for object detection and chip classification.
        # Before building the task config you can also set parameters using
        # 'with_' methods. In the example below we set the chip size, the
        # pixel class names and colors, and addiitonal chip options.
        task = rv.TaskConfig.builder(rv.SEMANTIC_SEGMENTATION) \
                            .with_chip_size(300) \
                            .with_classes({
                                'Building': (1, 'orange'),
                                'Background': (2, 'black')
                            }) \
            .with_chip_options(
                                chips_per_scene=9,
                                debug_chip_probability=0.25,
                                negative_survival_probability=1.0,
                                target_classes=[1],
                                target_count_threshold=1000) \
            .build()

        # Next we will create a backend that is built on top of a third-party
        # deep learning library. In this case we will construct the
        # BackendConfig for the pytorch semantic segmentation backend.
        backend = rv.BackendConfig.builder(rv.PYTORCH_SEMANTIC_SEGMENTATION) \
            .with_task(task) \
            .with_train_options(
                lr=1e-4,
                batch_size=batch_size,
                num_epochs=num_epochs,
                model_arch='resnet50',
                debug=debug) \
            .build()

        # We will use this function to create a list of scenes that we will pass
        # to the DataSetConfig builder.
        def make_scene(id):
            """Make a SceneConfig object for each image/label pair

            Args:
                id (str): The id that corresponds to both the .tiff image source
                    and .geojson label source for a given scene

            Returns:
                rv.data.SceneConfig: a SceneConfig object which is composed of
                    images, labels and optionally AOIs
            """
            # Find the uri for the image associated with this is
            train_image_uri = os.path.join(
                raster_uri, '{}{}.tif'.format(raster_fn_prefix, id))

            # Construct a raster source from an image uri that can be handled by Rasterio.
            # We also specify the order of image channels by their indices and add a
            # stats transformer which normalizes pixel values into uint8.
            raster_source = rv.RasterSourceConfig.builder(rv.RASTERIO_SOURCE) \
                .with_uri(train_image_uri) \
                .with_channel_order([0, 1, 2]) \
                .with_stats_transformer() \
                .build()

            # Next create a label source config to pair with the raster source:
            # define the geojson label source uri
            vector_source = os.path.join(
                label_uri, '{}{}.geojson'.format(label_fn_prefix, id))

            # Since this is a semantic segmentation experiment and the labels
            # are distributed in a vector-based GeoJSON format, we need to rasterize
            # the labels. We create  aRasterSourceConfigBuilder using
            # `rv.RASTERIZED_SOURCE`
            # indicating that it will come from a vector source. We then specify the uri
            # of the vector source and (in the 'with_rasterizer_options' method) the id
            # of the pixel class we would like to use as background.
            label_raster_source = rv.RasterSourceConfig.builder(rv.RASTERIZED_SOURCE) \
                .with_vector_source(vector_source) \
                .with_rasterizer_options(2) \
                .build()

            # Create a semantic segmentation label source from rasterized source config
            # that we built in the previous line.
            label_source = rv.LabelSourceConfig.builder(rv.SEMANTIC_SEGMENTATION) \
                .with_raster_source(label_raster_source) \
                .build()

            # Finally we can build a scene config object using the scene id and the
            # configs we just defined
            scene = rv.SceneConfig.builder() \
                .with_task(task) \
                .with_id(id) \
                .with_raster_source(raster_source) \
                .with_label_source(label_source) \
                .build()

            return scene

        # Create lists of train and test scene configs
        train_scenes = [make_scene(id) for id in train_ids]
        val_scenes = [make_scene(id) for id in val_ids]

        # Construct a DataSet config using the lists of train and
        # validation scenes
        dataset = rv.DatasetConfig.builder() \
            .with_train_scenes(train_scenes) \
            .with_validation_scenes(val_scenes) \
            .build()

        # We will need to convert this imagery from uint16 to uint8
        # in order to use it. We specified that this conversion should take place
        # when we built the train raster source but that process will require
        # dataset-level statistics. To get these stats we need to create an
        # analyzer.
        analyzer = rv.AnalyzerConfig.builder(rv.STATS_ANALYZER) \
                                    .build()

        # We use the previously-constructed configs to create the constituent
        # parts of the experiment. We also give the builder strings that define
        # the experiment id and and root uri. The root uri indicates where all
        # of the output will be written.
        experiment = rv.ExperimentConfig.builder() \
                                        .with_id(exp_id) \
                                        .with_task(task) \
                                        .with_backend(backend) \
                                        .with_analyzer(analyzer) \
                                        .with_dataset(dataset) \
                                        .with_root_uri(root_uri) \
                                        .build()

        # Return one or more experiment configs to run the experiment(s)
        return experiment
Ejemplo n.º 19
0
    def exp_main(self, test=False):
        # docker filepath mounted to my data directory
        base_uri = '/opt/data/labels2'

        raster_uri = base_uri  # rasters and labels in same directory for now
        label_uri = base_uri

        # Find all of the image ids that have associated images and labels. Collect
        # these values to use as our scene ids.
        # TODO use PV Array dataframe to select these
        label_paths = list_paths(label_uri, ext='.geojson')
        scene_ids = [x.split('.')[-2].split('/')[-1] for x in label_paths]

        scene2_ids = [
            'so9051_rgb_250_04', 'so9265_rgb_250_05', 'sp3590_rgb_250_04',
            'sj7304_rgb_250_04', 'su1385_rgb_250_06', 'st0709_rgb_250_05',
            'sj9004_rgb_250_05', 'st8022_rgb_250_05', 'st8303_rgb_250_05',
            'sj9402_rgb_250_05', 'so9078_rgb_250_06', 'sj9003_rgb_250_05',
            'sk0003_rgb_250_05', 'st8468_rgb_250_04', 'st6980_rgb_250_04',
            'su0883_rgb_250_05', 'su0983_rgb_250_05', 'so9249_rgb_250_05',
            'su1478_rgb_250_04', 'su1377_rgb_250_04', 'sj9002_rgb_250_06',
            'sj8903_rgb_250_04', 'sj9902_rgb_250_05', 'sj9602_rgb_250_05',
            'tg2827_rgb_250_04', 'sj9702_rgb_250_05', 'sj9803_rgb_250_04',
            'sj9802_rgb_250_05', 'sk0504_rgb_250_04', 'sk0302_rgb_250_05',
            'sk0306_rgb_250_04', 'sk0206_rgb_250_04', 'sk0207_rgb_250_04',
            'sk0503_rgb_250_04', 'sj9903_rgb_250_04', 'sk0202_rgb_250_06',
            'sk0309_rgb_250_03', 'sk0605_rgb_250_04', 'sk0405_rgb_250_04',
            'sk0404_rgb_250_04', 'sk0502_rgb_250_05', 'st5071_rgb_250_05',
            'sp3293_rgb_250_03', 'sy7691_rgb_250_05', 'sp3294_rgb_250_03',
            'sp3892_rgb_250_05', 'sp3690_rgb_250_04', 'st9979_rgb_250_05',
            'se6154_rgb_250_03', 'so8476_rgb_250_06', 'so8072_rgb_250_04',
            'so7972_rgb_250_04', 'sp3491_rgb_250_03', 'sp3490_rgb_250_03',
            'sp3291_rgb_250_03', 'sp3292_rgb_250_03', 'sp3492_rgb_250_03',
            'sk0212_rgb_250_03', 'so7878_rgb_250_06', 'tl1239_rgb_250_03',
            'su0972_rgb_250_03', 'st1532_rgb_250_04', 'so7556_rgb_250_05',
            'st7091_rgb_250_07', 'sn2040_rgb_250_04', 'so7371_rgb_250_04',
            'tl6064_rgb_250_05', 'so9255_rgb_250_05', 'st1826_rgb_250_04',
            'st1528_rgb_250_04', 'st1629_rgb_250_04', 'st0727_rgb_250_04',
            'st0827_rgb_250_04', 'st0928_rgb_250_04', 'st0930_rgb_250_04',
            'st0929_rgb_250_04', 'st0832_rgb_250_05', 'tl1750_rgb_250_03',
            'st2322_rgb_250_05', 'st1623_rgb_250_04', 'st1523_rgb_250_04',
            'st1624_rgb_250_04', 'st1424_rgb_250_04', 'st1421_rgb_250_05',
            'sp3793_rgb_250_04', 'sp3792_rgb_250_04', 'sj9912_rgb_250_03',
            'sk2347_rgb_250_05', 'sp3391_rgb_250_03', 'tl1846_rgb_250_03',
            'sp5177_rgb_250_03', 'sn3251_rgb_250_04', 'sp3693_rgb_250_04',
            'st2014_rgb_250_06', 'st2015_rgb_250_06', 'st2115_rgb_250_05',
            'st2114_rgb_250_05', 'sn4257_rgb_250_04', 'su4223_rgb_250_04',
            'su4323_rgb_250_04', 'tl3068_rgb_250_04', 'sp5178_rgb_250_03',
            'sp3791_rgb_250_04', 'st3689_rgb_250_03', 'st3789_rgb_250_03',
            'st0411_rgb_250_04', 'st0212_rgb_250_04', 'st0112_rgb_250_04',
            'st0211_rgb_250_04', 'st0111_rgb_250_04', 'st0209_rgb_250_05',
            'st0210_rgb_250_05', 'sj6714_rgb_250_04', 'sp3893_rgb_250_05',
            'su6712_rgb_250_04', 'su6713_rgb_250_04', 'st9363_rgb_250_04',
            'st9463_rgb_250_04', 'nr3059_rgb_250_03', 'st8576_rgb_250_03',
            'sp7948_rgb_250_04', 'sp6138_rgb_250_07', 'tl2276_rgb_250_04',
            'sm9817_rgb_250_04', 'sm9816_rgb_250_04', 'sm9716_rgb_250_04',
            'sm9616_rgb_250_04', 'sm9818_rgb_250_04', 'sm9009_rgb_250_04',
            'sm9721_rgb_250_05', 'sm9720_rgb_250_05', 'sm9101_rgb_250_04',
            'sm9201_rgb_250_04', 'sm9010_rgb_250_04', 'sm9109_rgb_250_04',
            'sn6502_rgb_250_04', 'sn6601_rgb_250_04', 'sn6201_rgb_250_04',
            'sn6202_rgb_250_04', 'st6788_rgb_250_05', 'st6688_rgb_250_05',
            'st6689_rgb_250_06', 'su0807_rgb_250_05', 'su0806_rgb_250_05',
            'sz0998_rgb_250_05', 'sz1099_rgb_250_05', 'su3743_rgb_250_04',
            'su3744_rgb_250_04', 'su6509_rgb_250_04', 'su6409_rgb_250_04',
            'su6410_rgb_250_04', 'su5413_rgb_250_04', 'su2088_rgb_250_04',
            'su5703_rgb_250_04', 'su5603_rgb_250_04', 'su5604_rgb_250_04',
            'st7642_rgb_250_06', 'st7744_rgb_250_05', 'st6728_rgb_250_05',
            'st8558_rgb_250_04', 'st2735_rgb_250_04', 'tl4990_rgb_250_05',
            'sm7209_rgb_250_04', 'st8864_rgb_250_04', 'tg5013_rgb_250_04',
            'st1198_rgb_250_04', 'st1298_rgb_250_04', 'st1722_rgb_250_04',
            'tq1078_rgb_250_05', 'su6401_rgb_250_04', 'st8753_rgb_250_04',
            'st8455_rgb_250_05', 'st8660_rgb_250_04', 'st8760_rgb_250_04',
            'st8765_rgb_250_04', 'sp7638_rgb_250_05', 'tl6332_rgb_250_04',
            'st8705_rgb_250_05', 'sy3297_rgb_250_06', 'sy3498_rgb_250_06',
            'se3636_rgb_250_01', 'st6578_rgb_250_05', 'st6478_rgb_250_05',
            'st5479_rgb_250_06', 'se2931_rgb_250_02', 'sd6835_rgb_250_01',
            'st2228_rgb_250_05', 'st2227_rgb_250_05'
        ]

        # Experiment label, used to label config files
        exp_id = 'pv-detection-2'

        # Number of times passing a batch of images through the model
        num_steps = 1e4  # 1e5 takes too long
        # Number of images in each batch
        batch_size = 8
        # Specify whether or not to make debug chips (a zipped sample of png chips
        # that you can examine to help debug the chipping process)
        debug = True

        # This experiment includes an option to run a small test experiment before
        # running the whole thing. You can set this using the 'test' parameter. If
        # this parameter is set to True it will run a tiny test example with a new
        # experiment id. This will be small enough to run locally. It is recommended
        # to run a test example locally before submitting the whole experiment to AWs
        # Batch.
        test = str_to_bool(test)
        if test:
            print("***************** TEST MODE *****************")
            exp_id += '-test'
            num_steps = 100
            batch_size = 4
            debug = True
            scene_ids = scene_ids[0:5]

        # Split the data into training and validation sets:
        # Randomize the order of all scene ids
        random.seed(5678)
        scene_ids = sorted(scene_ids)
        random.shuffle(scene_ids)

        # # Figure out how many scenes make up 80% of the whole set
        num_train_ids = round(len(scene_ids) * 0.8)

        # # Split the scene ids into training and validation lists
        train_ids = scene_ids[0:num_train_ids]
        val_ids = scene_ids[num_train_ids:]
        # train_ids = scene_ids
        # val_ids = scene_ids

        # ------------- TASK -------------

        task = rv.TaskConfig.builder(rv.SEMANTIC_SEGMENTATION) \
            .with_chip_size(300) \
            .with_classes({
            'pv': (1, 'yellow'),
            'background': (2, 'black')
            })\
            .with_chip_options(
                                chips_per_scene=50,
                                window_method='random_sample',
                                debug_chip_probability=1,
                                negative_survival_probability=0.01,
                                target_classes=[1],
                                target_count_threshold=1000) \
            .build()

        # # ------------- BACKEND -------------
        # Configuration options for different models and tasks:
        # https://github.com/azavea/raster-vision/blob/60f741e30a016f25d2643a9b32916adb22e42d50/rastervision/backend/model_defaults.json

        backend = rv.BackendConfig.builder(rv.TF_DEEPLAB) \
            .with_task(task) \
            .with_debug(debug) \
            .with_batch_size(batch_size) \
            .with_num_steps(num_steps) \
            .with_model_defaults(rv.MOBILENET_V2) \
            .with_train_options(replace_model=False,
                                sync_interval=5) \
            .build()

        # ------------- Make Scenes -------------
        # We will use this function to create a list of scenes that we will pass
        # to the DataSetConfig builder.
        def make_scene(id):
            """Make a SceneConfig object for each image/label pair
            Args:
                id (str): The id that corresponds to both the .jpg image source
                    and .geojson label source for a given scene
            Returns:
                rv.data.SceneConfig: a SceneConfig object which is composed of
                    images, labels and optionally AOIs
            """
            # Find the uri for the image associated with this is
            train_image_uri = os.path.join(raster_uri, '{}.jpg'.format(id))
            # Construct a raster source from an image uri that can be handled by Rasterio.
            # We also specify the order of image channels by their indices and add a
            # stats transformer which normalizes pixel values into uint8.
            raster_source = rv.RasterSourceConfig.builder(rv.RASTERIO_SOURCE) \
                .with_uri(train_image_uri) \
                .with_channel_order([0, 1, 2]) \
                .with_stats_transformer() \
                .build()

            # Next create a label source config to pair with the raster source:
            # define the geojson label source uri
            vector_source = os.path.join(label_uri, '{}.geojson'.format(id))

            # Since this is a semantic segmentation experiment and the labels
            # are distributed in a vector-based GeoJSON format, we need to rasterize
            # the labels. We create  aRasterSourceConfigBuilder using
            # `rv.RASTERIZED_SOURCE`
            # indicating that it will come from a vector source. We then specify the uri
            # of the vector source and (in the 'with_rasterizer_options' method) the id
            # of the pixel class we would like to use as background.
            label_raster_source = rv.RasterSourceConfig.builder(rv.RASTERIZED_SOURCE) \
                .with_vector_source(vector_source) \
                .with_rasterizer_options(2) \
                .build()

            # Create a semantic segmentation label source from rasterized source config
            # that we built in the previous line.
            label_source = rv.LabelSourceConfig.builder(rv.SEMANTIC_SEGMENTATION) \
                .with_raster_source(label_raster_source) \
                .build()

            # Finally we can build a scene config object using the scene id and the
            # configs we just defined
            scene = rv.SceneConfig.builder() \
                .with_task(task) \
                .with_id(id) \
                .with_raster_source(raster_source) \
                .with_label_source(label_source) \
                .build()

            return scene

        # Create lists of train and test scene configs
        train_scenes = [make_scene(id) for id in train_ids]
        val_scenes = [make_scene(id) for id in val_ids]

        # ------------- DATASET -------------
        # Construct a DataSet config using the lists of train and
        # validation scenes
        dataset = rv.DatasetConfig.builder() \
            .with_train_scenes(train_scenes) \
            .with_validation_scenes(val_scenes) \
            .build()

        # ------------- ANALYZE -------------
        # We will need to convert this imagery from uint16 to uint8
        # in order to use it. We specified that this conversion should take place
        # when we built the train raster source but that process will require
        # dataset-level statistics. To get these stats we need to create an
        # analyzer.
        # Use small sample prob so this step doesn't take ages.
        analyzer = rv.AnalyzerConfig.builder(rv.STATS_ANALYZER) \
            .with_sample_prob(0.05) \
            .build()

        # ------------- EXPERIMENT -------------
        experiment = rv.ExperimentConfig.builder() \
            .with_id(exp_id) \
            .with_task(task) \
            .with_backend(backend) \
            .with_analyzer(analyzer) \
            .with_dataset(dataset) \
            .with_root_uri('/opt/data/rv/test3') \
            .build()

        return experiment
    def train(self, tmp_dir):
        """Train a model."""
        self.print_options()

        # Sync output of previous training run from cloud.
        train_uri = self.backend_opts.train_uri
        train_dir = get_local_path(train_uri, tmp_dir)
        make_dir(train_dir)
        sync_from_dir(train_uri, train_dir)

        # Get zip file for each group, and unzip them into chip_dir.
        chip_dir = join(tmp_dir, 'chips')
        make_dir(chip_dir)
        for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'):
            zip_path = download_if_needed(zip_uri, tmp_dir)
            with zipfile.ZipFile(zip_path, 'r') as zipf:
                zipf.extractall(chip_dir)

        # Setup data loader.
        train_images = []
        train_lbl_bbox = []
        for annotation_path in glob.glob(join(chip_dir, 'train/*.json')):
            images, lbl_bbox = get_annotations(annotation_path)
            train_images += images
            train_lbl_bbox += lbl_bbox

        val_images = []
        val_lbl_bbox = []
        for annotation_path in glob.glob(join(chip_dir, 'valid/*.json')):
            images, lbl_bbox = get_annotations(annotation_path)
            val_images += images
            val_lbl_bbox += lbl_bbox

        images = train_images + val_images
        lbl_bbox = train_lbl_bbox + val_lbl_bbox

        img2bbox = dict(zip(images, lbl_bbox))
        get_y_func = lambda o: img2bbox[o.name]
        num_workers = 0 if self.train_opts.debug else 4
        data = ObjectItemList.from_folder(chip_dir)
        data = data.split_by_folder()
        data = data.label_from_func(get_y_func)
        data = data.transform(
            get_transforms(), size=self.task_config.chip_size, tfm_y=True)
        data = data.databunch(
            bs=self.train_opts.batch_sz, collate_fn=bb_pad_collate,
            num_workers=num_workers)
        print(data)

        if self.train_opts.debug:
            make_debug_chips(
                data, self.task_config.class_map, tmp_dir, train_uri)

        # Setup callbacks and train model.
        ratios = [1/2, 1, 2]
        scales = [1, 2**(-1/3), 2**(-2/3)]
        model_arch = getattr(models, self.train_opts.model_arch)
        encoder = create_body(model_arch, cut=-2)
        model = RetinaNet(encoder, data.c, final_bias=-4)
        crit = RetinaNetFocalLoss(scales=scales, ratios=ratios)
        learn = Learner(data, model, loss_func=crit, path=train_dir)
        learn = learn.split(retina_net_split)

        model_path = get_local_path(self.backend_opts.model_uri, tmp_dir)

        pretrained_uri = self.backend_opts.pretrained_uri
        if pretrained_uri:
            print('Loading weights from pretrained_uri: {}'.format(
                pretrained_uri))
            pretrained_path = download_if_needed(pretrained_uri, tmp_dir)
            learn.load(pretrained_path[:-4])

        callbacks = [
            TrackEpochCallback(learn),
            SaveModelCallback(learn, every='epoch'),
            MyCSVLogger(learn, filename='log'),
            ExportCallback(learn, model_path),
            SyncCallback(train_dir, self.backend_opts.train_uri,
                         self.train_opts.sync_interval)
        ]
        learn.unfreeze()
        learn.fit(self.train_opts.num_epochs, self.train_opts.lr,
                  callbacks=callbacks)

        # Since model is exported every epoch, we need some other way to
        # show that training is finished.
        str_to_file('done!', self.backend_opts.train_done_uri)

        # Sync output to cloud.
        sync_to_dir(train_dir, self.backend_opts.train_uri)
Ejemplo n.º 21
0
    def exp_main(self, raw_uri, root_uri):
        """Run an experiment on the Spacenet Vegas building dataset.

        This is a simple example of how to do semantic segmentation on data that
        doesn't require any pre-processing or special permission to access.

        Args:
            raw_uri: (str) directory of raw data (the root of the Spacenet dataset)
            root_uri: (str) root directory for experiment output
        """
        raster_uri = join(raw_uri, 'MUL')
        label_uri = join(raw_uri, 'geojson/buildings')
        raster_fn_prefix = 'MUL_AOI_2_Vegas_img'
        label_fn_prefix = 'buildings_AOI_2_Vegas_img'
        label_paths = list_paths(label_uri, ext='.geojson')
        label_re = re.compile(r'.*{}(\d+)\.geojson'.format(label_fn_prefix))
        scene_ids = [
            label_re.match(label_path).group(1) for label_path in label_paths
        ]

        random.seed(5678)
        scene_ids = sorted(scene_ids)
        random.shuffle(scene_ids)
        # Workaround to handle scene 1000 missing on S3.
        if '1000' in scene_ids:
            scene_ids.remove('1000')
        num_train_ids = int(len(scene_ids) * 0.8)
        train_ids = scene_ids[0:num_train_ids]
        val_ids = scene_ids[num_train_ids:]

        exp_id = 'spacenet-simple-seg'
        chip_size = 162

        task = rv.TaskConfig.builder(rv.SEMANTIC_SEGMENTATION) \
                            .with_chip_size(chip_size) \
                            .with_classes({
                                'Building': (1, 'orange'),
                                'Background': (2, 'black')
                            }) \
            .with_chip_options(
                chips_per_scene=1,
                debug_chip_probability=0.25,
                negative_survival_probability=1.0,
                target_classes=[1],
                target_count_threshold=1000) \
            .build()

        config = {
            'band_count': 8,
            'num_generations': 50,
            'pop_size': 250,
            'num_individuals': 125,
            'num_offspring': 125,
            'mutation_rate': 0.3,
            'crossover_rate': 0.5,
            'debug': True
        }

        backend = rv.BackendConfig.builder(GP_SEMANTIC_SEGMENTATION) \
                                  .with_task(task) \
                                  .with_train_options(**config) \
                                  .build()

        def make_scene(id):
            train_image_uri = os.path.join(
                raster_uri, '{}{}.tif'.format(raster_fn_prefix, id))

            raster_source = rv.RasterSourceConfig.builder(rv.RASTERIO_SOURCE) \
                .with_uri(train_image_uri) \
                .with_stats_transformer() \
                .build()

            vector_source = os.path.join(
                label_uri, '{}{}.geojson'.format(label_fn_prefix, id))
            label_raster_source = rv.RasterSourceConfig.builder(rv.RASTERIZED_SOURCE) \
                .with_vector_source(vector_source) \
                .with_rasterizer_options(2) \
                .build()

            label_source = rv.LabelSourceConfig.builder(rv.SEMANTIC_SEGMENTATION) \
                .with_raster_source(label_raster_source) \
                .build()

            scene = rv.SceneConfig.builder() \
                .with_task(task) \
                .with_id(id) \
                .with_raster_source(raster_source) \
                .with_label_source(label_source) \
                .build()

            return scene

        train_scenes = [make_scene(id) for id in train_ids]
        val_scenes = [make_scene(id) for id in val_ids]

        dataset = rv.DatasetConfig.builder() \
            .with_train_scenes(train_scenes) \
            .with_validation_scenes(val_scenes) \
            .build()

        analyzer = rv.AnalyzerConfig.builder(rv.STATS_ANALYZER) \
                                    .build()

        # Need to use stats_analyzer because imagery is uint16.
        experiment = rv.ExperimentConfig.builder() \
                                        .with_id(exp_id) \
                                        .with_task(task) \
                                        .with_backend(backend) \
                                        .with_analyzer(analyzer) \
                                        .with_dataset(dataset) \
                                        .with_root_uri(root_uri) \
                                        .build()

        return experiment
Ejemplo n.º 22
0
    def train(self, tmp_dir):
        """Train a model.

        This downloads any previous output saved to the train_uri,
        starts training (or resumes from a checkpoint), periodically
        syncs contents of train_dir to train_uri and after training finishes.

        Args:
            tmp_dir: (str) path to temp directory
        """
        self.log_options()

        # Sync output of previous training run from cloud.
        train_uri = self.backend_opts.train_uri
        train_dir = get_local_path(train_uri, tmp_dir)
        make_dir(train_dir)
        sync_from_dir(train_uri, train_dir)

        # Get zip file for each group, and unzip them into chip_dir.
        chip_dir = join(tmp_dir, 'chips')
        make_dir(chip_dir)
        for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'):
            zip_path = download_if_needed(zip_uri, tmp_dir)
            with zipfile.ZipFile(zip_path, 'r') as zipf:
                zipf.extractall(chip_dir)

        # Setup data loader.
        batch_size = self.train_opts.batch_size
        chip_size = self.task_config.chip_size
        class_names = self.class_map.get_class_names()
        databunch = build_databunch(chip_dir, chip_size, batch_size,
                                    class_names)
        log.info(databunch)
        num_labels = len(databunch.label_names)
        if self.train_opts.debug:
            make_debug_chips(databunch, self.class_map, tmp_dir, train_uri)

        # Setup model
        num_labels = len(databunch.label_names)
        model = get_model(self.train_opts.model_arch,
                          num_labels,
                          pretrained=True)
        model = model.to(self.device)
        model_path = join(train_dir, 'model')

        # Load weights from a pretrained model.
        pretrained_uri = self.backend_opts.pretrained_uri
        if pretrained_uri:
            log.info('Loading weights from pretrained_uri: {}'.format(
                pretrained_uri))
            pretrained_path = download_if_needed(pretrained_uri, tmp_dir)
            model.load_state_dict(
                torch.load(pretrained_path, map_location=self.device))

        # Possibly resume training from checkpoint.
        start_epoch = 0
        train_state_path = join(train_dir, 'train_state.json')
        if isfile(train_state_path):
            log.info('Resuming from checkpoint: {}\n'.format(model_path))
            train_state = file_to_json(train_state_path)
            start_epoch = train_state['epoch'] + 1
            model.load_state_dict(
                torch.load(model_path, map_location=self.device))

        # Write header of log CSV file.
        metric_names = ['precision', 'recall', 'f1']
        log_path = join(train_dir, 'log.csv')
        if not isfile(log_path):
            with open(log_path, 'w') as log_file:
                log_writer = csv.writer(log_file)
                row = ['epoch', 'time', 'train_loss'] + metric_names
                log_writer.writerow(row)

        # Setup Tensorboard logging.
        if self.train_opts.log_tensorboard:
            log_dir = join(train_dir, 'tb-logs')
            make_dir(log_dir)
            tb_writer = SummaryWriter(log_dir=log_dir)
            if self.train_opts.run_tensorboard:
                log.info('Starting tensorboard process')
                tensorboard_process = Popen(
                    ['tensorboard', '--logdir={}'.format(log_dir)])
                terminate_at_exit(tensorboard_process)

        # Setup optimizer, loss, and LR scheduler.
        loss_fn = torch.nn.CrossEntropyLoss()
        lr = self.train_opts.lr
        opt = optim.Adam(model.parameters(), lr=lr)
        step_scheduler, epoch_scheduler = None, None
        num_epochs = self.train_opts.num_epochs

        if self.train_opts.one_cycle and num_epochs > 1:
            steps_per_epoch = len(databunch.train_ds) // batch_size
            total_steps = num_epochs * steps_per_epoch
            step_size_up = (num_epochs // 2) * steps_per_epoch
            step_size_down = total_steps - step_size_up
            step_scheduler = CyclicLR(opt,
                                      base_lr=lr / 10,
                                      max_lr=lr,
                                      step_size_up=step_size_up,
                                      step_size_down=step_size_down,
                                      cycle_momentum=False)
            for _ in range(start_epoch * steps_per_epoch):
                step_scheduler.step()

        # Training loop.
        for epoch in range(start_epoch, num_epochs):
            # Train one epoch.
            log.info('-----------------------------------------------------')
            log.info('epoch: {}'.format(epoch))
            start = time.time()
            train_loss = train_epoch(model, self.device, databunch.train_dl,
                                     opt, loss_fn, step_scheduler)
            if epoch_scheduler:
                epoch_scheduler.step()
            log.info('train loss: {}'.format(train_loss))

            # Validate one epoch.
            metrics = validate_epoch(model, self.device, databunch.valid_dl,
                                     num_labels)
            log.info('validation metrics: {}'.format(metrics))

            # Print elapsed time for epoch.
            end = time.time()
            epoch_time = datetime.timedelta(seconds=end - start)
            log.info('epoch elapsed time: {}'.format(epoch_time))

            # Save model and state.
            torch.save(model.state_dict(), model_path)
            train_state = {'epoch': epoch}
            json_to_file(train_state, train_state_path)

            # Append to log CSV file.
            with open(log_path, 'a') as log_file:
                log_writer = csv.writer(log_file)
                row = [epoch, epoch_time, train_loss]
                row += [metrics[k] for k in metric_names]
                log_writer.writerow(row)

            # Write to Tensorboard log.
            if self.train_opts.log_tensorboard:
                for key, val in metrics.items():
                    tb_writer.add_scalar(key, val, epoch)
                tb_writer.add_scalar('train_loss', train_loss, epoch)
                for name, param in model.named_parameters():
                    tb_writer.add_histogram(name, param, epoch)

            if (train_uri.startswith('s3://')
                    and (((epoch + 1) % self.train_opts.sync_interval) == 0)):
                sync_to_dir(train_dir, train_uri)

        # Close Tensorboard.
        if self.train_opts.log_tensorboard:
            tb_writer.close()
            if self.train_opts.run_tensorboard:
                tensorboard_process.terminate()

        # Since model is exported every epoch, we need some other way to
        # show that training is finished.
        str_to_file('done!', self.backend_opts.train_done_uri)

        # Sync output to cloud.
        sync_to_dir(train_dir, self.backend_opts.train_uri)
    def train(self, tmp_dir):
        """Train a model."""
        self.print_options()

        # Sync output of previous training run from cloud.
        # This will either be local or S3. This allows restarting the job if it has been shut down.
        train_uri = self.backend_opts.train_uri
        train_dir = get_local_path(train_uri, tmp_dir)
        make_dir(train_dir)
        sync_from_dir(train_uri, train_dir)

        # Get zip file for each group, and unzip them into chip_dir.
        self.chip_dir = join(tmp_dir, 'chips')
        make_dir(self.chip_dir)

        train_chip_dir = self.chip_dir + '/train-img'
        train_truth_dir = self.chip_dir + '/train-labels'
        fitness_func = partial(fitness, train_chip_dir, train_truth_dir, self._toolbox.compile)
        self._toolbox.register("evaluate", fitness_func)
        # This is the key part -- this is how it knows where to get the chips from.
        # backend_opts comes from RV, and train_opts is where you can define backend-specific stuff.
        for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'):
            zip_path = download_if_needed(zip_uri, tmp_dir)
            with zipfile.ZipFile(zip_path, 'r') as zipf:
                zipf.extractall(self.chip_dir)

        # Setup data loader.
        def get_label_path(im_path):
            return Path(str(im_path.parent)[:-4] + '-labels') / im_path.name

        class_map = self.task_config.class_map
        classes = class_map.get_class_names()
        if 0 not in class_map.get_keys():
            classes = ['nodata'] + classes

        # Evolve
        # Set up hall of fame to track the best individual
        hof = tools.HallOfFame(1)

        # Set up debugging
        mstats = None
        if self.train_opts.debug:
            stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
            stats_size = tools.Statistics(len)
            mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
            mstats.register("averageaverage", np.mean)
            mstats.register("stdeviation", np.std)
            mstats.register("minimumstat", np.min)
            mstats.register("maximumstat", np.max)

        pop = self._toolbox.population(n=self.train_opts.pop_size)
        pop, log = algorithms.eaMuPlusLambda(
            pop,
            self._toolbox,
            self.train_opts.num_individuals,
            self.train_opts.num_offspring,
            self.train_opts.crossover_rate,
            self.train_opts.mutation_rate,
            self.train_opts.num_generations,
            stats=mstats,
            halloffame=hof,
            verbose=self.train_opts.debug
        )

        # ? What should my model output be given that the output is just a string? Should I output a
        # text file?
        # RV uses file-presence based caching to figure out whether a stage has completed (kinda
        # like Makefiles). So since this outputs a file every epoch, it needs to use something else
        # to trigger done-ness.
        # Since model is exported every epoch, we need some other way to
        # show that training is finished.
        if self.train_opts.debug:
            print(str(hof[0]))
        str_to_file(str(hof[0]), self.backend_opts.train_done_uri)
        str_to_file(str(hof[0]), self.backend_opts.model_uri)

        # Sync output to cloud.
        sync_to_dir(train_dir, self.backend_opts.train_uri)
Ejemplo n.º 24
0
    def exp_main(self, raw_uri, root_uri, test=False):
        """Run an experiment on the Spacenet Vegas building dataset.

        This is a simple example of how to do semantic segmentation on data that
        doesn't require any pre-processing or special permission to access.

        Args:
            raw_uri: (str) directory of raw data (the root of the Spacenet dataset)
            root_uri: (str) root directory for experiment output
            test: (bool) if True, run a very small experiment as a test and generate
                debug output
        """
        base_uri = join(
            raw_uri, 'SpaceNet_Buildings_Dataset_Round2/spacenetV2_Train/AOI_2_Vegas')
        raster_uri = join(base_uri, 'RGB-PanSharpen')
        label_uri = join(base_uri, 'geojson/buildings')
        raster_fn_prefix = 'RGB-PanSharpen_AOI_2_Vegas_img'
        label_fn_prefix = 'buildings_AOI_2_Vegas_img'
        label_paths = list_paths(label_uri, ext='.geojson')
        label_re = re.compile(r'.*{}(\d+)\.geojson'.format(label_fn_prefix))
        scene_ids = [
            label_re.match(label_path).group(1)
            for label_path in label_paths]

        random.seed(5678)
        scene_ids = sorted(scene_ids)
        random.shuffle(scene_ids)
        # Workaround to handle scene 1000 missing on S3.
        if '1000' in scene_ids:
            scene_ids.remove('1000')
        num_train_ids = int(len(scene_ids) * 0.8)
        train_ids = scene_ids[0:num_train_ids]
        val_ids = scene_ids[num_train_ids:]

        test = str_to_bool(test)
        exp_id = 'spacenet-simple-seg'
        num_epochs = 5
        batch_sz = 8
        debug = False
        chip_size = 300
        if test:
            exp_id += '-test'
            num_epochs = 2
            batch_sz = 1
            debug = True
            train_ids = ['12']
            val_ids = ['13']

        task = rv.TaskConfig.builder(rv.SEMANTIC_SEGMENTATION) \
                            .with_chip_size(chip_size) \
                            .with_classes({
                                'Building': (1, 'orange'),
                                'Background': (2, 'black')
                            }) \
                            .with_chip_options(
                                chips_per_scene=9,
                                debug_chip_probability=0.25,
                                negative_survival_probability=1.0,
                                target_classes=[1],
                                target_count_threshold=1000) \
                            .build()

        config = {
            'bs': batch_sz,
            'num_epochs': num_epochs,
            'debug': debug,
            'lr': 1e-4
        }

        backend = rv.BackendConfig.builder(FASTAI_SEMANTIC_SEGMENTATION) \
                                  .with_task(task) \
                                  .with_config(config) \
                                  .build()

        def make_scene(id):
            train_image_uri = os.path.join(raster_uri,
                                           '{}{}.tif'.format(raster_fn_prefix, id))

            raster_source = rv.RasterSourceConfig.builder(rv.RASTERIO_SOURCE) \
                .with_uri(train_image_uri) \
                .with_channel_order([0, 1, 2]) \
                .with_stats_transformer() \
                .build()

            vector_source = os.path.join(
                label_uri, '{}{}.geojson'.format(label_fn_prefix, id))
            label_raster_source = rv.RasterSourceConfig.builder(rv.RASTERIZED_SOURCE) \
                .with_vector_source(vector_source) \
                .with_rasterizer_options(2) \
                .build()

            label_source = rv.LabelSourceConfig.builder(rv.SEMANTIC_SEGMENTATION) \
                .with_raster_source(label_raster_source) \
                .build()

            scene = rv.SceneConfig.builder() \
                .with_task(task) \
                .with_id(id) \
                .with_raster_source(raster_source) \
                .with_label_source(label_source) \
                .build()

            return scene

        train_scenes = [make_scene(id) for id in train_ids]
        val_scenes = [make_scene(id) for id in val_ids]

        dataset = rv.DatasetConfig.builder() \
            .with_train_scenes(train_scenes) \
            .with_validation_scenes(val_scenes) \
            .build()

        analyzer = rv.AnalyzerConfig.builder(rv.STATS_ANALYZER) \
                                    .build()

        # Need to use stats_analyzer because imagery is uint16.
        experiment = rv.ExperimentConfig.builder() \
                                        .with_id(exp_id) \
                                        .with_task(task) \
                                        .with_backend(backend) \
                                        .with_analyzer(analyzer) \
                                        .with_dataset(dataset) \
                                        .with_root_uri(root_uri) \
                                        .build()

        return experiment
Ejemplo n.º 25
0
    def exp_main(self, test=False):
        # docker filepath mounted to my data directory
        base_uri = '/opt/data/labels2'

        raster_uri = base_uri # rasters and labels in same directory for now
        label_uri = base_uri

        # Find all of the image ids that have associated images and labels. Collect
        # these values to use as our scene ids.
        # TODO use PV Array dataframe to select these
        label_paths = list_paths(label_uri, ext='.geojson')
        scene_ids = [x.split('.')[-2].split('/')[-1] for x in label_paths]

        scene2_ids = [
         'so9051_rgb_250_04', 'so9265_rgb_250_05', 'sp3590_rgb_250_04',
         'sj7304_rgb_250_04', 'su1385_rgb_250_06', 'st0709_rgb_250_05',
         'sj9004_rgb_250_05', 'st8022_rgb_250_05', 'st8303_rgb_250_05',
         'sj9402_rgb_250_05', 'so9078_rgb_250_06', 'sj9003_rgb_250_05',
         'sk0003_rgb_250_05', 'st8468_rgb_250_04', 'st6980_rgb_250_04',
         'su0883_rgb_250_05', 'su0983_rgb_250_05', 'so9249_rgb_250_05',
         'su1478_rgb_250_04', 'su1377_rgb_250_04', 'sj9002_rgb_250_06',
         'sj8903_rgb_250_04', 'sj9902_rgb_250_05', 'sj9602_rgb_250_05',
         'tg2827_rgb_250_04', 'sj9702_rgb_250_05', 'sj9803_rgb_250_04',
         'sj9802_rgb_250_05', 'sk0504_rgb_250_04', 'sk0302_rgb_250_05',
         'sk0306_rgb_250_04', 'sk0206_rgb_250_04', 'sk0207_rgb_250_04',
         'sk0503_rgb_250_04', 'sj9903_rgb_250_04', 'sk0202_rgb_250_06',
         'sk0309_rgb_250_03', 'sk0605_rgb_250_04', 'sk0405_rgb_250_04',
         'sk0404_rgb_250_04', 'sk0502_rgb_250_05', 'st5071_rgb_250_05',
         'sp3293_rgb_250_03', 'sy7691_rgb_250_05', 'sp3294_rgb_250_03',
         'sp3892_rgb_250_05', 'sp3690_rgb_250_04', 'st9979_rgb_250_05',
         'se6154_rgb_250_03', 'so8476_rgb_250_06', 'so8072_rgb_250_04',
         'so7972_rgb_250_04', 'sp3491_rgb_250_03', 'sp3490_rgb_250_03',
         'sp3291_rgb_250_03', 'sp3292_rgb_250_03', 'sp3492_rgb_250_03',
         'sk0212_rgb_250_03', 'so7878_rgb_250_06', 'tl1239_rgb_250_03',
         'su0972_rgb_250_03', 'st1532_rgb_250_04', 'so7556_rgb_250_05',
         'st7091_rgb_250_07', 'sn2040_rgb_250_04', 'so7371_rgb_250_04',
         'tl6064_rgb_250_05', 'so9255_rgb_250_05', 'st1826_rgb_250_04',
         'st1528_rgb_250_04', 'st1629_rgb_250_04', 'st0727_rgb_250_04',
         'st0827_rgb_250_04', 'st0928_rgb_250_04', 'st0930_rgb_250_04',
         'st0929_rgb_250_04', 'st0832_rgb_250_05', 'tl1750_rgb_250_03',
         'st2322_rgb_250_05', 'st1623_rgb_250_04', 'st1523_rgb_250_04',
         'st1624_rgb_250_04', 'st1424_rgb_250_04', 'st1421_rgb_250_05',
         'sp3793_rgb_250_04', 'sp3792_rgb_250_04', 'sj9912_rgb_250_03',
         'sk2347_rgb_250_05', 'sp3391_rgb_250_03', 'tl1846_rgb_250_03',
         'sp5177_rgb_250_03', 'sn3251_rgb_250_04', 'sp3693_rgb_250_04',
         'st2014_rgb_250_06', 'st2015_rgb_250_06', 'st2115_rgb_250_05',
         'st2114_rgb_250_05', 'sn4257_rgb_250_04', 'su4223_rgb_250_04',
         'su4323_rgb_250_04', 'tl3068_rgb_250_04', 'sp5178_rgb_250_03',
         'sp3791_rgb_250_04', 'st3689_rgb_250_03', 'st3789_rgb_250_03',
         'st0411_rgb_250_04', 'st0212_rgb_250_04', 'st0112_rgb_250_04',
         'st0211_rgb_250_04', 'st0111_rgb_250_04', 'st0209_rgb_250_05',
         'st0210_rgb_250_05', 'sj6714_rgb_250_04', 'sp3893_rgb_250_05',
         'su6712_rgb_250_04', 'su6713_rgb_250_04', 'st9363_rgb_250_04',
         'st9463_rgb_250_04', 'nr3059_rgb_250_03', 'st8576_rgb_250_03',
         'sp7948_rgb_250_04', 'sp6138_rgb_250_07', 'tl2276_rgb_250_04',
         'sm9817_rgb_250_04', 'sm9816_rgb_250_04', 'sm9716_rgb_250_04',
         'sm9616_rgb_250_04', 'sm9818_rgb_250_04', 'sm9009_rgb_250_04',
         'sm9721_rgb_250_05', 'sm9720_rgb_250_05', 'sm9101_rgb_250_04',
         'sm9201_rgb_250_04', 'sm9010_rgb_250_04', 'sm9109_rgb_250_04',
         'sn6502_rgb_250_04', 'sn6601_rgb_250_04', 'sn6201_rgb_250_04',
         'sn6202_rgb_250_04', 'st6788_rgb_250_05', 'st6688_rgb_250_05',
         'st6689_rgb_250_06', 'su0807_rgb_250_05', 'su0806_rgb_250_05',
         'sz0998_rgb_250_05', 'sz1099_rgb_250_05', 'su3743_rgb_250_04',
         'su3744_rgb_250_04', 'su6509_rgb_250_04', 'su6409_rgb_250_04',
         'su6410_rgb_250_04', 'su5413_rgb_250_04', 'su2088_rgb_250_04',
         'su5703_rgb_250_04', 'su5603_rgb_250_04', 'su5604_rgb_250_04',
         'st7642_rgb_250_06', 'st7744_rgb_250_05', 'st6728_rgb_250_05',
         'st8558_rgb_250_04', 'st2735_rgb_250_04', 'tl4990_rgb_250_05',
         'sm7209_rgb_250_04', 'st8864_rgb_250_04', 'tg5013_rgb_250_04',
         'st1198_rgb_250_04', 'st1298_rgb_250_04', 'st1722_rgb_250_04',
         'tq1078_rgb_250_05', 'su6401_rgb_250_04', 'st8753_rgb_250_04',
         'st8455_rgb_250_05', 'st8660_rgb_250_04', 'st8760_rgb_250_04',
         'st8765_rgb_250_04', 'sp7638_rgb_250_05', 'tl6332_rgb_250_04',
         'st8705_rgb_250_05', 'sy3297_rgb_250_06', 'sy3498_rgb_250_06',
         'se3636_rgb_250_01', 'st6578_rgb_250_05', 'st6478_rgb_250_05',
         'st5479_rgb_250_06', 'se2931_rgb_250_02', 'sd6835_rgb_250_01',
         'st2228_rgb_250_05', 'st2227_rgb_250_05']

        # Experiment label and root directory for output 
        exp_id = 'pv-classification'
        root_uri = '/opt/data/rv/test3'

        # num_steps = 1e4 # 1e5 takes too long
        num_epochs = 20
        batch_size = 16 
        debug = True
       
        test = str_to_bool(test)
        if test:
            print("***************** TEST MODE *****************")
            exp_id += '-test'
            # num_steps = 100
            num_epochs = 1
            batch_size = 1
            debug = True
            train_ids = scene_ids
            val_ids = scene_ids
            scene_ids = scene_ids[0:5]
       
       # Split the data into training and validation sets:
        # Randomize the order of all scene ids
        random.seed(5678)
        scene_ids = sorted(scene_ids)
        random.shuffle(scene_ids)

        # Set scenes
        num_train_ids = round(len(scene_ids) * 0.8)
        train_ids = scene_ids[0:num_train_ids]
        val_ids = scene_ids[num_train_ids:]
        # train_ids = scene_ids
        # val_ids = scene_ids

        # ------------- TASK -------------

        task = rv.TaskConfig.builder(rv.CHIP_CLASSIFICATION) \
            .with_chip_size(200) \
            .with_classes({
                'pv': (1, 'yellow'),
                'background': (2, 'black')
            })\
            .build()


        # # ------------- BACKEND -------------
        # Configuration options for different models and tasks:
        # https://github.com/azavea/raster-vision/blob/60f741e30a016f25d2643a9b32916adb22e42d50/rastervision/backend/model_defaults.json

        backend = rv.BackendConfig.builder(rv.KERAS_CLASSIFICATION) \
            .with_task(task) \
            .with_debug(debug) \
            .with_batch_size(batch_size) \
            .with_num_epochs(num_epochs) \
            .with_model_defaults(rv.RESNET50_IMAGENET) \
            .with_config({
                'trainer': {
                    'options': {
                        'saveBest': True,
                        'lrSchedule': [
                            {
                              'epoch': 0,
                              'lr': 0.0005
                            },
                            {
                              'epoch': 10,
                              'lr': 0.0001
                            },
                            {
                              'epoch': 15,
                              'lr': 0.00001
                            }
                        ]
                    }
                 }
            }, set_missing_keys=True) \
            .build()

        # ------------- Make Scenes -------------
        # We will use this function to create a list of scenes that we will pass
        # to the DataSetConfig builder.
        def make_scene(id):
            """Make a SceneConfig object for each image/label pair
            Args:
                id (str): The id that corresponds to both the .jpg image source
                    and .geojson label source for a given scene
            Returns:
                rv.data.SceneConfig: a SceneConfig object which is composed of
                    images, labels and optionally AOIs
            """
            # Find the uri for the image associated with this is
            image_uri = os.path.join(raster_uri,
                                           '{}.jpg'.format(id))
            # Construct a raster source from an image uri that can be handled by Rasterio.
            # We also specify the order of image channels by their indices and add a
            # stats transformer which normalizes pixel values into uint8.
            raster_source = rv.RasterSourceConfig.builder(rv.RASTERIO_SOURCE) \
                .with_uri(image_uri) \
                .with_channel_order([0, 1, 2]) \
                .with_stats_transformer() \
                .build()

            label = os.path.join(label_uri, '{}.geojson'.format(id))
            
            # Build our classification labels.
            # IOA Threshold means minimum percentage of IOA of polygon and cell
            # infer_cells means label source infers polygons and labels
            # from the vector source.
            # with_pick_min_class_id True means will always pick PV if it is present
            # as it is the smaller id number
            label_source = rv.LabelSourceConfig.builder(rv.CHIP_CLASSIFICATION) \
                .with_uri(label) \
                .with_ioa_thresh(0.01) \
                .with_use_intersection_over_cell(False) \
                .with_pick_min_class_id(True) \
                .with_background_class_id(2) \
                .with_infer_cells(True) \
                .build()

            # Finally we can build a scene config object using the scene id and the
            # configs we just defined
            scene = rv.SceneConfig.builder() \
                .with_task(task) \
                .with_id(id) \
                .with_raster_source(raster_source) \
                .with_label_source(label_source) \
                .build()

            return scene

        # Create lists of train and test scene configs
        train_scenes = [make_scene(id) for id in train_ids]
        val_scenes = [make_scene(id) for id in val_ids]

        # ------------- DATASET -------------
        # Construct a DataSet config using the lists of train and
        # validation scenes
        augmentor = 
        
        dataset = rv.DatasetConfig.builder() \
            .with_train_scenes(train_scenes) \
            .with_validation_scenes(val_scenes) \
            .with_augmentor(augmentor)
            .build()