Exemple #1
0
    def _construct_data_dicts(self):
        logger.info('Will collect samples (img/ann pairs).')

        name_to_tag = self.config['dataset_tags']
        project_fs = sly.ProjectFS.from_disk_dir_project(self.helper.paths.project_dir)
        logger.info('Project structure has been read. Samples: {}.'.format(project_fs.pr_structure.image_cnt))

        samples_dct = sly.samples_by_tags(
            tags=list(name_to_tag.values()), project_fs=project_fs, project_meta=self.helper.in_project_meta
        )

        self.tf_data_dicts = {}
        self.iters_cnt = {}
        for the_name, the_tag in name_to_tag.items():
            samples_lst = samples_dct[the_tag]
            if len(samples_lst) < 1:
                raise RuntimeError('Dataset %s should contain at least 1 element.' % the_name)
            dataset_dict = {
                "samples": samples_lst,
                "classes_mapping": self.class_title_to_idx,
                "project_meta": self.helper.in_project_meta,
                "sample_cnt": len(samples_lst)
            }
            self.tf_data_dicts[the_name] = dataset_dict
            self.iters_cnt[the_name] = np.ceil(float(len(samples_lst)) /
                                       (self.config['batch_size'][the_name] * len(self.config['gpu_devices']))).astype('int')
            logger.info('Prepared dataset.', extra={
                'dataset_purpose': the_name, 'dataset_tag': the_tag, 'sample_cnt': len(samples_lst)
            })
Exemple #2
0
    def _construct_data_dicts(self):
        logger.info('Will collect samples (img/ann pairs).')

        name_to_tag = self.config['dataset_tags']
        project_fs = sly.ProjectFS.from_disk_dir_project(self.helper.paths.project_dir)
        logger.info('Project structure has been read. Samples: {}.'.format(project_fs.pr_structure.image_cnt))

        samples_dct = sly.samples_by_tags(
            tags=list(name_to_tag.values()), project_fs=project_fs, project_meta=self.helper.in_project_meta
        )

        self.data_dicts = {}
        self.iters_cnt = {}
        for the_name, the_tag in name_to_tag.items():
            samples_lst = samples_dct[the_tag]
            sly.ensure_samples_nonempty(samples_lst, the_tag)

            img_paths, labels, num_boxes = load_dataset(samples_lst, self.class_title_to_idx, self.helper.in_project_meta)
            dataset_dict = {
                'img_paths': img_paths,
                'labels': labels,
                'num_boxes': num_boxes,
                'sample_cnt': len(samples_lst)
            }
            self.data_dicts[the_name] = dataset_dict
            self.iters_cnt[the_name] = np.ceil(float(len(samples_lst)) /
                                       (self.config['batch_size'][the_name] * len(self.config['gpu_devices']))).astype('int')
            logger.info('Prepared dataset.', extra={
                'dataset_purpose': the_name, 'dataset_tag': the_tag, 'sample_cnt': len(samples_lst)
            })
Exemple #3
0
    def _explore_input_project(self):
        logger.info('Will collect samples (img/ann pairs).')

        name_to_tag = self.config['dataset_tags']
        project_fs = sly.ProjectFS.from_disk_dir_project(
            self.helper.paths.project_dir)
        logger.info('Project structure has been read. Samples: {}.'.format(
            project_fs.pr_structure.image_cnt))

        self.samples_dct = sly.samples_by_tags(
            tags=list(name_to_tag.values()),
            project_fs=project_fs,
            project_meta=self.helper.in_project_meta)
        logger.info('Annotations are splitted by tags.')

        self.samples_cnt = {
            k: len(self.samples_dct[v])
            for k, v in name_to_tag.items()
        }
        gpu_count = len(self.device_ids)
        self.iters_cnt = {
            k: int(
                np.ceil(
                    float(self.samples_cnt[k]) /
                    (self.config['batch_size'][k] * gpu_count)))
            for k in name_to_tag.keys()
        }  # internal cnt, per epoch or per validation

        self.epochs = self.config['epochs']
        self.total_train_iters = self.iters_cnt['train'] * self.epochs
        self.eval_planner = EvalPlanner(epochs=self.epochs,
                                        val_every=self.config['val_every'])
Exemple #4
0
    def _construct_data_loaders(self):
        logger.info('Will collect samples (img/ann pairs).')

        name_to_tag = self.config['dataset_tags']
        project_fs = sly.ProjectFS.from_disk_dir_project(
            self.helper.paths.project_dir)
        logger.info('Project structure has been read. Samples: {}.'.format(
            project_fs.pr_structure.image_cnt))

        samples_dct = sly.samples_by_tags(
            tags=list(name_to_tag.values()),
            project_fs=project_fs,
            project_meta=self.helper.in_project_meta)

        src_size = self.config['input_size']
        input_size_wh = (src_size['width'], src_size['height'])

        self.pytorch_datasets = {}
        for the_name, the_tag in name_to_tag.items():
            samples_lst = samples_dct[the_tag]
            the_ds = PytorchSlyDataset(
                project_meta=self.helper.in_project_meta,
                samples=samples_lst,
                out_size_wh=input_size_wh,
                class_mapping=self.class_title_to_idx,
                bkg_color=self.bkg_input_idx,
                allow_corrupted_cnt=self.config['allow_corrupted_samples']
                [the_name])
            self.pytorch_datasets[the_name] = the_ds
            logger.info('Prepared dataset.',
                        extra={
                            'dataset_purpose': the_name,
                            'dataset_tag': the_tag,
                            'sample_cnt': len(samples_lst)
                        })

        self.data_loaders = {}
        for name, need_shuffle in [
            ('train', True),
            ('val', False),
        ]:
            # note that now batch_size from config determines batch for single device
            batch_sz = self.config['batch_size'][name]
            batch_sz_full = batch_sz * len(self.device_ids)
            n_workers = self.config['data_workers'][name]
            self.data_loaders[name] = DataLoader(
                dataset=self.pytorch_datasets[name],
                batch_size=
                batch_sz_full,  # it looks like multi-gpu validation works
                num_workers=n_workers,
                shuffle=need_shuffle,
            )
        logger.info('DataLoaders are constructed.')

        self.train_iters = len(self.data_loaders['train'])
        self.val_iters = len(self.data_loaders['val'])
        self.epochs = self.config['epochs']
        self.eval_planner = EvalPlanner(epochs=self.epochs,
                                        val_every=self.config['val_every'])
Exemple #5
0
    def _construct_data_dicts(self):
        logger.info('Will collect samples (img/ann pairs).')
        src_size = self.config['input_size']
        self.input_size_wh = (src_size['width'], src_size['height'])
        name_to_tag = self.config['dataset_tags']
        project_fs = sly.ProjectFS.from_disk_dir_project(
            self.helper.paths.project_dir)
        logger.info('Project structure has been read. Samples: {}.'.format(
            project_fs.pr_structure.image_cnt))

        samples_dct = sly.samples_by_tags(
            tags=list(name_to_tag.values()),
            project_fs=project_fs,
            project_meta=self.helper.in_project_meta)

        self.tf_data_dicts = {}
        self.iters_cnt = {}
        for the_name, the_tag in name_to_tag.items():
            samples_lst = samples_dct[the_tag]
            sly.ensure_samples_nonempty(samples_lst, the_tag)
            dataset_dict = {
                "samples": samples_lst,
                "classes_mapping": self.class_title_to_idx,
                "project_meta": self.helper.in_project_meta,
                "sample_cnt": len(samples_lst),
                "batch_size": self.config['batch_size'][the_name]
            }
            self.tf_data_dicts[the_name] = dataset_dict
            self.iters_cnt[the_name] = np.ceil(
                float(len(samples_lst)) /
                (self.config['batch_size'][the_name] *
                 len(self.config['gpu_devices']))).astype('int')
            logger.info('Prepared dataset.',
                        extra={
                            'dataset_purpose': the_name,
                            'dataset_tag': the_tag,
                            'sample_cnt': len(samples_lst)
                        })