def _construct_data_dicts(self): logger.info('Will collect samples (img/ann pairs).') name_to_tag = self.config['dataset_tags'] project_fs = sly.ProjectFS.from_disk_dir_project(self.helper.paths.project_dir) logger.info('Project structure has been read. Samples: {}.'.format(project_fs.pr_structure.image_cnt)) samples_dct = sly.samples_by_tags( tags=list(name_to_tag.values()), project_fs=project_fs, project_meta=self.helper.in_project_meta ) self.data_dicts = {} self.iters_cnt = {} for the_name, the_tag in name_to_tag.items(): samples_lst = samples_dct[the_tag] sly.ensure_samples_nonempty(samples_lst, the_tag) img_paths, labels, num_boxes = load_dataset(samples_lst, self.class_title_to_idx, self.helper.in_project_meta) dataset_dict = { 'img_paths': img_paths, 'labels': labels, 'num_boxes': num_boxes, 'sample_cnt': len(samples_lst) } self.data_dicts[the_name] = dataset_dict self.iters_cnt[the_name] = np.ceil(float(len(samples_lst)) / (self.config['batch_size'][the_name] * len(self.config['gpu_devices']))).astype('int') logger.info('Prepared dataset.', extra={ 'dataset_purpose': the_name, 'dataset_tag': the_tag, 'sample_cnt': len(samples_lst) })
def _explore_input_project(self): logger.info('Will collect samples (img/ann pairs).') name_to_tag = self.config['dataset_tags'] project_fs = sly.ProjectFS.from_disk_dir_project( self.helper.paths.project_dir) logger.info('Project structure has been read. Samples: {}.'.format( project_fs.pr_structure.image_cnt)) self.samples_dct = sly.samples_by_tags( tags=list(name_to_tag.values()), project_fs=project_fs, project_meta=self.helper.in_project_meta) for the_name, the_tag in name_to_tag.items(): samples_lst = self.samples_dct[the_tag] sly.ensure_samples_nonempty(samples_lst, the_tag) logger.info('Prepared dataset.', extra={ 'dataset_purpose': the_name, 'dataset_tag': the_tag, 'sample_cnt': len(samples_lst) }) logger.info('Annotations are splitted by tags.') self.samples_cnt = { k: len(self.samples_dct[v]) for k, v in name_to_tag.items() } gpu_count = len(self.device_ids) self.iters_cnt = { k: int( np.ceil( float(self.samples_cnt[k]) / (self.config['batch_size'][k] * gpu_count))) for k in name_to_tag.keys() } # internal cnt, per epoch or per validation self.epochs = self.config['epochs'] self.total_train_iters = self.iters_cnt['train'] * self.epochs self.eval_planner = EvalPlanner(epochs=self.epochs, val_every=self.config['val_every'])
def _construct_data_dicts(self): logger.info('Will collect samples (img/ann pairs).') src_size = self.config['input_size'] self.input_size_wh = (src_size['width'], src_size['height']) name_to_tag = self.config['dataset_tags'] project_fs = sly.ProjectFS.from_disk_dir_project( self.helper.paths.project_dir) logger.info('Project structure has been read. Samples: {}.'.format( project_fs.pr_structure.image_cnt)) samples_dct = sly.samples_by_tags( tags=list(name_to_tag.values()), project_fs=project_fs, project_meta=self.helper.in_project_meta) self.tf_data_dicts = {} self.iters_cnt = {} for the_name, the_tag in name_to_tag.items(): samples_lst = samples_dct[the_tag] sly.ensure_samples_nonempty(samples_lst, the_tag) dataset_dict = { "samples": samples_lst, "classes_mapping": self.class_title_to_idx, "project_meta": self.helper.in_project_meta, "sample_cnt": len(samples_lst), "batch_size": self.config['batch_size'][the_name] } self.tf_data_dicts[the_name] = dataset_dict self.iters_cnt[the_name] = np.ceil( float(len(samples_lst)) / (self.config['batch_size'][the_name] * len(self.config['gpu_devices']))).astype('int') logger.info('Prepared dataset.', extra={ 'dataset_purpose': the_name, 'dataset_tag': the_tag, 'sample_cnt': len(samples_lst) })