def collect_and_add_project_meta_info(project_dir: str): """ Collect the meta information of the project including run type, git hashes of imported objects etc. and add it to existing meta information file inside of project directory Parameters ---------- project_dir project directory """ meta_fname = os.path.join(project_dir, _PROJECT_META_FILE_NAME) if not os.path.isfile(meta_fname): meta_all = {} else: meta_all = io_utils.load_json(meta_fname) project_name = get_project_name_from_directory(project_dir) meta_all.setdefault(_PROJECT_NAME_KEY, project_name) meta_run = collect_project_meta_info() for k in meta_run: if k in meta_all: meta_all[k] = meta_all[k] + [meta_run[k]] else: meta_all[k] = [meta_run[k]] with open(meta_fname, 'w') as file: json.dump(meta_all, file, indent=4, sort_keys=True)
def _serving_input_receiver_fn(self) -> callable: # pylint: disable=not-context-manager # bug of pylint and/or tensorflow, since it is a context manager with tf.Graph().as_default(): data = self.dataset_fn() # pylint: enable=not-context-manager input_shapes, input_types = data.output_shapes, data.output_types shape_fn = (_get_undefined_shapes_except_last if self.inference_inputs_have_variable_shape else _get_defined_shapes_with_batch) input_output_fname = os.path.join( self.save_dir_inputs_outputs_mapping, coord_configs.INPUT_OUTPUT_NAMES_FILE_NAME) self._maybe_create_inference_graph_and_save_input_output( input_output_fname, input_shapes, input_types, shape_fn) input_names = io_utils.load_json(input_output_fname)['inputs'] inputs = {k: tf.placeholder(dtype=input_types[k], shape=shape_fn(input_shapes[k]), name="input_{}".format(k)) for k in input_names} inputs_from_default_placeholders = { each_item.full_name: each_item.placeholder for each_item in self.model.default_placeholders} inputs.update(inputs_from_default_placeholders) return tf.estimator.export.ServingInputReceiver(inputs, inputs)
def panoptic_categories_to_rgb_hash_fn(fname_panoptic_annotations: str, remove_unused_classes: bool = False): """ Read panoptic annotation file and create mapping in the form: {file_name: {id: class label}} Inside of panoptic_annotations['segments_info'] all of segments have its category (coco class) and its id, where id = r + g*256 + b*256**2 uses as a hash to map rgb values on panoptic rgb images to its segment label. Parameters ---------- fname_panoptic_annotations file name of panoptic annotations json file remove_unused_classes if the unused classes must be removed and so at the end there are 133 class ids instead of 200 Returns ------- panoptic_image_fname_to_class_id_hash_fn mapping of type {file_name: {id: class label}} """ panoptic_coco = io_utils.load_json(fname_panoptic_annotations) panoptic_coco_categories = panoptic_coco['categories'] class_ids_to_name_coco = { p['id']: p['name'] for p in panoptic_coco_categories } if remove_unused_classes: _, class_ids_mapping, _ = _remove_empty_classes(class_ids_to_name_coco) else: class_ids_mapping = dict(zip(*[class_ids_to_name_coco] * 2)) panoptic_image_fname_to_class_id_hash_fn = {} for ann in panoptic_coco['annotations']: file_name = ann['file_name'] hash_to_class = { a['id']: class_ids_mapping[a['category_id']] for a in ann['segments_info'] } hash_to_class[0] = 0 panoptic_image_fname_to_class_id_hash_fn[file_name] = hash_to_class return panoptic_image_fname_to_class_id_hash_fn
def add_runtype_to_project_meta_info(project_dir: str): """ Add current runtype to the project meta information json Parameters ---------- project_dir project directory """ meta_fname = os.path.join(project_dir, _PROJECT_META_FILE_NAME) if not os.path.isfile(meta_fname): meta = {} else: meta = io_utils.load_json(meta_fname) used_runs = meta.get('runs', []) runtype = _get_or_increase_runtype(used_runs) meta['runs'] = used_runs + [runtype] with open(meta_fname, 'w') as file: json.dump(meta, file, indent=4, sort_keys=True)
def serialize_to_file(self, configs_to_log: dict): logger = logging.getLogger(__name__) if not self.save_dir: logger.warning("No save_dir provided to config serializer!" "Set the project if you want to save the configs") return save_file_name = self._get_save_file_name() logger.info("Save run config to %s", save_file_name) configs_to_log = self.format_configs_to_log(configs_to_log) if os.path.exists(save_file_name): configs_to_log_loaded = io_utils.load_json(save_file_name) configs_to_log = project_utils.update_config_with_other_config( configs_to_log_loaded, configs_to_log) with open(save_file_name, "w", encoding='utf8') as file: json.dump(configs_to_log, file, indent=4, sort_keys=True, default=lambda o: RunConfigSerializer.NOT_SERIALIZABLE)
def _validate_training_project(project_dir, continue_training): project_meta_fname = os.path.join(project_dir, 'nucleus7_project.json') runtype = run_utils.get_run_type() if not os.path.isfile(project_meta_fname): used_runs = [] else: used_runs = io_utils.load_json(project_meta_fname).get('runs', []) msg_project_exist = ("Project inside {} already exists! " "Clean the folder, set --continue flag or " "select new project folder!".format(project_dir)) if (run_utils.is_chief() and runtype in used_runs and not continue_training): raise FileExistsError(msg_project_exist) if (run_utils.is_chief() and not continue_training and os.path.exists(project_dir)): training_dir = os.path.join(project_dir, _TRAINING_DIR) allowed_training_content = ["configs", 'global_config.json'] if _get_not_allowed_content(training_dir, allowed_training_content): raise FileExistsError(msg_project_exist)
def get_project_name_from_directory(project_dir: str) -> str: """ Returns project name given project directory Parameters ---------- project_dir project directory Returns ------- project_name project name """ project_name = None project_meta_file_name = os.path.join(project_dir, _PROJECT_META_FILE_NAME) if os.path.exists(project_meta_file_name): project_meta = io_utils.load_json(project_meta_file_name) project_name = project_meta.get(_PROJECT_NAME_KEY) return project_name or os.path.split(project_dir.rstrip('/'))[-1]
def test_create_trainer_project_dirs_local(self): if "TF_CONFIG" in os.environ: del os.environ["TF_CONFIG"] project_dir = self.get_temp_dir() project_file = os.path.join(project_dir, 'nucleus7_project.json') trainer_dirs = create_trainer_project_dirs(project_dir) self.assertIsInstance(trainer_dirs, tuple) self.assertTupleEqual(trainer_dirs._fields, ProjectDirs.TRAINER._fields) self.assertTrue(os.path.isfile(project_file)) for k in trainer_dirs._fields: dir_must = os.path.join(project_dir, getattr(ProjectDirs.TRAINER, k)) self.assertTrue(os.path.isdir(dir_must)) self.assertTrue(os.path.isfile(project_file)) project_meta_info = load_json(project_file) self.assertListEqual(['chief:0'], project_meta_info['runs']) with self.assertRaises(FileExistsError): _ = create_trainer_project_dirs(project_dir)
def _read_objects_json(fname, with_scores=False): """ Read objects from json file Objects inside of file should be formatted in following way: `[{'bbox': {'xmin': , 'ymin': , 'w': , 'h': }, 'class_label': , 'id': , 'score': }, ...]` or bbox can be a list in format [ymin, xmin, ymax, xmax] Parameters ---------- fname file name with_scores if scores should be read from file Returns ------- class_labels class ids with shape [num_objects] instance_ids instance ids with shape [num_objects] bboxes bounding boxes with shape [num_objects, 4] and format [ymin, xmin, ymax, xmax] scores only if return_scores == True; if no scores inside of labels was found, then it will be returned as 1 """ try: data = io_utils.load_json(fname) except: # pylint: disable=bare-except data = [] instance_ids, class_labels, bboxes, scores = (_combine_object_labels( data, with_scores=with_scores)) return instance_ids, class_labels, bboxes, scores
def test_create_trainer_project_dirs_distributed(self): tasks = [{"type": "chief", "index": 0}, {"type": "worker", "index": 0}, {"type": "worker", "index": 1}, {"type": "ps", "index": 0}, {"type": "ps", "index": 1}, {"type": "evaluator", "index": 0}] cluster = {"chief": ["localhost:1111"], "worker": ["localhost:2222", "localhost:3333"], "ps": ["localhost:4444", "localhost:5555"]} project_dir = self.get_temp_dir() project_file = os.path.join(project_dir, 'nucleus7_project.json') for task in tasks: tf_config = {'cluster': cluster, "task": task} os.environ["TF_CONFIG"] = json.dumps(tf_config) _ = create_trainer_project_dirs(project_dir) config_dir = os.path.join(project_dir, ProjectDirs.TRAINER.configs) self.assertTrue(os.path.isdir(config_dir)) self.assertTrue(os.path.isfile(project_file)) project_meta_info = load_json(project_file) runs_must = ['{}:{}'.format(task['type'], task['index']) for task in tasks] self.assertListEqual(runs_must, project_meta_info['runs']) for task in tasks: tf_config = {'cluster': cluster, "task": task} os.environ["TF_CONFIG"] = json.dumps(tf_config) if task['type'] == 'chief': with self.assertRaises(FileExistsError): _ = create_trainer_project_dirs(project_dir) else: _ = create_trainer_project_dirs(project_dir) del os.environ['TF_CONFIG'] with self.assertRaises(FileExistsError): _ = create_trainer_project_dirs(project_dir)
def test_create_trainer_project_dirs_continue_training(self): if "TF_CONFIG" in os.environ: del os.environ["TF_CONFIG"] project_dir = self.get_temp_dir() project_file = os.path.join(project_dir, 'nucleus7_project.json') create_trainer_project_dirs(project_dir, continue_training=True) trainer_dirs = None for i in range(2): trainer_dirs = create_trainer_project_dirs(project_dir, continue_training=True) for k in trainer_dirs._fields: dir_must = os.path.join(project_dir, getattr(ProjectDirs.TRAINER, k)) self.assertTrue(os.path.isdir(dir_must)) self.assertTrue(os.path.isfile(project_file)) project_meta_info = load_json(project_file) runs_must = ['chief:0', 'chief:0/1', 'chief:0/2'] self.assertListEqual(runs_must, project_meta_info['runs']) with self.assertRaises(FileExistsError): _ = create_trainer_project_dirs(project_dir)
def test_on_iteration_end(self, monitor_mode): callback = EarlyStoppingCallback(inbound_nodes=[], monitor_mode=monitor_mode, min_delta=self.min_delta, patience=self.patience, name="early_stopping").build() callback.log_dir = self.log_dir with self.test_session() as sess: callback.begin() sess.run(tf.global_variables_initializer()) for epoch_number in range(1, self.max_epochs + 1): inputs, iteration_info, should_stop = self._get_inputs( epoch_number, monitor_mode, sess) callback.iteration_info = iteration_info callback.on_iteration_end(**inputs) run_context = callback.iteration_info.session_run_context result_stop_var = sess.run(callback._stop_var) if should_stop: run_context.request_stop.assert_called_once_with() self.assertTrue(run_context.stop_requested) self.assertTrue(result_stop_var) else: run_context.request_stop.assert_not_called() self.assertFalse(run_context.stop_requested) self.assertFalse(result_stop_var) if run_context.stop_requested: break self.assertTrue(os.path.isfile(self.final_stat_fname)) best_iter_info = load_json(self.final_stat_fname) best_iter_info_must = { "epoch": self.best_epoch, "iteration": self.best_epoch * 100 } self.assertDictEqual(best_iter_info_must, best_iter_info)
def get_class_descriptions_mapping(fname_annotations: str, remove_unused_classes: bool = False, sort_objects_by_class_and_id: bool = True, with_keypoints: bool = False): """ Create mapping of type {image_fname: [{class_label: , id: , bbox: [ymin, xmin, ymax, xmax]}, {...}]} Parameters ---------- fname_annotations file name of instances.json remove_unused_classes if the unused classes must be removed and so at the end there are 80 class ids instead of 90 sort_objects_by_class_and_id if the result objects should be sorted first by class id and then by instance id with_keypoints if the keypoints data should be read from annotations; must be used only if fname_annotations points to person keypoints annotation file Returns ------- image_fname_to_objects mapping of type {image_fname: [{class_label: , id: , bbox: [ymin, xmin, ymax, xmax]}, {...}]} with normalized to particular image shape bounding box coordinates """ instances_coco = io_utils.load_json(fname_annotations) class_ids_coco = {p['id']: p['name'] for p in instances_coco['categories']} if remove_unused_classes: _, class_ids_mapping, _ = _remove_empty_classes(class_ids_coco) else: class_ids_mapping = dict(zip(*[class_ids_coco] * 2)) image_fname_to_image_size = { p['file_name']: { 'width': p['width'], 'height': p['height'] } for p in instances_coco['images'] } image_id_to_fname = { p['id']: p['file_name'] for p in instances_coco['images'] } image_fname_to_objects = {} for ann in instances_coco['annotations']: image_fname = image_id_to_fname[ann['image_id']] image_size = image_fname_to_image_size[image_fname] object_data = _read_object_from_annotation( ann, image_size, class_ids_mapping, with_keypoints=with_keypoints) image_fname_to_objects.setdefault(image_fname, []) image_fname_to_objects[image_fname].append(object_data) if sort_objects_by_class_and_id: image_fname_to_objects = _sort_instances_by_attributes( image_fname_to_objects) return image_fname_to_objects
def _read_config_main(config_dir: str) -> dict: config_main = {} config_main_path = os.path.join(config_dir, _CONFIG_MAIN_FILE_NAME) if os.path.exists(config_main_path): config_main = io_utils.load_json(config_main_path) return config_main