Example #1
0
def collect_and_add_project_meta_info(project_dir: str):
    """
    Collect the meta information of the project including run type, git hashes
    of imported objects etc. and add it to existing meta information file inside
    of project directory

    Parameters
    ----------
    project_dir
        project directory
    """
    meta_fname = os.path.join(project_dir, _PROJECT_META_FILE_NAME)
    if not os.path.isfile(meta_fname):
        meta_all = {}
    else:
        meta_all = io_utils.load_json(meta_fname)
    project_name = get_project_name_from_directory(project_dir)
    meta_all.setdefault(_PROJECT_NAME_KEY, project_name)
    meta_run = collect_project_meta_info()
    for k in meta_run:
        if k in meta_all:
            meta_all[k] = meta_all[k] + [meta_run[k]]
        else:
            meta_all[k] = [meta_run[k]]
    with open(meta_fname, 'w') as file:
        json.dump(meta_all, file, indent=4, sort_keys=True)
Example #2
0
    def _serving_input_receiver_fn(self) -> callable:
        # pylint: disable=not-context-manager
        # bug of pylint and/or tensorflow, since it is a context manager
        with tf.Graph().as_default():
            data = self.dataset_fn()
        # pylint: enable=not-context-manager
        input_shapes, input_types = data.output_shapes, data.output_types

        shape_fn = (_get_undefined_shapes_except_last
                    if self.inference_inputs_have_variable_shape
                    else _get_defined_shapes_with_batch)

        input_output_fname = os.path.join(
            self.save_dir_inputs_outputs_mapping,
            coord_configs.INPUT_OUTPUT_NAMES_FILE_NAME)

        self._maybe_create_inference_graph_and_save_input_output(
            input_output_fname, input_shapes, input_types, shape_fn)

        input_names = io_utils.load_json(input_output_fname)['inputs']
        inputs = {k: tf.placeholder(dtype=input_types[k],
                                    shape=shape_fn(input_shapes[k]),
                                    name="input_{}".format(k))
                  for k in input_names}
        inputs_from_default_placeholders = {
            each_item.full_name: each_item.placeholder
            for each_item in self.model.default_placeholders}
        inputs.update(inputs_from_default_placeholders)
        return tf.estimator.export.ServingInputReceiver(inputs, inputs)
Example #3
0
def panoptic_categories_to_rgb_hash_fn(fname_panoptic_annotations: str,
                                       remove_unused_classes: bool = False):
    """
    Read panoptic annotation file and create mapping in the form:
        {file_name: {id: class label}}

    Inside of panoptic_annotations['segments_info'] all of segments have its
    category (coco class) and its id, where id = r + g*256 + b*256**2 uses
    as a hash to map rgb values on panoptic rgb images to its segment label.

    Parameters
    ----------
    fname_panoptic_annotations
        file name of panoptic annotations json file
    remove_unused_classes
        if the unused classes must be removed and so at the end there
        are 133 class ids instead of 200

    Returns
    -------
    panoptic_image_fname_to_class_id_hash_fn
        mapping of type {file_name: {id: class label}}
    """
    panoptic_coco = io_utils.load_json(fname_panoptic_annotations)
    panoptic_coco_categories = panoptic_coco['categories']
    class_ids_to_name_coco = {
        p['id']: p['name']
        for p in panoptic_coco_categories
    }
    if remove_unused_classes:
        _, class_ids_mapping, _ = _remove_empty_classes(class_ids_to_name_coco)
    else:
        class_ids_mapping = dict(zip(*[class_ids_to_name_coco] * 2))

    panoptic_image_fname_to_class_id_hash_fn = {}
    for ann in panoptic_coco['annotations']:
        file_name = ann['file_name']
        hash_to_class = {
            a['id']: class_ids_mapping[a['category_id']]
            for a in ann['segments_info']
        }
        hash_to_class[0] = 0
        panoptic_image_fname_to_class_id_hash_fn[file_name] = hash_to_class

    return panoptic_image_fname_to_class_id_hash_fn
Example #4
0
def add_runtype_to_project_meta_info(project_dir: str):
    """
    Add current runtype to the project meta information json

    Parameters
    ----------
    project_dir
        project directory
    """
    meta_fname = os.path.join(project_dir, _PROJECT_META_FILE_NAME)
    if not os.path.isfile(meta_fname):
        meta = {}
    else:
        meta = io_utils.load_json(meta_fname)
    used_runs = meta.get('runs', [])
    runtype = _get_or_increase_runtype(used_runs)
    meta['runs'] = used_runs + [runtype]
    with open(meta_fname, 'w') as file:
        json.dump(meta, file, indent=4, sort_keys=True)
Example #5
0
 def serialize_to_file(self, configs_to_log: dict):
     logger = logging.getLogger(__name__)
     if not self.save_dir:
         logger.warning("No save_dir provided to config serializer!"
                        "Set the project if you want to save the configs")
         return
     save_file_name = self._get_save_file_name()
     logger.info("Save run config to %s", save_file_name)
     configs_to_log = self.format_configs_to_log(configs_to_log)
     if os.path.exists(save_file_name):
         configs_to_log_loaded = io_utils.load_json(save_file_name)
         configs_to_log = project_utils.update_config_with_other_config(
             configs_to_log_loaded, configs_to_log)
     with open(save_file_name, "w", encoding='utf8') as file:
         json.dump(configs_to_log,
                   file,
                   indent=4,
                   sort_keys=True,
                   default=lambda o: RunConfigSerializer.NOT_SERIALIZABLE)
Example #6
0
def _validate_training_project(project_dir, continue_training):
    project_meta_fname = os.path.join(project_dir, 'nucleus7_project.json')
    runtype = run_utils.get_run_type()
    if not os.path.isfile(project_meta_fname):
        used_runs = []
    else:
        used_runs = io_utils.load_json(project_meta_fname).get('runs', [])
    msg_project_exist = ("Project inside {} already exists! "
                         "Clean the folder, set --continue flag or "
                         "select new project folder!".format(project_dir))
    if (run_utils.is_chief() and runtype in used_runs
            and not continue_training):
        raise FileExistsError(msg_project_exist)
    if (run_utils.is_chief() and not continue_training
            and os.path.exists(project_dir)):
        training_dir = os.path.join(project_dir, _TRAINING_DIR)
        allowed_training_content = ["configs", 'global_config.json']
        if _get_not_allowed_content(training_dir, allowed_training_content):
            raise FileExistsError(msg_project_exist)
Example #7
0
def get_project_name_from_directory(project_dir: str) -> str:
    """
    Returns project name given project directory

    Parameters
    ----------
    project_dir
        project directory

    Returns
    -------
    project_name
        project name
    """
    project_name = None
    project_meta_file_name = os.path.join(project_dir, _PROJECT_META_FILE_NAME)
    if os.path.exists(project_meta_file_name):
        project_meta = io_utils.load_json(project_meta_file_name)
        project_name = project_meta.get(_PROJECT_NAME_KEY)
    return project_name or os.path.split(project_dir.rstrip('/'))[-1]
Example #8
0
    def test_create_trainer_project_dirs_local(self):
        if "TF_CONFIG" in os.environ:
            del os.environ["TF_CONFIG"]
        project_dir = self.get_temp_dir()
        project_file = os.path.join(project_dir, 'nucleus7_project.json')
        trainer_dirs = create_trainer_project_dirs(project_dir)
        self.assertIsInstance(trainer_dirs, tuple)
        self.assertTupleEqual(trainer_dirs._fields, ProjectDirs.TRAINER._fields)
        self.assertTrue(os.path.isfile(project_file))
        for k in trainer_dirs._fields:
            dir_must = os.path.join(project_dir,
                                    getattr(ProjectDirs.TRAINER, k))
            self.assertTrue(os.path.isdir(dir_must))

        self.assertTrue(os.path.isfile(project_file))
        project_meta_info = load_json(project_file)
        self.assertListEqual(['chief:0'], project_meta_info['runs'])

        with self.assertRaises(FileExistsError):
            _ = create_trainer_project_dirs(project_dir)
def _read_objects_json(fname, with_scores=False):
    """
    Read objects from json file

    Objects inside of file should be formatted in following way:

    `[{'bbox': {'xmin': , 'ymin': , 'w': , 'h': }, 'class_label': ,
    'id': , 'score': }, ...]` or bbox can be a list in format
    [ymin, xmin, ymax, xmax]

    Parameters
    ----------
    fname
        file name
    with_scores
        if scores should be read from file

    Returns
    -------
    class_labels
        class ids with shape [num_objects]
    instance_ids
        instance ids with shape [num_objects]
    bboxes
        bounding boxes with shape [num_objects, 4] and format
        [ymin, xmin, ymax, xmax]
    scores
        only if return_scores == True; if no scores inside of labels was found,
        then it will be returned as 1

    """
    try:
        data = io_utils.load_json(fname)
    except:  # pylint: disable=bare-except
        data = []
    instance_ids, class_labels, bboxes, scores = (_combine_object_labels(
        data, with_scores=with_scores))
    return instance_ids, class_labels, bboxes, scores
Example #10
0
    def test_create_trainer_project_dirs_distributed(self):
        tasks = [{"type": "chief", "index": 0},
                 {"type": "worker", "index": 0},
                 {"type": "worker", "index": 1},
                 {"type": "ps", "index": 0},
                 {"type": "ps", "index": 1},
                 {"type": "evaluator", "index": 0}]
        cluster = {"chief": ["localhost:1111"],
                   "worker": ["localhost:2222", "localhost:3333"],
                   "ps": ["localhost:4444", "localhost:5555"]}

        project_dir = self.get_temp_dir()
        project_file = os.path.join(project_dir, 'nucleus7_project.json')
        for task in tasks:
            tf_config = {'cluster': cluster, "task": task}
            os.environ["TF_CONFIG"] = json.dumps(tf_config)
            _ = create_trainer_project_dirs(project_dir)
            config_dir = os.path.join(project_dir, ProjectDirs.TRAINER.configs)
            self.assertTrue(os.path.isdir(config_dir))

        self.assertTrue(os.path.isfile(project_file))
        project_meta_info = load_json(project_file)
        runs_must = ['{}:{}'.format(task['type'], task['index'])
                     for task in tasks]
        self.assertListEqual(runs_must, project_meta_info['runs'])

        for task in tasks:
            tf_config = {'cluster': cluster, "task": task}
            os.environ["TF_CONFIG"] = json.dumps(tf_config)
            if task['type'] == 'chief':
                with self.assertRaises(FileExistsError):
                    _ = create_trainer_project_dirs(project_dir)
            else:
                _ = create_trainer_project_dirs(project_dir)

        del os.environ['TF_CONFIG']
        with self.assertRaises(FileExistsError):
            _ = create_trainer_project_dirs(project_dir)
Example #11
0
    def test_create_trainer_project_dirs_continue_training(self):
        if "TF_CONFIG" in os.environ:
            del os.environ["TF_CONFIG"]
        project_dir = self.get_temp_dir()
        project_file = os.path.join(project_dir, 'nucleus7_project.json')
        create_trainer_project_dirs(project_dir, continue_training=True)
        trainer_dirs = None
        for i in range(2):
            trainer_dirs = create_trainer_project_dirs(project_dir,
                                                       continue_training=True)

        for k in trainer_dirs._fields:
            dir_must = os.path.join(project_dir,
                                    getattr(ProjectDirs.TRAINER, k))
            self.assertTrue(os.path.isdir(dir_must))

        self.assertTrue(os.path.isfile(project_file))
        project_meta_info = load_json(project_file)
        runs_must = ['chief:0', 'chief:0/1', 'chief:0/2']
        self.assertListEqual(runs_must, project_meta_info['runs'])

        with self.assertRaises(FileExistsError):
            _ = create_trainer_project_dirs(project_dir)
Example #12
0
    def test_on_iteration_end(self, monitor_mode):
        callback = EarlyStoppingCallback(inbound_nodes=[],
                                         monitor_mode=monitor_mode,
                                         min_delta=self.min_delta,
                                         patience=self.patience,
                                         name="early_stopping").build()
        callback.log_dir = self.log_dir

        with self.test_session() as sess:
            callback.begin()
            sess.run(tf.global_variables_initializer())
            for epoch_number in range(1, self.max_epochs + 1):
                inputs, iteration_info, should_stop = self._get_inputs(
                    epoch_number, monitor_mode, sess)
                callback.iteration_info = iteration_info
                callback.on_iteration_end(**inputs)
                run_context = callback.iteration_info.session_run_context
                result_stop_var = sess.run(callback._stop_var)
                if should_stop:
                    run_context.request_stop.assert_called_once_with()
                    self.assertTrue(run_context.stop_requested)
                    self.assertTrue(result_stop_var)
                else:
                    run_context.request_stop.assert_not_called()
                    self.assertFalse(run_context.stop_requested)
                    self.assertFalse(result_stop_var)
                if run_context.stop_requested:
                    break
        self.assertTrue(os.path.isfile(self.final_stat_fname))

        best_iter_info = load_json(self.final_stat_fname)
        best_iter_info_must = {
            "epoch": self.best_epoch,
            "iteration": self.best_epoch * 100
        }
        self.assertDictEqual(best_iter_info_must, best_iter_info)
Example #13
0
def get_class_descriptions_mapping(fname_annotations: str,
                                   remove_unused_classes: bool = False,
                                   sort_objects_by_class_and_id: bool = True,
                                   with_keypoints: bool = False):
    """
    Create mapping of type
    {image_fname:
    [{class_label: , id: , bbox: [ymin, xmin, ymax, xmax]}, {...}]}

    Parameters
    ----------
    fname_annotations
        file name of instances.json
    remove_unused_classes
        if the unused classes must be removed and so at the end there
        are 80 class ids instead of 90
    sort_objects_by_class_and_id
        if the result objects should be sorted first by class id and then by
        instance id
    with_keypoints
        if the keypoints data should be read from annotations; must be used
        only if fname_annotations points to person keypoints annotation file

    Returns
    -------
    image_fname_to_objects
        mapping of type
         {image_fname:
         [{class_label: , id: , bbox: [ymin, xmin, ymax, xmax]}, {...}]}
         with normalized to particular image shape bounding box coordinates
    """
    instances_coco = io_utils.load_json(fname_annotations)
    class_ids_coco = {p['id']: p['name'] for p in instances_coco['categories']}
    if remove_unused_classes:
        _, class_ids_mapping, _ = _remove_empty_classes(class_ids_coco)
    else:
        class_ids_mapping = dict(zip(*[class_ids_coco] * 2))

    image_fname_to_image_size = {
        p['file_name']: {
            'width': p['width'],
            'height': p['height']
        }
        for p in instances_coco['images']
    }
    image_id_to_fname = {
        p['id']: p['file_name']
        for p in instances_coco['images']
    }
    image_fname_to_objects = {}
    for ann in instances_coco['annotations']:
        image_fname = image_id_to_fname[ann['image_id']]
        image_size = image_fname_to_image_size[image_fname]
        object_data = _read_object_from_annotation(
            ann, image_size, class_ids_mapping, with_keypoints=with_keypoints)
        image_fname_to_objects.setdefault(image_fname, [])
        image_fname_to_objects[image_fname].append(object_data)
    if sort_objects_by_class_and_id:
        image_fname_to_objects = _sort_instances_by_attributes(
            image_fname_to_objects)
    return image_fname_to_objects
Example #14
0
def _read_config_main(config_dir: str) -> dict:
    config_main = {}
    config_main_path = os.path.join(config_dir, _CONFIG_MAIN_FILE_NAME)
    if os.path.exists(config_main_path):
        config_main = io_utils.load_json(config_main_path)
    return config_main