コード例 #1
0
ファイル: builder.py プロジェクト: ivy-dl/builder
def json_spec_from_fpath(json_spec_path, json_fname, store_duplicates=False):
    base_dir = json_spec_path
    if not os.path.isdir(base_dir):
        raise Exception('base_dir {} does not exist.'.format(base_dir))
    json_spec = ivy.Container()
    while True:
        fpath = os.path.normpath(os.path.join(base_dir, json_fname))
        if os.path.isfile(fpath):
            if store_duplicates:
                parsed_json_cont = ivy.Container(parse_json_to_cont(fpath))
                duplicate_key_chains = list()

                def map_fn(x, kc):
                    if kc in json_spec:
                        duplicate_key_chains.append(kc)
                        return ivy.Container(duplicated={'parent_dir': json_spec[kc], 'this_dir': x})
                    else:
                        return x

                parsed_json_cont = parsed_json_cont.map(map_fn)
                json_spec = ivy.Container.combine(parsed_json_cont,
                                                  json_spec.prune_key_chains(duplicate_key_chains))
            else:
                json_spec = ivy.Container.combine(ivy.Container(parse_json_to_cont(fpath)), json_spec)
        if base_dir.split('/')[-1] == 'json_args':
            return json_spec
        base_dir = os.path.normpath(os.path.join(base_dir, '..'))
コード例 #2
0
ファイル: builder.py プロジェクト: ivy-dl/builder
def build_network_specification(dataset_dirs_args=None,
                                dataset_dirs_class=None,
                                dataset_dirs=None,
                                dataset_spec_args=None,
                                dataset_spec_class=None,
                                dataset_spec=None,
                                network_spec_args=None,
                                network_spec_class=None,
                                json_spec_path=None,
                                spec_cont=None,
                                class_priority=False):
    """
    build network specification
    """

    # build dataset specification
    dataset_spec = ivy.default(
        dataset_spec,
        build_dataset_spec(
            dataset_dirs_args=dataset_dirs_args,
            dataset_dirs_class=dataset_dirs_class,
            dataset_dirs=dataset_dirs,
            dataset_spec_args=dataset_spec_args,
            dataset_spec_class=dataset_spec_class,
            json_spec_path=json_spec_path,
            spec_cont=spec_cont))

    # define network specification arguments
    if network_spec_args is None:
        network_spec_args = dict()
    network_spec_args = ivy.Container(network_spec_args)
    network_spec_args = ivy.Container.combine(network_spec_args, ivy.Container(dataset_spec=dataset_spec))

    # load json file
    if isinstance(json_spec_path, str):
        json_spec = json_spec_from_fpath(json_spec_path, 'network_args.json')
    else:
        json_spec = ivy.Container()

    # load from spec dict
    this_spec_cont =\
        ivy.Container(spec_cont['network']) if isinstance(spec_cont, dict) and 'network' in spec_cont \
            else ivy.Container()

    # combine args
    network_spec_args = ivy.Container.combine(json_spec, this_spec_cont, network_spec_args)

    # override network_spec_class if specified in network_spec_args
    network_spec_class = ivy.default(ivy.default(
        _import_arg_specified_class_if_present(network_spec_args, 'network_spec_class'),
        network_spec_class, rev=class_priority),
        NetworkSpec)

    # return network
    return network_spec_class(**network_spec_args)
コード例 #3
0
ファイル: builder.py プロジェクト: ivy-dl/builder
def build_data_loader_spec(dataset_dirs_args=None,
                           dataset_dirs_class=None,
                           dataset_dirs=None,
                           dataset_spec_args=None,
                           dataset_spec_class=None,
                           dataset_spec=None,
                           data_loader_spec_args=None,
                           data_loader_spec_class=None,
                           json_spec_path=None,
                           spec_cont=None,
                           class_priority=False):
    """
    build data loader specification
    """

    # build dataset specification
    dataset_spec = ivy.default(
        dataset_spec,
        build_dataset_spec(
            dataset_dirs_args=dataset_dirs_args,
            dataset_dirs_class=dataset_dirs_class,
            dataset_dirs=dataset_dirs,
            dataset_spec_args=dataset_spec_args,
            dataset_spec_class=dataset_spec_class,
            json_spec_path=json_spec_path,
            spec_cont=spec_cont))

    # define data loader specification arguments
    if data_loader_spec_args is None:
        data_loader_spec_args = dict()
    data_loader_spec_args = ivy.Container(data_loader_spec_args)
    data_loader_spec_args = ivy.Container.combine(data_loader_spec_args, ivy.Container(dataset_spec=dataset_spec))

    # load json file
    if isinstance(json_spec_path, str):
        json_spec = json_spec_from_fpath(json_spec_path, 'data_loader_args.json')
    else:
        json_spec = ivy.Container()

    # load from spec dict
    this_spec_cont =\
        ivy.Container(spec_cont['data_loader']) if isinstance(spec_cont, dict) and 'data_loader' in spec_cont \
            else ivy.Container()

    # combine args
    data_loader_spec_args = ivy.Container.combine(json_spec, this_spec_cont, data_loader_spec_args)

    # override data_loader_spec_class if specified in data_loader_spec_args
    data_loader_spec_class = ivy.default(ivy.default(
        _import_arg_specified_class_if_present(data_loader_spec_args, 'data_loader_spec_class'),
        data_loader_spec_class, rev=class_priority),
        DataLoaderSpec)

    # return data loader
    return data_loader_spec_class(**data_loader_spec_args)
コード例 #4
0
ファイル: builder.py プロジェクト: ivy-dl/builder
def command_line_str_to_spec_cont(spec_str):
    """
    save the python dict as a json file at specified filepath
    """
    if spec_str is not None:
        spec_cont = ivy.Container(json.loads(spec_str.replace("'", '"')))
    else:
        spec_cont = ivy.Container()
    all_keys = ['dataset_dirs', 'dataset', 'data_loader', 'network', 'trainer', 'tuner']
    for key in spec_cont.keys():
        if key not in all_keys:
            raise Exception('spec dict keys must all be one of {}, but found {}'.format(all_keys, key))
    for key in all_keys:
        if key not in spec_cont:
            spec_cont[key] = ivy.Container()
    return spec_cont
コード例 #5
0
ファイル: test_scripts.py プロジェクト: ivy-dl/builder
def test_format_dataset_containers(dev_str, call):
    this_dir = os.path.dirname(os.path.realpath(__file__))
    orig_cont_dir = os.path.join(this_dir, 'dataset/containers')
    cont_to_format_dir = os.path.join(this_dir, 'dataset/containers_to_format')
    shutil.rmtree(cont_to_format_dir)
    shutil.copytree(orig_cont_dir, cont_to_format_dir)

    # from format file
    cont_format_fpath = os.path.join(this_dir,
                                     'dataset/new_container_format.json')
    main(cont_to_format_dir, cont_format_fpath=cont_format_fpath)
    new_cont_format = ivy.Container.from_disk_as_json(cont_format_fpath)
    new_cont_fnames = os.listdir(cont_to_format_dir)
    for new_cont_fname in new_cont_fnames:
        new_cont_fpath = os.path.join(cont_to_format_dir, new_cont_fname)
        new_cont = ivy.Container.from_disk_as_json(new_cont_fpath)
        assert ivy.Container.identical([new_cont, new_cont_format],
                                       check_types=False)
    shutil.rmtree(cont_to_format_dir)
    shutil.copytree(orig_cont_dir, cont_to_format_dir)

    # from format string
    cont_format_as_str = '{"discounts": true, "rewards": true, "step_types": true, "array": true}'
    main(cont_to_format_dir, cont_format_as_str)
    new_cont_format = ivy.Container(json.loads(cont_format_as_str))
    new_cont_fnames = os.listdir(cont_to_format_dir)
    for new_cont_fname in new_cont_fnames:
        new_cont_fpath = os.path.join(cont_to_format_dir, new_cont_fname)
        new_cont = ivy.Container.from_disk_as_json(new_cont_fpath)
        assert ivy.Container.identical([new_cont, new_cont_format],
                                       check_types=False)
    shutil.rmtree(cont_to_format_dir)
    shutil.copytree(orig_cont_dir, cont_to_format_dir)
コード例 #6
0
ファイル: builder.py プロジェクト: ivy-dl/builder
def get_json_args(json_spec_path, keys_to_ignore, keychains_to_ignore, keychain_to_show, defaults=False,
                  store_duplicates=False, current_dir_only=False, spec_names=None):
    spec_names = ivy.default(spec_names,
                             [item.split('.json')[0] for item in os.listdir(json_spec_path) if '.json' in item])
    if defaults:
        defaults = '.defaults'
    else:
        defaults = ''
    cont = ivy.Container()
    if current_dir_only:
        for spec_name in spec_names:
            fpath = os.path.join(json_spec_path, spec_name + '.json' + defaults)
            if os.path.isfile(fpath):
                cont[spec_name] = parse_json_to_cont(fpath)
    else:
        for spec_name in spec_names:
            cont[spec_name] = \
                json_spec_from_fpath(json_spec_path, spec_name + '.json' + defaults, store_duplicates)
    for keychain_to_ignore in keychains_to_ignore:
        if keychain_to_ignore in cont:
            cont[keychain_to_ignore] = 'not_shown'
    cont = cont.set_at_keys(dict(zip(keys_to_ignore, ['not_shown']*len(keys_to_ignore))))
    if ivy.exists(keychain_to_show):
        cont = cont[keychain_to_show]
    return cont
コード例 #7
0
ファイル: network_group.py プロジェクト: ivy-dl/builder
 def __init__(self, spec: NetworkSpec, v=None) -> None:
     """
     base class for any networks
     """
     self._v_in = v
     self._spec = spec
     self._subnets = ivy.Container()
     super(NetworkGroup, self).__init__(spec, v=v)
コード例 #8
0
 def get_next_batch(self, dataset_key='training'):
     data = self._data[dataset_key]
     if self._spec.shuffle:
         self._i = np.random.randint(0, self._num_examples)
     else:
         self._i = (self._i + 1) % self._num_examples
     return ivy.Container(input=data.input[self._i:self._i + 1],
                          target=data.targets[self._i:self._i + 1])
コード例 #9
0
ファイル: builder.py プロジェクト: ivy-dl/builder
def trainer_to_spec_args_dict(trainer):
    args_dict = dict()
    args_dict['data_loader_class'] = _obj_to_class_str(trainer.spec.data_loader)
    args_dict['network_class'] = _obj_to_class_str(trainer.spec.network)
    args_dict['trainer_class'] = _obj_to_class_str(trainer)
    args_dict['dataset_dirs_args'] = ivy.Container(trainer.spec.data_loader.spec.dataset_spec.dirs.kwargs).to_dict()
    args_dict['dataset_dirs_class'] = _obj_to_class_str(trainer.spec.data_loader.spec.dataset_spec.dirs)
    args_dict['dataset_spec_args'] = ivy.Container(trainer.spec.data_loader.spec.dataset_spec.kwargs).to_dict()
    args_dict['dataset_spec_class'] = _obj_to_class_str(trainer.spec.data_loader.spec.dataset_spec)
    args_dict['data_loader_spec_args'] = ivy.Container(trainer.spec.data_loader.spec.kwargs).to_dict()
    args_dict['data_loader_spec_class'] = _obj_to_class_str(trainer.spec.data_loader.spec)
    args_dict['network_spec_args'] = ivy.Container(trainer.spec.network.spec.kwargs).to_dict()
    args_dict['network_spec_class'] = _obj_to_class_str(trainer.spec.network.spec)
    args_dict['trainer_spec_args'] = ivy.Container(trainer.spec.kwargs).prune_key_chains(
        ['data_loader', 'network']).to_dict()
    args_dict['trainer_spec_class'] = _obj_to_class_str(trainer.spec)
    return args_dict
コード例 #10
0
 def save(self, checkpoint_path: str) -> None:
     """
     save the network weights and optimizer state in checkpoint file
     :param checkpoint_path: path of the checkpoint file for saving the weights and optimizer state
     """
     checkpoint = ivy.Container({'network': self._network.v,
                                 'optimizer': self._optimizer.state})
     os.makedirs('/'.join(checkpoint_path.split('/')[:-1]), exist_ok=True)
     checkpoint.to_disk_as_hdf5(checkpoint_path)
コード例 #11
0
 def save(self, step):
     checkpoint = ivy.Container({
         'network':
         self._checkpoint.net.v,
         'optimizer':
         self._checkpoint.optimizer.state
     })
     self._latest_checkpoint_fpath = os.path.join(
         self._directory, 'chkpt-{}.hdf5'.format(step))
     checkpoint.to_disk_as_hdf5(self._latest_checkpoint_fpath)
コード例 #12
0
ファイル: seq_data_loader.py プロジェクト: ivy-dl/builder
 def _load_json_files(containers):
     read_files = list()
     for j_fpath in containers.fpaths:
         if j_fpath != '':
             with open(j_fpath, 'r') as file:
                 read_str = file.read()
         else:
             read_str = ''
         read_files.append(read_str)
     return ivy.Container({'json_str': read_files})
コード例 #13
0
ファイル: test_dataset.py プロジェクト: ivy-dl/builder
 def _init(self, num_processes):
     self._x = [ivy.array([0, 1]), ivy.array([2, 3, 4, 5, 6, 7, 8, 9])]
     dataset_container = ivy.Container({'x': self._x})
     dataset = Dataset(dataset_container,
                       'base',
                       dataset_container.shape[0],
                       num_processes=num_processes)
     dataset = dataset.unbatch('unbatched', num_processes=num_processes)
     self._dataset = dataset.batch('batched',
                                   3,
                                   num_processes=num_processes)
コード例 #14
0
ファイル: network_group.py プロジェクト: ivy-dl/builder
 def _build_subnets(self, *args, **kwargs) -> bool:
     """
     Build the network subnets.
     """
     built_rets = list()
     for k, subnet_spec in self._spec.subnets.items():
         subnet = subnet_spec.network_class(subnet_spec,
                                            v=ivy.default(
                                                lambda: self._v_in[k], None,
                                                True))
         built_rets.append(
             subnet.build(*args, dev_str=self._dev_str, **kwargs))
         self._subnets[k] = subnet
     return ivy.Container(dict(zip(self._spec.subnets.keys(), built_rets)))
コード例 #15
0
def main(compile_mode=False):
    current_dir = os.path.dirname(os.path.abspath(__file__))

    # dataset dirs specification
    dataset_dirs_args = dict()

    # dataset specification
    dataset_spec_filepath = os.path.join(current_dir, 'json_specs',
                                         'dataset_spec.json.example')
    dataset_spec_args = builder.parse_json_to_cont(dataset_spec_filepath)

    # data loader specification
    data_loader_spec_filepath = os.path.join(current_dir, 'json_specs',
                                             'data_loader_spec.json.example')
    data_loader_spec_args = builder.parse_json_to_cont(
        data_loader_spec_filepath)

    # network specification
    network_spec_filepath = os.path.join(current_dir, 'json_specs',
                                         'network_spec.json.example')
    network_spec_args = builder.parse_json_to_cont(network_spec_filepath)

    # trainer specification
    trainer_spec_filepath = os.path.join(current_dir, 'json_specs',
                                         'trainer_spec.json.example')
    trainer_spec_args = builder.parse_json_to_cont(trainer_spec_filepath)

    # In all above cases, the user could override the loaded json file dicts with command line args if so desired
    # before then passing into the TrainingJob for specification class construction, which are all then read-only

    trainer = builder.build_trainer(
        ExampleDataLoader,
        ExampleNetwork,
        ExampleTrainer,
        dataset_dirs_args=dataset_dirs_args,
        dataset_dirs_class=ExampleDatasetDirs,
        dataset_spec_args=dataset_spec_args,
        dataset_spec_class=ExampleDatasetSpec,
        data_loader_spec_args=data_loader_spec_args,
        data_loader_spec_class=ExampleDataLoaderSpec,
        network_spec_args=network_spec_args,
        network_spec_class=ExampleNetworkSpec,
        trainer_spec_args=trainer_spec_args,
        spec_cont=ivy.Container({'trainer': {
            'compile_mode': compile_mode
        }}))
    trainer.setup()
    print("Finished complete example!")
    trainer.train()
    trainer.close()
コード例 #16
0
ファイル: test_dataset.py プロジェクト: ivy-dl/builder
 def _init(self, array_shape, num_processes):
     x = [
         ivy.array([[0], [1], [2]]),
         ivy.array([[3], [4], [5]]),
         ivy.array([[6], [7], [8]])
     ]
     self._x = [ivy.reshape(item, array_shape) for item in x]
     dataset_container = ivy.Container({'x': x})
     dataset = Dataset(dataset_container,
                       'base',
                       dataset_container.shape[0],
                       num_processes=num_processes)
     self._dataset = dataset.unbatch('unbatched',
                                     num_processes=num_processes)
コード例 #17
0
    def __init__(self, data_loader_spec):
        super().__init__(data_loader_spec)

        # dataset size
        self._num_examples = self._spec.dataset_spec.num_examples

        # counter
        self._i = 0

        # load vector data
        vector_dim = self._spec.dataset_spec.vector_dim
        self._targets = ivy.zeros((self._num_examples, vector_dim, 1))

        # load image data
        image_dims = self._spec.dataset_spec.image_dims
        self._input = ivy.ones(
            (self._num_examples, image_dims[0], image_dims[1], 3))

        self._training_data = ivy.Container(targets=self._targets,
                                            input=self._input)
        self._validation_data = ivy.Container(targets=self._targets,
                                              input=self._input)
        self._data = ivy.Container(training=self._training_data,
                                   validation=self._validation_data)
コード例 #18
0
ファイル: test_dataset.py プロジェクト: ivy-dl/builder
 def _init(self, array_shape, num_processes):
     x = [
         ivy.array(0),
         ivy.array(1),
         ivy.array(2),
         ivy.array(3),
         ivy.array(4),
         ivy.array(5),
         ivy.array(6),
         ivy.array(7),
         ivy.array(8)
     ]
     self._x = [ivy.reshape(item, array_shape) for item in x]
     dataset_container = ivy.Container({'x': self._x})
     self._dataset = Dataset(dataset_container,
                             'base',
                             dataset_container.shape[0],
                             num_processes=num_processes)
コード例 #19
0
ファイル: seq_data_loader.py プロジェクト: ivy-dl/builder
    def _compute_num_workers(self):

        # init
        num_workers = self._total_num_workers
        self._num_workers = ivy.Container()

        # prefetch
        self._num_workers.prefetch = int(self._spec.with_prefetching) + 1
        num_workers = math.ceil(num_workers / self._num_workers.prefetch)

        # post processed
        self._num_workers.post_processed = 1

        # from numpy
        self._num_workers.from_np = 1

        # batched
        self._num_workers.batched = 1

        # loaded data
        self._num_workers.loaded_data = min(num_workers, self._batch_size)

        # ToDo: add multi-processing support for these lower level datasets

        # shuffled
        self._num_workers.shuffled = 1

        # unbatch
        self._num_workers.unbatched = 1

        # windowed
        self._num_workers.windowed = 1

        # valid first frames
        self._num_workers.valid_first_frames = 1

        # keychain pruned
        self._num_workers.keychain_pruned = 1

        # parsed json
        self._num_workers.parsed_json = 1

        # loaded json
        self._num_workers.loaded_json = 1
コード例 #20
0
ファイル: test_dataset.py プロジェクト: ivy-dl/builder
    def _init(self, array_shape, num_processes):
        x = [
            ivy.array(0),
            ivy.array(1),
            ivy.array(2),
            ivy.array(3),
            ivy.array(4),
            ivy.array(5),
            ivy.array(6),
            ivy.array(7),
            ivy.array(8),
            ivy.array(9)
        ]
        self._x = [ivy.reshape(item, array_shape) for item in x]

        def sleep_fn(cont):
            start_time = time.perf_counter()
            while True:
                if time.perf_counter() - start_time > 0.011:
                    return cont

        dataset_container = ivy.Container({'x': self._x})

        # without pre-fetch
        dataset_wo_prefetch = Dataset(copy.deepcopy(dataset_container),
                                      'base',
                                      dataset_container.shape[0],
                                      with_caching=False,
                                      cache_size=0,
                                      num_processes=num_processes)
        self._dataset_wo_prefetch = dataset_wo_prefetch.map('sleep', sleep_fn)

        # with pre-fetch
        dataset_w_prefetch = Dataset(copy.deepcopy(dataset_container),
                                     'base',
                                     dataset_container.shape[0],
                                     with_caching=False,
                                     cache_size=0,
                                     num_processes=num_processes)
        dataset_w_prefetch = dataset_w_prefetch.map('sleep', sleep_fn)
        self._dataset_w_prefetch = dataset_w_prefetch.prefetch('prefetch', 1)
コード例 #21
0
ファイル: builder.py プロジェクト: ivy-dl/builder
def parse_json_to_cont(json_filepath):
    """
    return the data from json file in the form of a python dict
    """
    return_cont = ivy.Container()
    with open(json_filepath) as json_data_file:
        loaded_dict = json.load(json_data_file)
    for k, v in loaded_dict.items():
        if k == 'parents':
            rel_fpaths = v
            for rel_fpath in rel_fpaths:
                if rel_fpath[-5:] == '.json':
                    parent_json_fname = rel_fpath.split('/')[-1]
                else:
                    parent_json_fname = json_filepath.split('/')[-1]
                    rel_fpath = os.path.join(rel_fpath, parent_json_fname)
                rel_fpath = os.path.normpath(rel_fpath)
                fpath = os.path.normpath(os.path.join('/'.join(json_filepath.split('/')[:-1]), rel_fpath))
                fdir = '/'.join(fpath.split('/')[:-1])
                return_cont = ivy.Container.combine(return_cont, json_spec_from_fpath(fdir, parent_json_fname))
    return ivy.Container.combine(return_cont, loaded_dict)
コード例 #22
0
def format_containers(cont_dir, cont_format, cont_format_file):

    if cont_format:
        key_chains = ivy.Container(json.loads(cont_format))
    else:
        key_chains = ivy.Container.from_disk_as_json(cont_format_file)

    cont_fnames = os.listdir(cont_dir)
    cont_fnames.sort()
    num_conts = len(cont_fnames)
    num_logs = 100
    log_freq = max((num_conts / num_logs), 1)

    for i, cont_fname in enumerate(cont_fnames):
        if i % log_freq == 0:
            logging.info('reformatting container {} of {}...'.format(
                i, num_conts))
        cont_fpath = os.path.join(cont_dir, cont_fname)
        cont = ivy.Container.from_disk_as_json(cont_fpath)
        cont = cont.at_key_chains(key_chains)
        cont.to_disk_as_json(cont_fpath)
コード例 #23
0
ファイル: builder.py プロジェクト: ivy-dl/builder
def build_tuner_spec(data_loader_class=None,
                     network_class=None,
                     trainer_class=None,
                     dataset_dirs_args=None,
                     dataset_dirs_class=None,
                     dataset_dirs=None,
                     dataset_spec_args=None,
                     dataset_spec_class=None,
                     dataset_spec=None,
                     data_loader_spec_args=None,
                     data_loader_spec_class=None,
                     data_loader_spec=None,
                     data_loader=None,
                     network_spec_args=None,
                     network_spec_class=None,
                     network_spec=None,
                     network=None,
                     trainer_spec_args=None,
                     trainer_spec_class=None,
                     trainer_spec=None,
                     trainer=None,
                     tuner_spec_args=None,
                     tuner_spec_class=None,
                     json_spec_path=None,
                     spec_cont=None,
                     class_priority=False):
    """
    build tuner specification
    """

    # define dataset directories specification arguments
    if tuner_spec_args is None:
        tuner_spec_args = dict()
    tuner_spec_args = ivy.Container(tuner_spec_args)

    # load json file
    if isinstance(json_spec_path, str):
        json_spec = json_spec_from_fpath(json_spec_path, 'tuner_args.json')
    else:
        json_spec = ivy.Container()

    # load from spec dict
    this_spec_cont =\
        ivy.Container(spec_cont['tuner']) if isinstance(spec_cont, dict) and 'tuner' in spec_cont else ivy.Container()

    # combine args
    tuner_spec_args = ivy.Container.combine(json_spec, this_spec_cont, tuner_spec_args)

    # override tuner_spec_class if specified in tuner_spec_args
    tuner_spec_class = ivy.default(ivy.default(
        _import_arg_specified_class_if_present(tuner_spec_args, 'tuner_spec_class'),
        tuner_spec_class, rev=class_priority),
        TunerSpec)

    # set framework
    ivy.set_framework(tuner_spec_class(None, **tuner_spec_args).framework)

    # build trainer
    trainer = ivy.default(
        trainer,
        build_trainer(
            data_loader_class=data_loader_class,
            network_class=network_class,
            trainer_class=trainer_class,
            dataset_dirs_args=dataset_dirs_args,
            dataset_dirs_class=dataset_dirs_class,
            dataset_dirs=dataset_dirs,
            dataset_spec_args=dataset_spec_args,
            dataset_spec_class=dataset_spec_class,
            dataset_spec=dataset_spec,
            data_loader_spec_args=data_loader_spec_args,
            data_loader_spec_class=data_loader_spec_class,
            data_loader_spec=data_loader_spec,
            data_loader=data_loader,
            network_spec_args=network_spec_args,
            network_spec_class=network_spec_class,
            network_spec=network_spec,
            network=network,
            trainer_spec_args=trainer_spec_args,
            trainer_spec_class=trainer_spec_class,
            trainer_spec=trainer_spec,
            json_spec_path=json_spec_path,
            spec_cont=spec_cont))

    # return tuner specification
    return tuner_spec_class(trainer,
                            **tuner_spec_args)
コード例 #24
0
ファイル: dataset.py プロジェクト: ivy-dl/builder
 def __getitem__(self, slice_obj):
     if not self._workers_initialized:
         self._initialize_all_workers()
     if self._numpy_loading:
         ivy.set_framework('numpy')
     if self._num_processes < 2 or isinstance(slice_obj, numbers.Number):
         ret = self._get_item(slice_obj)
         if self._numpy_loading:
             ivy.unset_framework()
         self._first_pass = False
         return ret
     slice_size = int(round(slice_obj.stop - slice_obj.start))
     num_sub_slices = min(slice_size, self._num_processes)
     slice_points = np.linspace(slice_obj.start, slice_obj.stop,
                                num_sub_slices + 1)
     slice_sizes = np.round(slice_points[1:] - slice_points[:-1]).astype(
         np.int32)
     if Dataset._is_int(slice_obj.start) and Dataset._is_int(
             slice_obj.stop):
         slice_points = np.round(slice_points)
     sub_slices = [
         slice(slice_points[i], slice_points[i + 1], 1.)
         for i in range(num_sub_slices)
     ]
     if self._prefetching:
         self._queue_offset = int(not self._queue_offset)
     else:
         self._queue_offset = np.random.randint(0, self._num_processes)
     q_idxs = [
         int((i + self._queue_offset) % self._num_processes)
         for i in range(len(sub_slices))
     ]
     slice_queues = [self._slice_queues[qi] for qi in q_idxs]
     output_queues = [self._output_queues[qi] for qi in q_idxs]
     if self._prefetching:
         if self._first_pass:
             [
                 slice_queue.put(sub_slice)
                 for slice_queue, sub_slice in zip(slice_queues, sub_slices)
             ]
         else:
             slice_queues[-1].put(sub_slices[-1])
         if self._numpy_loading:
             ivy.unset_framework()
         self._first_pass = False
         return ivy.Container(queues=output_queues,
                              queue_load_sizes=slice_sizes,
                              queue_timeout=self._queue_timeout)
     else:
         [
             slice_queue.put(sub_slice)
             for slice_queue, sub_slice in zip(slice_queues, sub_slices)
         ]
         if ivy.wrapped_mode():
             items_as_lists = [
                 ivy.Container(output_queue.get(
                     timeout=self._queue_timeout)).to_ivy()
                 for output_queue in output_queues
             ]
         else:
             items_as_lists = [
                 ivy.Container(
                     output_queue.get(timeout=self._queue_timeout))
                 for output_queue in output_queues
             ]
         if self._numpy_loading:
             ivy.unset_framework()
         self._first_pass = False
         return ivy.Container.list_join(items_as_lists)
コード例 #25
0
 def get_first_batch(self, dataset_key=None):
     return ivy.Container(x=ivy.array([[1.]] * self._spec.batch_size,
                                      dev_str=self._spec.dev_strs[0]),
                          target=ivy.array([[0.]] * self._spec.batch_size,
                                           dev_str=self._spec.dev_strs[0]))
コード例 #26
0
ファイル: seq_data_loader.py プロジェクト: ivy-dl/builder
    def _get_dataset(self, starting_example, ending_example):
        class ContainerIdxMap:
            def __init__(self,
                         sizes,
                         fpath_template=None,
                         seq_idxs=None,
                         start=None,
                         end=None,
                         max_seq_len=None,
                         conts_to_skip=None,
                         pruned_sizes=None):
                if isinstance(sizes, (tuple, list)):
                    pruned_sizes = ivy.default(pruned_sizes, [
                        SeqDataLoader._compute_seq_len(i, sl, conts_to_skip)
                        for i, sl in enumerate(sizes)
                    ])
                    num_empty = sum([ps == 0 for ps in pruned_sizes])
                    self._raw_sizes = dict(
                        zip(range(start, end + 1 + num_empty),
                            sizes[start:end + 1 + num_empty]))
                    self._pruned_sizes = dict(
                        zip(range(start, end + 1 + num_empty),
                            pruned_sizes[start:end + 1 + num_empty]))
                elif isinstance(sizes, (int, dict)):
                    self._raw_sizes = sizes
                    self._pruned_sizes = ivy.default(pruned_sizes, sizes)
                    if isinstance(self._pruned_sizes, int):
                        pruned_dict = dict()
                        for seq_idx, win_idx in conts_to_skip:
                            if seq_idx not in pruned_dict:
                                pruned_dict[seq_idx] = list()
                            pruned_dict[seq_idx].append(win_idx)
                        pruned_dict = {
                            k: len(set(v))
                            for k, v in pruned_dict.items()
                        }
                        pruned_sizes_dict = {
                            k: self._pruned_sizes - num_pruned
                            for k, num_pruned in pruned_dict.items()
                        }
                        num_empty = sum(
                            [size == 0 for size in pruned_sizes_dict.values()])
                        pruned_sizes = collections.defaultdict(
                            lambda: self._pruned_sizes, pruned_sizes_dict)
                    else:
                        num_empty = sum([ps == 0 for ps in self._pruned_sizes])
                else:
                    raise Exception(
                        'Invalid type for sizes, expected one of int, dict, tuple or list,'
                        'but found {} or type {}'.format(sizes, type(sizes)))
                self._constant_size = isinstance(self._raw_sizes, int)
                if max_seq_len:
                    self._max_seq_len = max_seq_len
                else:
                    self._max_seq_len = self._pruned_sizes if self._constant_size else max(
                        self._pruned_sizes.values())
                self._fpath_template = fpath_template
                self._conts_to_skip = conts_to_skip
                if seq_idxs:
                    self._seq_idxs = seq_idxs
                else:
                    vals = [
                        v
                        for i, v in enumerate(range(start, end + 1 +
                                                    num_empty))
                        if pruned_sizes[i] > 0
                    ]
                    keys = range(0, min(end - start + 1 + num_empty,
                                        len(vals)))
                    self._seq_idxs = dict(zip(keys, vals))

            def __getitem__(self, slice_obj):
                if isinstance(slice_obj, slice):
                    seq_idxs = collections.OrderedDict([
                        (i, self._seq_idxs[idx]) for i, idx in enumerate(
                            range(slice_obj.start, slice_obj.stop,
                                  ivy.default(slice_obj.step, 1)))
                    ])
                elif isinstance(slice_obj, int):
                    seq_idxs = collections.OrderedDict(
                        {0: self._seq_idxs[slice_obj]})
                else:
                    raise Exception(
                        'Invalid type for slice_obj, expected either slice or int,'
                        'but found {} of type {}'.format(
                            slice_obj, type(slice_obj)))
                if self._constant_size:
                    sizes = self._raw_sizes
                else:
                    sizes = collections.OrderedDict({
                        seq_idx: self._raw_sizes[seq_idx]
                        for seq_idx in seq_idxs.values()
                    })
                return ContainerIdxMap(sizes,
                                       self._fpath_template,
                                       seq_idxs,
                                       max_seq_len=self._max_seq_len,
                                       conts_to_skip=self._conts_to_skip,
                                       pruned_sizes=self._pruned_sizes)

            def __len__(self):
                return len(self._seq_idxs)

            def shuffle(self):
                mapped_idxs = list(self._seq_idxs.values())
                np.random.shuffle(mapped_idxs)
                self._seq_idxs = collections.OrderedDict(
                    zip(self._seq_idxs.keys(), mapped_idxs))

            def to_idxs(self):
                seq_idxs = self._seq_idxs.values()
                sizes = [
                    self._raw_sizes
                    if self._constant_size else self._raw_sizes[seq_idx]
                    for seq_idx in seq_idxs
                ]
                rets = [[(seq_idx, win_idx) for win_idx in range(size)
                         if not SeqDataLoader._skip_cont(
                             seq_idx, win_idx, self._conts_to_skip)]
                        for seq_idx, size in zip(seq_idxs, sizes)]
                return [
                    r + [(None, None)] * (self._max_seq_len - len(r))
                    for r in rets if list(set(r)) != [None]
                ]

            def to_filepaths(self):
                if not ivy.exists(self._fpath_template):
                    raise Exception(
                        'to_filepaths method is not valid if fpath_template has not been specified'
                        'in the constructor.')
                seq_idxs = self._seq_idxs.values()
                sizes = [
                    self._raw_sizes
                    if self._constant_size else self._raw_sizes[seq_idx]
                    for seq_idx in seq_idxs
                ]
                rets = [[
                    self._fpath_template % (seq_idx, win_idx)
                    for win_idx in range(size) if not SeqDataLoader._skip_cont(
                        seq_idx, win_idx, self._conts_to_skip)
                ] for seq_idx, size in zip(seq_idxs, sizes)]
                return [
                    r + [''] * (self._max_seq_len - len(r)) for r in rets
                    if ''.join(r) != ''
                ]

            @property
            def sizes(self):
                return self._pruned_sizes

        # container filepaths
        if self._spec.container_load_mode in ['preload', 'dynamic']:
            fpath_template = os.path.join(
                self._container_data_dir,
                self._spec.dataset_spec.cont_fname_template)
        else:
            fpath_template = None
        container_idx_map = ContainerIdxMap(
            self._spec.dataset_spec.unpruned_sequence_lengths,
            fpath_template,
            start=starting_example,
            end=ending_example,
            conts_to_skip=self._spec.containers_to_skip)

        if self._spec.num_sequences != -1:
            container_idx_map = container_idx_map[0:self._spec.num_sequences]

        # shuffle sequences
        if self._spec.preshuffle_data:
            container_idx_map.shuffle()

        # extract sequence lengths
        if self._fixed_sequence_length:
            self._sequence_lengths =\
                collections.OrderedDict(zip(range(len(container_idx_map)),
                                            [self._spec.dataset_spec.sequence_lengths] * len(container_idx_map)))
            self._windows_per_seq = self._sequence_lengths[
                0] - self._window_size + 1
            # windowing values
            window_idxs_per_seq = ivy.reshape(
                ivy.arange(self._windows_per_seq, 0, 1),
                (self._windows_per_seq, 1))
            gather_idxs_list = list()
            for x in window_idxs_per_seq:
                gather_idxs_list.append(
                    ivy.expand_dims(
                        ivy.arange(x[0] + self._window_size, x[0], 1), 0))
            gather_idxs = ivy.concatenate(gather_idxs_list, 0)
            self._gather_idxs = \
                ivy.to_numpy(ivy.reshape(gather_idxs, (self._windows_per_seq * self._window_size, 1))).tolist()
        else:
            self._sequence_lengths = container_idx_map.sizes

        # maybe pre-load containers
        if self._spec.container_load_mode == 'preload':
            # load containers with vector data and image filepath entries
            container_slices = self._get_containers_w_filepath_img_entries_as_tensor_slices(
                container_idx_map.to_filepaths())
            if self._first_frame_validity_fn is not None:
                container_slices =\
                    self._first_frame_validity_fn(container_slices, [ending_example - starting_example + 1])

            # prune unwanted chains of keys
            if 'unused_key_chains' in self._spec:
                container_slices = self._prune_unused_key_chains(
                    container_slices)

            dataset = Dataset(ivy.Container.list_stack([
                c[0]
                for c in container_slices.unstack(0, container_slices.shape[0])
            ], 0),
                              'base',
                              container_slices.shape[0],
                              numpy_loading=True,
                              cache_size=self._base_cache_size,
                              queue_timeout=self._spec.queue_timeout)
        else:
            if self._spec.container_load_mode == 'dynamic':
                # load containers with filepath entries
                dataset = Dataset(ivy.Container({'fpaths': container_idx_map}),
                                  'base',
                                  len(container_idx_map),
                                  trans_fn=lambda cont: cont.map(
                                      lambda x_, kc: x_.to_filepaths()),
                                  elementwise_query_fn=False,
                                  numpy_loading=True,
                                  cache_size=self._base_cache_size,
                                  queue_timeout=self._spec.queue_timeout)
                dataset = dataset.map('loaded_json', self._load_json_files,
                                      self._num_workers.loaded_json)
                dataset = dataset.map('parsed_json', self._parse_json_strings,
                                      self._num_workers.parsed_json)
            else:
                dataset = Dataset(ivy.Container({'idx_map':
                                                 container_idx_map}),
                                  'base',
                                  len(container_idx_map),
                                  trans_fn=lambda cont: self._spec.
                                  custom_container_load_fn(self, cont),
                                  elementwise_query_fn=False,
                                  numpy_loading=True,
                                  cache_size=self._base_cache_size,
                                  queue_timeout=self._spec.queue_timeout)
            if 'unused_key_chains' in self._spec:
                dataset = dataset.map('keychain_pruned',
                                      self._prune_unused_key_chains,
                                      self._num_workers.keychain_pruned)
            if self._first_frame_validity_fn is not None:
                dataset = dataset.map(
                    'valid_first_frames',
                    lambda x_: self._first_frame_validity_fn(x_, None),
                    self._num_workers.valid_first_frames)
        if not (self._spec.dataset_spec.sequence_lengths == 1
                and self._window_size == 1):
            # ToDo: add other conditionals which make the loading more efficient if only one of the
            #  above two conditions is True
            dataset = dataset.map(
                'windowed', self._group_container_into_windowed_container,
                self._num_workers.windowed)
            dataset = dataset.unbatch(
                'unbatched',
                self._num_workers.unbatched,
                batch_sizes=[
                    max(seq_len, self._window_size) - self._window_size + 1
                    for seq_len in self._sequence_lengths.values()
                    if seq_len > 0
                ])
        if self._spec.shuffle_buffer_size > 0:
            dataset = dataset.shuffle('shuffled',
                                      self._spec.shuffle_buffer_size,
                                      self._num_workers.shuffled)
        dataset = dataset.map('loaded_data',
                              self._load_data_from_filepath_tensors,
                              self._num_workers.loaded_data)
        dataset = dataset.batch('batched', self._batch_size,
                                self._num_workers.batched)
        dataset = dataset.map(
            'from_np',
            lambda cont: cont.map(lambda x_, kc: ivy.array(x_, dev_str='cpu')),
            self._num_workers.from_np,
            numpy_loading=False)
        if ivy.exists(self._spec.post_proc_fn):
            dataset = dataset.map('post_processed', self._spec.post_proc_fn,
                                  self._num_workers.post_processed)
        if self._spec.with_prefetching:
            dataset = dataset.prefetch('prefetch')
        # ToDo: find way to make pre-fetching to GPU actually pre-fetch, ideally using multi-processing.
        #  For example, swapping prefetch and to_gpu ops around would work if to_gpu could accept self._num_workers.
        if self._spec.prefetch_to_devs:
            if isinstance(self._spec.prefetch_to_devs, str):
                dataset = dataset.to_dev('to_dev', self._spec.prefetch_to_devs)
            elif len(self._spec.prefetch_to_devs) == 1:
                dataset = dataset.to_dev('to_dev',
                                         self._spec.prefetch_to_devs[0])
            else:
                dataset = dataset.to_devs('to_devs',
                                          self._spec.prefetch_to_devs)
        return dataset
コード例 #27
0
ファイル: builder.py プロジェクト: ivy-dl/builder
 def map_fn(x, kc):
     if kc in json_spec:
         duplicate_key_chains.append(kc)
         return ivy.Container(duplicated={'parent_dir': json_spec[kc], 'this_dir': x})
     else:
         return x
コード例 #28
0
ファイル: builder.py プロジェクト: ivy-dl/builder
def build_trainer_spec(data_loader_class=None,
                       network_class=None,
                       dataset_dirs_args=None,
                       dataset_dirs_class=None,
                       dataset_dirs=None,
                       dataset_spec_args=None,
                       dataset_spec_class=None,
                       dataset_spec=None,
                       data_loader_spec_args=None,
                       data_loader_spec_class=None,
                       data_loader_spec=None,
                       data_loader=None,
                       network_spec_args=None,
                       network_spec_class=None,
                       network_spec=None,
                       network=None,
                       trainer_spec_args=None,
                       trainer_spec_class=None,
                       json_spec_path=None,
                       spec_cont=None,
                       class_priority=False):
    """
    build trainer specification
    """

    # build data loader
    data_loader = ivy.default(
        data_loader,
        build_data_loader(
            data_loader_class=data_loader_class,
            dataset_dirs_args=dataset_dirs_args,
            dataset_dirs_class=dataset_dirs_class,
            dataset_dirs=dataset_dirs,
            dataset_spec_args=dataset_spec_args,
            dataset_spec_class=dataset_spec_class,
            dataset_spec=dataset_spec,
            data_loader_spec_args=data_loader_spec_args,
            data_loader_spec_class=data_loader_spec_class,
            data_loader_spec=data_loader_spec,
            json_spec_path=json_spec_path,
            spec_cont=spec_cont))

    # build network
    network = ivy.default(
        network,
        build_network(
            network_class=network_class,
            dataset_dirs_args=dataset_dirs_args,
            dataset_dirs_class=dataset_dirs_class,
            dataset_spec_args=dataset_spec_args,
            dataset_spec_class=dataset_spec_class,
            network_spec_args=network_spec_args,
            network_spec_class=network_spec_class,
            network_spec=network_spec,
            json_spec_path=json_spec_path,
            spec_cont=spec_cont))

    # define trainer specification arguments
    if trainer_spec_args is None:
        trainer_spec_args = dict()
    trainer_spec_args = ivy.Container(trainer_spec_args)
    trainer_spec_args = ivy.Container.combine(trainer_spec_args,
                                              ivy.Container(data_loader=data_loader,
                                                            network=network))

    # load json file
    if isinstance(json_spec_path, str):
        json_spec = json_spec_from_fpath(json_spec_path, 'trainer_args.json')
    else:
        json_spec = ivy.Container()

    # load from spec dict
    this_spec_cont =\
        ivy.Container(spec_cont['trainer']) if isinstance(spec_cont, dict) and 'trainer' in spec_cont \
            else ivy.Container()

    # combine args
    trainer_spec_args = ivy.Container.combine(json_spec, this_spec_cont, trainer_spec_args)

    # override trainer_spec_class if specified in trainer_spec_args
    trainer_spec_class = ivy.default(ivy.default(
        _import_arg_specified_class_if_present(trainer_spec_args, 'trainer_spec_class'),
        trainer_spec_class, rev=class_priority),
        TrainerSpec)

    # return trainer specification
    return trainer_spec_class(**trainer_spec_args)
コード例 #29
0
 def get_first_batch(self, dataset_key='training'):
     data = self._data[dataset_key]
     return ivy.Container(input=data.input[0:1], target=data.targets[0:1])
コード例 #30
0
ファイル: builder.py プロジェクト: ivy-dl/builder
def print_json_args(base_dir=None, keys_to_ignore=None, keychains_to_ignore=None):
    if not ivy.exists(base_dir):
        base_dir = os.getcwd()
    ivy.set_framework('numpy')
    parser = argparse.ArgumentParser()
    parser.add_argument('-sd', '--sub_directory', type=str,
                        help='A sub-directory to print the json args for, default is base_dir passed in.')
    parser.add_argument('-dd', '--diff_directory', type=str,
                        help='The directory from which to compare the difference in specifications.')
    parser.add_argument('-kti', '--keys_to_ignore', type=str, default=keys_to_ignore,
                        help='Keys to ignore when printing the specification.')
    parser.add_argument('-kcti', '--keychains_to_ignore', type=str, default=keychains_to_ignore,
                        help='Key-chains to ignore when printing the specification.')
    parser.add_argument('-kcts', '--keychain_to_show', type=str,
                        help='The key-chain to show. Default is None, in which case all key-chains are shown.')
    parser.add_argument('-sn', '--spec_names', type=str,
                        help='The specification names for the json files. Default is ivy_builder defaults of'
                             '[ dataset_dirs | dataset | data_loader| network | trainer |]')
    parser.add_argument('-d', '--show_defaults', action='store_true',
                        help='Whether to show the default json arguments.'
                             'If unset then the current arguments are shown, not the defaut values.')
    parser.add_argument('-c', '--current_dir_only', action='store_true',
                        help='Whether to only show the json arguments for the current directory,'
                             'without searching through parent directories also.')
    parser.add_argument('-sdo', '--show_diff_only', action='store_true',
                        help='Whether to only show the difference between the current directory'
                             'and the diff directory.')
    parser.add_argument('-sso', '--show_same_only', action='store_true',
                        help='Whether to only show the same entries between the current directory'
                             'and the diff directory.')
    parsed_args = parser.parse_args()
    if (parsed_args.show_diff_only or parsed_args.show_same_only) and not parsed_args.diff_directory:
        raise Exception('show_diff_only and show_same_only flags are only applicable if diff_directory is set.')
    if parsed_args.show_diff_only and parsed_args.show_same_only:
        raise Exception('show_diff_only and show_same_only cannot both be set, please choose one to set.')
    if ivy.exists(parsed_args.spec_names):
        spec_names = [kc[1:-1] for kc in ''.join(parsed_args.spec_names[1:-1]).split(', ')]
    else:
        spec_names = None
    if ivy.exists(parsed_args.sub_directory):
        sub_dir = os.path.normpath(os.path.join(base_dir, parsed_args.sub_directory))
    else:
        sub_dir = base_dir
    if ivy.exists(parsed_args.keys_to_ignore):
        keys_to_ignore = [kc[1:-1] for kc in ''.join(parsed_args.keys_to_ignore[1:-1]).split(', ')]
    else:
        keys_to_ignore = list()
    if ivy.exists(parsed_args.keychains_to_ignore):
        keychains_to_ignore = [kc[1:-1] for kc in ''.join(parsed_args.keychains_to_ignore[1:-1]).split(',')]
    else:
        keychains_to_ignore = list()
    these_json_args = get_json_args(
        sub_dir, keys_to_ignore, keychains_to_ignore, parsed_args.keychain_to_show, parsed_args.show_defaults,
        store_duplicates=True, current_dir_only=parsed_args.current_dir_only, spec_names=spec_names)
    if ivy.exists(parsed_args.diff_directory):
        other_sub_dir = os.path.normpath(os.path.join(base_dir, parsed_args.diff_directory))
        if other_sub_dir == sub_dir:
            raise Exception('Invalid diff_directory {} selected, it is the same as the sub_directory {}.'.format(
                other_sub_dir, sub_dir))
        other_json_args = get_json_args(
            other_sub_dir, keys_to_ignore, keychains_to_ignore, parsed_args.keychain_to_show, parsed_args.show_defaults,
            store_duplicates=True, current_dir_only=parsed_args.current_dir_only, spec_names=spec_names)
        diff_keys = 'diff'
        for sub_folder, other_sub_folder in zip(sub_dir.split('/'), other_sub_dir.split('/')):
            if sub_folder != other_sub_folder:
                diff_keys = [sub_folder, other_sub_folder]
                break
        if parsed_args.show_diff_only:
            mode = 'diff_only'
        elif parsed_args.show_same_only:
            mode = 'same_only'
        else:
            mode = 'all'
        diff_json_args = ivy.Container.diff(these_json_args, other_json_args, mode=mode, diff_keys=diff_keys)
        keyword_color_dict = {'duplicated': 'magenta'}
        if isinstance(diff_keys, list):
            diff_keys_dict = dict(zip(diff_keys, ['red'] * 2))
            keyword_color_dict = {**keyword_color_dict, **diff_keys_dict}
        print(ivy.Container(diff_json_args, keyword_color_dict=keyword_color_dict))
    else:
        print(ivy.Container(these_json_args, keyword_color_dict={'duplicated': 'magenta'}))
    ivy.unset_framework()