Ejemplo n.º 1
0
    def __init__(self, dataset_spec, batch_size, starting_idx, num_sequences, window_size=1,
                 num_workers=1, cache_size=0, unused_key_chains=None, custom_init_fn=None,
                 container_load_mode='dynamic', custom_container_load_fn=None, preshuffle_data=True,
                 shuffle_buffer_size=0, with_prefetching=True, queue_timeout=None, post_proc_fn=None,
                 prefetch_to_devs='gpu:0', single_pass=False, array_strs=None, float_strs=None, uint8_strs=None,
                 custom_img_strs=None, custom_img_fns=None, custom_strs=None, custom_fns=None, array_mode='pickled',
                 load_gray_as_rgb=True, containers_to_skip=None, **kwargs):

        kw = locals_to_kwargs(locals())

        unused_key_chains = ivy.default(unused_key_chains, [])
        array_strs = ivy.default(array_strs, [])
        float_strs = ivy.default(float_strs, [])
        uint8_strs = ivy.default(uint8_strs, [])
        custom_img_strs = ivy.default(custom_img_strs, [[]])
        custom_img_fns = ivy.default(custom_img_fns, [])
        custom_strs = ivy.default(custom_strs, [[]])
        custom_fns = ivy.default(custom_fns, [])
        containers_to_skip = ivy.default(containers_to_skip, [])
        prefetch_to_devs = prefetch_to_devs if ivy.gpu_is_available() or isinstance(prefetch_to_devs, list) else False
        assert container_load_mode in ['preload', 'dynamic', 'custom']
        if container_load_mode == 'custom':
            assert ivy.exists(custom_container_load_fn)
        else:
            assert ivy.exists(dataset_spec.cont_fname_template)

        super(SeqDataLoaderSpec, self).__init__(dataset_spec,
                                                batch_size=batch_size,
                                                window_size=window_size,
                                                starting_idx=starting_idx,
                                                num_sequences=num_sequences,
                                                num_workers=num_workers,
                                                cache_size=cache_size,
                                                unused_key_chains=unused_key_chains,
                                                custom_init_fn=custom_init_fn,
                                                container_load_mode=container_load_mode,
                                                custom_container_load_fn=custom_container_load_fn,
                                                preshuffle_data=preshuffle_data,
                                                shuffle_buffer_size=shuffle_buffer_size,
                                                with_prefetching=with_prefetching,
                                                post_proc_fn=post_proc_fn,
                                                prefetch_to_devs=prefetch_to_devs,
                                                single_pass=single_pass,
                                                array_strs=array_strs,
                                                float_strs=float_strs,
                                                uint8_strs=uint8_strs,
                                                custom_img_strs=custom_img_strs,
                                                custom_img_fns=custom_img_fns,
                                                custom_strs=custom_strs,
                                                custom_fns=custom_fns,
                                                array_mode=array_mode,
                                                load_gray_as_rgb=load_gray_as_rgb,
                                                containers_to_skip=containers_to_skip,
                                                **kwargs)
        self.queue_timeout = ivy.default(queue_timeout, ivy.queue_timeout())  # conflicts with ivy.Container argument

        self._kwargs = kw
Ejemplo n.º 2
0
    def _train(self, vis_mode=False, starting_iteration=None, total_iterations=None):

        self._starting_iteration = ivy.default(starting_iteration, self._starting_iteration)
        self._total_iterations = ivy.default(total_iterations, self._spec.total_iterations)

        self._global_step = self._starting_iteration
        self._learning_rate = self._learning_rate_func(self._global_step)

        if self._starting_iteration == self._total_iterations:
            return self._starting_iteration

        if vis_mode:
            vis_freq = 1
        else:
            vis_freq = self._spec.vis_freq

        local_counter = 0

        while self._global_step < self._total_iterations or self._total_iterations == -1:

            if self._profiling and local_counter == self._spec.profile_start_step:
                self._profiler.start()

            final_step = self._global_step == self._total_iterations - 1
            log_scalars = (self._global_step % self._spec.log_freq == 0 or (final_step and self._spec.log_at_end)) \
                          and self._spec.log_freq > 0 and not vis_mode
            log_viz = (self._global_step % vis_freq == 0 or (final_step and self._spec.vis_at_end)) \
                      and self._spec.vis_freq > 0
            save = (self._global_step % self._spec.save_freq == 0 or (final_step and self._spec.save_at_end)) \
                   and self._spec.save_freq > 0 and not vis_mode

            self._data_load_and_train_step(vis_mode, log_scalars, log_viz)

            if log_scalars:
                ivy.try_use_compiled = False
                self._log_scalars()
                ivy.try_use_compiled = True
            if log_viz or vis_mode:
                ivy.try_use_compiled = False
                self._write_image_summaries(self._spec.data_loader, self._network, self._training_batch,
                                            self._global_step)
                ivy.try_use_compiled = True
            if save:
                self._save()

            self._global_step += 1
            local_counter += 1
            self._learning_rate = self._learning_rate_func(self._global_step)

            if vis_mode:
                input('press enter to visualise another example')

            if self._profiling and local_counter == self._spec.profile_start_step + self._spec.steps_to_profile:
                self._profiler.stop()

        return self._global_step
Ejemplo n.º 3
0
def build_network_specification(dataset_dirs_args=None,
                                dataset_dirs_class=None,
                                dataset_dirs=None,
                                dataset_spec_args=None,
                                dataset_spec_class=None,
                                dataset_spec=None,
                                network_spec_args=None,
                                network_spec_class=None,
                                json_spec_path=None,
                                spec_cont=None,
                                class_priority=False):
    """
    build network specification
    """

    # build dataset specification
    dataset_spec = ivy.default(
        dataset_spec,
        build_dataset_spec(
            dataset_dirs_args=dataset_dirs_args,
            dataset_dirs_class=dataset_dirs_class,
            dataset_dirs=dataset_dirs,
            dataset_spec_args=dataset_spec_args,
            dataset_spec_class=dataset_spec_class,
            json_spec_path=json_spec_path,
            spec_cont=spec_cont))

    # define network specification arguments
    if network_spec_args is None:
        network_spec_args = dict()
    network_spec_args = ivy.Container(network_spec_args)
    network_spec_args = ivy.Container.combine(network_spec_args, ivy.Container(dataset_spec=dataset_spec))

    # load json file
    if isinstance(json_spec_path, str):
        json_spec = json_spec_from_fpath(json_spec_path, 'network_args.json')
    else:
        json_spec = ivy.Container()

    # load from spec dict
    this_spec_cont =\
        ivy.Container(spec_cont['network']) if isinstance(spec_cont, dict) and 'network' in spec_cont \
            else ivy.Container()

    # combine args
    network_spec_args = ivy.Container.combine(json_spec, this_spec_cont, network_spec_args)

    # override network_spec_class if specified in network_spec_args
    network_spec_class = ivy.default(ivy.default(
        _import_arg_specified_class_if_present(network_spec_args, 'network_spec_class'),
        network_spec_class, rev=class_priority),
        NetworkSpec)

    # return network
    return network_spec_class(**network_spec_args)
Ejemplo n.º 4
0
def build_data_loader_spec(dataset_dirs_args=None,
                           dataset_dirs_class=None,
                           dataset_dirs=None,
                           dataset_spec_args=None,
                           dataset_spec_class=None,
                           dataset_spec=None,
                           data_loader_spec_args=None,
                           data_loader_spec_class=None,
                           json_spec_path=None,
                           spec_cont=None,
                           class_priority=False):
    """
    build data loader specification
    """

    # build dataset specification
    dataset_spec = ivy.default(
        dataset_spec,
        build_dataset_spec(
            dataset_dirs_args=dataset_dirs_args,
            dataset_dirs_class=dataset_dirs_class,
            dataset_dirs=dataset_dirs,
            dataset_spec_args=dataset_spec_args,
            dataset_spec_class=dataset_spec_class,
            json_spec_path=json_spec_path,
            spec_cont=spec_cont))

    # define data loader specification arguments
    if data_loader_spec_args is None:
        data_loader_spec_args = dict()
    data_loader_spec_args = ivy.Container(data_loader_spec_args)
    data_loader_spec_args = ivy.Container.combine(data_loader_spec_args, ivy.Container(dataset_spec=dataset_spec))

    # load json file
    if isinstance(json_spec_path, str):
        json_spec = json_spec_from_fpath(json_spec_path, 'data_loader_args.json')
    else:
        json_spec = ivy.Container()

    # load from spec dict
    this_spec_cont =\
        ivy.Container(spec_cont['data_loader']) if isinstance(spec_cont, dict) and 'data_loader' in spec_cont \
            else ivy.Container()

    # combine args
    data_loader_spec_args = ivy.Container.combine(json_spec, this_spec_cont, data_loader_spec_args)

    # override data_loader_spec_class if specified in data_loader_spec_args
    data_loader_spec_class = ivy.default(ivy.default(
        _import_arg_specified_class_if_present(data_loader_spec_args, 'data_loader_spec_class'),
        data_loader_spec_class, rev=class_priority),
        DataLoaderSpec)

    # return data loader
    return data_loader_spec_class(**data_loader_spec_args)
Ejemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-bd',
                        '--base_dir',
                        type=str,
                        help='The directory containing the checkpoint files.')
    parser.add_argument(
        '-c',
        '--cutoff',
        type=int,
        help=
        'The cutoff value for the checkpoints, above which existing checkpoints will be removed.'
    )
    parser.add_argument(
        '-lo',
        '--last_only',
        action='store_true',
        help=
        'Whether to keep only the most recent checkpoint in each checkpoint directory.'
    )
    parser.add_argument(
        '-ra',
        '--remove_all',
        action='store_true',
        help='Whether to remove all checkpoints in each checkpoint directory.')
    parsed_args = parser.parse_args()
    if not parsed_args.cutoff and not parsed_args.last_only and not parsed_args.remove_all:
        raise Exception(
            'Either cutoff value must be specified or last_only mode or remove_all mode must be set.'
        )
    base_dir = ivy.default(parsed_args.base_dir, os.getcwd())
    prune_checkpoints(base_dir, parsed_args.cutoff, parsed_args.last_only,
                      parsed_args.remove_all)
Ejemplo n.º 6
0
def get_json_args(json_spec_path, keys_to_ignore, keychains_to_ignore, keychain_to_show, defaults=False,
                  store_duplicates=False, current_dir_only=False, spec_names=None):
    spec_names = ivy.default(spec_names,
                             [item.split('.json')[0] for item in os.listdir(json_spec_path) if '.json' in item])
    if defaults:
        defaults = '.defaults'
    else:
        defaults = ''
    cont = ivy.Container()
    if current_dir_only:
        for spec_name in spec_names:
            fpath = os.path.join(json_spec_path, spec_name + '.json' + defaults)
            if os.path.isfile(fpath):
                cont[spec_name] = parse_json_to_cont(fpath)
    else:
        for spec_name in spec_names:
            cont[spec_name] = \
                json_spec_from_fpath(json_spec_path, spec_name + '.json' + defaults, store_duplicates)
    for keychain_to_ignore in keychains_to_ignore:
        if keychain_to_ignore in cont:
            cont[keychain_to_ignore] = 'not_shown'
    cont = cont.set_at_keys(dict(zip(keys_to_ignore, ['not_shown']*len(keys_to_ignore))))
    if ivy.exists(keychain_to_show):
        cont = cont[keychain_to_show]
    return cont
Ejemplo n.º 7
0
 def __getitem__(self, slice_obj):
     if isinstance(slice_obj, slice):
         seq_idxs = collections.OrderedDict([
             (i, self._seq_idxs[idx]) for i, idx in enumerate(
                 range(slice_obj.start, slice_obj.stop,
                       ivy.default(slice_obj.step, 1)))
         ])
     elif isinstance(slice_obj, int):
         seq_idxs = collections.OrderedDict(
             {0: self._seq_idxs[slice_obj]})
     else:
         raise Exception(
             'Invalid type for slice_obj, expected either slice or int,'
             'but found {} of type {}'.format(
                 slice_obj, type(slice_obj)))
     if self._constant_size:
         sizes = self._raw_sizes
     else:
         sizes = collections.OrderedDict({
             seq_idx: self._raw_sizes[seq_idx]
             for seq_idx in seq_idxs.values()
         })
     return ContainerIdxMap(sizes,
                            self._fpath_template,
                            seq_idxs,
                            max_seq_len=self._max_seq_len,
                            conts_to_skip=self._conts_to_skip,
                            pruned_sizes=self._pruned_sizes)
Ejemplo n.º 8
0
 def __init__(self,
              dataset_spec: DatasetSpec = None,
              dev_strs: List[str] = None,
              v_keychains=None,
              keep_v_keychains=False,
              build_mode='explicit',
              **kwargs) -> None:
     """
     base class for storing general specifications of the neural network
     """
     kw = locals_to_kwargs(locals())
     super().__init__(dataset_spec=dataset_spec,
                      dev_strs=dev_strs,
                      v_keychains=v_keychains,
                      keep_v_keychains=keep_v_keychains,
                      build_mode=build_mode,
                      **kwargs)
     if 'subnets' in self:
         for k, subet_spec in self.subnets.items():
             if 'network_spec_class' in subet_spec:
                 if isinstance(subet_spec.network_spec_class, str):
                     spec_class = load_class_from_str(
                         subet_spec.network_spec_class)
                 else:
                     spec_class = subet_spec.network_spec_class
                 if isinstance(kwargs['subnets'][k], spec_class):
                     subet_spec = kwargs['subnets'][k]
                 else:
                     subet_spec = spec_class(
                         **{
                             **kwargs['subnets'][k],
                             **dict(dataset_spec=dataset_spec,
                                    dev_strs=dev_strs)
                         })
                 self.subnets[k] = subet_spec
             if isinstance(subet_spec.network_class, str):
                 self.subnets[k].network_class = load_class_from_str(
                     subet_spec.network_class)
             else:
                 self.subnets[k].network_class = subet_spec.network_class
             self.subnets[k].store_vars = ivy.default(
                 self.subnets[k].if_exists('store_vars'), True)
             self.subnets[k].build_mode = ivy.default(
                 self.subnets[k].if_exists('build_mode'), self.build_mode)
             self.subnets[k].dataset_spec = dataset_spec
             self.subnets[k].dev_strs = dev_strs
     self._kwargs = kw
Ejemplo n.º 9
0
 def __init__(self, network_spec: NetworkSpec, v=None) -> None:
     """
     base class for any networks
     """
     self._spec = network_spec
     ivy.Module.__init__(
         self,
         v=v,
         dev_strs=self._spec.dev_strs,
         build_mode=ivy.default(self._spec.if_exists('build_mode'),
                                'explicit'),
         store_vars=ivy.default(self._spec.if_exists('store_vars'), True),
         stateful=ivy.default(self._spec.if_exists('stateful'), []),
         arg_stateful_idxs=ivy.default(
             self._spec.if_exists('arg_stateful_idxs'), []),
         kwarg_stateful_idxs=ivy.default(
             self._spec.if_exists('kwarg_stateful_idxs'), []))
Ejemplo n.º 10
0
def main(container_dir=None, cont_format=None, cont_format_fpath=None):
    if (not cont_format and not cont_format_fpath) or (cont_format
                                                       and cont_format_fpath):
        raise Exception(
            'Exactly one of format or format_file must be specified, but found {} and {}'
            .format(cont_format, cont_format_fpath))
    container_dir = ivy.default(container_dir, os.getcwd())
    format_containers(container_dir, cont_format, cont_format_fpath)
Ejemplo n.º 11
0
def build_network(network_class=None,
                  dataset_dirs_args=None,
                  dataset_dirs_class=None,
                  dataset_dirs=None,
                  dataset_spec_args=None,
                  dataset_spec_class=None,
                  dataset_spec=None,
                  network_spec_args=None,
                  network_spec_class=None,
                  network_spec=None,
                  json_spec_path=None,
                  spec_cont=None,
                  class_priority=False):
    """
    build network
    """

    # build network specification
    network_spec = ivy.default(
        network_spec,
        build_network_specification(
            dataset_dirs_args=dataset_dirs_args,
            dataset_dirs_class=dataset_dirs_class,
            dataset_dirs=dataset_dirs,
            dataset_spec_args=dataset_spec_args,
            dataset_spec_class=dataset_spec_class,
            dataset_spec=dataset_spec,
            network_spec_args=network_spec_args,
            network_spec_class=network_spec_class,
            json_spec_path=json_spec_path,
            spec_cont=spec_cont))

    # override network_class if specified in network_spec
    network_class = ivy.default(ivy.default(
        _import_arg_specified_class_if_present(network_spec, 'network_class'),
        network_class, rev=class_priority),
        None)

    # verify network_class exists
    if not ivy.exists(network_class):
        raise Exception('network_class must either be specified in this build_network() method,'
                        'or network_class attribute must be specified in the network_spec instance')

    # network
    return network_class(network_spec)
Ejemplo n.º 12
0
def build_data_loader(data_loader_class=None,
                      dataset_dirs_args=None,
                      dataset_dirs_class=None,
                      dataset_dirs=None,
                      dataset_spec_args=None,
                      dataset_spec_class=None,
                      dataset_spec=None,
                      data_loader_spec_args=None,
                      data_loader_spec_class=None,
                      data_loader_spec=None,
                      json_spec_path=None,
                      spec_cont=None,
                      class_priority=False):
    """
    build data loader
    """

    # build data loader specification
    data_loader_spec = ivy.default(
        data_loader_spec,
        build_data_loader_spec(
            dataset_dirs_args=dataset_dirs_args,
            dataset_dirs_class=dataset_dirs_class,
            dataset_dirs=dataset_dirs,
            dataset_spec_args=dataset_spec_args,
            dataset_spec_class=dataset_spec_class,
            dataset_spec=dataset_spec,
            data_loader_spec_args=data_loader_spec_args,
            data_loader_spec_class=data_loader_spec_class,
            json_spec_path=json_spec_path,
            spec_cont=spec_cont))

    # override data_loader_class if specified in data_loader_spec
    data_loader_class = ivy.default(ivy.default(
        _import_arg_specified_class_if_present(data_loader_spec, 'data_loader_class'),
        data_loader_class, rev=class_priority),
        None)

    # verify data_loader_class exists
    if not ivy.exists(data_loader_class):
        raise Exception('data_loader_class must either be specified in this build_data_loader() method,'
                        'or data_loader_class attribute must be specified in the data_loader_spec instance')

    # return data loader
    return data_loader_class(data_loader_spec)
Ejemplo n.º 13
0
    def __init__(self,
                 base_dataset,
                 name,
                 size,
                 base_slice_fn=None,
                 trans_fn=None,
                 slice_fn=None,
                 elementwise_query_fn=True,
                 with_caching=True,
                 cache_size=1,
                 num_processes=1,
                 numpy_loading=False,
                 prefetching=False,
                 queue_timeout=None,
                 subprocess_depth=0):
        self._name = name
        self._size = size
        self._base_slice_fn_arg = base_slice_fn
        if base_slice_fn is None:
            self._base_slice_fn = self._default_base_slice_fn
        else:
            self._base_slice_fn = base_slice_fn
        self._trans_fn = trans_fn
        self._slice_fn = slice_fn
        if slice_fn is None:
            self._slice_dataset = self._default_slice_fn
        else:
            self._slice_dataset = slice_fn
        self._elementwise_query_fn = elementwise_query_fn
        self._with_caching = with_caching
        self._cache_size = cache_size
        self._cache = Cache(cache_size)
        self._num_processes = multiprocessing().cpu_count(
        ) if num_processes is None else num_processes
        self._numpy_loading = numpy_loading
        self._prefetching = prefetching
        self._queue_timeout = ivy.default(queue_timeout, ivy.queue_timeout())
        self._subprocess_depth = subprocess_depth
        self._is_subprocess = bool(subprocess_depth)
        self._first_pass = True
        self._queue_offset = 1
        if numpy_loading and isinstance(base_dataset, ivy.Container):

            def to_numpy(x, kc):
                if ivy.is_array(x):
                    return ivy.to_numpy(x)
                elif isinstance(x, list):
                    return [
                        ivy.to_numpy(v) if ivy.is_array(v) else v for v in x
                    ]
                else:
                    return x

            base_dataset = base_dataset.map(to_numpy)
        self._base_dataset = base_dataset
        self._workers_initialized = False
        self._has_workers = False
Ejemplo n.º 14
0
 def __init__(self,
              dataset_spec: DatasetSpec,
              dev_strs: Union[str, List[str]] = None,
              **kwargs) -> None:
     """
     base class for storing general parameters which define the way in which the data loader loads the dataset
     """
     kw = locals_to_kwargs(locals())
     super().__init__(dataset_spec=dataset_spec,
                      dev_strs=ivy.default(dev_strs, ['gpu:0'] if ivy.gpu_is_available() else ['cpu']),
                      **kwargs)
     self._kwargs = kw
Ejemplo n.º 15
0
 def _build_subnets(self, *args, **kwargs) -> bool:
     """
     Build the network subnets.
     """
     built_rets = list()
     for k, subnet_spec in self._spec.subnets.items():
         subnet = subnet_spec.network_class(subnet_spec,
                                            v=ivy.default(
                                                lambda: self._v_in[k], None,
                                                True))
         built_rets.append(
             subnet.build(*args, dev_str=self._dev_str, **kwargs))
         self._subnets[k] = subnet
     return ivy.Container(dict(zip(self._spec.subnets.keys(), built_rets)))
Ejemplo n.º 16
0
 def _deep_copy(self, num_processes=None):
     # noinspection PyProtectedMember
     return Dataset(base_dataset=self._base_dataset if isinstance(
         self._base_dataset, ivy.Container) else
                    self._base_dataset._deep_copy(),
                    name=self._name,
                    size=self._size,
                    base_slice_fn=self._base_slice_fn_arg,
                    trans_fn=self._trans_fn,
                    slice_fn=self._slice_fn,
                    elementwise_query_fn=self._elementwise_query_fn,
                    with_caching=self._with_caching,
                    cache_size=self._cache_size,
                    num_processes=ivy.default(num_processes,
                                              self._num_processes),
                    numpy_loading=self._numpy_loading,
                    prefetching=self._prefetching,
                    queue_timeout=self._queue_timeout,
                    subprocess_depth=self._subprocess_depth + 1)
Ejemplo n.º 17
0
 def _initialize_model(self, checkpoint_path=None):
     self._pre_init()
     if self._net_spec.build_mode == 'explicit':
         self._network.build()
     first_batch = self._spec.data_loader.get_first_batch()
     if ivy.exists(self._dev_manager):
         self._dev_manager.dim_size = first_batch.shape[0]
     # for on_call builds
     self._compute_cost(self._network, first_batch[0:1], self._spec.dev_strs[0])
     # compile
     if self._spec.compile_graph:
         valid_modes = ['network', 'optimizer', 'all']
         assert self._spec.compile_graph in ['network', 'optimizer', 'all'], 'invalid value for compile_graph, ' \
                                                                             'must be one of {}'.format(valid_modes)
         if self._spec.compile_graph in ['network', 'all']:
             self._compile_network_once_tuned = True
         if self._spec.compile_graph in ['optimizer', 'all']:
             self._compile_optimizer_once_tuned = True
     if self._spec.save_spec:
         self._save_spec_to_disk()
     self._save_info_to_disk()
     self._init_checkpoint_manager()
     if not checkpoint_path:
         checkpoint_path = self._chkpt_manager.latest_checkpoint_fpath
     if self._spec.ld_chkpt is True and not ivy.exists(checkpoint_path):
         raise Exception('Unable to load checkpoint, no checkpoint files found.')
     elif self._spec.ld_chkpt in [True, 'try'] and ivy.exists(checkpoint_path):
         self._chkpt.restore(checkpoint_path)
         logging.info('loaded checkpoints from {}'.format(checkpoint_path))
         starting_iteration = int(checkpoint_path.split('-')[-1].split('.')[0])
         logging.info('#--------------#\n# MODEL LOADED #\n#--------------#')
         self._post_init()
         if ivy.exists(self._spec.starting_iteration):
             assert starting_iteration == self._spec.starting_iteration
         return starting_iteration
     else:
         logging.info('#-------------#\n# MODEL BUILT #\n#-------------#')
     self._global_step = self._spec.starting_iteration
     self._post_init()
     return ivy.default(self._spec.starting_iteration, 0)
Ejemplo n.º 18
0
def main(seed=0, compile_mode=False, dev_strs=None):
    ivy.seed(seed)
    data_loader_spec_args = {
        'batch_size': 2,
        'dev_strs': [ivy.default(lambda: dev_strs[0], None, True)]
    }
    trainer_spec_args = {
        'total_iterations': 10,
        'ld_chkpt': False,
        'log_freq': 1,
        'initial_learning_rate': 0.1,
        'compile_mode': compile_mode,
        'dev_strs': dev_strs
    }
    trainer = trainer_builder.build_trainer(
        ExampleDataLoader,
        ExampleNetwork,
        ExampleTrainer,
        data_loader_spec_args=data_loader_spec_args,
        trainer_spec_args=trainer_spec_args)
    trainer.setup()
    trainer.train()
    trainer.close()
Ejemplo n.º 19
0
def build_tuner_spec(data_loader_class=None,
                     network_class=None,
                     trainer_class=None,
                     dataset_dirs_args=None,
                     dataset_dirs_class=None,
                     dataset_dirs=None,
                     dataset_spec_args=None,
                     dataset_spec_class=None,
                     dataset_spec=None,
                     data_loader_spec_args=None,
                     data_loader_spec_class=None,
                     data_loader_spec=None,
                     data_loader=None,
                     network_spec_args=None,
                     network_spec_class=None,
                     network_spec=None,
                     network=None,
                     trainer_spec_args=None,
                     trainer_spec_class=None,
                     trainer_spec=None,
                     trainer=None,
                     tuner_spec_args=None,
                     tuner_spec_class=None,
                     json_spec_path=None,
                     spec_cont=None,
                     class_priority=False):
    """
    build tuner specification
    """

    # define dataset directories specification arguments
    if tuner_spec_args is None:
        tuner_spec_args = dict()
    tuner_spec_args = ivy.Container(tuner_spec_args)

    # load json file
    if isinstance(json_spec_path, str):
        json_spec = json_spec_from_fpath(json_spec_path, 'tuner_args.json')
    else:
        json_spec = ivy.Container()

    # load from spec dict
    this_spec_cont =\
        ivy.Container(spec_cont['tuner']) if isinstance(spec_cont, dict) and 'tuner' in spec_cont else ivy.Container()

    # combine args
    tuner_spec_args = ivy.Container.combine(json_spec, this_spec_cont, tuner_spec_args)

    # override tuner_spec_class if specified in tuner_spec_args
    tuner_spec_class = ivy.default(ivy.default(
        _import_arg_specified_class_if_present(tuner_spec_args, 'tuner_spec_class'),
        tuner_spec_class, rev=class_priority),
        TunerSpec)

    # set framework
    ivy.set_framework(tuner_spec_class(None, **tuner_spec_args).framework)

    # build trainer
    trainer = ivy.default(
        trainer,
        build_trainer(
            data_loader_class=data_loader_class,
            network_class=network_class,
            trainer_class=trainer_class,
            dataset_dirs_args=dataset_dirs_args,
            dataset_dirs_class=dataset_dirs_class,
            dataset_dirs=dataset_dirs,
            dataset_spec_args=dataset_spec_args,
            dataset_spec_class=dataset_spec_class,
            dataset_spec=dataset_spec,
            data_loader_spec_args=data_loader_spec_args,
            data_loader_spec_class=data_loader_spec_class,
            data_loader_spec=data_loader_spec,
            data_loader=data_loader,
            network_spec_args=network_spec_args,
            network_spec_class=network_spec_class,
            network_spec=network_spec,
            network=network,
            trainer_spec_args=trainer_spec_args,
            trainer_spec_class=trainer_spec_class,
            trainer_spec=trainer_spec,
            json_spec_path=json_spec_path,
            spec_cont=spec_cont))

    # return tuner specification
    return tuner_spec_class(trainer,
                            **tuner_spec_args)
Ejemplo n.º 20
0
 def __init__(self, data_loader_spec: DataLoaderSpec):
     """
     base class for loading data from disk for training
     """
     self._spec = data_loader_spec
     self._dev_str = ivy.default(lambda: data_loader_spec.dev_strs[0], ivy.default_device(), True)
Ejemplo n.º 21
0
    def __init__(
            self,
            data_loader_class,
            network_class,
            trainer_class,
            dataset_dirs_args: dict = None,
            dataset_dirs_class: DatasetDirs.__base__ = DatasetDirs,
            dataset_dirs: DatasetDirs = None,
            dataset_spec_args: dict = None,
            dataset_spec_class: DatasetSpec.__base__ = DatasetSpec,
            dataset_spec: DatasetSpec = None,
            data_loader_spec_args: dict = None,
            data_loader_spec_class: DataLoaderSpec.__base__ = DataLoaderSpec,
            data_loader_spec: DataLoaderSpec = None,
            data_loader=None,
            network_spec_args: dict = None,
            network_spec_class: NetworkSpec.__base__ = NetworkSpec,
            network_spec: NetworkSpec = None,
            network=None,
            trainer_spec_args: dict = None,
            trainer_spec_class: TrainerSpec.__base__ = TrainerSpec,
            trainer_spec: TrainerSpec = None,
            trainer=None,
            tuner_spec_args: dict = None,
            tuner_spec_class: TunerSpec.__base__ = TunerSpec,
            tuner_spec: TunerSpec = None,
            json_spec_path: str = None,
            spec_cont: dict = None):
        """
        base class for any tune trainers
        """
        if not ivy.exists(tune):
            raise Exception(
                'ray[tune] is needed in order to use the Tuner class, but it is not installed.'
                'Please install via pip install ray[tune]')
        self._data_loader_class = data_loader_class
        self._network_class = network_class
        self._trainer_class = trainer_class
        self._dataset_dirs_args = ivy.default(dataset_dirs_args, dict())
        self._dataset_dirs_class = dataset_dirs_class
        self._dataset_dirs = dataset_dirs
        self._dataset_spec_args = ivy.default(dataset_spec_args, dict())
        self._dataset_spec_class = dataset_spec_class
        self._dataset_spec = dataset_spec
        self._data_loader_spec_args = ivy.default(data_loader_spec_args,
                                                  dict())
        self._data_loader_spec_class = data_loader_spec_class
        self._data_loader_spec = data_loader_spec
        self._data_loader = data_loader
        self._network_spec_args = ivy.default(network_spec_args, dict())
        self._network_spec_class = network_spec_class
        self._network_spec = network_spec
        self._network = network
        self._trainer_spec_args = ivy.default(trainer_spec_args, dict())
        self._trainer_spec_class = trainer_spec_class
        self._trainer_spec = trainer_spec
        self._trainer = trainer
        self._tuner_spec_args = ivy.default(tuner_spec_args, dict())
        self._tuner_spec_class = tuner_spec_class
        self._tuner_spec = tuner_spec
        self._json_spec_path = json_spec_path
        self._spec_cont = spec_cont

        # initialized on _setup
        self._trainer = None

        # builder
        while len(ivy.framework_stack) > 0:
            logging.info(
                'unsetting framework {}, framework stack must be empty when'
                'initializing tuner class.'.format(ivy.framework_stack[-1]))
            ivy.unset_framework()
        self._builder = builder_module

        # tuner spec
        self._spec = self._builder.build_tuner_spec(
            data_loader_class=self._data_loader_class,
            network_class=self._network_class,
            trainer_class=self._trainer_class,
            dataset_dirs_args=self._dataset_dirs_args,
            dataset_dirs_class=self._dataset_dirs_class,
            dataset_dirs=self._dataset_dirs,
            dataset_spec_args=self._dataset_spec_args,
            dataset_spec_class=self._dataset_spec_class,
            dataset_spec=self._dataset_spec,
            data_loader_spec_args=self._data_loader_spec_args,
            data_loader_spec_class=self._data_loader_spec_class,
            data_loader_spec=self._data_loader_spec,
            data_loader=self._data_loader,
            network_spec_args=self._network_spec_args,
            network_spec_class=self._network_spec_class,
            network_spec=self._network_spec,
            network=self._network,
            trainer_spec_args=self._trainer_spec_args,
            trainer_spec_class=self._trainer_spec_class,
            trainer_spec=self._trainer_spec,
            trainer=self._trainer,
            tuner_spec_args=self._tuner_spec_args,
            tuner_spec_class=self._tuner_spec_class,
            json_spec_path=self._json_spec_path,
            spec_cont=self._spec_cont)
        self._spec = _convert_tuner_spec(self._spec)
Ejemplo n.º 22
0
        dataset_dirs_class=ExampleDatasetDirs,
        dataset_spec_args=dataset_spec_args,
        dataset_spec_class=ExampleDatasetSpec,
        data_loader_spec_args=data_loader_spec_args,
        data_loader_spec_class=ExampleDataLoaderSpec,
        network_spec_args=network_spec_args,
        network_spec_class=ExampleNetworkSpec,
        trainer_spec_args=trainer_spec_args,
        spec_cont=ivy.Container({'trainer': {
            'compile_mode': compile_mode
        }}))
    trainer.setup()
    print("Finished complete example!")
    trainer.train()
    trainer.close()


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--framework',
        type=str,
        default=None,
        help=
        'which framework to use. Chooses a random framework if unspecified.')
    parsed_args = parser.parse_args()
    f = ivy.default(parsed_args.framework, ivy.choose_random_framework())
    ivy.set_framework(f)
    main()
    ivy.unset_framework()
Ejemplo n.º 23
0
def build_trainer(data_loader_class=None,
                  network_class=None,
                  trainer_class=None,
                  dataset_dirs_args=None,
                  dataset_dirs_class=None,
                  dataset_dirs=None,
                  dataset_spec_args=None,
                  dataset_spec_class=None,
                  dataset_spec=None,
                  data_loader_spec_args=None,
                  data_loader_spec_class=None,
                  data_loader_spec=None,
                  data_loader=None,
                  network_spec_args=None,
                  network_spec_class=None,
                  network_spec=None,
                  network=None,
                  trainer_spec_args=None,
                  trainer_spec_class=None,
                  trainer_spec=None,
                  json_spec_path=None,
                  spec_cont=None,
                  class_priority=False):
    """
    build trainer
    """

    # build trainer spec
    trainer_spec = ivy.default(
        trainer_spec,
        build_trainer_spec(
            data_loader_class=data_loader_class,
            network_class=network_class,
            dataset_dirs_args=dataset_dirs_args,
            dataset_dirs_class=dataset_dirs_class,
            dataset_dirs=dataset_dirs,
            dataset_spec_args=dataset_spec_args,
            dataset_spec_class=dataset_spec_class,
            dataset_spec=dataset_spec,
            data_loader_spec_args=data_loader_spec_args,
            data_loader_spec_class=data_loader_spec_class,
            data_loader_spec=data_loader_spec,
            data_loader=data_loader,
            network_spec_args=network_spec_args,
            network_spec_class=network_spec_class,
            network_spec=network_spec,
            network=network,
            trainer_spec_args=trainer_spec_args,
            trainer_spec_class=trainer_spec_class,
            json_spec_path=json_spec_path,
            spec_cont=spec_cont))

    # override trainer_class if specified in trainer_spec
    trainer_class = ivy.default(ivy.default(
        _import_arg_specified_class_if_present(trainer_spec, 'trainer_class'),
        trainer_class, rev=class_priority),
        None)

    # verify trainer_class exists
    if not ivy.exists(trainer_class):
        raise Exception('trainer_class must either be specified in this build_trainer() method,'
                        'or trainer_class attribute must be specified in the trainer_spec instance')

    # return trainer
    return trainer_class(trainer_spec)
Ejemplo n.º 24
0
def build_trainer_spec(data_loader_class=None,
                       network_class=None,
                       dataset_dirs_args=None,
                       dataset_dirs_class=None,
                       dataset_dirs=None,
                       dataset_spec_args=None,
                       dataset_spec_class=None,
                       dataset_spec=None,
                       data_loader_spec_args=None,
                       data_loader_spec_class=None,
                       data_loader_spec=None,
                       data_loader=None,
                       network_spec_args=None,
                       network_spec_class=None,
                       network_spec=None,
                       network=None,
                       trainer_spec_args=None,
                       trainer_spec_class=None,
                       json_spec_path=None,
                       spec_cont=None,
                       class_priority=False):
    """
    build trainer specification
    """

    # build data loader
    data_loader = ivy.default(
        data_loader,
        build_data_loader(
            data_loader_class=data_loader_class,
            dataset_dirs_args=dataset_dirs_args,
            dataset_dirs_class=dataset_dirs_class,
            dataset_dirs=dataset_dirs,
            dataset_spec_args=dataset_spec_args,
            dataset_spec_class=dataset_spec_class,
            dataset_spec=dataset_spec,
            data_loader_spec_args=data_loader_spec_args,
            data_loader_spec_class=data_loader_spec_class,
            data_loader_spec=data_loader_spec,
            json_spec_path=json_spec_path,
            spec_cont=spec_cont))

    # build network
    network = ivy.default(
        network,
        build_network(
            network_class=network_class,
            dataset_dirs_args=dataset_dirs_args,
            dataset_dirs_class=dataset_dirs_class,
            dataset_spec_args=dataset_spec_args,
            dataset_spec_class=dataset_spec_class,
            network_spec_args=network_spec_args,
            network_spec_class=network_spec_class,
            network_spec=network_spec,
            json_spec_path=json_spec_path,
            spec_cont=spec_cont))

    # define trainer specification arguments
    if trainer_spec_args is None:
        trainer_spec_args = dict()
    trainer_spec_args = ivy.Container(trainer_spec_args)
    trainer_spec_args = ivy.Container.combine(trainer_spec_args,
                                              ivy.Container(data_loader=data_loader,
                                                            network=network))

    # load json file
    if isinstance(json_spec_path, str):
        json_spec = json_spec_from_fpath(json_spec_path, 'trainer_args.json')
    else:
        json_spec = ivy.Container()

    # load from spec dict
    this_spec_cont =\
        ivy.Container(spec_cont['trainer']) if isinstance(spec_cont, dict) and 'trainer' in spec_cont \
            else ivy.Container()

    # combine args
    trainer_spec_args = ivy.Container.combine(json_spec, this_spec_cont, trainer_spec_args)

    # override trainer_spec_class if specified in trainer_spec_args
    trainer_spec_class = ivy.default(ivy.default(
        _import_arg_specified_class_if_present(trainer_spec_args, 'trainer_spec_class'),
        trainer_spec_class, rev=class_priority),
        TrainerSpec)

    # return trainer specification
    return trainer_spec_class(**trainer_spec_args)
Ejemplo n.º 25
0
 def __init__(self,
              data_loader: None,
              network: Network,
              log_dir: str = 'log',
              overwrite_log_dir: bool = False,
              seed: int = 0,
              ld_chkpt: bool = False,
              save_freq: int = 1000,
              save_at_end: bool = True,
              log_freq: int = 100,
              log_at_end: bool = True,
              vis_freq: int = 500,
              vis_at_end: bool = True,
              log_validation: bool = True,
              log_time: bool = True,
              log_learning_rate: bool = True,
              starting_iteration: int = None,
              total_iterations: int = 1e6,
              initial_learning_rate: float = 1e-4,
              save_spec: bool = True,
              custom_train_step: bool = False,
              auto_detect_weights: bool = True,
              log_gradients: (tuple, str) = 'all',
              log_variables: (tuple, str) = 'all',
              log_optimizer_state: (tuple, str) = 'all',
              profile_start_step: int = 5,
              steps_to_profile: int = 0,
              compile_graph: bool = 'all',
              dev_strs: Union[str, List[str]] = None,
              dev_map_fn: str = '_split_execute_with_grads',
              tune_device_allocation: bool = True,
              tune_splitting: bool = True,
              **kwargs) -> None:
     """
     parameters which define the training procedure
     """
     kw = locals_to_kwargs(locals())
     if log_gradients == 'all' or 'all' in log_gradients:
         log_gradients = ['mean', 'abs_mean', 'var', 'abs_var', 'min', 'abs_min', 'max', 'abs_max', 'vector_norm',
                          'global_vector_norm']
     if log_variables == 'all' or 'all' in log_variables:
         log_variables = ['mean', 'abs_mean', 'var', 'abs_var', 'min', 'abs_min', 'max', 'abs_max', 'vector_norm',
                          'global_vector_norm']
     if log_optimizer_state == 'all' or 'all' in log_optimizer_state:
         log_optimizer_state = ['mean', 'abs_mean', 'var', 'abs_var', 'min', 'abs_min', 'max', 'abs_max',
                                'vector_norm', 'global_vector_norm']
     super().__init__(data_loader=data_loader,
                      network=network,
                      log_dir=log_dir,
                      overwrite_log_dir=overwrite_log_dir,
                      seed=seed,
                      ld_chkpt=ld_chkpt,
                      save_freq=save_freq,
                      save_at_end=save_at_end,
                      log_freq=log_freq,
                      log_at_end=log_at_end,
                      vis_freq=vis_freq,
                      vis_at_end=vis_at_end,
                      log_validation=log_validation,
                      log_time=log_time,
                      log_learning_rate=log_learning_rate,
                      starting_iteration=starting_iteration,
                      total_iterations=total_iterations,
                      initial_learning_rate=initial_learning_rate,
                      save_spec=save_spec,
                      custom_train_step=custom_train_step,
                      auto_detect_weights=auto_detect_weights,
                      log_gradients=log_gradients,
                      log_variables=log_variables,
                      log_optimizer_state=log_optimizer_state,
                      profile_start_step=profile_start_step,
                      steps_to_profile=steps_to_profile,
                      compile_graph=compile_graph,
                      dev_strs=ivy.default(dev_strs, ['gpu:0'] if ivy.gpu_is_available() else ['cpu']),
                      dev_map_fn=dev_map_fn,
                      tune_device_allocation=tune_device_allocation,
                      tune_splitting=tune_splitting,
                      **kwargs)
     self._kwargs = kw
Ejemplo n.º 26
0
    def __init__(self, spec: TrainerSpec) -> None:

        # specification
        self._spec = spec

        # uninitialized variables
        if spec.starting_iteration is not None:
            self._starting_iteration = spec.starting_iteration
        else:
            self._starting_iteration = 0
        self._total_iterations = None

        # trainer variables
        self._global_step = 0
        self._moving_average_loss = 0

        # set seed
        np.random.seed(self._spec.seed)
        ivy.seed(self._spec.seed)

        # uninitialized variables
        self._chkpt = None
        self._chkpt_manager = None

        # summary writer
        try:
            from torch.utils.tensorboard import SummaryWriter
        except ModuleNotFoundError:
            SummaryWriter = None
        if SummaryWriter is not None:
            self._writer = SummaryWriter(os.path.join(self._spec.log_dir, 'tnsrbrd'))
        else:
            self._writer = None

        # profiler
        self._profiling = self._spec.steps_to_profile > 0
        if self._profiling:
            self._profiler = ivy.Profiler(self._spec.log_dir)
        else:
            self._profiler = None

        # timing
        self._start_time = time.perf_counter()

        # batch
        self._training_batch = None

        # network
        self._network = self._spec.network
        self._net_spec = self._network.spec
        self._partial_grad_updates = bool(self._net_spec.v_keychains)

        # multi-dev
        self._dev_str = ivy.default(lambda: self._spec.dev_strs[0], ivy.default_device(), True)
        if len(self._spec.dev_strs) > 1:
            if self._network.built:
                raise Exception('Network must use either explicit or on_call build modes if training on multiple'
                                'devices, but the network was already built using on_init method.')
            ret_fn = lambda ret: ivy.unify_iter(ret, self._spec.dev_strs[0], 'mean', transpose=True)
            dev_mapper = ivy.DevMapperMultiProc(
                self.__getattribute__(self._spec.dev_map_fn), ret_fn, self._spec.dev_strs,
                constant={'network': self._network})
            self._multi_dev = True
        else:
            dev_mapper = None
            self._multi_dev = False

        # device manager
        if (self._multi_dev and self._spec.tune_device_allocation) or self._spec.tune_splitting:
            self._dev_manager = ivy.DevManager(
                dev_mapper, self._spec.dev_strs, tune_dev_alloc=(self._multi_dev and self._spec.tune_device_allocation),
                tune_dev_splits=self._spec.tune_splitting)
        else:
            self._dev_manager = None

        # compilation
        self._compile_network_once_tuned = False
        self._compile_optimizer_once_tuned = False
Ejemplo n.º 27
0
def build_tuner(data_loader_class=None,
                network_class=None,
                trainer_class=None,
                dataset_dirs_args=None,
                dataset_dirs_class=None,
                dataset_dirs=None,
                dataset_spec_args=None,
                dataset_spec_class=None,
                dataset_spec=None,
                data_loader_spec_args=None,
                data_loader_spec_class=None,
                data_loader_spec=None,
                data_loader=None,
                network_spec_args=None,
                network_spec_class=None,
                network_spec=None,
                network=None,
                trainer_spec_args=None,
                trainer_spec_class=None,
                trainer_spec=None,
                trainer=None,
                tuner_spec_args=None,
                tuner_spec_class=None,
                tuner_spec=None,
                tuner_class=None,
                json_spec_path=None,
                spec_cont=None,
                class_priority=False):
    """
    build tuner
    """

    # build tuner spec
    tuner_spec = ivy.default(
        tuner_spec,
        build_tuner_spec(
            data_loader_class=data_loader_class,
            network_class=network_class,
            trainer_class=trainer_class,
            dataset_dirs_args=dataset_dirs_args,
            dataset_dirs_class=dataset_dirs_class,
            dataset_dirs=dataset_dirs,
            dataset_spec_args=dataset_spec_args,
            dataset_spec_class=dataset_spec_class,
            dataset_spec=dataset_spec,
            data_loader_spec_args=data_loader_spec_args,
            data_loader_spec_class=data_loader_spec_class,
            data_loader_spec=data_loader_spec,
            data_loader=data_loader,
            network_spec_args=network_spec_args,
            network_spec_class=network_spec_class,
            network_spec=network_spec,
            network=network,
            trainer_spec_args=trainer_spec_args,
            trainer_spec_class=trainer_spec_class,
            trainer_spec=trainer_spec,
            trainer=trainer,
            tuner_spec_args=tuner_spec_args,
            tuner_spec_class=tuner_spec_class,
            json_spec_path=json_spec_path,
            spec_cont=spec_cont))

    # override tuner_class if specified in tuner_spec_args
    tuner_class = ivy.default(ivy.default(
        _import_arg_specified_class_if_present(tuner_spec, 'tuner_class'),
        tuner_class, rev=class_priority),
        Tuner)

    # return tuner
    return tuner_class(
        data_loader_class=data_loader_class,
        network_class=network_class,
        trainer_class=trainer_class,
        dataset_dirs_args=dataset_dirs_args,
        dataset_dirs_class=dataset_dirs_class,
        dataset_dirs=dataset_dirs,
        dataset_spec_args=dataset_spec_args,
        dataset_spec_class=dataset_spec_class,
        dataset_spec=dataset_spec,
        data_loader_spec_args=data_loader_spec_args,
        data_loader_spec_class=data_loader_spec_class,
        data_loader_spec=data_loader_spec,
        data_loader=data_loader,
        network_spec_args=network_spec_args,
        network_spec_class=network_spec_class,
        network_spec=network_spec,
        network=network,
        trainer_spec_args=trainer_spec_args,
        trainer_spec_class=trainer_spec_class,
        trainer_spec=trainer_spec,
        trainer=trainer,
        tuner_spec_args=tuner_spec_args,
        tuner_spec_class=tuner_spec_class,
        tuner_spec=tuner_spec,
        json_spec_path=json_spec_path,
        spec_cont=spec_cont)
Ejemplo n.º 28
0
 def __init__(self,
              sizes,
              fpath_template=None,
              seq_idxs=None,
              start=None,
              end=None,
              max_seq_len=None,
              conts_to_skip=None,
              pruned_sizes=None):
     if isinstance(sizes, (tuple, list)):
         pruned_sizes = ivy.default(pruned_sizes, [
             SeqDataLoader._compute_seq_len(i, sl, conts_to_skip)
             for i, sl in enumerate(sizes)
         ])
         num_empty = sum([ps == 0 for ps in pruned_sizes])
         self._raw_sizes = dict(
             zip(range(start, end + 1 + num_empty),
                 sizes[start:end + 1 + num_empty]))
         self._pruned_sizes = dict(
             zip(range(start, end + 1 + num_empty),
                 pruned_sizes[start:end + 1 + num_empty]))
     elif isinstance(sizes, (int, dict)):
         self._raw_sizes = sizes
         self._pruned_sizes = ivy.default(pruned_sizes, sizes)
         if isinstance(self._pruned_sizes, int):
             pruned_dict = dict()
             for seq_idx, win_idx in conts_to_skip:
                 if seq_idx not in pruned_dict:
                     pruned_dict[seq_idx] = list()
                 pruned_dict[seq_idx].append(win_idx)
             pruned_dict = {
                 k: len(set(v))
                 for k, v in pruned_dict.items()
             }
             pruned_sizes_dict = {
                 k: self._pruned_sizes - num_pruned
                 for k, num_pruned in pruned_dict.items()
             }
             num_empty = sum(
                 [size == 0 for size in pruned_sizes_dict.values()])
             pruned_sizes = collections.defaultdict(
                 lambda: self._pruned_sizes, pruned_sizes_dict)
         else:
             num_empty = sum([ps == 0 for ps in self._pruned_sizes])
     else:
         raise Exception(
             'Invalid type for sizes, expected one of int, dict, tuple or list,'
             'but found {} or type {}'.format(sizes, type(sizes)))
     self._constant_size = isinstance(self._raw_sizes, int)
     if max_seq_len:
         self._max_seq_len = max_seq_len
     else:
         self._max_seq_len = self._pruned_sizes if self._constant_size else max(
             self._pruned_sizes.values())
     self._fpath_template = fpath_template
     self._conts_to_skip = conts_to_skip
     if seq_idxs:
         self._seq_idxs = seq_idxs
     else:
         vals = [
             v
             for i, v in enumerate(range(start, end + 1 +
                                         num_empty))
             if pruned_sizes[i] > 0
         ]
         keys = range(0, min(end - start + 1 + num_empty,
                             len(vals)))
         self._seq_idxs = dict(zip(keys, vals))