Exemple #1
0
    def run(self, ops, batch=False, **kwargs):
        # ensure variables are initialized
        uninit_vars = []
        for var in self.global_variables():
            if var not in self.initialized_variables:
                uninit_vars.append(var)
        if uninit_vars:
            desc = '\n    '.join(v.op.name for v in uninit_vars)
            log.warn('Variables are not initialized:\n    {}'.format(desc))
            self.raw_run(tf.variables_initializer(uninit_vars))
            self.initialized_variables += uninit_vars

        # assign overrider hyperparameters
        self._overrider_assign_parameters()

        # session run
        if batch:
            results, statistics = self.raw_run(
                (ops, self.estimator.operations), **kwargs)
            # update statistics
            self.estimator.append(statistics)
            text = self.estimator.format(batch_size=self.batch_size)
            log.info(text, update=True)
            if log.is_enabled('debug'):
                self.estimator.debug()
        else:
            results = self.raw_run(ops, **kwargs)
        return results
Exemple #2
0
 def subtract_channel_means(self, i):
     means = self.moment.get('mean')
     if not means:
         log.warn('Channel means not supplied, defaulting '
                  'to 0.5 for each channel.')
         means = [0.5] * i.shape[-1]
     shape = [1, 1, len(means)]
     means = tf.constant(means, shape=shape, name='image_means')
     return i - means
Exemple #3
0
 def _warn_ties(ties, num_ties, thresholds):
     iterer = enumerate(zip(ties, num_ties, thresholds))
     for i, (each_ties, each_num_ties, each_threshold) in iterer:
         if each_num_ties == 1:
             continue
         log.warn(
             'Top-k of batch index {} has {} tie values {}.'
             .format(i, int(each_num_ties), int(each_threshold)),
             once='ties')
     return num_ties
Exemple #4
0
 def _estimate_layer(self, node, in_info):
     out_info = super()._estimate_layer(node, in_info)
     log.debug('Estimated statistics for {!r}: {}.'.format(
         node.formatted_name(), out_info))
     for k, o in self.overriders.get(node, {}).items():
         if k == ['gradient', 'normalization']:
             log.warn('Normalization/gradient estimation not supported.')
             continue
         out_info = o.estimate(out_info, in_info)
         log.debug('Overrider {!r} modified statistics: {}.'.format(
             o, out_info))
     return out_info
Exemple #5
0
 def save(self, key):
     cp_path = self._path(key, True)
     if isinstance(key, int):
         log.info('Saving checkpoint at epoch {} to {!r}...'.format(
             key, cp_path))
     else:
         log.info('Saving checkpoint to {!r}...'.format(cp_path))
     try:
         saver = tf.train.Saver(self._global_variables())
         saver.save(self.tf_session, cp_path, write_meta_graph=False)
     except tf.errors.ResourceExhaustedError:
         log.warn('Unable to save a checkpoint because we have '
                  'no space left on device.')
Exemple #6
0
 def normalize_channels(self, i):
     # FIXME we pin this augmentation action to GPU because of
     # poor performance on CPU caused by this.
     with tf.device('/gpu:0'):
         i = self.subtract_channel_means(i)
         stds = self.moment.get('std')
         if not stds:
             log.warn('Channel std value not supplied, defaulting '
                      'to 1.0 for each channel.')
             return i
         shape = [1, 1, len(stds)]
         stds = tf.constant(stds, shape=shape, name='image_stds')
         return i / stds
Exemple #7
0
 def _update_policy(self, tensor):
     """ simple brute-force, optimal result.  """
     w = self.eval(self.width)
     for p in range(-w, w + 1):
         rate = self._quantize(tensor,
                               point=p,
                               width=w,
                               compute_overflow_rate=True)
         if rate <= self.overflow_rate:
             return p
     log.warn('Cannot find a binary point position that satisfies the '
              'overflow_rate budget, using integer (point at the right '
              'of LSB) instead.')
     return w
Exemple #8
0
def _auto_select_gpus(num_gpus, memory_bound):
    try:
        info = subprocess.check_output('nvidia-smi',
                                       shell=True,
                                       stderr=subprocess.STDOUT)
        info = re.findall(r'(\d+)MiB\s/', info.decode('utf-8'))
        log.debug('GPU memory usages (MB): {}'.format(', '.join(info)))
        info = [int(m) for m in info]
        gpus = [i for i in range(len(info)) if info[i] <= memory_bound]
    except subprocess.CalledProcessError:
        gpus = []
    if len(gpus) < num_gpus:
        log.warn('Number of GPUs available {} is less than the number of '
                 'GPUs requested {}.'.format(len(gpus), num_gpus))
    return ','.join(str(g) for g in gpus[:num_gpus])
Exemple #9
0
 def search(self, params):
     max_bound = params.get('max')
     if max_bound is None:
         raise ValueError('require max value to search for {}',
                          self.__name__)
     targets = params.get('targets')
     if targets is None or 'point' not in targets:
         raise ValueError('Required targets are not specified')
     w = self.eval(self.width)
     max_value = 2**(w - 1)
     for p in range(-2 * w, w + 1):
         shift = 2.0**(p)
         if max_bound <= max_value * shift:
             return {'point': w + p}
     log.warn('Cannot find a binary point position that satisfies the '
              'overflow_rate budget, using integer (point at the right '
              'of LSB) instead.')
     return {'point': w}
Exemple #10
0
    def _add_var_scope(self, node, params, scope_list):
        path = '/'.join(node.module)
        if not path:
            raise ValueError('Module path is empty.')

        forward_overriders = params.pop('overrider', None) or {}
        gradient_overriders = forward_overriders.pop('gradient', {})
        for key, overrider in gradient_overriders.items():
            if params.pop('{}_regularizer'.format(key), None):
                log.warn(
                    'Regularizer for \'{}/{}\' is for now disabled as we '
                    'override its gradient with {!r}.'
                    .format(node.formatted_name(), key, overrider))

        def custom_getter(getter, name, *args, **kwargs):
            v = getter(name, *args, **kwargs)
            log.debug('Variable {} created.'.format(v))
            key = name.replace('{}/'.format(node.formatted_name()), '')
            overrider = forward_overriders.get(key)
            if overrider:
                log.debug(
                    'Overriding {!r} with {!r}.'.format(name, overrider))
                v = overrider.apply(node, name, getter, v)
            # gradient overrider
            overrider = gradient_overriders.get(key)
            if overrider and self.is_training:
                v = self._apply_gradient_overrider(node, name, overrider, v)
            self.variables.setdefault(node, {})[key] = v
            return v

        @contextlib.contextmanager
        def custom_scope():
            # we do not have direct access to variable creation,
            # so scope must be used.
            # FIXME there is currently no possible workaround for
            # auto-generated `name_scope` from `variable_scope` with names that
            # are being uniquified.  See #39.
            var_scope = tf.variable_scope(
                path, reuse=self.reuse, custom_getter=custom_getter)
            with var_scope as scope:
                yield scope

        scope_list.append(custom_scope())
Exemple #11
0
 def _update(self):
     # update positives mask and mean values
     value = self.session.run(self.before)
     # divide them into two groups
     # mean = util.mean(value)
     mean = 0.0
     # find two central points
     positives = value > mean
     self.positives = positives
     self.positives_mean = util.mean(value[util.where(positives)])
     negatives = util.logical_and(util.logical_not(positives), value != 0)
     self.negatives_mean = util.mean(value[util.where(negatives)])
     if self.positives_mean.eval() == 0 or self.negatives_mean.eval() == 0:
         log.warn(
             'means are skewed, pos mean is {} and neg mean is {}'.format(
                 self.positives_mean.eval(), self.negatives_mean.eval()))
     # update internal quantizer
     self.quantizer.update()
     for quantizer in self.parameter_quantizers.values():
         quantizer.update()
Exemple #12
0
 def create_overrider(overriders):
     for name, p in overriders.items():
         if p.get('type'):
             continue
         raise TypeError(
             'We expect a mapping of name-overrider pairs, overrider '
             'named {!r} does not have a type.'.format(name))
     if all(not p.get('_priority') for p in overriders.values()):
         log.warn(
             'Priority not specified for a sequence of overriders '
             'in layer {!r}, which may result in unexpected ordering.'
             .format(layer_node.formatted_name()))
     overriders = list(reversed(sorted(
         overriders.values(), key=lambda p: p.get('_priority', 0))))
     overriders = [
         cls(session=self.session, **p)
         for cls, p in multi_objects_from_params(overriders)]
     if len(overriders) == 1:
         return overriders[0]
     return ChainOverrider(session=self.session, overriders=overriders)
Exemple #13
0
 def load(self, key=_checkpoint_latest):
     if key is False or (key != 0 and not key):
         log.debug('Checkpoint loading disabled.')
         return []
     try:
         path = self._path(key, False)
     except CheckpointManifestNotFoundError as e:
         log.warn('{} Abort load.'.format(e))
         return []
     reader = tf.train.NewCheckpointReader(path)
     var_shape_map = reader.get_variable_to_shape_map()
     var_dtype_map = reader.get_variable_to_dtype_map()
     restore_vars = []
     missing_vars = []
     for v in self._global_variables():
         base_name, _ = v.name.split(':')
         shape = var_shape_map.get(base_name, None)
         if shape is None:
             missing_vars.append(base_name)
             continue
         v_shape = v.shape.as_list()
         if shape != v_shape:
             v_shape = format_shape(v_shape)
             shape = format_shape(shape)
             log.warn(
                 'Variable named {!r} has shape ({}) mismatching '
                 'the shape ({}) in checkpoint, not loading it.'.format(
                     base_name, v_shape, shape))
             continue
         dtype = var_dtype_map.get(base_name, None).base_dtype
         v_dtype = v.dtype.base_dtype
         if dtype != v_dtype:
             log.warn(
                 'Variable named {!r} has dtype {!r} mismatching '
                 'the dtype {!r} in checkpoint, not loading it.'.format(
                     base_name, v_dtype.name, dtype.name))
             continue
         restore_vars.append(v)
     # variable not restored
     not_restore_vars = []
     restore_var_names = [v.name.split(':')[0] for v in restore_vars]
     for v in var_shape_map:
         if v not in restore_var_names:
             not_restore_vars.append(v)
     desc = 'Variables in checkpoint but not restored'
     print_variables(desc, not_restore_vars, 'warn')
     # variables missing
     desc = 'Variables to be restored but missing in checkpoint'
     print_variables(desc, missing_vars, 'warn')
     # variables to restore
     desc = 'Checkpoint variables to restore'
     print_variables(desc, (v.name for v in restore_vars), 'debug')
     # restore
     restorer = tf.train.Saver(restore_vars)
     restorer.restore(self.tf_session, path)
     log.debug('Checkpoint restored.')
     return restore_vars
Exemple #14
0
 def load(self, key=_checkpoint_latest):
     if key is False or (key != 0 and not key):
         log.debug('Checkpoint loading disabled.')
         return []
     try:
         path = self._path(key, False)
     except CheckpointManifestNotFoundError as e:
         log.warn('{} Abort load.'.format(e))
         return []
     reader = tf.train.NewCheckpointReader(path)
     var_shape_map = reader.get_variable_to_shape_map()
     restore_vars = []
     missing_vars = []
     for v in self._global_variables():
         base_name, _ = v.name.split(':')
         shape = var_shape_map.get(base_name, None)
         if shape is None:
             missing_vars.append(base_name)
             continue
         v_shape = v.shape.as_list()
         if shape != v_shape:
             msg = ('Variable named {!r} has shape ({}) mismatch with the '
                    'shape ({}) in checkpoint, not loading it.')
             msg = msg.format(base_name, format_shape(v_shape),
                              format_shape(shape))
             log.warn(msg)
             continue
         restore_vars.append(v)
     # variable not restored
     not_restore_vars = []
     restore_var_names = [v.name.split(':')[0] for v in restore_vars]
     for v in var_shape_map:
         if v not in restore_var_names:
             not_restore_vars.append(v)
     if not_restore_vars:
         log.debug(
             'Variables in checkpoint but not restored:\n    {}'.format(
                 '\n    '.join(not_restore_vars)))
     # variables missing
     if missing_vars:
         log.warn('Variables missing in checkpoint:\n    {}'.format(
             '\n    '.join(missing_vars)))
     log.debug('Checkpoint variables to restore:\n    {}'.format(
         '\n    '.join(v.name for v in restore_vars)))
     restorer = tf.train.Saver(restore_vars)
     restorer.restore(self.tf_session, path)
     log.debug('Checkpoint restored.')
     return restore_vars
Exemple #15
0
 def _init_system_config(self):
     root = os.path.dirname(__file__)
     self.yaml_update(os.path.join(root, 'system.yaml'))
     if os.environ.pop('CUDA_VISIBLE_DEVICES', None):
         log.warn('Ignoring "CUDA_VISIBLE_DEVICES", as it is overridden '
                  'by "system.visible_gpus".')