class RandomBinary(Initializer): _label = INITIALIZER.RANDOM_BINARY_LABEL """ Initialize an array with shape with an random binary """ @MType(seed=OneOfType(int, None)) def __init__(self, *, seed=None): self._rbinary_t = None if seed is not None and seed < 0: warnings.warn('Seed must be > 0. Reset to None', UserWarning) self._seed = None self._seed = seed self._rng = np.random.RandomState(seed=self._seed) super().__init__() @MType((int, ), pzero=float, dtype=type(np.dtype), reuse=bool) def __call__(self, shape, *, pzero=0.5, dtype=np.int8, reuse=False): (row_size, col_size) = shape if pzero >= 1 or pzero <= 0: warnings.warn( 'Probability of zeros must be > 0 and < 1. Reset to 0.5.', UserWarning) pzero = 0.5 if self._rbinary_t is None or self._rbinary_t.shape != shape or not reuse: self._rbinary_t = self._rng.binomial(size=shape, n=1, p=1 - pzero) if self._rbinary_t.shape != shape and reuse: warnings.warn( 'Unable to reuse last random binary because the shape is different.', UserWarning) return self._rbinary_t.copy() # ------------------------------------------------------------------------ @MType(as_json=bool, beautify_json=bool) def snapshot(self, *, as_json=False, beautify_json=True): """ Return initializer state snapshot as a dict. Arguments: as_json: set to True to convert and return dict as JSON beautify_json: set to True to beautify JSON Returns: dict """ snapshot = super().snapshot(as_json=False, beautify_json=False) snapshot.update({ 'seed': self._seed, 'dtype': str(self._rbinary_t.dtype) if self._rbinary_t is not None else None # 'values': self._rbinary_t.tolist() if self._rbinary_t is not None else None }) if as_json: if beautify_json: return json.dumps(snapshot, indent=4, sort_keys=False) else: return json.dumps(snapshot) else: return snapshot.copy()
class Diagonal(Initializer): _label = INITIALIZER.DIAGONAL_LABEL """ Initialize an array with shape with a diagonal """ @MType(OneOfType(int, float)) def __init__(self, value): self._value = value self._diagonal_t = None super().__init__() @MType((int, ), dtype=type(np.dtype)) def __call__(self, shape, *, dtype=np.float32): if self._diagonal_t is None or self._diagonal_t.shape != shape: self._diagonal_t = np.zeros(shape=shape, dtype=dtype) np.fill_diagonal(self._diagonal_t, self._value, wrap=False) return self._diagonal_t.copy() # ------------------------------------------------------------------------ @MType(as_json=bool, beautify_json=bool) def snapshot(self, *, as_json=False, beautify_json=True): """ Return initializer state snapshot as a dict. Arguments: as_json: set to True to convert and return dict as JSON beautify_json: set to True to beautify JSON Returns: dict """ snapshot = super().snapshot(as_json=False, beautify_json=False) snapshot.update({ 'dtype': str(self._diagonal_t.dtype) if self._diagonal_t is not None else None, 'value': self._value }) if as_json: if beautify_json: return json.dumps(snapshot, indent=4, sort_keys=False) else: return json.dumps(snapshot) else: return snapshot.copy()
class SigmoidCrossentropyLoss(Objective): _label = OBJECTIVE.SIGMOID_CROSSENTROPY_LOSS """ Objective using sigmoid (binary)crossentropyfor loss function. Arguments: size: objective size name: objective name metric: loss and accuracy metrics """ @MType(size=int, name=str, metric=(str,)) def __init__(self, *, size=1, name='', metric=('loss', 'accuracy')): super().__init__(size=size, name=name) self.reconfig(metric=metric) # ------------------------------------------------------------------------ @MType(shape=OneOfType((int,), None), metric=OneOfType((str,), None)) def reconfig(self, *, shape=None, metric=None): """ Reconfig objective Arguments: shape: objective layer shape metric: loss metric """ if metric is not None: if 'loss' in metric or ('accuracy' or 'acc'): if 'loss' in metric: self._evaluation['metric']['loss'] = 0 if ('accuracy' or 'acc') in metric: self._evaluation['metric']['accuracy'] = 0 if ('recall' or 'rc') in metric: self._evaluation['metric']['recall'] = 0 if ('precision' or 'prec') in metric: self._evaluation['metric']['precision'] = 0 if ('f1_score' or 'f1') in metric: self._evaluation['metric']['f1_score'] = 0 else: raise TypeError(f'Unknown metric {metric} for objective {self.name}.') if shape is not None: super().reconfig(shape=shape) self.reset() @MType(dict, np.ndarray, residue=dict) @MShape(axis=1) def forward(self, stage, a_t, *, residue={}): """ Do forward pass method. Arguments: stage: forward stage a_t: post-nonlinearity (a) tensor residue: Returns: layer """ sigmoid_of_a_t = np.exp(-np.logaddexp(0, -a_t + 1e-12)) return super().forward(stage, sigmoid_of_a_t, residue=residue) @MType(np.ndarray, np.ndarray, dict) def compute_loss(self, y_t, y_prime_t, *, residue={}): """ Compute the loss. Arguments: y_t: output (y) tensor y_prime_t: expected output (y) tensor residue: Returns: tuple """ y_prime_t = y_prime_t.astype(np.float32) ly_t = -(y_prime_t * np.log(y_t + 1e-12) + (1 - y_prime_t) * np.log((1 - y_t) + 1e-12)) return (ly_t, residue) @MType(np.ndarray, np.ndarray, dict) def compute_loss_grad(self, y_t, y_prime_t, *, residue={}): """ Compute the loss gradient tensor for gradient descent update. Arguments: y_t: output (y) tensor y_prime_t: expected output (y) tensor residue: Returns: tuple """ ey_t = y_t - y_prime_t eyg_t = ey_t return (eyg_t, residue) @MType(np.ndarray, np.ndarray, np.ndarray, dict) def compute_evaluation_metric(self, y_t, y_prime_t, ly_t, evaluation_metric): """ Compute the evaluation metric. Arguments: y_t: output (y) tensor y_prime_t: expected output (y) tensor ly_t: loss tensor Returns: metric """ if 'loss' in evaluation_metric: evaluation_metric['loss'] += ly_t.mean() if 'accuracy' in evaluation_metric: evaluation_metric['accuracy'] += np.equal(y_prime_t, y_t.round()).astype(np.int8).mean() if 'recall' in evaluation_metric or 'precision' in evaluation_metric or 'f1_score' in evaluation_metric: y_t = np.round(y_t) true_pos = np.sum(np.multiply(y_t, y_prime_t), axis=0).astype(np.float) # true_neg = np.sum(np.multiply((1 - y_t), (1 - y_prime_t)), axis=0).astype(np.float) false_pos = np.sum(np.multiply(y_t, (1 - y_prime_t)), axis=0).astype(np.float) false_neg = np.sum(np.multiply((1 - y_t), y_prime_t), axis=0).astype(np.float) recall = true_pos / (true_pos + false_neg + 1e-12) precision = true_pos / (true_pos + false_pos + 1e-12) if 'recall' in evaluation_metric: evaluation_metric['recall'] = recall.mean() if 'precision' in evaluation_metric: evaluation_metric['precision'] = precision.mean() if 'f1_score' in evaluation_metric: evaluation_metric['f1_score'] = (2 * np.multiply(precision, recall) / (precision + recall + 1e-12)).mean() return evaluation_metric
class AlgebraicLoss(Objective): _label = OBJECTIVE.ALGEBRAIC_LOSS_LABEL """ Arguments: size: objective size name: objective name metric: loss metric """ @MType(size=int, name=str, metric=(str,)) def __init__(self, *, size=1, name='', metric=('loss',)): self._cache = None super().__init__(size=size, name=name) self.reconfig(metric=metric) # ------------------------------------------------------------------------ @MType(shape=OneOfType((int,), None), metric=OneOfType((str,), None)) def reconfig(self, *, shape=None, metric=None): """ Reconfig objective Arguments: shape: objective layer shape metric: loss metric """ if metric is not None: if 'loss' in metric or ('accuracy' or 'acc') in metric: if 'loss' in metric: self._evaluation['metric']['loss'] = 0 if ('accuracy' or 'acc') in metric or \ ('recall' or 'rc') in metric or \ ('precision' or 'prec') in metric or \ ('f1_score' or 'f1') in metric: warnings.warn(f'Algebraic loss objective only have loss metric. Ignoring metrics {metric}', UserWarning) else: raise TypeError(f'Unknown metric {metric} for objective {self.name}.') if shape is not None: super().reconfig(shape=shape) self.reset() @MType(np.ndarray, np.ndarray, dict) def compute_loss(self, y_t, y_prime_t, *, residue={}): """ Compute the loss. Arguments: y_t: output (y) tensor y_prime_t: expected output (y) tensor residue: Returns: tuple """ ey_t = y_t - y_prime_t sqr_of_ey_t = np.square(ey_t) inv_of_ey_t = 1 / (1 + sqr_of_ey_t) inv_sqrt_of_ey_t = np.sqrt(inv_of_ey_t) ly_t = np.multiply(sqr_of_ey_t, inv_sqrt_of_ey_t) self._cache = (sqr_of_ey_t, inv_of_ey_t, inv_sqrt_of_ey_t) return (ly_t, residue) @MType(np.ndarray, np.ndarray, dict) def compute_loss_grad(self, y_t, y_prime_t, *, residue={}): """ Compute the loss gradient tensor for gradient descent update. Arguments: y_t: output (y) tensor y_prime_t: expected output (y) tensor residue: Returns: tuple """ ey_t = y_t - y_prime_t (sqr_of_ey_t, inv_of_ey_t, inv_sqrt_of_ey_t) = self._cache eyg_t = np.multiply(2 * ey_t + np.multiply(ey_t, sqr_of_ey_t), np.multiply(inv_of_ey_t, inv_sqrt_of_ey_t)) return (eyg_t, residue) @MType(np.ndarray, np.ndarray, np.ndarray, dict) def compute_evaluation_metric(self, y_t, y_prime_t, ly_t, evaluation_metric): """ Compute the evaluation metric. Arguments: y_t: output (y) tensor y_prime_t: expected output (y) tensor ly_t: loss tensor Returns: metric """ if 'loss' in evaluation_metric: evaluation_metric['loss'] += ly_t.mean() return evaluation_metric
class Objective(Layer): _label = OBJECTIVE.LABEL _arrangement = OBJECTIVE.ARRANGEMENT """ Abtraction of a base objective layer. Manages objective loss. Arguments: size: objective size name: objective name metric: loss metric """ @MType(size=int, name=str, metric=(str,)) def __init__(self, *, size=1, name='', metric=('loss',)): self._y_t = None self._y_prime_t = None self._evaluation = { 'count': 0, 'metric': {} } self._residue = {} self._monitor = None super().__init__(shape=(1, size), name=name) self.reconfig(metric=metric) def __str__(self): return super().__str__() + '_' + OBJECTIVE.LABEL # ------------------------------------------------------------------------ @property def inputs(self): """ Get objective forward pass input tensor. Returns: tensor """ if self.has_prev: return self.prev.outputs else: return None @property def outputs(self): """ Get objective forward pass output tensor Returns: tensor """ if self._y_t is not None: return self._y_t.copy() else: return None @property def evaluation_metric(self): """ Get objective evaluation metric """ evaluation_count = self._evaluation['count'] evaluation_metric = copy.deepcopy(self._evaluation['metric']) if evaluation_count > 1: for key in evaluation_metric.keys(): evaluation_metric[key] /= evaluation_count return evaluation_metric def unassign_hooks(self): """ Unassign all callback functions """ self._monitor = None @MType(monitor=OneOfType(callable, None)) def assign_hook(self, *, monitor=None): """ Assign callback functions Arguments: monitor: callback function to do probing during forward/backward pass """ if monitor is not None: self._monitor = monitor def reset(self): """ Reset internal states. """ self._y_t = None self._y_prime_t = None self._residue = {} self._evaluation['count'] = 0 for key in self._evaluation['metric'].keys(): self._evaluation['metric'][key] = 0 @MType(shape=OneOfType((int,), None), metric=OneOfType((str,), None)) def reconfig(self, *, shape=None, metric=None): """ Reconfig objective Arguments: shape: objective layer shape metric: loss metric """ if metric is not None: if 'loss' in metric: self._evaluation['metric']['loss'] = 0 else: raise TypeError(f'Unknown metric {metric} for objective {self.name}.') if shape is not None: super().reconfig(shape=shape) self.reset() @MType(as_json=bool, beautify_json=bool) def snapshot(self, *, as_json=False, beautify_json=True): """ Return objective as a snapshot dict data Arguments: as_json: beautify_json: Returns: snapshot """ snapshot = super().snapshot(as_json=False, beautify_json=False) snapshot.update({ 'base_label': Objective.label + '_' + snapshot['base_label'], 'metric': tuple(self._evaluation['metric'].keys()) }) if as_json: if beautify_json: return json.dumps(snapshot, indent=4, sort_keys=False) else: return json.dumps(snapshot) else: return snapshot.copy() @MType(dict, np.ndarray, residue=dict) @MShape(axis=1) def forward(self, stage, a_t, *, residue={}): """ Do forward pass method. Arguments: stage: forward stage a_t: post-nonlinearity (a) tensor residue: Returns: layer """ self._y_t = a_t # a_t.copy() self._residue = residue if self._monitor is not None: report = { 'pass': '******', 'stage': stage, 'inputs': self.inputs, 'outputs': self.outputs, 'residue': residue } self._monitor(report) if self.has_next: warnings.warn(f'Objective {self.name} layer must be the last in connection. There should be no connection to next layer.', UserWarning) return self @MType(np.ndarray) @MShape(axis=1) def evaluate(self, y_prime_t): """ Get evaluation metric given the expected truth. Arguments: y_prime_t: expected output (y) tensor Returns: self """ self._evaluation['count'] += 1 self._y_prime_t = y_prime_t # y_prime_t.copy() evaluation_metric = self._evaluation['metric'] (ly_t, residue) = self.compute_loss(self._y_t, self._y_prime_t, residue=self._residue) metric = self.compute_evaluation_metric(self._y_t, self._y_prime_t, ly_t, evaluation_metric) self._evaluation['metric'] = metric self._residue = residue return self @MType(dict) def backward(self, stage): """ Do backward pass by passing the loss gradient tensor back to the prev link. Arguments: stage: backward stage Returns: layer """ if self._y_t is None: warnings.warn(f'Objective {self.name} cannot do backward pass. Need to run forward pass first.', UserWarning) return self elif self._y_prime_t is None: warnings.warn(f'Objective {self.name} cannot do backward pass. Need to run evaluation first.', UserWarning) return self else: hparam = stage['hparam'] batch_size = hparam['batch_size'] (eyg_t, residue) = self.compute_loss_grad(self._y_t, self._y_prime_t, residue=self._residue) eyg_t = eyg_t / batch_size if batch_size > 1 else eyg_t if self._monitor is not None: report = { 'pass': '******', 'stage': stage, 'error': self._ey_t, 'grad': { 'error': eyg_t }, 'evaluation': self._evaluation, 'residue': residue } self._monitor(report) if self.has_prev: return self.prev.backward(stage, eyg_t, residue=residue) else: warnings.warn(f'Objective {self.name} connection is incomplete. Missing connection to previous layer.', UserWarning) return self @abc.abstractmethod def compute_evaluation_metric(self): """ Compute the evaluation metric. """ pass @abc.abstractmethod def compute_loss(self): """ Compute the loss tensor. Not implemented """ pass @abc.abstractmethod def compute_loss_grad(self): """ Compute the loss gradient tensor for backpropagation. Not implemented """ pass
class LogCoshLoss(Objective): _label = OBJECTIVE.LOG_COSH_LOSS_LABEL """ Objective using log-cosh loss for loss functionself. `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly like the l2 loss, but will not be so strongly affected by the occasional wildly incorrect prediction. """ # ------------------------------------------------------------------------ @MType(shape=OneOfType((int,), None), metric=OneOfType((str,), None)) def reconfig(self, *, shape=None, metric=None): """ Reconfig objective Arguments: shape: objective layer shape metric: loss metric """ if metric is not None: if 'loss' in metric or ('accuracy' or 'acc') in metric: if 'loss' in metric: self._evaluation['metric']['loss'] = 0 if ('accuracy' or 'acc') in metric or \ ('recall' or 'rc') in metric or \ ('precision' or 'prec') in metric or \ ('f1_score' or 'f1') in metric: warnings.warn(f'Log-cosh loss objective only have loss metric. Ignoring metrics {metric}', UserWarning) else: raise TypeError(f'Unknown metric {metric} for objective {self.name}.') if shape is not None: super().reconfig(shape=shape) self.reset() @MType(np.ndarray, np.ndarray, dict) def compute_loss(self, y_t, y_prime_t, *, residue={}): """ Compute the loss. Arguments: y_t: output (y) tensor y_prime_t: expected output (y) tensor residue: Returns: tuple """ ey_t = y_t - y_prime_t ly_t = np.log(np.cosh(ey_t) + 1e-12) return (ly_t, residue) @MType(np.ndarray, np.ndarray, dict) def compute_loss_grad(self, y_t, y_prime_t, *, residue={}): """ Compute the loss gradient tensor for gradient descent update. Arguments: y_t: output (y) tensor y_prime_t: expected output (y) tensor residue: Returns: tuple """ ey_t = y_t - y_prime_t eyg_t = np.tanh(ey_t) return (eyg_t, residue) @MType(np.ndarray, np.ndarray, np.ndarray, dict) def compute_evaluation_metric(self, y_t, y_prime_t, ly_t, evaluation_metric): """ Compute the evaluation metric. Arguments: y_t: output (y) tensor y_prime_t: expected output (y) tensor ly_t: loss tensor Returns: metric """ if 'loss' in evaluation_metric: evaluation_metric['loss'] += ly_t.mean() return evaluation_metric
class FeedForward(object, metaclass=FeedForward): _label = FEED_FORWARD.LABEL """ A feed forward class. Arguments: name: """ @MType(name=str) def __init__(self, *, name=''): self._name = name self._sequencer = None self._hparam = { 'eta': OPTIMIZER.DEFAULT_ETA, 'eta_decay': OPTIMIZER.DEFAULT_ETA_DECAY, 'beta_decay1': OPTIMIZER.DEFAULT_BETA_DECAY1, 'beta_decay2': OPTIMIZER.DEFAULT_BETA_DECAY2, 'momentum': OPTIMIZER.DEFAULT_MOMENTUM, 'l1_lambda': REGULARIZER.DEFAULT_L1_LAMBDA, 'l2_lambda': REGULARIZER.DEFAULT_L2_LAMBDA } self._setup_completed = False self._eta_scheduler = None self._monitor = None self._checkpoint = None def __str__(self): if self.name != '': return self.name + '_' + self.label else: return self.label # ------------------------------------------------------------------------ @property def label(self): """ Get feed forward label. Returns: str """ return type(self).label @property def name(self): """ Get feed forward name. Returns: """ return self._name @name.setter @MType(str) def name(self, name): """ Set feed forward name. Arguments: name: feed forward name """ self._name = name @property def sequence(self): """ Get feed forward sequence. Returns: """ if self.is_valid: return self._sequencer.sequence else: return None @property def is_valid(self): """ Check if feed forward has a valid sequence. Returns: bool """ return self._sequencer is not None and self._sequencer.is_valid @property def is_complete(self): """ Check if feed forward has a valid and complete sequence. Returns: bool """ return self.is_valid and self._sequencer.is_complete and self._setup_completed @MType(as_json=bool, beautify_json=bool) def snapshot(self, *, as_json=False, beautify_json=True): """ Return feed forward as a snapshot dict data. Arguments: as_json: beautify_json: Returns: dict """ model_snapshot = { 'name': self.name, 'label': self.label, 'base_label': FeedForward.label, 'hparam': self._hparam, 'sequencer': self._sequencer.snapshot(as_json=False, beautify_json=False) if self.is_complete else None } if as_json: if beautify_json: return json.dumps(model_snapshot, indent=4, sort_keys=False) else: return json.dumps(model_snapshot) else: return model_snapshot.copy() def unassign_hooks(self): """ Unassign all callback functions. """ self._eta_scheduler = None self._monitor = None self._checkpoint = None @MType(eta_scheduler=OneOfType(callable, None), checkpoint=OneOfType(callable, None), monitor=OneOfType(callable, None)) def assign_hook(self, *, eta_scheduler=None, checkpoint=None, monitor=None): """ Assign callback functions. Arguments: eta_scheduler: checkpoint: monitor: callback function to retreive and monitor report/summary """ if eta_scheduler is not None: self._eta_scheduler = eta_scheduler if checkpoint is not None: self._checkpoint = checkpoint if monitor is not None: self._monitor = monitor @MType(int) def on_epoch_begin(self, epoch): """ Arguments: epoch: """ pass @MType(int) def on_epoch_end(self, epoch): """ Arguments: epoch: """ pass def on_setup_completed(self): """ """ pass @abc.abstractmethod def construct(self): """ """ pass @property def summary(self): """ Get feed forward summary. Returns: """ nonlinear_gate_count = 0 linear_gate_count = 0 link_count = 0 total_param_count = 0 link_optim = '' layer_label = '' layer_shape = '' hdr = 'Layer\tIndex\tOptim\tType\t\tShape\t\tParams\n' div1 = '====================================================================================\n' div2 = ' ----------------------------------------------------------------------------\n' summary = f'### Feed forward {self.name} Summary ###\n' summary += hdr + div1 if self.is_valid: if self.sequence.head.name != '': summary += f'{self.sequence.head.name}:\n' for layer in self.sequence.head: param_count = 0 if isinstance(layer, Nonlinear): nonlinear_gate_count += 1 param_count += layer.size if isinstance(layer, Linear): linear_gate_count += 1 if isinstance(layer, BatchNorm): param_count += 2 * layer.shape[1] if isinstance(layer, Link): link_count += 1 param_count += (layer.shape[0] * layer.shape[1]) + layer.shape[0] if isinstance(layer, Link): link_optim = f'{layer.optim.label}' if layer.is_frozen: layer_label = f'{layer.label} (frozen)' else: layer_label = f'{layer.label}' elif isinstance(layer, Nonlinear): link_optim = '' layer_label = f'{layer.label}' else: link_optim = '' layer_label = f'{layer.label}' if isinstance(layer, Link): layer_shape = str(layer.shape) else: layer_shape = f'(*, {layer.size})' if layer.has_next: summary += div2 summary += f'\t{layer.index:<8d}{link_optim:<8s}{layer_label:<16s}{layer_shape:<16s}{param_count:<8d}\n' if layer.next.name != '' and layer.next.name != layer.name and layer.next.has_next: summary += f'{layer.next.name}:\n' else: summary += div1 total_param_count += param_count if self.is_complete: summary += f'Objective : {self.sequence.tail.label}\n' summary += f'Total number of params: {total_param_count}\n' summary += f'Total number of layers: {linear_gate_count + nonlinear_gate_count + link_count}\n' summary += f' {nonlinear_gate_count} nonlinear gate layers\n' summary += f' {link_count} link layers\n' return summary @MType(objective=OneOfType(str, Objective), metric=(str, ), optim=OneOfType(str, Optimizer, None), hparam=OneOfType(dict, None)) def setup(self, *, objective='mse', metric=('loss', ), optim=None, hparam=None): """ Setup the objective layer where the loss & loss gradient is calculated. Arguments: objective: objective layer metric: optim: hparam: Returns: self """ if self.is_complete: warnings.warn( f'Feed forward {self.name} sequence is completed and already setup. Setup skipped.', UserWarning) else: if not self.is_valid: sequencer = self.construct() if not sequencer.is_valid: raise RuntimeError( f'Constructed sequence from sequencer {sequencer.name} is invalid.' ) self._sequencer = sequencer if 'linear' != self.sequence.tail.label: warnings.warn( f'Output sequence of sequencer {sequencer.name} is not linear.', UserWarning) size = self.sequence.tail.size if isinstance(objective, str): name = self._sequencer.name + '_' + Objective.label objective_label = objective if MAELoss.label == objective_label: self.sequence.tail.connect( MAELoss(size=size, name=name, metric=metric)).lock() elif MSELoss.label == objective_label: self.sequence.tail.connect( MSELoss(size=size, name=name, metric=metric)).lock() elif LogCoshLoss.label == objective_label: self.sequence.tail.connect( LogCoshLoss(size=size, name=name, metric=metric)).lock() elif XTanhLoss.label == objective_label: self.sequence.tail.connect( XTanhLoss(size=size, name=name, metric=metric)).lock() elif AlgebraicLoss.label == objective_label: self.sequence.tail.connect( AlgebraicLoss(size=size, name=name, metric=metric)).lock() elif SigmoidCrossentropyLoss.label == objective_label: self.sequence.tail.connect( SigmoidCrossentropyLoss(size=size, name=name, metric=metric)).lock() elif SoftmaxCrossentropyLoss.label == objective_label: self.sequence.tail.connect( SoftmaxCrossentropyLoss(size=size, name=name, metric=metric)).lock() else: raise TypeError( f'Unknown objective {objective_label} for objective layer.' ) else: if size != objective.size: objective.reconfig(shape=(1, size)) if metric is not None and metric != tuple( objective.evaluation_metric.keys()): objective.reconfig(metric=metric) warnings.warn( f'Overiding custom objective layer {objective.name} metric. Using metric {metric}.', UserWarning) self.sequence.tail.connect(objective).lock() self.sequence.tail.name = self._sequencer.name + objective.name self._setup_completed = True self.reconfig(optim=optim, hparam=hparam) self.on_setup_completed() return self @MType(optim=OneOfType(str, Optimizer, None), hparam=OneOfType(dict, None)) def reconfig(self, *, optim=None, hparam=None): """ Arguments: optim: hparam: Returns: self """ if not self.is_complete: raise RuntimeError( f'Feed forward {self.name} sequence is incomplete. Need to complete setup.' ) if hparam is not None: if 'eta' in hparam: if hparam['eta'] <= 0: warnings.warn( f'Learning rate eta cannot be <= 0. Reset to {OPTIMIZER.DEFAULT_ETA}.', UserWarning) hparam['eta'] = OPTIMIZER.DEFAULT_ETA if 'eta_decay' in hparam: if hparam['eta_decay'] < 0: warnings.warn( f'Learning rate eta decay cannot be < 0. Reset to {OPTIMIZER.DEFAULT_ETA_DECAY}.', UserWarning) hparam['eta_decay'] = OPTIMIZER.DEFAULT_ETA_DECAY if 'beta_decay1' in hparam: if hparam['beta_decay1'] < 0: warnings.warn( f'Optimization beta decay cannot be < 0. Reset to {OPTIMIZER.DEFAULT_BETA_DECAY1}.', UserWarning) hparam['beta_decay1'] = OPTIMIZER.DEFAULT_BETA_DECAY1 if 'beta_decay2' in hparam: if hparam['beta_decay2'] < 0: warnings.warn( f'Optimization beta decay cannot be < 0. Reset to {OPTIMIZER.DEFAULT_BETA_DECAY2}.', UserWarning) hparam['beta_decay2'] = OPTIMIZER.DEFAULT_BETA_DECAY1 if 'momentum' in hparam: if hparam['momentum'] < 0: warnings.warn( f'Optimization momentum cannot be < 0. Reset to {OPTIMIZER.DEFAULT_MOMENTUM}.', UserWarning) hparam['momentum'] = OPTIMIZER.DEFAULT_MOMENTUM if 'l1_lambda' in hparam: if hparam['l1_lambda'] < 0: warnings.warn( f'Regularization lambda cannot be < 0. Reset to {OPTIMIZER.DEFAULT_L1_LAMBDA}.', UserWarning) hparam['l1_lambda'] = OPTIMIZER.DEFAULT_L1_LAMBDA if 'l2_lambda' in hparam: if hparam['l2_lambda'] < 0: warnings.warn( f'Regularization lambda cannot be < 0. Reset to {OPTIMIZER.DEFAULT_L2_LAMBDA}.', UserWarning) hparam['l2_lambda'] = OPTIMIZER.DEFAULT_L2_LAMBDA self._hparam.update(hparam) if optim is not None: self._sequencer.reconfig_all(optim=optim) @MType(int, int) def compute_eta(self, epoch, epoch_limit): """ Get current learning rate Arguments: epoch: epoch_limit: Returns: eta: learning rate """ eta = self._hparam['eta'] if self._eta_scheduler is not None: eta = self._eta_scheduler(epoch, epoch_limit, eta) if not isinstance(eta, float) or eta < 0: raise TypeError( 'Learning rate value must be a positive floating point number.' ) else: eta_decay = self._hparam['eta_decay'] if eta_decay > 0: eta *= math.pow(eta_decay, epoch / epoch_limit) return eta @MType(np.ndarray) def predict(self, x_t): """ Do forward prediction with a given input tensor. Arguments: x_t: input tensor Returns: y_t: output prediction tensor """ if not self.is_complete: raise RuntimeError( f'Feed forward {self.name} sequence is incomplete. Need to complete setup.' ) if len(x_t.shape) != 2: raise RuntimeError( 'Input tensor shape size is invalid. Input tensor shape must have a length of 2.' ) (input_sample_size, input_feature_size) = x_t.shape if input_feature_size != self.sequence.head.size: raise ValueError( f'Input tensor feature size does not match the size of input layer of {self.sequence.head.size}.' ) # x_t = x_t.copy() stage = { 'epoch': 0, 'mode': 'predicting', 'hparam': copy.deepcopy(self._hparam) } # tstart_ns = time.process_time_ns() tstart_us = time.process_time() self.sequence.head.forward(stage, x_t) # tend_ns = time.process_time_ns() tend_us = time.process_time() # elapse_per_epoch_ms = int(round((tend_ns - tstart_ns) * 0.000001)) elapse_per_epoch_ms = int(round((tend_us - tstart_us) * 1000)) if self._monitor is not None: report = { 'name': self.name, 'stage': stage, 'elapse': { 'per_epoch_ms': elapse_per_epoch_ms, 'total_ms': elapse_per_epoch_ms, } } self._monitor(report) return self.sequence.tail.outputs @MType(np.ndarray, np.ndarray, epoch_limit=int, batch_size=int, tl_split=float, tl_shuffle=bool, verbose=bool) def learn(self, x_t, y_prime_t, *, epoch_limit=FEED_FORWARD.DEFAULT_EPOCH_LIMIT, batch_size=1, tl_split=0, tl_shuffle=False, verbose=True): """ Arguments: x_t: y_prime_t: epoch_limit: batch_size: tl_split: tl_shuffle: verbose: """ if not self.is_complete: raise RuntimeError( f'Feed forward {self.name} sequence is incomplete. Need to complete setup.' ) if len(x_t.shape) != 2: raise RuntimeError( 'Input tensor shape size is invalid. Input tensor shape must have a length of 2.' ) elif len(y_prime_t.shape) != 2: raise RuntimeError( 'Expected output tensor shape size is invalid. Output tensor shape must have a length of 2.' ) (input_sample_size, input_feature_size) = x_t.shape (expected_output_sample_size, expected_output_prediction_size) = y_prime_t.shape if input_feature_size != self.sequence.head.size: raise ValueError( f'Input tensor feature size does not match the size of input layer of {self.sequence.head.size}.' ) if expected_output_prediction_size != self.sequence.tail.size: raise ValueError( f'Expected output tensor prediction size does not match the size of output layer of {self.sequence.tail.size}.' ) if expected_output_sample_size != input_sample_size: raise ValueError( 'Input and output tensor sample sizes do not matched.') if tl_shuffle: shuffler = np.random.permutation(input_sample_size) x_t = x_t[shuffler] # .copy() y_prime_t = y_prime_t[shuffler] # .copy() # else: # x_t = x_t.copy() # y_prime_t = y_prime_t.copy() if tl_split < 0 or tl_split > 0.5: tl_split = 0 warnings.warn( 'Testing and learning split ratio must be >= 0 and <= 0.5. Reset testing and learning split ratio to 0.', UserWarning) enable_testing = tl_split > 0 if enable_testing: if input_sample_size == 1: learning_sample_size = input_sample_size enable_testing = False warnings.warn( 'Input sample size = 1. Reset testing and learning split ratio to 0.', UserWarning) else: learning_sample_size = int(input_sample_size * (1 - tl_split)) learning_sample_size = learning_sample_size - learning_sample_size % batch_size testing_sample_size = input_sample_size - learning_sample_size else: learning_sample_size = input_sample_size if batch_size < 1 or batch_size > learning_sample_size: batch_size = learning_sample_size warnings.warn( f'Batch size must be >= 1 and <= learning sample size {learning_sample_size}. Set batch size = learning sample size.', UserWarning) stop_learning = False stage = {'epoch': 0, 'mode': '', 'hparam': copy.deepcopy(self._hparam)} stage['hparam']['batch_size'] = batch_size elapse_total_ms = 0 for layer in self.sequence.head: if isinstance(layer, Link) or isinstance(layer, BatchNorm): layer.optim.reset() for epoch in range(epoch_limit): self.on_epoch_begin(epoch) tstart_us = time.process_time() # tstart_ns = time.process_time_ns() self.sequence.tail.reset() stage['epoch'] = epoch stage['mode'] = 'learning' stage['hparam']['eta'] = self.compute_eta(epoch, epoch_limit) if batch_size == learning_sample_size: batched_x_t = x_t[:learning_sample_size] batched_y_prime_t = y_prime_t[:learning_sample_size] self.sequence.head.forward( stage, batched_x_t).evaluate(batched_y_prime_t).backward(stage) else: for i in range(learning_sample_size): if (i + batch_size) < learning_sample_size: batched_x_t = x_t[i:i + batch_size] batched_y_prime_t = y_prime_t[i:i + batch_size] # else: # batched_x_t = x_t[i: learning_sample_size] # batched_y_prime_t = y_prime_t[i: learning_sample_size] self.sequence.head.forward(stage, batched_x_t).evaluate( batched_y_prime_t).backward(stage) learning_evaluation_metric = self.sequence.tail.evaluation_metric if enable_testing: stage['mode'] = 'learning_and_testing' self.sequence.tail.reset() self.sequence.head.forward( stage, x_t[learning_sample_size:]).evaluate( y_prime_t[learning_sample_size:]) testing_evaluation_metric = self.sequence.tail.evaluation_metric if self._checkpoint is not None: stop_learning = self._checkpoint( epoch, learning_evaluation_metric, testing_evaluation_metric) else: stop_learning = False else: if self._checkpoint is not None: stop_learning = self._checkpoint( epoch, learning_evaluation_metric, None) else: stop_learning = False tend_us = time.process_time() elapse_per_epoch_ms = int(round((tend_us - tstart_us) * 1000)) # tend_ns = time.process_time_ns() # elapse_per_epoch_ms = int(round((tend_ns - tstart_ns) * 0.000001)) elapse_total_ms += elapse_per_epoch_ms self.on_epoch_end(epoch) if self._monitor is not None: stage['mode'] = 'learning' self.sequence.tail.reset() self.sequence.head.forward(stage, x_t[:learning_sample_size]) snapshot_learning_output_t = self.sequence.tail.outputs report = { 'name': self.name, 'stage': stage, 'epoch_limit': epoch_limit, 'learning_sample_size': learning_sample_size, 'snapshot': { 'learning': { 'inputs': x_t[:learning_sample_size], 'expected_outputs': y_prime_t[:learning_sample_size], 'outputs': snapshot_learning_output_t } }, 'elapse': { 'per_epoch_ms': elapse_per_epoch_ms, 'total_ms': elapse_total_ms, }, 'evaluation_metric': { 'learning': learning_evaluation_metric } } if enable_testing: stage['mode'] = 'learning_and_testing' self.sequence.tail.reset() self.sequence.head.forward(stage, x_t[learning_sample_size:]) snapshot_testing_output_t = self.sequence.tail.outputs report['test_sample_size'] = testing_sample_size report['snapshot']['testing'] = { 'inputs': x_t[learning_sample_size:], 'expected_outputs': y_prime_t[learning_sample_size:], 'outputs': snapshot_testing_output_t } report['evaluation_metric'][ 'testing'] = testing_evaluation_metric self._monitor(report) if verbose: learning_rate = stage['hparam']['eta'] print( f'Epoch: {epoch + 1}/{epoch_limit} - Elapse/Epoch: {elapse_per_epoch_ms} ms - Elapse: {round(elapse_total_ms * 1e-3)} s', end='\n', flush=True) print(f'\tLearning rate: {learning_rate:.9f}', end='\n', flush=True) if enable_testing: learning_metric_summary = '' testing_metric_summary = '' for (metric_name, metric_value) in learning_evaluation_metric.items(): learning_metric_summary += f'{metric_name}: {metric_value:.9f} ' for (metric_name, metric_value) in testing_evaluation_metric.items(): testing_metric_summary += f'{metric_name}: {metric_value:.9f} ' print(f'\tLearning {learning_metric_summary}', end='\n', flush=True) print(f'\tTesting {testing_metric_summary}', end='\n', flush=True) else: learning_metric_summary = '' for (metric_name, metric_value) in learning_evaluation_metric.items(): learning_metric_summary += f'{metric_name}: {metric_value:.9f} ' print(f'\tLearning {learning_metric_summary}', end='\n', flush=True) if epoch == epoch_limit - 1: print('\n') if stop_learning: break @MType(str, save_as=OneOfType(str, None)) def save_snapshot(self, filepath, *, save_as=None): """ Save model snapshot to file. Arguments: filepath: save_as: """ if not self.is_complete: raise RuntimeError( f'Feed forward {self.name} sequence is incomplete. Need to complete setup.' ) if save_as is not None and save_as != '': filename = os.path.join(filepath, save_as + '.json') else: if self.name != '': filename = os.path.join(filepath, self.name + '.json') else: filename = os.path.join(filepath, 'untitled.json') with open(filename, 'w') as file: model_snapshot = self.snapshot(as_json=False, beautify_json=True) json.dump(model_snapshot, file, ensure_ascii=False) @MType(str, overwrite=bool) def load_snapshot(self, filename, *, overwrite=False): """ Load model snapshot from file. Arguments: filename: overwrite: Returns: self """ if self.is_valid and not overwrite: raise RuntimeError( f'Feed forward {self.name} sequence is valid. Cannot overwrite sequence.' ) with open(filename, 'r') as file: model_snapshot = json.load(file) hparam = model_snapshot['hparam'] sequencer_snapshot = model_snapshot['sequencer'] self._setup_completed = False self._sequencer = Sequencer().load_snapshot(sequencer_snapshot, overwrite=overwrite) sequence_snapshot = sequencer_snapshot['sequences'][-1] objective_label = sequence_snapshot['base_label'] if Objective.label in objective_label: objective = sequence_snapshot['label'] metric = tuple(sequence_snapshot['metric']) self.setup(objective=objective, metric=metric, hparam=hparam) self.name = model_snapshot['name'] return self
class RandomUniform(Initializer): _label = INITIALIZER.RANDOM_UNIFORM_LABEL """ Initialize an array with shape with a random uniform between min and max """ @MType(min=float, max=float, seed=OneOfType(int, None)) def __init__(self, *, min=INITIALIZER.DEFAULT_RANDOM_UNIFORM_MIN, max=INITIALIZER.DEFAULT_RANDOM_UNIFORM_MAX, seed=None): self._runiform_t = None if min >= max: warnings.warn( f'Min must be < max. Reset to {INITIALIZER.DEFAULT_RANDOM_UNIFORM_MIN}, {INITIALIZER.DEFAULT_RANDOM_UNIFORM_MAX}.', UserWarning) self._min = min self._max = max if seed is not None and seed < 0: warnings.warn('Seed must be > 0. Reset to None', UserWarning) self._seed = None self._seed = seed self._rng = np.random.RandomState(seed=self._seed) super().__init__() @MType((int, ), dtype=type(np.dtype), reuse=bool) def __call__(self, shape, *, dtype=np.float32, reuse=False): if self._runiform_t is None or self._runiform_t.shape != shape or not reuse: self._runiform_t = self._rng.uniform(low=self._min, high=self._max, size=shape).astype(dtype) if self._runiform_t.shape != shape and reuse: warnings.warn( 'Unable to reuse last random uniform because the shape is different.', UserWarning) return self._runiform_t.copy() # ------------------------------------------------------------------------ @MType(as_json=bool, beautify_json=bool) def snapshot(self, *, as_json=False, beautify_json=True): """ Return initializer state snapshot as a dict. Arguments: as_json: set to True to convert and return dict as JSON beautify_json: set to True to beautify JSON Returns: dict """ snapshot = super().snapshot(as_json=False, beautify_json=False) snapshot.update({ 'seed': self._seed, 'dtype': str(self._runiform_t.dtype) if self._runiform_t is not None else None, 'min': self._min, 'max': self._max, 'spread': (self._max - self._min) / 2 # 'values': self._runiform_t.tolist() if self._runiform_t is not None else None }) if as_json: if beautify_json: return json.dumps(snapshot, indent=4, sort_keys=False) else: return json.dumps(snapshot) else: return snapshot.copy()
class RandomOrthonormal(Initializer): _label = INITIALIZER.RANDOM_ORTHONORMAL_LABEL """ Initialize an array with shape with a random orthonormal """ @MType(seed=OneOfType(int, None)) def __init__(self, *, seed=None): self._rorthonormal_m = None if seed is not None and seed < 0: warnings.warn('Seed must be > 0. Reset to None', UserWarning) self._seed = None self._seed = seed self._rng = np.random.RandomState(seed=self._seed) super().__init__() @MType((int, ), dtype=type(np.dtype), reuse=bool) def __call__(self, shape, *, dtype=np.float32, reuse=False): (row_size, col_size) = shape if row_size != col_size: raise ValueError( 'RandomOrthonormal initializer requires shape to be square with rows = cols.' ) if self._rorthonormal_m is None or self._rorthonormal_m.shape != shape or not reuse: i_m = np.identity(n=row_size, dtype=dtype) one_v = np.ones(shape=(row_size, ), dtype=dtype) for i in range(1, row_size): x_v = self._rng.normal(size=(row_size - i + 1, )) one_v[i - 1] = np.sign(x_v[0]) x_v -= one_v[i - 1] * np.sqrt((np.square(x_v)).sum()) # householder transformation h_m = np.multiply( np.identity(n=(row_size - i + 1), dtype=dtype) - 2, np.outer(x_v, x_v)) / (np.square(x_v)).sum() mat = np.identity(n=row_size, dtype=dtype) mat[i - 1:, i - 1:] = h_m i_m = np.dot(i_m, mat) # fix the last sign such that the determinant is 1 one_v[-1] = math.pow(-1, 1 - (row_size % 2)) * one_v.prod() # equivalent to np.dot(np.diag(one_v), i_m) i_m = np.multiply(one_v, i_m.transpose()).transpose() self._rorthonormal_m = i_m if self._rorthonormal_m.shape != shape and reuse: warnings.warn( 'Unable to reuse last random orthonormal because the shape is different.', UserWarning) return self._rorthonormal_m.copy() # ------------------------------------------------------------------------ @MType(as_json=bool, beautify_json=bool) def snapshot(self, *, as_json=False, beautify_json=True): """ Return initializer state snapshot as a dict. Arguments: as_json: set to True to convert and return dict as JSON beautify_json: set to True to beautify JSON Returns: dict """ snapshot = super().snapshot(as_json=False, beautify_json=False) snapshot.update({ 'seed': self._seed, 'dtype': str(self._rorthonormal_m.dtype) if self._rorthonormal_m is not None else None # 'values': self._rorthonormal_m.tolist() if self._rorthonormal_m is not None else None }) if as_json: if beautify_json: return json.dumps(snapshot, indent=4, sort_keys=False) else: return json.dumps(snapshot) else: return snapshot.copy()
def create(cls, layer, *, size=None, shape=None, name=''): """ Create a sequencer with layers. Arguments: size: shape: layer: name: Returns: callable """ @FType(OneOfType(callable, Sequencer, None)) def connect(preceded_sequencer): """ Connect new layer to preceded sequencer sequence. Arguments: preceded_sequencer: Returns: sequencer """ nonlocal layer nonlocal size if preceded_sequencer is None: preceded_sequencer = cls(name=name) elif callable(preceded_sequencer): preceded_sequencer = preceded_sequencer(None) sequence = None if isinstance(layer, str): layer_label = layer if size is None: if preceded_sequencer.is_valid: prev_layer_size = preceded_sequencer.sequence.tail.size size = prev_layer_size layer.reconfig(shape=(1, size)) else: warnings.warn( 'Gate layer size is not specified. Using size = 1.', UserWarning) size = 1 if Linear.label == layer_label: layer = Linear(size=size, name=name) elif ReLU.label == layer_label: layer = ReLU(size=size, name=name) elif LeakyReLU.label == layer_label: layer = LeakyReLU(size=size, name=name) elif ELU.label == layer_label: layer = ELU(size=size, name=name) elif SoftPlus.label == layer_label: layer = SoftPlus(size=size, name=name) elif Swish.label == layer_label: layer = Swish(size=size, name=name) elif Sigmoid.label == layer_label: layer = Sigmoid(size=size, name=name) elif Tanh.label == layer_label: layer = Tanh(size=size, name=name) elif Algebraic.label == layer_label: layer = Algebraic(size=size, name=name) else: raise TypeError(f'Unknown gate layer label {layer_label}.') if preceded_sequencer.is_valid: prev_layer_label = preceded_sequencer.sequence.tail.label prev_layer_size = preceded_sequencer.sequence.tail.size shape = (prev_layer_size, size) sequence = Link( shape=shape, name=name, weight_init='random_normal', weight_reg='not_use', bias_init='zeros' if BatchNorm.label != prev_layer_label else 'not_use', optim='sgd').connect(layer) else: sequence = layer elif isinstance(layer, Gate): if size is None: if preceded_sequencer.is_valid: prev_layer_size = preceded_sequencer.sequence.tail.size size = prev_layer_size layer.reconfig(shape=(1, size)) else: if size != layer.size: layer.reconfig(shape=(1, size)) if name != '': layer.name = name if preceded_sequencer.is_valid: prev_layer_label = preceded_sequencer.sequence.tail.label prev_layer_size = preceded_sequencer.sequence.tail.size shape = (prev_layer_size, size) sequence = Link( shape=shape, name=name, weight_init='random_normal', weight_reg='not_use', bias_init='zeros' if BatchNorm.label != prev_layer_label else 'not_use', optim='sgd').connect(layer) else: sequence = layer elif isinstance(layer, Socket): if not preceded_sequencer.is_valid: raise RuntimeError( f'Socket layer {layer_label} cannot be the first layer in sequence.' ) if size is None: if preceded_sequencer.is_valid: prev_layer_size = preceded_sequencer.sequence.tail.size size = prev_layer_size layer.reconfig(shape=(1, size)) else: if size != layer.size: layer.reconfig(shape=(1, size)) if name != '': layer.name = name sequence = layer if preceded_sequencer.is_valid: preceded_sequencer.sequence.tail.connect(sequence.head) else: preceded_sequencer._sequence = sequence if preceded_sequencer.sequence.is_singular: preceded_sequencer._valid_sequence = False else: if Gate.label in str(preceded_sequencer.sequence.head) and \ Gate.label in str(preceded_sequencer.sequence.tail) and \ Link.label in str(preceded_sequencer.sequence.tail.prev): preceded_sequencer._valid_sequence = True return preceded_sequencer return connect
# # Author Tuan Le ([email protected]) # # ------------------------------------------------------------------------ from __future__ import absolute_import from __future__ import division from __future__ import print_function import env import unittest from util.validation import (MShape, MType, FType, OneOfType) # ------------------------------------------------------------------------ @FType(str, b1=int, c1=str, d1=OneOfType((int, ), str)) def test_a(a1, *, b1=0, c1='c', d1=(1, 2)): print(a1) print(b1) print(c1) print(d1) # class Test(object): # @property # def shape(self): # return (3, 4) # # @MType(str, OneOfType(int, str)) # def test_a(self, a, b): # print(a)
class Socket(Layer): _label = SOCKET.LABEL _arrangement = SOCKET.ARRANGEMENT """ Abtraction of a base socket layer. Arguments: shape: socket shape name: socket name """ @MType(shape=(int, ), name=str) def __init__(self, *, shape=(1, 1), name=''): self._a_t = None self._monitor = None super().__init__(shape=shape, name=name) def __str__(self): return super().__str__() + '_' + SOCKET.LABEL # ------------------------------------------------------------------------ @property def inputs(self): """ Get socket forward pass input tensor Returns: """ if self.has_prev: return self.prev.outputs else: return None @property def outputs(self): """ Get socket forward pass output tensor """ if self._a_t is not None: return self._a_t.copy() else: return None def reset(self): """ Reset internal evaluation states """ self._a_t = None def unassign_hooks(self): """ Unassign all callback functions """ self._monitor = None @MType(monitor=OneOfType(callable, None)) def assign_hook(self, *, monitor=None): """ Assign callback functions Arguments: monitor: callback function to do probing during forward/backward pass """ if monitor is not None: self._monitor = monitor @MType(as_json=bool, beautify_json=bool) def snapshot(self, *, as_json=False, beautify_json=True): """ Return socket as a snapshot dict data Arguments: as_json: beautify_json: Returns: snapshot """ snapshot = super().snapshot(as_json=False, beautify_json=False) snapshot.update( {'base_label': Socket.label + '_' + snapshot['base_label']}) if as_json: if beautify_json: return json.dumps(snapshot, indent=4, sort_keys=False) else: return json.dumps(snapshot) else: return snapshot.copy() @MType(dict, np.ndarray, residue=dict) @MShape(axis=1, transpose=False) def forward(self, stage, a_t, *, residue={}): """ Do forward pass by passing through the input (a) tensor Arguments: stage: forward stage a_t: post-nonlinearity (a) tensor residue: Returns: tail """ (a_t, residue) = self.compute_forward_ops(stage, a_t, residue=residue) self._a_t = a_t if self._monitor is not None: report = { 'pass': '******', 'stage': stage, 'inputs': self.inputs, 'outputs': self.outputs, 'residue': residue } self._monitor(report) if self.has_next: return self.next.forward(stage, self._a_t, residue=residue) else: warnings.warn( f'Socket {self.name} connection is incomplete. Missing connection to next layer.', UserWarning) return self @MType(dict, np.ndarray, residue=dict) @MShape(axis=1, transpose=False) def backward(self, stage, eag_t, *, residue={}): """ Do backward backward pass by passing through the error gradient tensor w.r.t. nonlinearity Arguments: stage: backward stage eag_t: gradient error tensor w.r.t. post-nonlinearity (a) tensor residue: Returns: head """ (eag_t, residue) = self.compute_backward_ops(stage, eag_t, residue=residue) if self._monitor is not None: report = { 'pass': '******', 'stage': stage, 'grad': { 'ea': eag_t }, 'residue': residue } self._monitor(report) if self.has_prev: return self.prev.backward(stage, eag_t, residue=residue) else: warnings.warn( f'Socket {self.name} connection is incomplete. Missing connection to previous layer.', UserWarning) return self @abc.abstractmethod def compute_forward_ops(self): """ Compute the forwarded operation function. Not implemented. """ pass @abc.abstractmethod def compute_backward_ops(self): """ Compute the backwarded operation function. Not implemented. """ pass
class BatchNorm(Socket): _label = SOCKET.BATCH_NORMALIZER_LABEL """ Arguments: size: normalizer size name: normalizer name moving_mean_init: moving_variance_init: gamma_init: beta_init: optim: """ @MType(size=int, name=str, moving_mean_init=OneOfType(str, float, Initializer), moving_variance_init=OneOfType(str, float, Initializer), gamma_init=OneOfType(str, float, Initializer), beta_init=OneOfType(str, float, Initializer), optim=OneOfType(str, Optimizer)) def __init__(self, *, size=1, name='', moving_mean_init='zeros', moving_variance_init='ones', gamma_init='ones', beta_init='zeros', optim='sgdm'): self._frozen = False self._optim = None self._a_hat_t = None self._a_offset_t = None self._mean_v = None self._variance_v = None self._moving_mean_init = None self._moving_variance_init = None self._moving_mean_v = None self._moving_variance_v = None self._gamma_v = None self._beta_v = None self._gamma_init = None self._beta_init = None super().__init__(shape=(1, size), name=name) self.reconfig(moving_mean_init=moving_mean_init, moving_variance_init=moving_variance_init, gamma_init=gamma_init, beta_init=beta_init, optim=optim) # ------------------------------------------------------------------------ @property def is_frozen(self): """ Check if layer is frozen Returns: is frozen flag """ return self._frozen def freeze(self): """ Freeze layer """ self._frozen = True def unfreeze(self): """ Unfreeze layer """ self._frozen = False @property def optim(self): """ Get normalizer optimizer Returns: optimizer """ return self._optim @property def moving_means(self): """ Get normalizer moving mean vector Returns: moving mean vector """ if self._moving_mean_v is not None: return self._moving_mean_v.copy() else: return None @moving_means.setter @MType(np.ndarray) @MShape(axis=-1) def moving_means(self, moving_mean_v): """ Set normalizer moving mean vector """ if self.is_frozen: warnings.warn( f'Cannot set moving means to a frozen normalizer {self.name}.', UserWarning) else: np.copyto(self._moving_mean_v, moving_mean_v, casting='same_kind') @property def moving_variances(self): """ Get normalizer moving variance vector Returns: moving variance vector """ if self._moving_variance_v is not None: return self._moving_variance_v.copy() else: return None @moving_variances.setter @MType(np.ndarray) @MShape(axis=-1) def moving_variances(self, moving_variance_v): """ Set normalizer moving variance vector """ if self.is_frozen: warnings.warn( f'Cannot set moving variances to a frozen normalizer {self.name}.', UserWarning) else: np.copyto(self._moving_variance_v, moving_variance_v, casting='same_kind') @property def gammas(self): """ Get normalizer gamma vector Returns: gamma vector """ if self._gamma_v is not None: return self._gamma_v.copy() else: return None @gammas.setter @MType(np.ndarray) @MShape(axis=-1) def gammas(self, gamma_v): """ Set normalizer gamma vector """ if self.is_frozen: warnings.warn( f'Cannot set gammas to a frozen normalizer {self.name}.', UserWarning) else: np.copyto(self._gamma_v, gamma_v, casting='same_kind') @property def betas(self): """ Get normalizer beta vector Returns: beta vector """ if self._beta_v is not None: return self._beta_v.copy() else: return None @betas.setter @MType(np.ndarray) @MShape(axis=-1) def betas(self, beta_v): """ Set normalizer beta vector """ if self.is_frozen: warnings.warn( f'Cannot set betas to a frozen normalizer {self.name}.', UserWarning) else: np.copyto(self._beta_v, beta_v, casting='same_kind') def unassign_hooks(self): """ Unassign all callback functions """ super().unassign_hooks() @MType(monitor=OneOfType(callable, None)) def assign_hook(self, *, monitor=None): """ Assign callback functions Arguments: monitor: callback function to do probing during forward/backward pass """ super().assign_hook(monitor=monitor) def reset(self): """ Reset params to initial values """ super().reset() self._a_hat_t = None self._a_offset_t = None self._mean_v = None self._variance_v = None if self._moving_mean_init is not None: self._moving_mean_v = self._moving_mean_init(self.shape) if self._moving_variance_init is not None: self._moving_variance_v = self._moving_variance_init(self.shape) if self._gamma_init is not None: self._gamma_v = self._gamma_init(self.shape) if self._beta_init is not None: self._beta_v = self._beta_init(self.shape) if self._optim is not None: self._optim.reset() @MType(shape=OneOfType((int, ), None), moving_mean_init=OneOfType(str, float, Initializer, None), moving_variance_init=OneOfType(str, float, Initializer, None), gamma_init=OneOfType(str, float, Initializer, None), beta_init=OneOfType(str, float, Initializer, None), optim=OneOfType(str, Optimizer, None)) def reconfig(self, *, shape=None, moving_mean_init=None, moving_variance_init=None, gamma_init=None, beta_init=None, optim=None): """ Reconfig batch normalizer Arguments: shape: moving_mean_init: moving_variance_init: gamma_init: beta_init: optim: """ if moving_mean_init is not None: if isinstance(moving_mean_init, str): moving_mean_init_label = moving_mean_init if self._moving_mean_init is not None and moving_mean_init_label == self._moving_mean_init.label: warnings.warn( 'No change made to normalizer gamma. Re-initializing gamma skipped.', UserWarning) else: if Zeros.label == moving_mean_init_label: self._moving_mean_init = Zeros() elif Ones.label == moving_mean_init_label: self._moving_mean_init = Ones() elif RandomNormal.label == moving_mean_init_label: self._moving_mean_init = RandomNormal() elif RandomUniform.label == moving_mean_init_label: self._moving_mean_init = RandomUniform() elif GlorotRandomNormal.label == moving_mean_init_label: self._moving_mean_init = GlorotRandomNormal() elif GlorotRandomUniform.label == moving_mean_init_label: self._moving_mean_init = GlorotRandomUniform() else: raise TypeError( f'Unknown moving mean initializer {moving_mean_init_label} for normalizer {self.name}.' ) self._moving_mean_v = self._moving_mean_init(self.shape) elif isinstance(moving_mean_init, float): self._moving_mean_init = Constant(moving_mean_init) self._moving_mean_v = self._moving_mean_init(self.shape) else: if self._moving_mean_init is not None and moving_mean_init.label == self._moving_mean_init.label: warnings.warn( 'No change made to normalizer moving mean initializer. Re-initializing moving means skipped.', UserWarning) else: self._moving_mean_init = moving_mean_init self._moving_mean_v = self._moving_mean_init(self.shape) if moving_variance_init is not None: if isinstance(moving_variance_init, str): moving_variance_init_label = moving_variance_init if self._moving_variance_init is not None and moving_variance_init_label == self._moving_variance_init.label: warnings.warn( 'No change made to normalizer gamma. Re-initializing gamma skipped.', UserWarning) else: if Zeros.label == moving_variance_init_label: self._moving_variance_init = Zeros() elif Ones.label == moving_variance_init_label: self._moving_variance_init = Ones() elif RandomNormal.label == moving_variance_init_label: self._moving_variance_init = RandomNormal() elif RandomUniform.label == moving_variance_init_label: self._moving_variance_init = RandomUniform() elif GlorotRandomNormal.label == moving_variance_init_label: self._moving_variance_init = GlorotRandomNormal() elif GlorotRandomUniform.label == moving_variance_init_label: self._moving_variance_init = GlorotRandomUniform() else: raise TypeError( f'Unknown moving variance initializer {moving_variance_init_label} for normalizer {self.name}.' ) self._moving_variance_v = self._moving_variance_init( self.shape) elif isinstance(moving_variance_init, float): self._moving_variance_init = Constant(moving_variance_init) self._moving_variance_v = self._moving_variance_init( self.shape) else: if self._moving_variance_init is not None and moving_variance_init.label == self._moving_variance_init.label: warnings.warn( f'No change made to normalizer moving variance initializer. Re-initializing moving variances skipped.', UserWarning) else: self._moving_variance_init = moving_variance_init self._moving_variance_v = self._moving_variance_init( self.shape) if gamma_init is not None: if isinstance(gamma_init, str): gamma_init_label = gamma_init if self._gamma_init is not None and gamma_init_label == self._gamma_init.label: warnings.warn( f'No change made to normalizer gamma initializer. Re-initializing gammas skipped.', UserWarning) else: if Zeros.label == gamma_init_label: self._gamma_init = Zeros() elif Ones.label == gamma_init_label: self._gamma_init = Ones() elif RandomNormal.label == gamma_init_label: self._gamma_init = RandomNormal() elif RandomUniform.label == gamma_init_label: self._gamma_init = RandomUniform() elif GlorotRandomNormal.label == gamma_init_label: self._gamma_init = GlorotRandomNormal() elif GlorotRandomUniform.label == gamma_init_label: self._gamma_init = GlorotRandomUniform() else: raise TypeError( f'Unknown gamma initializer {gamma_init_label} for normalizer {self.name}.' ) self._gamma_v = self._gamma_init(self.shape) elif isinstance(gamma_init, float): self._gamma_init = Constant(gamma_init) self._gamma_v = self._gamma_init(self.shape) else: if self._gamma_init is not None and gamma_init.label == self._gamma_init.label: warnings.warn( 'No change made to normalizer gamma initializer. Re-initializing gammas skipped.', UserWarning) else: self._gamma_init = gamma_init self._gamma_v = self._gamma_init(self.shape) if beta_init is not None: if isinstance(beta_init, str): beta_init_label = beta_init if self._beta_init is not None and beta_init_label == self._beta_init.label: warnings.warn( 'No change made to normalizer beta initializer. Re-initializing betas skipped.', UserWarning) else: if Zeros.label == beta_init_label: self._beta_init = Zeros() elif Ones.label == beta_init_label: self._beta_init = Ones() elif RandomNormal.label == beta_init_label: self._beta_init = RandomNormal() elif RandomUniform.label == beta_init_label: self._beta_init = RandomUniform() elif GlorotRandomNormal.label == beta_init_label: self._beta_init = GlorotRandomNormal() elif GlorotRandomUniform.label == beta_init_label: self._beta_init = GlorotRandomUniform() else: raise TypeError( f'Unknown beta initializer {beta_init_label} for normalizer {self.name}.' ) self._beta_v = self._beta_init(self.shape) elif isinstance(beta_init, float): self._beta_init = Constant(beta_init) self._beta_v = self._beta_init(self.shape) else: if self._beta_init is not None and beta_init.label == self._beta_init.label: warnings.warn( 'No change made to normalizer beta initializer. Re-initializing betas skipped.', UserWarning) else: self._beta_init = beta_init self._beta_v = self._beta_init(self.shape) if optim is not None: if isinstance(optim, str): optim_label = optim if self._optim is not None and optim_label == self._optim.label: warnings.warn( 'No change made to normalizer optimizer. Reconfig normalizer optimization skipped.', UserWarning) else: if SGD.label == optim_label: self._optim = SGD() elif SGDM.label == optim_label: self._optim = SGDM() elif RMSprop.label == optim_label: self._optim = RMSprop() elif Adam.label == optim_label: self._optim = Adam() else: raise TypeError( f'Unknown optimizer {optim_label} for normalizer {self.name}.' ) else: if self._optim is not None and optim.label == self._optim.label: warnings.warn( 'No change made to normalizer. Reconfig normalizer optimization skipped.', UserWarning) else: self._optim = optim if shape is not None: super().reconfig(shape=shape) self.reset() @MType(as_json=bool, beautify_json=bool) def snapshot(self, *, as_json=False, beautify_json=True): """ Return normalizer as a snapshot dict data Arguments: as_json: beautify_json: Returns: snapshot """ snapshot = super().snapshot(as_json=False, beautify_json=False) snapshot.update({ 'moving_mean': { 'dtype': str(self.moving_means.dtype), 'values': self.moving_means.tolist() } if self.moving_means is not None else None, 'moving_variance': { 'dtype': str(self.moving_variances.dtype), 'values': self.moving_variances.tolist() } if self.moving_variances is not None else None, 'gamma': { 'dtype': str(self.gammas.dtype), 'values': self.gammas.tolist() } if self.gammas is not None else None, 'beta': { 'dtype': str(self.betas.dtype), 'values': self.betas.tolist() } if self.betas is not None else None, 'moving_mean_init': self._moving_mean_init.snapshot(as_json=False, beautify_json=False), 'moving_variance_init': self._moving_variance_init.snapshot(as_json=False, beautify_json=False), 'gamma_init': self._gamma_init.snapshot(as_json=False, beautify_json=False), 'beta_init': self._beta_init.snapshot(as_json=False, beautify_json=False), 'optim': self._optim.snapshot(as_json=False, beautify_json=False) }) if as_json: if beautify_json: return json.dumps(snapshot, indent=4, sort_keys=False) else: return json.dumps(snapshot) else: return snapshot.copy() @MType(dict, np.ndarray, residue=dict) @MShape(axis=1) def compute_forward_ops(self, stage, a_t, *, residue={}): """ Do a dropout forwarded operation function on the post-nonlinear (a) tensor and residue. Arguments: stage: forward stage a_t: post-nonlinearity (a) tensor residue: Returns: tensor """ mode = stage['mode'] if mode == 'learning' or mode == 'infering': self._mean_v = np.mean(a_t, axis=0) self._variance_v = np.mean(np.square(a_t - self._mean_v), axis=0) self._a_hat_t = (a_t - self._mean_v) / np.sqrt(self._variance_v + 1e-12) self._a_offset_t = a_t - self._mean_v a_t = (self._gamma_v * self._a_hat_t) + self._beta_v self._moving_mean_v = SOCKET.DEFAULT_BATCH_NORMALIZER_MOVING_MOMENTUM * self._moving_mean_v + ( 1 - SOCKET.DEFAULT_BATCH_NORMALIZER_MOVING_MOMENTUM) * self._mean_v self._moving_variance_v = SOCKET.DEFAULT_BATCH_NORMALIZER_MOVING_MOMENTUM * self._moving_variance_v + ( 1 - SOCKET.DEFAULT_BATCH_NORMALIZER_MOVING_MOMENTUM ) * self._variance_v self._moving_mean_v = self._moving_mean_v.astype(np.float32) self._moving_variance_v = self._moving_variance_v.astype( np.float32) else: self._a_hat_t = (a_t - self._moving_mean_v ) / np.sqrt(self._moving_variance_v + 1e-12) a_t = (self._gamma_v * self._a_hat_t) + self._beta_v return (a_t, residue) @MType(dict, np.ndarray, dict, residue=dict) @MShape(axis=1) def compute_backward_ops(self, stage, eag_t, *, residue={}): """ Do a dropout backwarded operation function on gradient post-nonlinear (a) tensor and residue. Arguments: stage: backward stage eag_t: gradient error tensor w.r.t. post-nonlinearity (a) tensor residue: Returns: tensor """ epoch = stage['epoch'] mode = stage['mode'] hparam = stage['hparam'] batch_size = hparam['batch_size'] if mode == 'learning' or mode == 'infering': gammag_v = np.sum(self._a_offset_t * (self._variance_v + 1e-12)**(-0.5) * eag_t, axis=0) betag_v = np.sum(eag_t, axis=0) [gamma_delta_v, beta_delta_v ] = self._optim.compute_grad_descent_step(epoch, [gammag_v, betag_v], hparam) self._gamma_v -= gamma_delta_v self._beta_v -= beta_delta_v if batch_size == 1: eag_t = self._gamma_v * ( eag_t - np.sum(eag_t, axis=0) - (self._a_offset_t * np.sum(eag_t * self._a_offset_t, axis=0)) / (self._variance_v + 1e-12)) eag_t = eag_t / np.sqrt(self._variance_v + 1e-12) else: eag_t = self._gamma_v * ( batch_size * eag_t - np.sum(eag_t, axis=0) - (self._a_offset_t * np.sum(eag_t * self._a_offset_t, axis=0)) / (self._variance_v + 1e-12)) eag_t = eag_t / (batch_size * np.sqrt(self._variance_v + 1e-12)) return (eag_t, residue)
class Dropout(Socket): _label = SOCKET.DROPOUT_LABEL """ A dropout socket class. Arguments: size: name: pzero: dropping probability """ @MType(size=int, name=str, pzero=float) def __init__(self, *, size=1, name='', pzero=SOCKET.DEFAULT_DROPOUT_PZERO): self._pzero = SOCKET.DEFAULT_DROPOUT_PZERO self._mask_init = RandomBinary() self._mask_t = None self._pzero_scheduler = None super().__init__(shape=(1, size), name=name) self.reconfig(pzero=pzero) # ------------------------------------------------------------------------ def reset(self): """ Reset internal evaluation states """ super().reset() self._mask_t = None def unassign_hooks(self): """ Unassign all callback functions """ super().unassign_hooks() self._pzero_scheduler = None @MType(monitor=OneOfType(callable, None), pzero_scheduler=OneOfType(callable, None)) def assign_hook(self, *, monitor=None, pzero_scheduler=None): """ Assign callback functions Arguments: monitor: pzero_scheduler: callback function to schedule the pzero """ super().assign_hook(monitor=monitor) if pzero_scheduler is not None: self._pzero_scheduler = pzero_scheduler @MType(shape=OneOfType((int, ), None), pzero=OneOfType(float, None)) def reconfig(self, *, shape=None, pzero=None): """ Reconfig dropout. Arguments: shape: pzero: """ if pzero is not None: if pzero < 0 or pzero >= 1: warnings.warn( f'Dropout probability cannot be < 0 or >= 1. Reset to {SOCKET.DEFAULT_DROPOUT_PZERO}.', UserWarning) pzero = SOCKET.DEFAULT_DROPOUT_PZERO self._pzero = pzero if shape is not None: super().reconfig(shape=shape) self.reset() @MType(as_json=bool, beautify_json=bool) def snapshot(self, *, as_json=False, beautify_json=True): """ Return dropout as a snapshot dict data Arguments: as_json - beautify_json - Returns: dict """ snapshot = super().snapshot(as_json=False, beautify_json=False) snapshot.update({ 'pzero': self._pzero # 'mask': { # 'dtype': str(self._mask_t.dtype), # 'values': self._mask_t.tolist() # } if self._mask_t is not None else None, # 'mask_init': self._mask_init.snapshot(as_json=False, beautify_json=False), }) if as_json: if beautify_json: return json.dumps(snapshot, indent=4, sort_keys=False) else: return json.dumps(snapshot) else: return snapshot.copy() @MType(int) def compute_pzero(self, epoch): """ Get current regularization rate. Arguments: epoch: Returns: float """ pzero = self._pzero if self._pzero_scheduler is not None: pzero = self._pzero_scheduler(epoch, self._pzero) if not isinstance(pzero, float) or pzero < 0: raise TypeError( 'Dropout propability value must be a positive floating point number.' ) return pzero @MType(dict, np.ndarray, residue=dict) @MShape(axis=1) def compute_forward_ops(self, stage, a_t, *, residue={}): """ Do a dropout forwarded operation function on the post-nonlinear (a) tensor and residue. Arguments: stage: forward stage: a_t: post-nonlinearity (a) tensor residue: Returns: tensor """ if 'epoch' in stage: epoch = stage['epoch'] else: raise ValueError( 'Input stage is missing the required epoch number.') pzero = self.compute_pzero(epoch) if pzero > 0: self._mask_t = self._mask_init((1, self.size), pzero=pzero, dtype=np.int8) if self._mask_t is not None: a_t = np.multiply(a_t, self._mask_t) return (a_t, residue) @MType(dict, np.ndarray, residue=dict) @MShape(axis=1) def compute_backward_ops(self, stage, eag_t, *, residue={}): """ Do a dropout backwarded operation function on gradient post-nonlinear (a) tensor and residue. Arguments: stage: backward stage eag_t: gradient error tensor w.r.t. post-nonlinearity (a) tensor residue: Returns: tensor """ if self._mask_t is not None: eag_t = np.multiply(eag_t, self._mask_t) return (eag_t, residue)
class Gate(Layer): _label = GATE.LABEL _arrangement = GATE.ARRANGEMENT """ A base gate layer that applies a linear or nonlinear function on the input (z) tensor to get an output (a) tensor. Arguments: size: gate size name: gate name """ @MType(size=int, name=str) def __init__(self, *, size=1, name=''): self._z_t = None self._a_t = None self._monitor = None super().__init__(shape=(1, size), name=name) def __str__(self): return super().__str__() + '_' + GATE.LABEL # ------------------------------------------------------------------------ @property def inputs(self): """ Get gate forward pass input (z) tensor. Returns: tensor """ if self._z_t is not None: return self._z_t.copy() else: return None @property def outputs(self): """ Get gate forward pass output (a) tensor. Returns: tensor """ if self._a_t is not None: return self._a_t.copy() else: return None def reset(self): """ Reset internal states. """ self._z_t = None self._a_t = None def unassign_hooks(self): """ Unassign all callback or hook functions. """ self._monitor = None @MType(monitor=OneOfType(callable, None)) def assign_hook(self, *, monitor=None): """ Assign callback or hook functions. Arguments: monitor: callback function to do probing during forward/backward pass """ if monitor is not None: self._monitor = monitor @MType(as_json=bool, beautify_json=bool) def snapshot(self, *, as_json=False, beautify_json=True): """ Return gate as a snapshot dict data Arguments: as_json: beautify_json: Returns: snapshot """ snapshot = super().snapshot(as_json=False, beautify_json=False) snapshot.update({ 'base_label': Gate.label + '_' + snapshot['base_label'] }) if as_json: if beautify_json: return json.dumps(snapshot, indent=4, sort_keys=False) else: return json.dumps(snapshot) else: return snapshot.copy()
class Sequencer(object, metaclass=Sequencer): _label = SEQUENCER.LABEL """ A sequencer class. Arguments: name: """ @MType(name=str) def __init__(self, *, name=''): self._name = name self._valid_sequence = False self._sequence = None # self._registry = {} def __str__(self): if self.name != '': return self.name + '_' + self.label else: return self.label # ------------------------------------------------------------------------ @property def label(self): """ Get layer label. Returns: str """ return type(self).label @property def name(self): """ Get sequencer name Returns: """ return self._name @name.setter @MType(str) def name(self, name): """ Set sequencer name Arguments: name: sequencer name """ self._name = name @property def sequence(self): """ Get sequencer sequence Returns: """ if self.is_valid: return self._sequence else: return None @property def is_valid(self): """ Check that sequence is valid Returns: """ return self._sequence is not None @property def is_complete(self): """ Check that sequence is complete Returns: """ return self.is_valid and self._valid_sequence @MType(as_json=bool, beautify_json=bool) def snapshot(self, *, as_json=False, beautify_json=True): """ Return sequencer as a snapshot dict data Arguments: as_json: beautify_json: Returns: dict """ sequencer_snapshot = { 'name': self.name, 'label': self.label, 'base_label': Sequencer.label, 'sequences': [] } if self.is_complete: for layer in self.sequence.head: sequencer_snapshot['sequences'].append( layer.snapshot(as_json=False, beautify_json=False)) if as_json: if beautify_json: return json.dumps(sequencer_snapshot, indent=4, sort_keys=False) else: return json.dumps(sequencer_snapshot) else: return sequencer_snapshot.copy() @MType(dict, overwrite=bool) def load_snapshot(self, sequencer_snapshot, *, overwrite=False): """ Load sequence from file Arguments: sequencer_snapshot: overwrite: Returns: self """ if self.is_valid and not overwrite: raise RuntimeError( f'Sequencer {self.name} sequence is valid. Cannot overwrite sequence.' ) sequence = None for sequence_snapshot in sequencer_snapshot['sequences'][:-1]: layer_label = sequence_snapshot['label'] name = sequence_snapshot['name'] shape = tuple(sequence_snapshot['shape']) size = shape[1] if Linear.label == layer_label: layer = Linear(size=size, name=name) elif ReLU.label == layer_label: layer = ReLU(size=size, name=name) elif LeakyReLU.label == layer_label: layer = LeakyReLU(size=size, name=name) elif ELU.label == layer_label: layer = ELU(size=size, name=name) elif SoftPlus.label == layer_label: layer = SoftPlus(size=size, name=name) elif Swish.label == layer_label: layer = Swish(size=size, name=name) elif Sigmoid.label == layer_label: layer = Sigmoid(size=size, name=name) elif Tanh.label == layer_label: layer = Tanh(size=size, name=name) elif Algebraic.label == layer_label: layer = Algebraic(size=size, name=name) elif Dropout.label == layer_label: pzero = sequence_snapshot['pzero'] layer = Dropout(size=size, name=name, pzero=pzero) elif BatchNorm.label == layer_label: optim = 'sgdm' optim_label = sequence_snapshot['optim']['label'] if SGD.label == optim_label: optim = SGD() elif SGDM.label == optim_label: optim = SGDM() elif RMSprop.label == optim_label: optim = RMSprop() elif Adam.label == optim_label: optim = Adam() else: raise TypeError( f'Unknown optimizer {optim_label} for normalizer {name}.' ) moving_mean_init = 'zeros' moving_mean_init_label = sequence_snapshot['moving_mean_init'][ 'label'] if Zeros.label == moving_mean_init_label: moving_mean_init = Zeros() elif Ones.label == moving_mean_init_label: moving_mean_init = Ones() elif RandomNormal.label == moving_mean_init_label: moving_mean_init = RandomNormal() elif RandomUniform.label == moving_mean_init_label: moving_mean_init = RandomUniform() elif GlorotRandomNormal.label == moving_mean_init_label: moving_mean_init = GlorotRandomNormal() elif GlorotRandomUniform.label == moving_mean_init_label: moving_mean_init = GlorotRandomUniform() else: raise TypeError( f'Unknown moving mean initializer {moving_mean_init_label} for normalizer {name}.' ) moving_variance_init = 'ones' moving_variance_init_label = sequence_snapshot[ 'moving_variance_init']['label'] if Zeros.label == moving_variance_init_label: moving_variance_init = Zeros() elif Ones.label == moving_variance_init_label: moving_variance_init = Ones() elif RandomNormal.label == moving_variance_init_label: moving_variance_init = RandomNormal() elif RandomUniform.label == moving_variance_init_label: moving_variance_init = RandomUniform() elif GlorotRandomNormal.label == moving_variance_init_label: moving_variance_init = GlorotRandomNormal() elif GlorotRandomUniform.label == moving_variance_init_label: moving_variance_init = GlorotRandomUniform() else: raise TypeError( f'Unknown moving variance initializer {moving_variance_init_label} for normalizer {name}.' ) gamma_init = 'ones' gamma_init_label = sequence_snapshot['gamma_init']['label'] if Zeros.label == gamma_init_label: gamma_init = Zeros() elif Ones.label == gamma_init_label: gamma_init = Ones() elif RandomNormal.label == gamma_init_label: gamma_init = RandomNormal() elif RandomUniform.label == gamma_init_label: gamma_init = RandomUniform() elif GlorotRandomNormal.label == gamma_init_label: gamma_init = GlorotRandomNormal() elif GlorotRandomUniform.label == gamma_init_label: gamma_init = GlorotRandomUniform() else: raise TypeError( f'Unknown gamma initializer {gamma_init_label} for normalizer {name}.' ) beta_init = 'zeros' beta_init_label = sequence_snapshot['beta_init']['label'] if Zeros.label == beta_init_label: beta_init = Zeros() elif Ones.label == beta_init_label: beta_init = Ones() elif RandomNormal.label == beta_init_label: beta_init = RandomNormal() elif RandomUniform.label == beta_init_label: beta_init = RandomUniform() elif GlorotRandomNormal.label == beta_init_label: beta_init = GlorotRandomNormal() elif GlorotRandomUniform.label == beta_init_label: beta_init = GlorotRandomUniform() else: raise TypeError( f'Unknown beta initializer {beta_init_label} for normalizer {name}.' ) layer = BatchNorm(size=size, name=name, moving_mean_init=moving_mean_init, moving_variance_init=moving_variance_init, gamma_init=gamma_init, beta_init=beta_init, optim=optim) layer.moving_means = np.array( sequence_snapshot['moving_mean']['values'], dtype=sequence_snapshot['moving_mean']['dtype']) layer.moving_variances = np.array( sequence_snapshot['moving_variance']['values'], dtype=sequence_snapshot['moving_variance']['dtype']) layer.gammas = np.array( sequence_snapshot['gamma']['values'], dtype=sequence_snapshot['gamma']['dtype']) layer.betas = np.array( sequence_snapshot['beta']['values'], dtype=sequence_snapshot['beta']['dtype']) elif Link.label == layer_label: frozen = sequence_snapshot['frozen'] weight_init = 'random_normal' weight_init_label = sequence_snapshot['weight_init']['label'] if Zeros.label == weight_init_label: weight_init = Zeros() elif Ones.label == weight_init_label: weight_init = Ones() elif Identity.label == weight_init_label: weight_init = Identity() elif Diagonal.label == weight_init_label: value = sequence_snapshot['weight_init']['value'] weight_init = Diagonal(value) elif RandomNormal.label == weight_init_label: seed = sequence_snapshot['weight_init']['seed'] mean = sequence_snapshot['weight_init']['mean'] variance = sequence_snapshot['weight_init']['variance'] weight_init = RandomNormal(seed=seed, mean=mean, variance=variance) elif RandomUniform.label == weight_init_label: seed = sequence_snapshot['weight_init']['seed'] min = sequence_snapshot['weight_init']['min'] max = sequence_snapshot['weight_init']['max'] weight_init = RandomUniform(seed=seed, min=min, max=max) elif GlorotRandomNormal.label == weight_init_label: seed = sequence_snapshot['weight_init']['seed'] weight_init = GlorotRandomNormal(seed=seed) elif GlorotRandomUniform.label == weight_init_label: seed = sequence_snapshot['weight_init']['seed'] weight_init = GlorotRandomUniform(seed=seed) else: raise TypeError( f'Unknown weight initializer {weight_init_label} for link {name}.' ) weight_reg = 'not_use' if sequence_snapshot['weight_reg'] is not None: weight_reg_label = sequence_snapshot['weight_reg']['label'] if L1Lasso.label == weight_reg_label: weight_reg = L1Lasso() elif L2Ridge.label == weight_reg_label: weight_reg = L2Ridge() elif L1L2ElasticNet.label == weight_reg_label: weight_reg = L1L2ElasticNet() else: raise TypeError( f'Unknown weight regularizer {weight_reg_label} for link {name}.' ) bias_init = 'not_use' if BatchNorm.label == sequence.tail.label and bias_init != 'not_use': warnings.warn( f'Link biases is not needed with batch normalization in the previous layer enabled. Link biases initialization skipped.', UserWarning) else: if sequence_snapshot['bias_init'] is not None: bias_init_label = sequence_snapshot['bias_init'][ 'label'] if Zeros.label == bias_init_label: bias_init = Zeros() elif Ones.label == bias_init_label: bias_init = Ones() elif Constant.label == bias_init_label: value = sequence_snapshot['bias_init']['value'] bias_init = Constant(value) else: raise TypeError( f'Unknown bias initializer {bias_init_label} for link {name}.' ) optim = 'sgd' optim_label = sequence_snapshot['optim']['label'] if SGD.label == optim_label: optim = SGD() elif SGDM.label == optim_label: optim = SGDM() elif RMSprop.label == optim_label: optim = RMSprop() elif Adam.label == optim_label: optim = Adam() else: raise TypeError( f'Unknown optimizer {optim_label} for link {name}.') layer = Link(shape=shape, name=name, weight_init=weight_init, weight_reg=weight_reg, bias_init=bias_init, optim=optim) layer.weights = np.array( sequence_snapshot['weight']['values'], dtype=sequence_snapshot['weight']['dtype']) if sequence_snapshot['bias'] is not None: layer.biases = np.array( sequence_snapshot['bias']['values'], dtype=sequence_snapshot['bias']['dtype']) if frozen: layer.freeze() if sequence is None: sequence = layer else: sequence.tail.connect(layer) self._name = sequencer_snapshot['name'] self._sequence = sequence self._valid_sequence = True return self @classmethod @MType(OneOfType(str, Gate, Socket), size=OneOfType(int, None), shape=OneOfType((int, ), None), name=str) def create(cls, layer, *, size=None, shape=None, name=''): """ Create a sequencer with layers. Arguments: size: shape: layer: name: Returns: callable """ @FType(OneOfType(callable, Sequencer, None)) def connect(preceded_sequencer): """ Connect new layer to preceded sequencer sequence. Arguments: preceded_sequencer: Returns: sequencer """ nonlocal layer nonlocal size if preceded_sequencer is None: preceded_sequencer = cls(name=name) elif callable(preceded_sequencer): preceded_sequencer = preceded_sequencer(None) sequence = None if isinstance(layer, str): layer_label = layer if size is None: if preceded_sequencer.is_valid: prev_layer_size = preceded_sequencer.sequence.tail.size size = prev_layer_size layer.reconfig(shape=(1, size)) else: warnings.warn( 'Gate layer size is not specified. Using size = 1.', UserWarning) size = 1 if Linear.label == layer_label: layer = Linear(size=size, name=name) elif ReLU.label == layer_label: layer = ReLU(size=size, name=name) elif LeakyReLU.label == layer_label: layer = LeakyReLU(size=size, name=name) elif ELU.label == layer_label: layer = ELU(size=size, name=name) elif SoftPlus.label == layer_label: layer = SoftPlus(size=size, name=name) elif Swish.label == layer_label: layer = Swish(size=size, name=name) elif Sigmoid.label == layer_label: layer = Sigmoid(size=size, name=name) elif Tanh.label == layer_label: layer = Tanh(size=size, name=name) elif Algebraic.label == layer_label: layer = Algebraic(size=size, name=name) else: raise TypeError(f'Unknown gate layer label {layer_label}.') if preceded_sequencer.is_valid: prev_layer_label = preceded_sequencer.sequence.tail.label prev_layer_size = preceded_sequencer.sequence.tail.size shape = (prev_layer_size, size) sequence = Link( shape=shape, name=name, weight_init='random_normal', weight_reg='not_use', bias_init='zeros' if BatchNorm.label != prev_layer_label else 'not_use', optim='sgd').connect(layer) else: sequence = layer elif isinstance(layer, Gate): if size is None: if preceded_sequencer.is_valid: prev_layer_size = preceded_sequencer.sequence.tail.size size = prev_layer_size layer.reconfig(shape=(1, size)) else: if size != layer.size: layer.reconfig(shape=(1, size)) if name != '': layer.name = name if preceded_sequencer.is_valid: prev_layer_label = preceded_sequencer.sequence.tail.label prev_layer_size = preceded_sequencer.sequence.tail.size shape = (prev_layer_size, size) sequence = Link( shape=shape, name=name, weight_init='random_normal', weight_reg='not_use', bias_init='zeros' if BatchNorm.label != prev_layer_label else 'not_use', optim='sgd').connect(layer) else: sequence = layer elif isinstance(layer, Socket): if not preceded_sequencer.is_valid: raise RuntimeError( f'Socket layer {layer_label} cannot be the first layer in sequence.' ) if size is None: if preceded_sequencer.is_valid: prev_layer_size = preceded_sequencer.sequence.tail.size size = prev_layer_size layer.reconfig(shape=(1, size)) else: if size != layer.size: layer.reconfig(shape=(1, size)) if name != '': layer.name = name sequence = layer if preceded_sequencer.is_valid: preceded_sequencer.sequence.tail.connect(sequence.head) else: preceded_sequencer._sequence = sequence if preceded_sequencer.sequence.is_singular: preceded_sequencer._valid_sequence = False else: if Gate.label in str(preceded_sequencer.sequence.head) and \ Gate.label in str(preceded_sequencer.sequence.tail) and \ Link.label in str(preceded_sequencer.sequence.tail.prev): preceded_sequencer._valid_sequence = True return preceded_sequencer return connect @MType(OneOfType(str, Gate, Socket), size=OneOfType(int, None), name=str) def add(self, layer, *, size=None, name=''): """ Add new sequence layer Arguments: size: layer: name Returns: self """ sequencer = self.create(layer, size=size, name=name)(self) self._sequence = sequencer.sequence self._valid_sequence = sequencer._valid_sequence return self @MType(pzero=OneOfType(float, None), weight_init=OneOfType(str, Initializer, None), weight_reg=OneOfType(str, Regularizer, None), bias_init=OneOfType(str, float, Initializer, None), moving_mean_init=OneOfType(str, float, Initializer, None), moving_variance_init=OneOfType(str, float, Initializer, None), gamma_init=OneOfType(str, float, Initializer, None), beta_init=OneOfType(str, float, Initializer, None), optim=OneOfType(str, Optimizer, None)) def reconfig(self, *, pzero=None, weight_init=None, weight_reg=None, bias_init=None, moving_mean_init=None, moving_variance_init=None, gamma_init=None, beta_init=None, optim=None): """ Reconfig the previous layer in sequence. Arguments: pzero: weight_init: weight_reg: bias_init: moving_mean_init: moving_variance_init: gamma_init: beta_init: optim: Returns: self """ if not self.is_valid: raise RuntimeError(f'Sequencer {self.name} sequence is valid.') layer = self.sequence.tail if Gate.label in str(layer): if layer.has_prev: if weight_init is None and weight_reg is None and \ bias_init is None and optim is None: warnings.warn( f'No reconfiguration was applied to layer {layer.label}.', UserWarning) else: layer.prev.reconfig(weight_init=weight_init, weight_reg=weight_reg, bias_init=bias_init, optim=optim) else: warnings.warn( f'Reconfiguration was applied. Layer {layer.label} reconfiguration skipped.', UserWarning) elif Dropout.label == layer.label: if pzero is None: warnings.warn( f'No reconfiguration was applied to layer {layer.label}.', UserWarning) else: layer.reconfig(pzero=pzero) elif BatchNorm.label == layer.label: if moving_mean_init is None and moving_variance_init is None and \ gamma_init is None and beta_init is None and optim is None: warnings.warn( f'No reconfiguration was applied to layer {layer.label}.', UserWarning) else: layer.reconfig(moving_mean_init=moving_mean_init, moving_variance_init=moving_variance_init, gamma_init=gamma_init, beta_init=beta_init, optim=optim) return self @MType(pzero=OneOfType(float, None), weight_init=OneOfType(str, Initializer, None), weight_reg=OneOfType(str, Regularizer, None), bias_init=OneOfType(str, float, Initializer, None), moving_mean_init=OneOfType(str, float, Initializer, None), moving_variance_init=OneOfType(str, float, Initializer, None), gamma_init=OneOfType(str, float, Initializer, None), beta_init=OneOfType(str, float, Initializer, None), optim=OneOfType(str, Optimizer, None)) def reconfig_all(self, *, pzero=None, weight_init=None, weight_reg=None, bias_init=None, moving_mean_init=None, moving_variance_init=None, gamma_init=None, beta_init=None, optim=None): """ Reconfig all previous layers in sequence. Arguments: pzero: weight_init: weight_reg: bias_init: moving_mean_init: moving_variance_init: gamma_init: beta_init: optim: Returns: self """ if not self.is_valid: raise RuntimeError(f'Sequencer {self.name} sequence is valid.') for layer in self.sequence.head: if Link.label == layer.label: layer.reconfig(weight_init=weight_init, weight_reg=weight_reg, bias_init=bias_init, optim=optim) elif Dropout.label == layer.label: layer.reconfig(pzero=pzero) elif BatchNorm.label == layer.label: layer.reconfig(moving_mean_init=moving_mean_init, moving_variance_init=moving_variance_init, gamma_init=gamma_init, beta_init=beta_init, optim=optim) return self
class Layer(object, metaclass=Layer): _label = LAYER.LABEL _arrangement = LAYER.ARRANGEMENT """ Layer base class. Arguments: shape: layer shape name: layer name """ @MType(shape=(int, ), name=str) def __init__(self, *, shape=(1, 1), name=''): self._name = name self._next = None self._prev = None self._shape = None self._locked = False self.reconfig(shape=shape) def __str__(self): if self.name != '': return self.name + '_' + self.label else: return self.label def __iter__(self): """ Set layer to be an iterator to allows iteration over all connected layers. """ layer = self.head while layer is not None: yield layer layer = layer.next # ------------------------------------------------------------------------ @property def label(self): """ Get layer label. Returns: str """ return type(self).label @property def arrangement(self): """ Get layer arrangement. Returns: tuple """ return type(self).arrangement @property def shape(self): """ Get layer shape. Returns: tuple """ return self._shape @property def size(self): """ Get layer size. Returns: int """ return self.shape[1] @property def index(self): """ Get layer index. Returns: int """ if self.has_prev: return self.prev.index + 1 else: return 0 @property def name(self): """ Get layer name. Returns: str """ return self._name @name.setter @MType(str) def name(self, name): """ Set layer name. Arguments: name: string name """ self._name = name @property def next(self): """ Get next layer. Returns: layer """ return self._next @property def prev(self): """ Get previous layer. Returns: layer """ return self._prev @property def head(self): """ Get head layer. Returns: layer """ if self.has_prev: if self.is_head: return self else: return self.prev.head else: return self @property def tail(self): """ Get tail layer. Returns: layer """ if self.has_next: if self.is_tail: return self else: return self.next.tail else: return self @property def has_prev(self): """ Check if there is a connection to previous layer. Returns: bool """ return self.prev is not None @property def has_next(self): """ Check if there is a connection to next layer. Returns: bool """ return self.next is not None @property def is_head(self): """ Check if layer is head. Returns: bool """ return self.is_singular or (self.next is not None and self.prev is None) @property def is_tail(self): """ Check if layer is tail. Returns: bool """ return self.is_singular or (self.next is None and self.prev is not None) @property def is_body(self): """ Check if layer is center body. Returns: bool """ return self.next is not None and self.prev is not None @property def is_singular(self): """ Check if layer is a singular layer with no connection. Returns: bool """ return self.next is None and self.prev is None @MType(Layer) def is_connected_to(self, layer): """ Check if layer is already connected. Arguments: layer: layer to be check for connectivity Returns: bool """ connected = False for connected_layer in self.head: connected = connected_layer is layer if connected: break return connected @property def is_locked(self): """ Check if layer is locked. Returns: bool """ return self._locked @property @abc.abstractmethod def inputs(self): """ Get layer forward pass input. Not implemented. """ pass @property @abc.abstractmethod def outputs(self): """ Get layer forward pass output. Not implemented. """ pass def lock(self): """ Finalize by locking this layer and connecting layers in connection. """ if not self.is_locked: self._locked = True if self.has_next: self.next.lock() if self.has_prev: self.prev.lock() def unlock(self): """ Unlock this layer and connecting layers in connection. """ if self.is_locked: self._locked = False if self.has_next: self.next.unlock() if self.has_prev: self.prev.unlock() @MType(shape=OneOfType((int, ), None)) def reconfig(self, *, shape=None): """ Reconfig layer. Arguments: shape: """ if shape is not None: if not all(axis >= 1 for axis in shape): raise ValueError(f'Shape {shape} has axis < 1.') if len(shape) < 2: raise ValueError('Shape must have atleast 2 axes.') if self.is_locked: warnings.warn( f'Layer {self.name} is locked. Reconfig layer shape skipped.', UserWarning) if not self.is_singular: warnings.warn( f'Layer {self.name} has connection to other layers. Reconfig layer shape skipped.', UserWarning) self._shape = shape self.reset() @MType(as_json=bool, beautify_json=bool) def snapshot(self, *, as_json=False, beautify_json=True): """ Return layer state snapshot as a dict. Arguments: as_json: set to True to convert and return dict as JSON beautify_json: set to True to beautify JSON Returns: dict """ snapshot = { 'index': self.index, 'name': self.name, 'label': self.label, 'base_label': Layer.label, 'shape': self.shape, 'locked': self.is_locked } if as_json: if beautify_json: return json.dumps(snapshot, indent=4, sort_keys=False) else: return json.dumps(snapshot) else: return snapshot.copy() @MType(int) def from_index(self, index): """ Goto layer at index. Arguments: index: layer index Returns: layer """ layer = self.head target_index = 0 while layer is not None: if target_index == index: break if target_index > index: layer = None break if target_index < index: target_index += 1 layer = layer.next if layer is None: warnings.warn(f'No layer is found at index {index}.', UserWarning) return layer @MType(Layer, position=str) def connect(self, layer, *, position='ahead'): """ Add a new layer ahead or behind this layer. Arguments: layer: next layer to make connection to position: connection position, ahead or behind Returns: layer """ if self.is_locked: warnings.warn( f'Cannot make connection from locked layer {self.name} to layer {layer.name}. Connecting layer skipped.', UserWarning) return self elif layer.is_connected_to(self): warnings.warn( f'Layer {layer.name} is already connected to {self.name}.', UserWarning) return self else: if position == 'ahead': if not self.is_singular and (self.is_head or self.is_body): if not layer.is_singular: raise RuntimeError( f'Cannot make connection from layer {self.name} to a non-singular layer {layer.name}.' ) if layer.arrangement[0] not in self.arrangement[1]: raise RuntimeError( f'Cannot make connection from layer {self.name} to layer {layer.name}. Mismatched arrangement.' ) if layer.arrangement[1] not in self._next.arrangement[0]: raise RuntimeError( f'Cannot make connection from layer {layer.name} to layer {self._next.name}. Mismatched arrangement.' ) self._next._prev = layer layer._next = self._next layer._prev = self self._next = layer return layer elif self.is_singular or self.is_tail: if layer.is_body or (layer.is_tail and layer.has_prev): raise RuntimeError( f'Cannot make connection from layer {self.name} to a non-signular layer {layer.name} that is either a body or tail.' ) if layer.arrangement[0] not in self.arrangement[1]: raise RuntimeError( f'Cannot make connection from layer {self.name} to layer {layer.name}. Mismatched arrangement.' ) self._next = layer layer._prev = self return layer elif position == 'behind': if not layer.is_singular: raise RuntimeError( f'Cannot make connection from layer {self.name} to a non-singular layer {layer.name}.' ) if self.is_singular or self.is_head: if self.arrangement[0] not in layer.arrangement[1]: raise RuntimeError( f'Cannot make connection from layer {layer.name} to layer{self.name}. Mismatched arrangement.' ) self._prev = layer layer._next = self return layer elif self.is_body or (not self.is_singular and self.is_tail): if self.arrangement[0] not in layer.arrangement[1]: raise RuntimeError( f'Cannot make connection from layer {layer.name} to layer {self.name}. Mismatched arrangement.' ) if layer.arrangement[0] not in self._prev.arrangement[1]: raise RuntimeError( f'Cannot make connection from layer {self._prev.name} to layer {layer.name}. Mismatched arrangement.' ) self._prev._next = layer layer._prev = self._prev layer._next = self self._prev = layer return layer else: raise TypeError(f'Unknown position type {position}.') @MType(Layer) def replace_with(self, layer): """ Replace this layer with a different layer. Arguments: layer: layer to replace with Returns: layer """ if self.is_locked: warnings.warn( f'Cannot replace locked layer {self.name} with layer {layer.name}. Replace layer skipped.', UserWarning) return self elif layer.is_connected_to(self): warnings.warn( f'Layer {layer.name} is already connected to {self.name}.', UserWarning) return self else: if not layer.is_singular: raise RuntimeError( f'Cannot make connection from layer {self.name} to non-singular layer {layer.name}.' ) if self.is_singular: raise RuntimeError( f'Cannot replace a non-connecting layer {self.name} with layer {layer.name}.' ) if self.is_head: if self._next.arrangement[0] not in layer.arrangement[1]: raise RuntimeError( f'Cannot make connection from layer {layer.name} to layer {self._next.name}. Mismatched arrangement.' ) self._next._prev = layer layer._next = self._next self._next = None elif self.is_body: if self._next.arrangement[0] not in layer.arrangement[1]: raise RuntimeError( f'Cannot make connection from layer {self._next.name} to layer {layer.name}. Mismatched arrangement.' ) if layer.arrangement[0] not in self._prev.arrangement[1]: raise RuntimeError( f'Cannot make connection from layer {layer.name} to layer {self._prev.name}. Mismatched arrangement.' ) self._next._prev = layer self._prev._next = layer layer._next = self._next layer._prev = self._prev self._next = None self._prev = None elif self.is_tail: if layer.arrangement[0] not in self._prev.arrangement[1]: raise RuntimeError( f'Cannot make connection from layer {self._prev.name} to layer {layer.name}. Mismatched arrangement.' ) self._prev._next = layer layer._prev = self._prev self._prev = None return layer def disconnect(self): """ Remove self from connection. Returns: layer """ if self.is_locked: warnings.warn( f'Cannot remove locked layer {self.name}. Remove layer skipped.', UserWarning) return self else: if self.is_head: self._next._prev = None elif self.is_body: if self._next.arrangement[0] not in self._prev.arrangement[1]: raise RuntimeError( f'Cannot make connection from layer {self._prev.name} to layer {self._next.name}. Mismatched arrangement.' ) self._next._prev = self._prev self._prev._next = self._next elif self.is_tail: self._prev._next = None else: raise RuntimeError( f'Cannot remove a non-connecting layer {self.name}.') self._next = None self._prev = None return self @abc.abstractmethod def unassign_hooks(self): """ Unassign all callback functions. Not implemented. """ pass @abc.abstractmethod def assign_hook(self): """ Assign callback functions. Not implemented. """ pass @abc.abstractmethod def forward(self): """ Layer forward pass method. Not implemented. """ pass @abc.abstractmethod def backward(self): """ Layer backward pass method. Not implemented. """ pass
class RandomNormal(Initializer): _label = INITIALIZER.RANDOM_NORMAL_LABEL """ Initialize an array with shape with a random normal at a mean +/- variance """ @MType(mean=float, variance=float, seed=OneOfType(int, None)) def __init__(self, *, mean=INITIALIZER.DEFAULT_RANDOM_NORMAL_MEAN, variance=INITIALIZER.DEFAULT_RANDOM_NORMAL_VARIANCE, seed=None): self._rnormal_t = None self._mean = mean self._variance = variance if seed is not None and seed < 0: warnings.warn('Seed must be > 0. Reset to None', UserWarning) self._seed = None self._seed = seed self._rng = np.random.RandomState(seed=self._seed) super().__init__() @MType((int, ), dtype=type(np.dtype), reuse=bool) def __call__(self, shape, *, dtype=np.float32, reuse=False): if self._rnormal_t is None or self._rnormal_t.shape != shape or not reuse: self._rnormal_t = self._rng.normal(loc=self._mean, scale=self._variance, size=shape).astype(dtype) if self._rnormal_t.shape != shape and reuse: warnings.warn( 'Unable to reuse last random normal because the shape is different.', UserWarning) return self._rnormal_t.copy() # ------------------------------------------------------------------------ @MType(as_json=bool, beautify_json=bool) def snapshot(self, *, as_json=False, beautify_json=True): """ Return initializer state snapshot as a dict. Arguments: as_json: set to True to convert and return dict as JSON beautify_json: set to True to beautify JSON Returns: dict """ snapshot = super().snapshot(as_json=False, beautify_json=False) snapshot.update({ 'seed': self._seed, 'dtype': str(self._rnormal_t.dtype) if self._rnormal_t is not None else None, 'mean': self._mean, 'variance': self._variance # 'values': self._rnormal_t.tolist() if self._rnormal_t is not None else None }) if as_json: if beautify_json: return json.dumps(snapshot, indent=4, sort_keys=False) else: return json.dumps(snapshot) else: return snapshot.copy()
class MSELoss(Objective): _label = OBJECTIVE.MSE_LOSS_LABEL """ Objective using mean square error for loss function. """ # ------------------------------------------------------------------------ @MType(shape=OneOfType((int,), None), metric=OneOfType((str,), None)) def reconfig(self, *, shape=None, metric=None): """ Reconfig objective Arguments: shape: objective layer shape metric: loss metric """ if metric is not None: if 'loss' in metric: self._evaluation['metric']['loss'] = 0 if ('accuracy' or 'acc') in metric or \ ('recall' or 'rc') in metric or \ ('precision' or 'prec') in metric or \ ('f1_score' or 'f1') in metric: warnings.warn(f'Mean square error objective only have loss metric. Ignoring metrics {metric}', UserWarning) else: raise TypeError(f'Unknown metric {metric} for objective {self.name}.') if shape is not None: super().reconfig(shape=shape) self.reset() @MType(np.ndarray, np.ndarray, dict) def compute_loss(self, y_t, y_prime_t, *, residue={}): """ Compute the loss. Arguments: y_t: output (y) tensor y_prime_t: expected output (y) tensor residue: Returns: tuple """ ey_t = y_t - y_prime_t ly_t = np.square(ey_t) return (ly_t, residue) @MType(np.ndarray, np.ndarray, dict) def compute_loss_grad(self, y_t, y_prime_t, *, residue={}): """ Compute the loss gradient tensor for gradient descent update. Arguments: y_t: output (y) tensor y_prime_t: expected output (y) tensor residue: Returns: tuple """ ey_t = y_t - y_prime_t eyg_t = 2 * ey_t return (eyg_t, residue) @MType(np.ndarray, np.ndarray, np.ndarray, dict) def compute_evaluation_metric(self, y_t, y_prime_t, ly_t, evaluation_metric): """ Compute the evaluation metric. Arguments: y_t: output (y) tensor y_prime_t: expected output (y) tensor ly_t: loss tensor Returns: metric """ if 'loss' in evaluation_metric: evaluation_metric['loss'] += ly_t.mean() return evaluation_metric
class GlorotRandomUniform(Initializer): _label = INITIALIZER.GLOROT_RANDOM_UNIFORM_LABEL """ Initialize an array with shape with a glorot random uniform at a 0 +/- sqrt(2 / sum(shape)) """ @MType(seed=OneOfType(int, None)) def __init__(self, *, seed=None): self._gruniform_t = None if seed is not None and seed < 0: warnings.warn('Seed must be > 0. Reset to None', UserWarning) self._seed = None self._seed = seed self._rng = np.random.RandomState(seed=self._seed) super().__init__() @MType((int, ), dtype=type(np.dtype), reuse=bool) def __call__(self, shape, *, dtype=np.float32, reuse=False): if self._gruniform_t is None or self._gruniform_t.shape != shape or not reuse: spread = math.sqrt(2 / sum(shape)) / 2 min = -spread max = spread self._gruniform_t = self._rng.uniform(low=min, high=max, size=shape).astype(dtype) if self._gruniform_t.shape != shape and reuse: warnings.warn( 'Unable to reuse last glorot uniform because the shape is different.', UserWarning) return self._gruniform_t.copy() # ------------------------------------------------------------------------ @MType(as_json=bool, beautify_json=bool) def snapshot(self, *, as_json=False, beautify_json=True): """ Return initializer state snapshot as a dict. Arguments: as_json: set to True to convert and return dict as JSON beautify_json: set to True to beautify JSON Returns: dict """ snapshot = super().snapshot(as_json=False, beautify_json=False) snapshot.update({'seed': self._seed}) if self._gruniform_t is not None: spread = math.sqrt(2 / sum(self._gruniform_t.shape)) / 2 min = -spread max = spread snapshot.update({ 'dtype': str(self._gruniform_t.dtype), 'min': min, 'max': max, 'spread': spread # 'values': self._gruniform_t.tolist() if self._gruniform_t is not None else None }) if as_json: if beautify_json: return json.dumps(snapshot, indent=4, sort_keys=False) else: return json.dumps(snapshot) else: return snapshot.copy()
class Link(Layer): _label = LINK.LABEL _arrangement = LINK.ARRANGEMENT """ A fully connected link that connect two layers together consited of a weight matrix and bias vector. Arguments: shape: name: weight_init: weight matrix initializer weight_reg: weight matrix regularization bias_init: bias vector initializer optim: """ @MType(shape=(int,), name=str, weight_init=OneOfType(str, Initializer), weight_reg=OneOfType(str, Regularizer, None), bias_init=OneOfType(str, float, Initializer), optim=OneOfType(str, Optimizer)) def __init__(self, *, shape=(1, 1), name='', weight_init='random_normal', weight_reg='not_use', bias_init='zeros', optim='sgd'): self._frozen = False self._weight_init = None self._weight_reg = None self._bias_init = None self._w_m = None self._b_v = None self._optim = None self._monitor = None super().__init__(shape=shape, name=name) self.reconfig(weight_init=weight_init, weight_reg=weight_reg, bias_init=bias_init, optim=optim) def __str__(self): return super().__str__() + '_' + LINK.LABEL # ------------------------------------------------------------------------ @property def is_frozen(self): """ Check if layer is frozen. Returns: is frozen flag """ return self._frozen def freeze(self): """ Freeze layer """ self._frozen = True def unfreeze(self): """ Unfreeze layer """ self._frozen = False @property def inputs(self): """ Get link forward pass input tensor. Returns: tensor """ if self.has_prev: return self.prev.outputs else: return None @property def outputs(self): """ Get link forward pass output tensor. Returns: tensor """ if self.has_next: return self.next.inputs else: return None @property def weights(self): """ Get link weight matrix. Returns: matrix """ if self._w_m is not None: return self._w_m.copy() else: return None @weights.setter @MType(np.ndarray) @MShape(axis=-1) def weights(self, w_m): """ Set link weight matrix. """ if self.is_frozen: warnings.warn(f'Cannot set weights to a frozen link {self.name}.', UserWarning) else: np.copyto(self._w_m, w_m, casting='same_kind') @property def biases(self): """ Get link bias vector. Returns: vector """ if self._b_v is not None: return self._b_v.copy() else: return None @biases.setter @MType(np.ndarray) @MShape(axis=1) def biases(self, b_v): """ Set link bias vector. """ if self.is_frozen: warnings.warn(f'Cannot set biases to a frozen link {self.name}.', UserWarning) else: np.copyto(self._b_v, b_v, casting='same_kind') @property def optim(self): """ Get link optimizer. Returns: optimizer """ return self._optim def reset(self): """ Reset internal evaluation states. """ if self._weight_init is not None: self._w_m = self._weight_init(self.shape) if self._bias_init is not None: self._b_v = self._bias_init((1, self.size)) if self._optim is not None: self._optim.reset() @MType(shape=OneOfType((int,), None), weight_init=OneOfType(str, Initializer, None), weight_reg=OneOfType(str, Regularizer, None), bias_init=OneOfType(str, float, Initializer, None), optim=OneOfType(str, Optimizer, None)) def reconfig(self, *, shape=None, weight_init=None, weight_reg=None, bias_init=None, optim=None): """ Reconfig link Arguments: shape ; weight_init: weight_reg: bias_init: optim: """ if self.is_frozen: warnings.warn(f'Link {self.name} is frozen. Reconfig link skipped.', UserWarning) else: if weight_init is not None: if isinstance(weight_init, str): weight_init_label = weight_init if self._weight_init is not None and weight_init_label == self._weight_init.label: warnings.warn( 'No change made to link weight initializer. Re-initializing link weights skipped.', UserWarning) else: if Zeros.label == weight_init_label: self._weight_init = Zeros() elif Ones.label == weight_init_label: self._weight_init = Ones() elif Identity.label == weight_init_label: self._weight_init = Identity() elif RandomNormal.label == weight_init_label: self._weight_init = RandomNormal() elif RandomUniform.label == weight_init_label: self._weight_init = RandomUniform() elif GlorotRandomNormal.label == weight_init_label: self._weight_init = GlorotRandomNormal() elif GlorotRandomUniform.label == weight_init_label: self._weight_init = GlorotRandomUniform() else: raise TypeError(f'Unknown weight initializer {weight_init_label} for link {self.name}.') self._w_m = self._weight_init(self.shape) else: if self._weight_init is not None and weight_init.label == self._weight_init.label: warnings.warn( 'No change made to link weight initializer. Re-initializing link weights skipped.', UserWarning) else: self._weight_init = weight_init self._w_m = self._weight_init(self.shape) if weight_reg is not None: if isinstance(weight_reg, str): weight_reg_label = weight_reg if self._weight_reg is not None and weight_reg_label == self._weight_reg.label: warnings.warn( 'No change made to link weight regularizer. Reconfig link weight regularizer skipped.', UserWarning) else: if weight_reg_label == 'not_use': self._weight_reg = None elif L1Lasso.label == weight_reg_label: self._weight_reg = L1Lasso() elif L2Ridge.label == weight_reg_label: self._weight_reg = L2Ridge() elif L1L2ElasticNet.label == weight_reg_label: self._weight_reg = L1L2ElasticNet() else: raise TypeError(f'Unknown weight regularizer {weight_reg_label} for link {self.name}.') else: if self._weight_reg is not None and weight_reg.label == self._weight_reg.label: warnings.warn( 'No change made to link weight initializer. Reconfig link weight regularizer skipped.', UserWarning) else: self._weight_reg = weight_reg if bias_init is not None: if isinstance(bias_init, str): bias_init_label = bias_init if self._bias_init is not None and bias_init_label == self._bias_init.label: warnings.warn( 'No change made to link bias initializer. Re-initializing link biases skipped.', UserWarning) else: if bias_init_label == 'not_use': self._bias_init = None elif Zeros.label == bias_init_label: self._bias_init = Zeros() elif Ones.label == bias_init_label: self._bias_init = Ones() else: raise TypeError(f'Unknown bias initializer {bias_init_label} for link {self.name}.') if self._bias_init is not None: self._b_v = self._bias_init((1, self.size)) else: self._b_v = None elif isinstance(bias_init, float): self._bias_init = Constant(bias_init) self._b_v = self._bias_init((1, self.size)) else: if self._bias_init is not None and bias_init.label == self._bias_init.label: warnings.warn( 'No change made to link bias initializer. Re-initializing link biases skipped.', UserWarning) else: self._bias_init = bias_init self._b_v = self._bias_init((1, self.size)) if optim is not None: if isinstance(optim, str): optim_label = optim if self._optim is not None and optim_label == self._optim.label: warnings.warn( 'No change made to link optimizer. Reconfig link optimization skipped.', UserWarning) else: if SGD.label == optim_label: self._optim = SGD() elif SGDM.label == optim_label: self._optim = SGDM() elif RMSprop.label == optim_label: self._optim = RMSprop() elif Adam.label == optim_label: self._optim = Adam() else: raise TypeError(f'Unknown optimizer {optim_label} for link {self.name}.') else: if self._optim is not None and optim.label == self._optim.label: warnings.warn( 'No change made to link optimizer. Reconfig link optimization skipped.', UserWarning) else: self._optim = optim if shape is not None: super().reconfig(shape=shape) self.reset() @MType(as_json=bool, beautify_json=bool) def snapshot(self, *, as_json=False, beautify_json=True): """ Return link as a snapshot dict data. Arguments: as_json: beautify_json: Returns: dict """ snapshot = super().snapshot(as_json=False, beautify_json=False) snapshot.update({ 'base_label': Link.label + '_' + snapshot['base_label'], 'frozen': self.is_frozen, 'weight': { 'dtype': str(self.weights.dtype), 'values': self.weights.tolist() } if self.weights is not None else None, 'bias': { 'dtype': str(self.biases.dtype), 'values': self.biases.tolist() } if self.biases is not None else None, 'weight_init': self._weight_init.snapshot(as_json=False, beautify_json=False), 'weight_reg': self._weight_reg.snapshot(as_json=False, beautify_json=False) if self._weight_reg is not None else None, 'bias_init': self._bias_init.snapshot(as_json=False, beautify_json=False) if self._bias_init is not None else None, 'optim': self._optim.snapshot(as_json=False, beautify_json=False) }) if as_json: if beautify_json: return json.dumps(snapshot, indent=4, sort_keys=False) else: return json.dumps(snapshot) else: return snapshot.copy() def unassign_hooks(self): """ Unassign all callback functions. """ self._monitor = None @MType(monitor=OneOfType(callable, None)) def assign_hook(self, *, monitor=None): """ Assign callback functions. Arguments: monitor: callback function to do probing during forward/backward pass """ if monitor is not None: self._monitor = monitor @MType(dict, np.ndarray, residue=dict) @MShape(axis=1, transpose=True) def forward(self, stage, a_t, *, residue={}): """ Do forward forward pass by calculating the weight sum of the pre-nonlinearity (z) tensor. Arguments: stage: forward stage a_t: post-nonlinearity (a) tensor residue: Returns: layer """ if self.has_next: if self._bias_init is not None: z_t = np.inner(a_t, self._w_m.transpose()) + self._b_v else: z_t = np.inner(a_t, self._w_m.transpose()) if self._monitor is not None: report = { 'pass': '******', 'stage': stage, 'inputs': self.inputs, 'outputs': self.outputs, 'weights': self.weights, 'biases': self.biases, 'residue': residue } self._monitor(report) return self.next.forward(stage, z_t, residue=residue) else: warnings.warn(f'Dense link {self.name} connection is incomplete. Missing connection to next layer.', UserWarning) return self @MType(dict, np.ndarray, np.ndarray, residue=dict) @MShape(axis=1, transpose=False) def backward(self, stage, azg_t, eag_t, *, residue={}): """ Do backward backward pass by calculate error gradient tensor w.r.t. nonlinearity. Arguments: stage: backward stage azg_t: gradient post-nonlinearity (a) tensor w.r.t. pre-nonlinearity (z) tensor eag_t: gradient error tensor w.r.t. post-nonlinearity (a) tensor residue: Returns: layer """ if self.has_prev: delta_t = np.multiply(eag_t, azg_t) if not self.is_frozen: if 'epoch' in stage: epoch = stage['epoch'] else: raise ValueError('Input stage is missing the required epoch number.') hparam = stage['hparam'] batch_size = hparam['batch_size'] zwg_t = self.inputs if self._bias_init is not None: if batch_size == 1: ewg_m = np.multiply(zwg_t.transpose(), delta_t) ebg_v = delta_t else: ewg_m = np.inner(zwg_t.transpose(), delta_t.transpose()) ebg_v = delta_t.mean(axis=0) [w_delta_m, b_delta_v] = self._optim.compute_grad_descent_step(epoch, [ewg_m, ebg_v], hparam) if self._weight_reg is not None: w_reg_m = self._weight_reg.compute_regularization(epoch, self._w_m, hparam) self._w_m -= w_delta_m + w_reg_m else: self._w_m -= w_delta_m self._b_v -= b_delta_v else: if batch_size == 1: ewg_m = np.multiply(zwg_t.transpose(), delta_t) else: ewg_m = np.inner(zwg_t.transpose(), delta_t.transpose()) [w_delta_m] = self._optim.compute_grad_descent_step(epoch, [ewg_m], hparam) if self._weight_reg is not None: w_reg_m = self._weight_reg.compute_regularization(epoch, self._w_m, hparam) self._w_m -= w_delta_m + w_reg_m else: self._w_m -= w_delta_m eag_t = np.inner(self._w_m, delta_t).transpose() if self._monitor is not None: report = { 'pass': '******', 'stage': stage, 'weights': self.weights, 'biases': self.biases, 'grad': { 'delta': delta_t, 'az': azg_t, 'ea': eag_t }, 'residue': residue } self._monitor(report) return self.prev.backward(stage, eag_t, residue=residue) else: warnings.warn(f'Dense link {self.name} connection is incomplete. Missing connection to previous layer.', UserWarning) return self