def _init_params(self): self._dv = {} self.conv_dilation = Conv1D( stride=1, pad="causal", init=self.init, kernel_width=2, dilation=self.dilation, out_ch=self.ch_dilation, optimizer=self.optimizer, act_fn=Affine(slope=1, intercept=0), ) self.tanh = Tanh() self.sigm = Sigmoid() self.multiply_gate = Multiply(act_fn=Affine(slope=1, intercept=0)) self.conv_1x1 = Conv1D( stride=1, pad="same", dilation=0, init=self.init, kernel_width=1, out_ch=self.ch_residual, optimizer=self.optimizer, act_fn=Affine(slope=1, intercept=0), ) self.add_residual = Add(act_fn=Affine(slope=1, intercept=0)) self.add_skip = Add(act_fn=Affine(slope=1, intercept=0))
def set_params(self, summary_dict): cids = self.hyperparameters["component_ids"] for k, v in summary_dict["parameters"].items(): if k == "components": for c, cd in summary_dict["parameters"][k].items(): if c in cids: getattr(self, c).set_params(cd) elif k in self.parameters: self.parameters[k] = v for k, v in summary_dict["hyperparameters"].items(): if k == "components": for c, cd in summary_dict["hyperparameters"][k].items(): if c in cids: getattr(self, c).set_params(cd) if k in self.hyperparameters: if k == "act_fn" and v == "ReLU": self.hyperparameters[k] = ReLU() elif v == "act_fn" and v == "Sigmoid": self.hyperparameters[k] = Sigmoid() elif v == "act_fn" and v == "Tanh": self.hyperparameters[k] = Tanh() elif v == "act_fn" and "Affine" in v: r = r"Affine\(slope=(.*), intercept=(.*)\)" slope, intercept = re.match(r, v).groups() self.hyperparameters[k] = Affine(float(slope), float(intercept)) elif v == "act_fn" and "Leaky ReLU" in v: r = r"Leaky ReLU\(alpha=(.*)\)" alpha = re.match(r, v).groups()[0] self.hyperparameters[k] = LeakyReLU(float(alpha)) else: self.hyperparameters[k] = v
def _init_params(self, X=None): self._dv = {} self.conv1 = Conv2D( pad=self.pad1, init=self.init, act_fn=self.act_fn, out_ch=self.out_ch1, stride=self.stride1, optimizer=self.optimizer, kernel_shape=self.kernel_shape1, ) self.conv2 = Conv2D( pad=self.pad2, init=self.init, out_ch=self.out_ch2, stride=self.stride2, optimizer=self.optimizer, kernel_shape=self.kernel_shape2, act_fn=Affine(slope=1, intercept=0), ) # we can't initialize `conv_skip` without X's dimensions; see `forward` # for further details self.batchnorm1 = BatchNorm2D(epsilon=self.epsilon, momentum=self.momentum) self.batchnorm2 = BatchNorm2D(epsilon=self.epsilon, momentum=self.momentum) self.batchnorm_skip = BatchNorm2D(epsilon=self.epsilon, momentum=self.momentum) self.add3 = Add(self.act_fn)
def _init_conv2(self): self.conv2 = Conv2D( pad="same", init=self.init, out_ch=self.in_ch, stride=self.stride2, optimizer=self.optimizer, kernel_shape=self.kernel_shape2, act_fn=Affine(slope=1, intercept=0), )
def _init_conv_skip(self, X): self._calc_skip_padding(X) self.conv_skip = Conv2D( init=self.init, pad=self.pad_skip, out_ch=self.out_ch2, stride=self.stride_skip, kernel_shape=self.kernel_shape_skip, act_fn=Affine(slope=1, intercept=0), optimizer=self.optimizer, )
def __call__(self): param = self.param if param is None: act = Affine(slope=1, intercept=0) elif isinstance(param, ActivationBase): act = param elif isinstance(param, str): act = self.init_from_str(param) else: raise ValueError("Unknown activation: {}".format(param)) return act
def _build_encoder(self): """ CNN encoder Conv1 -> ReLU -> MaxPool1 -> Conv2 -> ReLU -> MaxPool2 -> Flatten -> FC1 -> ReLU -> FC2 """ self.encoder = OrderedDict() self.encoder["Conv1"] = Conv2D( act_fn=ReLU(), init=self.init, pad=self.enc_conv1_pad, optimizer=self.optimizer, out_ch=self.enc_conv1_out_ch, stride=self.enc_conv1_stride, kernel_shape=self.enc_conv1_kernel_shape, ) self.encoder["Pool1"] = Pool2D( mode="max", optimizer=self.optimizer, stride=self.enc_pool1_stride, kernel_shape=self.enc_pool1_kernel_shape, ) self.encoder["Conv2"] = Conv2D( act_fn=ReLU(), init=self.init, pad=self.enc_conv2_pad, optimizer=self.optimizer, out_ch=self.enc_conv2_out_ch, stride=self.enc_conv2_stride, kernel_shape=self.enc_conv2_kernel_shape, ) self.encoder["Pool2"] = Pool2D( mode="max", optimizer=self.optimizer, stride=self.enc_pool2_stride, kernel_shape=self.enc_pool2_kernel_shape, ) self.encoder["Flatten3"] = Flatten(optimizer=self.optimizer) self.encoder["FC4"] = FullyConnected( n_out=self.latent_dim, act_fn=ReLU(), optimizer=self.optimizer ) self.encoder["FC5"] = FullyConnected( n_out=self.T * 2, optimizer=self.optimizer, act_fn=Affine(slope=1, intercept=0), init=self.init, )
def init_from_str(self, act_str): act_str = act_str.lower() if act_str == "relu": act_fn = ReLU() elif act_str == "tanh": act_fn = Tanh() elif act_str == "sigmoid": act_fn = Sigmoid() elif "affine" in act_str: r = r"affine\(slope=(.*), intercept=(.*)\)" slope, intercept = re.match(r, act_str).groups() act_fn = Affine(float(slope), float(intercept)) elif "leaky relu" in act_str: r = r"leaky relu\(alpha=(.*)\)" alpha = re.match(r, act_str).groups()[0] act_fn = LeakyReLU(float(alpha)) else: raise ValueError("Unknown activation: {}".format(act_str)) return act_fn
def plot_activations(): fig, axes = plt.subplots(2, 3, sharex=True, sharey=True) fns = [Affine(), Tanh(), Sigmoid(), ReLU(), LeakyReLU(), ELU()] for ax, fn in zip(axes.flatten(), fns): X = np.linspace(-3, 3, 100).astype(float).reshape(100, 1) ax.plot(X, fn(X), label=r"$y$", alpha=0.7) ax.plot(X, fn.grad(X), label=r"$\frac{dy}{dx}$", alpha=0.7) ax.plot(X, fn.grad2(X), label=r"$\frac{d^2 y}{dx^2}$", alpha=0.7) ax.hlines(0, -3, 3, lw=1, linestyles="dashed", color="k") ax.vlines(0, -1.2, 1.2, lw=1, linestyles="dashed", color="k") ax.set_ylim(-1.1, 1.1) ax.set_xlim(-3, 3) ax.set_xticks([]) ax.set_yticks([-1, 0, 1]) ax.xaxis.set_visible(False) # ax.yaxis.set_visible(False) ax.set_title("{}".format(fn)) ax.legend(frameon=False) sns.despine(left=True, bottom=True) fig.set_size_inches(8, 5) plt.tight_layout() plt.savefig("plot.png", dpi=300) plt.close("all")
def __init__( self, out_ch1, out_ch2, kernel_shape1, kernel_shape2, kernel_shape_skip, pad1=0, pad2=0, stride1=1, stride2=1, act_fn=None, epsilon=1e-5, momentum=0.9, stride_skip=1, optimizer=None, init="glorot_uniform", ): """ A ResNet-like "convolution" shortcut module. The additional `conv2d_skip` and `batchnorm_skip` layers in the shortcut path allow adjusting the dimensions of X to match the output of the main set of convolutions. X -> Conv2D -> Act_fn -> BatchNorm2D -> Conv2D -> BatchNorm2D -> + -> Act_fn \_____________________ Conv2D -> Batchnorm2D __________________/ See He et al. (2015) at https://arxiv.org/pdf/1512.03385.pdf for further details. Parameters ---------- out_ch1 : int The number of filters/kernels to compute in the first convolutional layer out_ch2 : int The number of filters/kernels to compute in the second convolutional layer kernel_shape1 : 2-tuple The dimension of a single 2D filter/kernel in the first convolutional layer kernel_shape2 : 2-tuple The dimension of a single 2D filter/kernel in the second convolutional layer kernel_shape_skip : 2-tuple The dimension of a single 2D filter/kernel in the "skip" convolutional layer stride1 : int (default: 1) The stride/hop of the convolution kernels in the first convolutional layer stride2 : int (default: 1) The stride/hop of the convolution kernels in the second convolutional layer stride_skip : int (default: 1) The stride/hop of the convolution kernels in the "skip" convolutional layer pad1 : int, tuple, or 'same' (default: 0) The number of rows/columns of 0's to pad the input to the first convolutional layer with pad2 : int, tuple, or 'same' (default: 0) The number of rows/columns of 0's to pad the input to the second convolutional layer with act_fn : `activations.Activation` instance (default: None) The activation function for computing Y[t]. If `None`, use the identity f(x) = x by default epsilon : float (default : 1e-5) A small smoothing constant to use during BatchNorm2D computation to avoid divide-by-zero errors. momentum : float (default: 0.9) The momentum term for the running mean/running std calculations in the BatchNorm2D layers. The closer this is to 1, the less weight will be given to the mean/std of the current batch (i.e., higher smoothing) init : str (default: 'glorot_uniform') The weight initialization strategy. Valid entries are {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} optimizer : str or `OptimizerBase` instance (default: None) The optimization strategy to use when performing gradient updates within the `update` method. If `None`, use the `SGD` optimizer with default parameters. """ super().__init__() self.init = init self.pad1 = pad1 self.pad2 = pad2 self.in_ch = None self.out_ch1 = out_ch1 self.out_ch2 = out_ch2 self.epsilon = epsilon self.stride1 = stride1 self.stride2 = stride2 self.momentum = momentum self.optimizer = optimizer self.stride_skip = stride_skip self.kernel_shape1 = kernel_shape1 self.kernel_shape2 = kernel_shape2 self.kernel_shape_skip = kernel_shape_skip self.act_fn = Affine(slope=1, intercept=0) if act_fn is None else act_fn self._init_params()
def __init__( self, out_ch, kernel_shape1, kernel_shape2, stride1=1, stride2=1, act_fn=None, epsilon=1e-5, momentum=0.9, optimizer=None, init="glorot_uniform", ): """ A ResNet-like "identity" shortcut module. Enforces `same` padding during each convolution to ensure module output has same dims as its input. X -> Conv2D -> Act_fn -> BatchNorm2D -> Conv2D -> BatchNorm2D -> + -> Act_fn \______________________________________________________________/ See He et al. (2015) at https://arxiv.org/pdf/1512.03385.pdf for further details. Parameters ---------- out_ch : int The number of filters/kernels to compute in the first convolutional layer kernel_shape1 : 2-tuple The dimension of a single 2D filter/kernel in the first convolutional layer kernel_shape2 : 2-tuple The dimension of a single 2D filter/kernel in the second convolutional layer stride1 : int (default: 1) The stride/hop of the convolution kernels in the first convolutional layer stride2 : int (default: 1) The stride/hop of the convolution kernels in the second convolutional layer act_fn : `activations.Activation` instance (default: None) The activation function for computing Y[t]. If `None`, use the identity f(x) = x by default epsilon : float (default : 1e-5) A small smoothing constant to use during BatchNorm2D computation to avoid divide-by-zero errors. momentum : float (default: 0.9) The momentum term for the running mean/running std calculations in the BatchNorm2D layers. The closer this is to 1, the less weight will be given to the mean/std of the current batch (i.e., higher smoothing) init : str (default: 'glorot_uniform') The weight initialization strategy. Valid entries are {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} """ super().__init__() self.init = init self.in_ch = None self.out_ch = out_ch self.epsilon = epsilon self.stride1 = stride1 self.stride2 = stride2 self.optimizer = optimizer self.momentum = momentum self.kernel_shape1 = kernel_shape1 self.kernel_shape2 = kernel_shape2 self.act_fn = Affine(slope=1, intercept=0) if act_fn is None else act_fn self._init_params()