def diff(self, da, X, l=None, Z=None): self.init_ddws_ddbs() dda_h = np.zeros(self.Hs[0].shape) da_s = [] for t in reversed(range(len(X))): da_t = util.mindim(da[t].copy()) X_t = util.mindim(X[t]) H_t = util.mindim(self.Hs[t].copy()) H_t_prev = util.mindim(self.Hs[t - 1].copy()) self.ddws_hy += np.dot(H_t.T, da_t) self.ddbs_y += da_t da_h = dda_h + np.dot(da_t, self.ws_hy.T) da_f = da_h * self.ntype.diff(H_t) da_s.append(da_f) self.ddws_xh += np.dot(X_t.T, da_f) self.ddws_hh += np.dot(H_t_prev.T, da_f) self.ddbs_h += da_f dda_h = np.dot(da_f, self.ws_hh.T) self.clip_ddws_ddbs('norm', gmax=0.25) return da_s
def act(self, X, train): if train: self.Zs = [] self.Fs = [] self.Is = [] self.Gs = [] self.Os = [] self.Cs = [np.zeros(self.bs_f.shape)] self.Hs = [np.zeros(self.bs_f.shape)] for t in range(len(X)): X_t = util.mindim(X[t]) H_t = util.mindim(self.Hs[t]) C_t = util.mindim(self.Cs[t]) Z = np.hstack((H_t, X_t)) self.Zs.append(Z) F = self.ntype_gate.act(np.dot(Z, self.ws_f) + self.bs_f) self.Fs.append(F) I = self.ntype_gate.act(np.dot(Z, self.ws_i) + self.bs_i) self.Is.append(I) G = self.ntype_gate.act(np.dot(Z, self.ws_g) + self.bs_g) self.Gs.append(G) C_t = F * C_t + I * G self.Cs.append(C_t) O = self.ntype_gate.act(np.dot(Z, self.ws_o) + self.bs_o) self.Os.append(O) H_t = O * self.ntype.act(C_t) self.Hs.append(H_t) V = np.dot(H_t, self.ws_v) + self.bs_v Y = self.ntype_out.act(V) else: Y = [] for x in X[0]: _, y = self.act(x, train=True) Y.append(y) Y = np.array(Y) return 0, Y
def act(self, X, train): X = util.mindim(X) if train: self.mask = np.ones(1) elif self.ntype == 'binomial': self.mask = np.random.binomial(1, self.p, size=X.shape) / self.p else: raise ValueError("Dropout ntype must be: 'binomial', ") Z = X Y = self.mask * Z return Z, Y
def act(self, X, _): if self.imshape == None: self.imshape = (util.int_sqrt(X.shape[-1]), ) * 2 if self.kshape == None: self.kshape = (util.int_sqrt(self.ws.shape[0]), ) * 2 X = util.mindim(X) X = X.reshape(X.shape[:-1] + self.imshape) X = self.im2col(X, self.padding, self.kshape, self.roll) X = np.dot(X, self.ws) + self.bs Z = X.reshape(X.shape[0], -1) Y = self.ntype.act(Z) return Z, Y
def act(self, X, train): if train : self.Hs = [np.zeros((1,self.ws_hh.shape[0]))] self.Ys = [] for t in range(len(X)): H_t = util.mindim(self.Hs[t].copy()) X_t = util.mindim(X[t]) H = np.dot(X_t, self.ws_xh) + np.dot(H_t, self.ws_hh) + self.bs_h H = self.ntype.act(H) Z = np.dot(H, self.ws_hy)+self.bs_y Y = self.ntype_out.act(Z) self.Hs.append(H) self.Ys.append(Y) else: Y = [] for x in X[0]: _, y = self.act(x, train=True) Y.append(y) Y = np.array(Y) return 0, Y
def act(self, X, train): if train: self.Xs = [] self.Zs = [] self.Rs = [] self.Gs = [] self.Hs = [np.zeros(self.bs_z.shape)] for t in range(len(X)): X_ti = util.mindim(X[t]) H_t = util.mindim(self.Hs[t]) X_t = np.hstack((H_t, X_ti)) self.Xs.append(X_t) Z = self.ntype_gate.act(np.dot(X_t, self.ws_z)+self.bs_z) self.Zs.append(Z) R = self.ntype_gate.act(np.dot(X_t, self.ws_r)+self.bs_r) self.Rs.append(R) X_t = np.hstack((H_t*R, X_ti)) G = self.ntype.act(np.dot(X_t, self.ws_g)+self.bs_g) self.Gs.append(G) H_t = (1-Z)*H_t+Z*G self.Hs.append(H_t) V = np.dot(H_t, self.ws_v)+self.bs_v Y = self.ntype_out.act(V) else: Y = [] for x in X[0]: x = util.mindim(x) _, y = self.act(x, train=True) Y.append(y) Y = np.array(Y) return 0, Y
def diff(self, da, X, l=None, Z=None): self.init_ddws_ddbs() C_t_next = np.zeros_like(self.Cs[0]) H_t_next = np.zeros_like(self.Hs[0]) da_s = [] for t in reversed(range(len(X))): O_t = self.Os[t] I_t = self.Is[t] G_t = self.Gs[t] F_t = self.Fs[t] Z_t = self.Zs[t] H_t = self.Hs[t] C_t = self.Cs[t] C_t_prev = self.Cs[t - 1] da_t = util.mindim(da[t]) self.ddws_v += np.dot(H_t.T, da_t) self.ddbs_v += da_t da_Hs = H_t_next.copy() da_Hs += np.dot(da_t, self.ws_v.T) da_Os = da_Hs * self.ntype.act(C_t) da_Os = da_Os * self.ntype_gate.diff(O_t) self.ddws_o += np.dot(Z_t.T, da_Os) self.ddbs_o += da_Os da_Cs = C_t_next.copy() da_Cs += da_Hs * O_t * self.ntype.diff(C_t) da_Gs = da_Cs * I_t da_Gs = self.ntype.diff(G_t) * da_Gs self.ddws_g += np.dot(Z_t.T, da_Gs) self.ddbs_g += da_Gs da_s.append(da_Cs) da_Is = da_Cs * G_t da_Is = self.ntype_gate.act(I_t) * da_Is self.ddws_i += np.dot(Z_t.T, da_Is) self.ddbs_i += da_Is da_Fs = da_Cs * C_t_prev da_Fs = self.ntype_gate.act(F_t) * da_Fs self.ddws_f += np.dot(Z_t.T, da_Fs) self.ddbs_f += da_Fs self.clip_ddws_ddbs('norm', gmax=0.25) return da_s
def act(self, X, _): X = util.mindim(X) if self.imshape[-1] == -1: end_shape = (util.int_sqrt(X.shape[-1] / self.imshape[0]), ) * 2 self.imshape = (self.imshape[0], ) + end_shape X = X.reshape(X.shape[:-1] + self.imshape) Z = self.im_split(X, self.kshape) if self.ntype == 'max': Y = np.amax(Z, axis=(-2, -1)) self.pre_act = np.equal(Z, np.amax(Z, axis=(-2, -1), keepdims=True)) elif self.ntype == 'mean': Y = np.mean(Z, axis=(-2, -1)) self.pre_act = np.multiply( np.ones(Z.shape), np.mean(Z, axis=(-2, -1), keepdims=True)) else: raise ValueError("Pool ntype must be: 'max', 'mean', ") Y = Y.reshape(Y.shape[0], -1) return Z, Y
def diff(self, da, X, l=None, Z=None): self.init_ddws_ddbs() H_t_next = np.zeros_like(self.Hs[0]) da_s = [] for t in reversed(range(len(X))): X_t = self.Xs[t] Z_t = self.Zs[t] R_t = self.Rs[t] G_t = self.Gs[t] H_t = self.Hs[t] H_t_prev = self.Hs[t - 1] da_t = util.mindim(da[t]) self.ddws_v += np.dot(H_t.T, da_t) self.ddbs_v += da_t da_Hs = H_t_next.copy() da_Hs += np.dot(da_t, self.ws_v.T) da_Gs = da_Hs * Z_t da_Gs = da_Gs * self.ntype.diff(G_t) self.ddws_g += np.dot(X_t.T, da_Gs) self.ddbs_g += da_Gs da_Rs = H_t_prev * da_Gs da_Rs = self.ntype_gate.diff(R_t) * da_Rs self.ddws_r += np.dot(X_t.T, da_Rs) self.ddbs_r += da_Gs da_Zs = self.ntype_gate.diff(Z_t) self.ddws_z += np.dot(X_t.T, da_Zs) self.ddbs_z += da_Zs da_s.append(da_Hs) self.clip_ddws_ddbs('norm', gmax=0.25) return da_s
def act(self, X, train): X = util.mindim(X) self.cache = [{'Z': None, 'Y': X}] for l in self.layers: Z, X = l.act(X, train) self.cache.append({'Z': Z, 'Y': X})
def act(self, X, _): X = util.mindim(X) Z = np.dot(X, self.ws) + self.bs Y = self.ntype.act(Z) return Z, Y
def act(self, X, _): X = util.mindim(X) A = self.act_layers(X) Z = np.dot(A, self.ws) + self.bs Y = self.ntype.act(np.squeeze(Z, axis=-1)) return Z, Y
def act(self, X, train=False): X_true = util.mindim(X) X_fake = self.generate(len(X_true)) self.dis.cache_true = self.discriminate(X_true, train) self.dis.cache_fake = self.discriminate(X_fake, train)