def update_lstm(input, hiddens, cells): change = np.tanh(concat_and_multiply(params['change'], input, hiddens)) forget = sigmoid(concat_and_multiply(params['forget'], input, hiddens)) ingate = sigmoid(concat_and_multiply(params['ingate'], input, hiddens)) outgate = sigmoid(concat_and_multiply(params['outgate'], input, hiddens)) cells = cells * forget + ingate * change hiddens = outgate * np.tanh(cells) return hiddens, cells
def update_lstm(input, hiddens, cells, forget_weights, change_weights, ingate_weights, outgate_weights): """One iteration of an LSTM layer.""" change = np.tanh(activations(change_weights, input, hiddens)) forget = sigmoid(activations(forget_weights, input, cells, hiddens)) ingate = sigmoid(activations(ingate_weights, input, cells, hiddens)) cells = cells * forget + ingate * change outgate = sigmoid(activations(outgate_weights, input, cells, hiddens)) hiddens = outgate * np.tanh(cells) return hiddens, cells
def __init__(self,**kwargs): # set default values for layer sizes, activation, and scale activation = 'relu' # decide on these parameters via user input if 'activation' in kwargs: activation = kwargs['activation'] # switches if activation == 'linear': self.activation = lambda data: data elif activation == 'tanh': self.activation = lambda data: np.tanh(data) elif activation == 'relu': self.activation = lambda data: np.maximum(0,data) elif activation == 'sinc': self.activation = lambda data: np.sinc(data) elif activation == 'sin': self.activation = lambda data: np.sin(data) elif activation == 'maxout': self.activation = lambda data1,data2: np.maximum(data1,data2) # select layer sizes and scale self.layer_sizes = kwargs['layer_sizes'] self.scale = 0.1 if 'scale' in kwargs: self.scale = kwargs['scale'] # assign initializer / feature transforms function if activation == 'linear' or activation == 'tanh' or activation == 'relu' or activation == 'sinc' or activation == 'sin': self.initializer = self.standard_initializer self.feature_transforms = self.feature_transforms elif activation == 'maxout': self.initializer = self.maxout_initializer self.feature_transforms = self.maxout_feature_transforms
def tanh_predict(self, pt, w): # linear combo val = w[0] + sum([ w[i] * np.tanh(self.R[i - 1, 0] + self.R[i - 1, 1] * pt) for i in range(1, self.D + 1) ]) return val
def __init__(self, **kwargs): if 'layer_sizes' in kwargs: self.layer_sizes = kwargs['layer_sizes'] # Set layer sizes else: # Else create default setup N = 1 # Input dimensions M = 1 # Output dimensions U = 10 # 10-unit hidden layer self.layer_sizes = [ N, U, M ] # Build layer sizes to generate weight matrix if 'scale' in kwargs: self.scale = kwargs['scale'] # Set scale else: self.scale = 0.1 a = 'relu' # Set default activation to ReLU if 'activation' in kwargs: a = kwargs['activation'] # Manually set activation if present self.activation_name = a if a == 'relu': self.activation = lambda data: np.maximum(0, data) elif a == 'tanh': self.activation = lambda data: np.tanh(data) elif a == 'maxout': self.activation = lambda data1, data2: np.maximum(data1, data2) self.weight_matrix = self.maxout_init_weights self.transforms = self.maxout_feature_transforms else: # Manual activation self.activation = kwargs['activation'] if a in ['relu', 'tanh']: self.weight_matrix = self.init_weights self.transforms = self.feature_transforms
def variational_log_density(params, samples): ''' samples: [n_samples, D] u: [D,1] w: [D,1] b: [1] Returns: [num_samples] ''' n_samples = len(samples) mean = params[0] log_std = params[1] u = params[2] w = params[3] b = params[4] # print (samples.shape) # samples = sample_diag_gaussian(mean, log_std, num_samples, rs) z_k = normalizing_flows(samples, u, w, b) logp_zk = logprob(z_k) logp_zk = np.reshape(logp_zk, [n_samples, 1]) logq_z0 = diag_gaussian_log_density(samples, mean, log_std) logq_z0 = np.reshape(logq_z0, [n_samples, 1]) # [n_samples, D] phi = np.dot((1.-np.tanh(np.dot(samples,w)+b)**2), w.T) # [n_samples, 1] sum_nf = np.log(abs(1+np.dot(phi, u))) # return logq_z0 - sum_nf return np.reshape(logq_z0 - sum_nf, [n_samples])
def planar_flow(z: np.ndarray, w: np.ndarray, u: np.ndarray, b: Union[int, float], h=np.tanh) -> np.ndarray: """Apply a planar flow to each element of `samples` :param z: numpy array, samples to be transformed Shape: (n_samples, n_dim) :param u: numpy array, parameter of flow (N, D) :param w: numpy array, parameter of flow (N, D) :param b: numeric, parameter of flow (N,) :param h: callable, non-linear function (default tanh) :returns: numpy array, transformed samples Transforms given samples according to the planar flow :math:`f(z) = z + uh(w^Tz + b)` """ assert np.all( np.array([w.shape[0], u.shape[0], b.shape[0]]) == z.shape[0]), 'Incorrect first dimension' assert np.all(np.array([w.shape[1], u.shape[1]]) == z.shape[1]), "Incorrect second dimension" u = _get_uhat(u, w) assert np.all( np.sum(u * w, axis=1) ) >= -1, f'Flow is not guaranteed to be invertible (u^Tw < -1: {w._value, u._value})' assert np.all( np.sum(u * w, axis=1) ) >= -1, f'Flow is not guaranteed to be invertible (u^Tw < -1: {w._value, u._value})' res = z + np.sum(u * np.tanh(np.sum(z * w, axis=1) + b).reshape(-1, 1), axis=1).reshape(z.shape[0], -1) assert res.shape == z.shape, f'Incorrect output shape: {(res.shape)}' return res
def neural_net_predict(params, inputs, dropout, test_time = False): for W, b in params: outputs = np.dot(inputs, W) + b inputs = np.tanh(outputs) if dropout: inputs *= np.random.binomial([np.ones_like(inputs)],(1-dropout_rate))[0]/(1-dropout_rate) return outputs
def normalizing_flows(z_0, norm_flow_params): ''' z_0: [n_samples, D] u: [D,1] w: [D,1] b: [1] ''' current_z = z_0 all_zs = [] all_zs.append(z_0) for params_k in norm_flow_params: u = params_k[0] w = params_k[1] b = params_k[2] # Appendix equations m_x = -1. + np.log(1.+np.exp(np.dot(w.T,u))) u_k = u + (-1. + np.log(1.+np.exp(np.dot(w.T,u))) - np.dot(w.T,u)) * (w/np.linalg.norm(w)) # u_k = u # [D,1] term1 = np.tanh(np.dot(current_z,w)+b) # [n_samples, D] term1 = np.dot(term1,u_k.T) # [n_samples, D] current_z = current_z + term1 all_zs.append(current_z) return current_z, all_zs
def update_rnn(input, hiddens): """ Calculate the hidden states, given the inputs and previous hidden states at this time step. @input: The stacked inputs at this time step. @hiddens: The hiddens at this timestep. """ return np.tanh(concat_and_multiply(params['change'], input, hiddens))
def predictions(W_vect, inputs, alpha): outputs = 0 for W, b in unpack_layers(W_vect): prev_outputs = outputs outputs = np.dot(np.array(inputs), W) + b inputs = np.tanh(outputs) return sigmoid(alpha) * outputs + (1 - sigmoid(alpha)) * prev_outputs
def test_function_overloading(): a = pe.pseudo_Obs(17, 2.9, 'e1') b = pe.pseudo_Obs(4, 0.8, 'e1') fs = [ lambda x: x[0] + x[1], lambda x: x[1] + x[0], lambda x: x[0] - x[1], lambda x: x[1] - x[0], lambda x: x[0] * x[1], lambda x: x[1] * x[0], lambda x: x[0] / x[1], lambda x: x[1] / x[0], lambda x: np.exp(x[0]), lambda x: np.sin(x[0]), lambda x: np.cos(x[0]), lambda x: np.tan(x[0]), lambda x: np.log(x[0]), lambda x: np.sqrt(np.abs(x[0])), lambda x: np.sinh(x[0]), lambda x: np.cosh(x[0]), lambda x: np.tanh(x[0]) ] for i, f in enumerate(fs): t1 = f([a, b]) t2 = pe.derived_observable(f, [a, b]) c = t2 - t1 assert c.is_zero() assert np.log(np.exp(b)) == b assert np.exp(np.log(b)) == b assert np.sqrt(b**2) == b assert np.sqrt(b)**2 == b np.arcsin(1 / b) np.arccos(1 / b) np.arctan(1 / b) np.arctanh(1 / b) np.sinc(1 / b) b**b 0.5**b b**0.5
def __init__(self, **kwargs): # set default values for layer sizes, activation, and scale activation = 'relu' # decide on these parameters via user input if 'activation' in kwargs: activation = kwargs['activation'] # switches if activation == 'linear': self.activation = lambda data: data elif activation == 'tanh': self.activation = lambda data: np.tanh(data) elif activation == 'relu': self.activation = lambda data: np.maximum(0, data) elif activation == 'sinc': self.activation = lambda data: np.sinc(data) elif activation == 'sin': self.activation = lambda data: np.sin(data) else: # user-defined activation self.activation = kwargs['activation'] # select layer sizes and scale N = 1 M = 1 U = 10 self.layer_sizes = [N, U, M] self.scale = 0.1 if 'layer_sizes' in kwargs: self.layer_sizes = kwargs['layer_sizes'] if 'scale' in kwargs: self.scale = kwargs['scale']
def plot_fit(self, plotting_weights, **kwargs): # construct figure fig, axs = plt.subplots(1, 3, figsize=(9, 4)) # create subplot with 2 panels gs = gridspec.GridSpec(1, 3, width_ratios=[1, 5, 1]) ax1 = plt.subplot(gs[0]) ax1.axis('off') ax = plt.subplot(gs[1]) ax3 = plt.subplot(gs[2]) ax3.axis('off') # set plotting limits xmax = copy.deepcopy(max(self.x)) xmin = copy.deepcopy(min(self.x)) xgap = (xmax - xmin) * 0.25 xmin -= xgap xmax += xgap ymax = max(self.y) ymin = min(self.y) ygap = (ymax - ymin) * 0.25 ymin -= ygap ymax += ygap # initialize points ax.scatter(self.x, self.y, color='k', edgecolor='w', linewidth=0.9, s=80, zorder=3) # clean up panel ax.set_xlim([xmin, xmax]) ax.set_ylim([ymin, ymax]) # label axes ax.set_xlabel(r'$x$', fontsize=12) ax.set_ylabel(r'$y$', rotation=0, fontsize=12) # create fit s = np.linspace(xmin, xmax, 300) colors = ['k', 'magenta'] if 'colors' in kwargs: colors = kwargs['colors'] transformers = [lambda a: a for i in range(len(plotting_weights))] if 'transformers' in kwargs: transformers = kwargs['transformers'] for i in range(len(plotting_weights)): weights = plotting_weights[i] transformer = transformers[i] # plot approximation l = weights[0] + weights[1] * transformer(s) t = np.tanh(l).flatten() ax.plot(s, t, linewidth=2, color=colors[i], zorder=2)
def update_rnn(input, hiddens): """ Calculate the hidden states, given the batch inputs and previous hidden states at this time step. @input: The stacked inputs at this time step, shape (batchSize x numChannels) "numExamples" aka batchSize @hiddens: The hidden states at this timestep, shape (batchSize x hdim) """ return np.tanh(concat_and_multiply(params['change'], input, hiddens))
def tanh_feats(self,D): F = [np.ones((len(self.x)))] for deg in range(D): F.append(np.tanh(self.R[deg,0] + self.R[deg,1]*self.x)) F = np.asarray(F) F.shape = (D+1,len(self.x)) return F.T
def plot_fit(self,w,model,**kwargs): # construct figure fig = plt.figure(figsize=(9,4)) # create subplot with 2 panels gs = gridspec.GridSpec(1, 3, width_ratios=[1,5,1]) ax = plt.subplot(gs[0]); ax.axis('off') ax2 = plt.subplot(gs[2]); ax2.axis('off') ax1 = plt.subplot(gs[1]); view = [20,20] if 'view' in kwargs: view = kwargs['view'] ##### plot left panel in original space #### # scatter points xmin,xmax,ymin,ymax = self.scatter_pts_2d(self.x,ax1) # clean up panel ax1.set_xlim([xmin,xmax]) ax1.set_ylim([ymin,ymax]) # label axes ax1.set_xlabel(r'$x$', fontsize = 16) ax1.set_ylabel(r'$y$', rotation = 0,fontsize = 16,labelpad = 10) # create fit s = np.linspace(xmin,xmax,300)[np.newaxis,:] normalizer = lambda a: a if 'normalizer' in kwargs: normalizer = kwargs['normalizer'] t = np.tanh(model(normalizer(s),w)) ax1.plot(s.flatten(),t.flatten(),linewidth = 2,c = 'lime')
def tanh_least_squares(self, w): cost = 0 for p in range(0, len(self.y)): x_p = self.x[p, :] y_p = self.y[p] a_p = w[0] + np.sum([u * v for (u, v) in zip(x_p, w[1:])]) cost += (np.tanh(a_p) - y_p)**2 return cost
def act(self, ob): ob = self.observation_filter(ob, update=self.update_filter) inputs = ob.copy() for W, b in self.weights: outputs = anp.dot(ob, W) + b inputs = anp.tanh(outputs) return outputs
def update_hidden(self, weights, input, hidden, cells): concated_input = agnp.concatenate((input, hidden), axis=2) W_change, b_change = self.unpack_change_params(weights) change = agnp.tanh( agnp.einsum('pdh,pnd->pnh', W_change, concated_input) + b_change) W_forget, b_forget = self.unpack_forget_params(weights) forget = self.hidden_nonlinearity( agnp.einsum('pdh,pnd->pnh', W_forget, concated_input) + b_forget) W_ingate, b_ingate = self.unpack_ingate_params(weights) ingate = self.hidden_nonlinearity( agnp.einsum('pdh,pnd->pnh', W_ingate, concated_input) + b_ingate) W_outgate, b_outgate = self.unpack_outgate_params(weights) outgate = self.hidden_nonlinearity( agnp.einsum('pdh,pnd->pnh', W_outgate, concated_input) + b_outgate) cells = cells * forget + ingate * change hidden = outgate * agnp.tanh(cells) return hidden, cells
def neural_net_predict(params, inputs, dropout=True): """Implements a deep neural network for classification. params is a list of (weights, bias) tuples. inputs is an (N x D) matrix. returns normalized class log-probabilities.""" for W, b in params: outputs = np.dot(inputs, W) + b inputs = np.tanh(outputs) if dropout: np.random.binomial([np.ones_like(inputs)], 0.8)[0] / (0.8) return outputs
def a_fb(sqrtshalf, gf): MZ = 90 GFNom = 1.0 sqrts = sqrtshalf * 2. A_FB_EN = np.tanh((sqrts - MZ) / MZ * 10) A_FB_GF = gf / GFNom return 2 * A_FB_EN * A_FB_GF
def lastlayer(params, inputs): i = 0 for W, b in params: outputs = np.dot(inputs, W) + b i = i + 1 if i == len(params)-1: break inputs = np.tanh(outputs) return outputs
def neural_net_predict(params, inputs): """Implements a logistic regression model for classification. params is a list of (weights, bias) tuples. inputs is an (N x D) matrix. returns normalized class log-probabilities.""" for W, b in params: outputs = np.dot(inputs, W) + b inputs = np.tanh(outputs) return sigmoid(outputs)
def prediction_loss_check(x, y, W, V, b, c): """ Compute loss with respect to the given forward propagation """ l1_out = np.dot(W, x) + b l1_act = np.tanh(l1_out) final_out = np.dot(V, l1_act) + c L = -final_out[y][0] + np.log(np.sum(np.exp(final_out))) return L
def tanh_feats(self,D): F = [np.ones((len(self.y),1))] for deg in range(D): f = np.tanh(self.R[deg,0] + self.R[deg,1]*self.x[:,0] + self.R[deg,2]*self.x[:,1]) f.shape = (len(f),1) F.append(f) F = np.asarray(F) F = F[:, :, 0] return F.T
def neural_net_predict(params, inputs): """Implements a deep neural network for classification. params is a list of (weights, bias) tuples. inputs is an (N x D) matrix. returns normalized class log-probabilities.""" for W, b in params: outputs = np.dot(inputs, W) + b inputs = np.tanh(outputs) return outputs - logsumexp(outputs, axis=1, keepdims=True)
def autograd_rnn(params, x, label, n): W, b, Wout, bout = params h1 = x for i in range(n): h1 = np.tanh(np.dot(h1, W) + b) logit = np.dot(h1, Wout) + bout loss = -np.sum(label * logit - (logit + np.log(1 + np.exp(-logit)))) return loss
def _update_hidden_state_batched(self, inputs, h, weights): W_hh, W_xh, b_h, W_hy, b_y = weights for t in range(self.number_of_steps): x = np.array([char_to_one_hot(c) for c in inputs[:, t]]) x = x.reshape((self.batch_size, -1, 1)) h = h.reshape((self.batch_size, self.hidden_size, 1)) h = np.squeeze( np.tanh(W_hh @ h + W_xh @ x + np.reshape(b_h, (-1, 1)))) return h
def test_make_ggnvp_broadcasting(): A = npr.randn(4, 5) x = npr.randn(10, 4) v = npr.randn(10, 4) fun = lambda x: np.tanh(np.dot(x, A)) res1 = np.stack([_make_explicit_ggnvp(fun)(xi)(vi) for xi, vi in zip(x, v)]) res2 = make_ggnvp(fun)(x)(v) check_equivalent(res1, res2)
def test_make_ggnvp(): A = npr.randn(5, 4) x = npr.randn(4) v = npr.randn(4) fun = lambda x: np.dot(A, x) check_equivalent(make_ggnvp(fun)(x)(v), _make_explicit_ggnvp(fun)(x)(v)) fun2 = lambda x: np.tanh(np.dot(A, x)) check_equivalent(make_ggnvp(fun2)(x)(v), _make_explicit_ggnvp(fun2)(x)(v))
def test_make_jvp(): A = npr.randn(3, 5) x = npr.randn(5) v = npr.randn(5) fun = lambda x: np.tanh(np.dot(A, x)) jvp_explicit = lambda x: lambda v: np.dot(jacobian(fun)(x), v) jvp = make_jvp(fun) check_equivalent(jvp_explicit(x)(v), jvp(x)(v))
def test_make_ggnvp_broadcasting(): A = npr.randn(4, 5) x = npr.randn(10, 4) v = npr.randn(10, 4) fun = lambda x: np.tanh(np.dot(x, A)) res1 = np.stack( [_make_explicit_ggnvp(fun)(xi)(vi) for xi, vi in zip(x, v)]) res2 = make_ggnvp(fun)(x)(v) check_equivalent(res1, res2)
def build_velocity_fun(self, input): self._get_projections(input) z_fun = lambda x: self.sigmoid(x @ self.U_z + self.z_projection_b) r_fun = lambda x: self.sigmoid(x @ self.U_r + self.r_projection_b) g_fun = lambda x: np.tanh(r_fun(x) * (x @ self.U_h) + self.g_projection_b) fun = lambda x: 1/self.n_hidden * np.sum(((- x + z_fun(x) * x + (1 - z_fun(x)) * g_fun(x)) ** 2), axis=1) return fun
def test_jacobian_against_wrapper(): A = npr.randn(3, 3, 3) fun = lambda x: np.einsum('ijk,jkl->il', A, np.sin(x[..., None] * np.tanh(x[None, ...]))) B = npr.randn(3, 3) jac1 = jacobian(fun)(B) jac2 = old_jacobian(fun)(B) assert np.allclose(jac1, jac2)
def predicted_class_logprobs(self, W_vect, inputs): for W, b in self.unpack_layers(W_vect): outputs = np.dot(inputs, W) + b if self.activation_type == 'tanh': inputs = np.tanh(outputs) elif self.activation_type == 'relu': inputs = relu(outputs) else: raise ValueException('unknown activation_type {}'.format(self.activation_type)) return outputs - logsumexp(outputs, axis=1, keepdims=True)
def test_make_jvp(): A = npr.randn(3, 5) x = npr.randn(5) v = npr.randn(5) fun = lambda x: np.tanh(np.dot(A, x)) jvp_explicit = lambda x: lambda v: np.dot(jacobian(fun)(x), v) jvp = make_jvp(fun) check_equivalent(jvp_explicit(x)(v), jvp(x)(v)[1])
def test_jacobian_against_wrapper(): A = npr.randn(3,3,3) fun = lambda x: np.einsum( 'ijk,jkl->il', A, np.sin(x[...,None] * np.tanh(x[None,...]))) B = npr.randn(3,3) jac1 = jacobian(fun)(B) jac2 = old_jacobian(fun)(B) assert np.allclose(jac1, jac2)
def SimpleRNN(Params, u, x): """Implements a first-order recurrent neural network, with tanh activation function. params is a list of (weights, bias) tuples. inputs is an (N x D) matrix.""" x = np.dot(x, Params[1][0]) + np.dot(u, Params[0][0]) + Params[1][1] x = np.tanh(x) y = np.dot(x, Params[-1][0]) + Params[-1][1] return y, x
def _apply_nonlinearity(self, nonlin, res): if nonlin == 'none' or nonlin == 'linear': return res if nonlin == 'relu': return 0.5 * (res + np.abs(res)) if nonlin == 'tanh': return np.tanh(res) if nonlin == 'softmax': res -= res.max() res = np.exp(res) return res / res.sum() raise Exception('unknown nonlinearity: "%s"' % nonlin)
def test_make_ggnvp_nondefault_g(): A = npr.randn(5, 4) x = npr.randn(4) v = npr.randn(4) g = lambda y: np.sum(2.*y**2 + y**4) fun = lambda x: np.dot(A, x) check_equivalent(make_ggnvp(fun, g)(x)(v), _make_explicit_ggnvp(fun, g)(x)(v)) fun2 = lambda x: np.tanh(np.dot(A, x)) check_equivalent(make_ggnvp(fun2, g)(x)(v), _make_explicit_ggnvp(fun2, g)(x)(v))
def predictions(W_vect, X): """Outputs normalized log-probabilities.""" cur_units = X N_iter = len(layer_sizes) - 1 for i in range(len(layer_sizes) - 1): cur_W = parser.get(W_vect, ('weights', i)) cur_B = parser.get(W_vect, ('biases', i)) cur_units = np.dot(cur_units, cur_W) + cur_B if i == (N_iter - 1): cur_units = cur_units - logsumexp(cur_units, axis=1) # Normalize last layer. else: cur_units = np.tanh(cur_units) return cur_units
def mlp_decode(z, phi, tanh_scale=10., sigmoid_output=True): nnet_params, ((W_mu, b_mu), (W_sigma, b_sigma)) = phi[:-2], phi[-2:] z = z if z.ndim == 3 else z[:,None,:] # ensure z.shape == (T, K, n) nnet = compose(tanh_layer(W, b) for W, b in nnet_params) mu = linear_layer(W_mu, b_mu) log_sigmasq = linear_layer(W_sigma, b_sigma) nnet_outputs = nnet(np.reshape(z, (-1, z.shape[-1]))) mu = sigmoid(mu(nnet_outputs)) if sigmoid_output else mu(nnet_outputs) log_sigmasq = tanh_scale * np.tanh(log_sigmasq(nnet_outputs) / tanh_scale) shape = z.shape[:-1] + (-1,) return mu.reshape(shape), log_sigmasq.reshape(shape)
def test_jacobian_against_stacked_grads(): scalar_funs = [ lambda x: np.sum(x ** 3), lambda x: np.prod(np.sin(x) + np.sin(x)), lambda x: grad(lambda y: np.exp(y) * np.tanh(x[0]))(x[1]), ] vector_fun = lambda x: np.array([f(x) for f in scalar_funs]) x = npr.randn(5) jac = jacobian(vector_fun)(x) grads = [grad(f)(x) for f in scalar_funs] assert np.allclose(jac, np.vstack(grads))
def normalizing_flows(z_0, u, w, b): ''' z_0: [n_samples, D] u: [D,1] w: [D,1] b: [1] ''' # [D,1] term1 = np.tanh(np.dot(z_0,w)+b) # [n_samples, D] term1 = np.dot(term1,u.T) # [n_samples, D] z_1 = z_0 + term1 return z_1
def forward_pass(self, X): self.last_input = X n_samples, n_timesteps, input_shape = X.shape states = np.zeros((n_samples, n_timesteps + 1, self.hidden_dim)) states[:, -1, :] = self.hprev.copy() p = self._params for i in range(n_timesteps): states[:, i, :] = np.tanh(np.dot(X[:, i, :], p['W']) + np.dot(states[:, i - 1, :], p['U']) + p['b']) self.states = states self.hprev = states[:, n_timesteps - 1, :].copy() if self.return_sequences: return states[:, 0:-1, :] else: return states[:, -2, :]
def variational_log_density(params, samples): ''' samples: [n_samples, D] u: [D,1] w: [D,1] b: [1] Returns: [num_samples] ''' n_samples = len(samples) mean = params[0] log_std = params[1] norm_flow_params = params[2] z_k, all_zs = normalizing_flows(samples, norm_flow_params) logp_zk = logprob(z_k) logp_zk = np.reshape(logp_zk, [n_samples, 1]) logq_z0 = diag_gaussian_log_density(samples, mean, log_std) logq_z0 = np.reshape(logq_z0, [n_samples, 1]) sum_nf = np.zeros([n_samples,1]) for params_k in range(len(norm_flow_params)): u = norm_flow_params[params_k][0] w = norm_flow_params[params_k][1] b = norm_flow_params[params_k][2] # Appendix equations m_x = -1. + np.log(1.+np.exp(np.dot(w.T,u))) u_k = u + (m_x - np.dot(w.T,u)) * (w/np.linalg.norm(w)) # u_k = u # [n_samples, D] phi = np.dot((1.-np.tanh(np.dot(all_zs[params_k],w)+b)**2), w.T) # [n_samples, 1] sum_nf = np.log(np.abs(1+np.dot(phi, u_k))) sum_nf += sum_nf # return logq_z0 - sum_nf log_qz = np.reshape(logq_z0 - sum_nf, [n_samples]) return log_qz
def test_tanh(): fun = lambda x : 3.0 * np.tanh(x) d_fun = grad(fun) check_grads(fun, npr.randn()) check_grads(d_fun, npr.randn())
def nonlinearity(self, x): return np.tanh(x)
def predictions(W_vect, inputs): outputs = 0 for W, b in unpack_layers(W_vect): outputs = np.dot(np.array(inputs), W) + b inputs = np.tanh(outputs) return outputs
def tanh(A, *args): return np.tanh(dots(A, *args))
def predicted_regression(self, W_vect, inputs): for W, b in self.unpack_layers(W_vect): outputs = np.dot(inputs, W) + b inputs = np.tanh(outputs) return outputs
def update(input, hiddens, change_weights): return np.tanh(activations(change_weights, input, hiddens))
def update_rnn(input, hiddens): return np.tanh(concat_and_multiply(params['change'], input, hiddens))
def sigmoid(x): return 0.5*(np.tanh(x) + 1.0) def concat_and_multiply(weights, *args):
def sigmoid(x): return 0.5*(np.tanh(x) + 1.0) # Output ranges from 0 to 1.
def sigmoid(x): return 0.5 * (np.tanh(x) + 1.0) def logsigmoid(x): return x - np.logaddexp(0, x)
def neural_network(x, theta): w1, b1, w2, b2 = theta return np.tanh(np.dot((np.tanh(np.dot(x,w1) + b1)), w2) + b2)