def loss(self, inputs, targets): inputs, targets = map(np.array, [inputs, targets]) outputs = self(inputs) ep = 2e-23 loss = -np.sum(targets * np.log2(outputs + ep) + (1 - targets) * np.log2(1 - outputs + ep)) return loss
def lossfun(params): machine.set_params(params) loss = 0 for t in range(inputs.shape[0]): input = inputs[t] target = targets[t] output = machine.forward(input) loss -= np.sum(target * np.log2(output) + (1 - target) * np.log2(1 - output)) return loss
def plotDistrs(ds,mus_,sigmas_): Nrows = int(np.ceil(np.log2(ds.N))) + 1 fig,ax= plt.subplots(nrows=Nrows,ncols=ds.N,figsize=(5 * ds.N,5 * ds.N)) for row in range(Nrows): for col in range(2**row): idx = col if row > 0: idx += 2**(row) - 1 ax[row,col].hist(ds.treeAlphaHats[idx],density=True) leafIndices = getChildren(idx, ds.N - 1).astype(int) - (ds.N-1) ln = ds.numU[leafIndices] # Final mu = np.dot(ds.mu[leafIndices],ln)/np.sum(ln) sigma = ds.sigma[idx] pdf = ss.norm.pdf(np.arange(0, ds.treeAlphaHats[idx].max(), .01), loc=mu,scale=sigma) ax[row,col].plot(np.arange(0, ds.treeAlphaHats[idx].max(), .01), pdf,color="green",alpha=.5,label="final") ax[row,col].vlines(mu,0,1,color="green",label="alpha hat") # Original mu = np.dot(mus_[0][leafIndices],ln)/np.sum(ln) sigma = sigmas_[0][idx] pdf = ss.norm.pdf(np.arange(0,ds.treeAlphaHats[idx].max(),.01), loc=mu,scale=sigma) ax[row,col].plot(np.arange(0,ds.treeAlphaHats[idx].max(),.01),pdf,color="red",alpha=.5,label="og") truth = np.dot(ds.trueAlphas[leafIndices].flatten(), ln)/np.sum(ln) ax[row,col].vlines(truth,0,1,color="black",label="alpha") ax[row,col].legend() return fig
def get_starlet_shape(shape, lvl=None): """ Get the pad shape for a starlet transform """ #Number of levels for the Starlet decomposition lvl_max = np.int(np.log2(np.min(shape[-2:]))) if (lvl is None) or lvl > lvl_max: lvl = lvl_max return int(lvl)
def shannon_rate(L): Sigma = np.dot(L, L.transpose()) Sigma_norm = Sigma / (np.trace(Sigma)) Sigma_norm_B = np.dot(np.dot(B, Sigma_norm), B.transpose()) Wcd = dlyap_iterative(Ad.transpose(), Sigma_norm_B) Wnum = np.dot(Wo, Wcd) Wden = np.dot(Wo, Wcd - Sigma_norm_B) num = np.linalg.det(Wnum / sigma2 + I) den = np.linalg.det(Wden / sigma2 + I) return -np.log2(num / den) / (2. * T)
def shannon_rate(L): Sigma = np.dot(L, L.transpose()) Sigma_norm = Sigma / (np.trace(Sigma)) Sigma_norm_B = np.dot(np.dot(B, Sigma_norm), B.transpose()) Sigma_norm_vec = np.reshape(Sigma_norm_B, (n * n, 1)) Wcd_vec = np.dot(np.linalg.inv(lyap_tmp), Sigma_norm_vec) Wcd = np.reshape(Wcd_vec, (n, n)) Wnum = np.dot(Wo, Wcd) Wden = np.dot(Wo, Wcd - Sigma_norm_B) num = np.linalg.det(Wnum + sigma2 * I) den = np.linalg.det(Wden + sigma2 * I) return -np.log2(num / den) / (2. * T)
def make_entropy_panel(cp_dfs, entropies, entropy_sds, labels, ax=None): if ax is None: f, ax = plt.subplots(figsize=(4, 4)) num_bars = len(cp_dfs) for ii, (entropy, entropy_sd, label) in enumerate(zip(entropies, entropy_sds, labels)): ax.bar(ii, entropy, width=0.6, label=label, color="gray") ax.errorbar(ii, entropy, yerr=entropy_sd, color="k", elinewidth=2) ax.set_xticks(range(num_bars)) ax.set_xticklabels(labels) ax.set_ylabel("Entropy", fontsize=18) ax.set_ylim([2, np.log2(N + 1)])
def shannon_rate(L, T): Ad = scipy.linalg.expm(A*T) y0 = np.zeros([A.shape[0]**2,1])[:,0] out = scipy.integrate.odeint(gramian_ode, y0, [0,T], args=(A,C)) Wo = out[1,:].reshape([A.shape[0], A.shape[0]]) lyap_tmp = - np.kron(Ad,Ad) + np.identity(n*n) Sigma = np.dot(L,L.transpose()) Sigma_norm = Sigma/(np.trace(Sigma)) Sigma_norm_B = np.dot(np.dot(B,Sigma_norm),B.transpose()) Sigma_norm_vec = np.reshape(Sigma_norm_B,(n*n,1)) Wcd_vec = np.dot(np.linalg.inv(lyap_tmp),Sigma_norm_vec) Wcd = np.reshape(Wcd_vec,(n,n)) Wnum = np.dot(Wo,Wcd) Wden = np.dot(Wo,Wcd-Sigma_norm_B) num = np.linalg.det(Wnum+sigma2*I) den = np.linalg.det(Wden+sigma2*I) return -np.log2(num/den)/(2.*T)
def _compute_likelihood_ratio_test(self): """ This function computes the likelihood ratio test for the model. We compare the existing model (with all the covariates) to the trivial model of no covariates. """ from lifelines.statistics import chisq_test ll_null = self._ll_null ll_alt = self._log_likelihood test_stat = 2 * ll_alt - 2 * ll_null degrees_freedom = self.params_.shape[0] - 2 # delta in number of parameters between models p_value = chisq_test(test_stat, degrees_freedom=degrees_freedom) with np.errstate(invalid="ignore", divide="ignore"): return test_stat, degrees_freedom, -np.log2(p_value)
def LL(leafMeans,bagSigma): NBags = len(bagSigma) NInternal_Nodes = np.floor(NBags/2) NLeaves = len(leafMeans) ll = 0 Nrows = int(np.ceil(np.log2(NLeaves))) + 1 for row in range(Nrows): for col in range(2**row): idx = col if row > 0: idx += 2**(row) - 1 leafIndices = (getChildren(idx, NInternal_Nodes) - NInternal_Nodes).astype(int) ln = leafN[leafIndices] mu = np.dot(leafMeans[leafIndices],ln)/np.sum(ln) sigma = bagSigma[idx] ll = ll + (rlambda**row) * logLikelihood(x[idx],mu,sigma,normalize) return -1 * ll
def _compute_likelihood_ratio_test(self): """ This function computes the likelihood ratio test for the Weibull model. We compare the existing model (with all the covariates) to the trivial model of no covariates. """ ll_null = WeibullFitter().fit(self.durations, self.event_observed)._log_likelihood ll_alt = self._log_likelihood test_stat = 2 * ll_alt - 2 * ll_null degrees_freedom = self.params_.shape[ 0] - 2 # diff in number of parameters between models p_value = chisq_test(test_stat, degrees_freedom=degrees_freedom) with np.errstate(invalid="ignore", divide="ignore"): return test_stat, degrees_freedom, -np.log2(p_value)
def summary(self): """Summary statistics describing the fit. Returns ------- df : DataFrame Contains columns coef, np.exp(coef), se(coef), z, p, lower, upper""" ci = 1 - self.alpha with np.errstate(invalid="ignore", divide="ignore"): df = pd.DataFrame(index=self.params_.index) df["coef"] = self.params_ df["exp(coef)"] = np.exp(self.params_) df["se(coef)"] = self.standard_errors_ df["z"] = self._compute_z_values() df["p"] = self._compute_p_values() df["-log2(p)"] = -np.log2(df["p"]) df["lower %g" % ci] = self.confidence_intervals_["lower-bound"] df["upper %g" % ci] = self.confidence_intervals_["upper-bound"] return df
def expm_pade(a): """ Compute the matrix exponential via pade approximation. References: [0] http://eprints.ma.man.ac.uk/634/1/high05e.pdf [1] https://github.com/scipy/scipy/blob/v0.14.0/scipy/linalg/_expm_frechet.py#L10 Arguments: a :: ndarray(N x N) - The matrix to exponentiate. Returns: expm_a :: ndarray(N x N) - The exponential of a. """ # If the one norm is sufficiently small, # pade orders up to 13 are well behaved. scale = 0 size = a.shape[0] pade_order = None one_norm_ = one_norm(a) for pade_order_ in PADE_ORDERS: if one_norm_ < THETA[pade_order_]: pade_order = pade_order_ #ENDIF #ENDFOR # If the one norm is large, scaling and squaring # is required. if pade_order is None: pade_order = 13 scale = max(0, int(anp.ceil(anp.log2(one_norm_ / THETA[13])))) a = a * (2 ** -scale) # Execute pade approximant. i = anp.eye(size) u, v = PADE[pade_order](a, i) r = anp.linalg.solve(-u + v, u + v) # Do squaring if necessary. for _ in range(scale): r = anp.matmul(r, r) return r
def _ell(A, m): """ A helper function for expm_2009. Parameters ---------- A : linear operator A linear operator whose norm of power we care about. m : int The power of the linear operator Returns ------- value : int A value related to a bound. """ if len(A.shape) != 2 or A.shape[0] != A.shape[1]: raise ValueError('expected A to be like a square matrix') p = 2 * m + 1 # The c_i are explained in (2.2) and (2.6) of the 2005 expm paper. # They are coefficients of terms of a generating function series expansion. choose_2p_p = scipy.special.comb(2 * p, p, exact=True) abs_c_recip = float(choose_2p_p * math.factorial(2 * p + 1)) # This is explained after Eq. (1.2) of the 2009 expm paper. # It is the "unit roundoff" of IEEE double precision arithmetic. u = 2**-53 # Compute the one-norm of matrix power p of abs(A). A_abs_onenorm = _onenorm_matrix_power_nnm(abs(A), p) # Treat zero norm as a special case. if not A_abs_onenorm: return 0 alpha = A_abs_onenorm / (_onenorm(A) * abs_c_recip) log2_alpha_div_u = np.log2(alpha / u) value = int(np.ceil(log2_alpha_div_u / (2 * m))) return max(value, 0)
def test_log2(): fun = lambda x : 3.0 * np.log2(x) d_fun = grad(fun) check_grads(fun, abs(npr.randn())) check_grads(d_fun, abs(npr.randn()))
def autocorrelation(iterate_chains, warmup=0.5, param_idx=0, lag_max=100): """ Compute the autocorrelation and ESS for a variational parameter using FFT. where ESS is the effective sample size computed using eq(10) and (11) of https://arxiv.org/pdf/1903.08008.pdf MCSE is from 100th to the last iterate using all the chains. Parameters ---------- iterate_chains : multi-dimensional array, shape=(n_chains, n_iters, n_var_params) warmup : warmup iterates param_idx : index of the variational parameter lag_max: lag value Returns ------- neff : Effective sample size rho_t: autocorrelation at last lag autocov: auto covariance using FFT a: array of autocorrelation from lag t=0 to lag t=lag_max """ n_iters = iterate_chains.shape[1] n_chains = iterate_chains.shape[0] if warmup < 1: warmup = int(warmup * n_iters) if warmup > n_iters - 2: raise ValueError('Warmup should be less than number of iterates ..') if (n_iters - warmup) % 2: warmup = int(warmup + 1) chains = iterate_chains[:, warmup:, param_idx] means = np.mean(chains, axis=1) variances = np.var(chains, ddof=1, axis=1) if n_chains == 1: var_between = 0 else: var_between = n_iters * np.var(means, ddof=1) var_chains = np.mean(variances, axis=0) var_pooled = ((n_iters - 1.) * var_chains + var_between) / n_iters n_pad = int(2**np.ceil(1. + np.log2(n_iters))) freqs = np.fft.rfft(chains - np.expand_dims(means, axis=1), n_pad) #print(freqs) autocov = np.fft.irfft(np.abs(freqs)**2)[:, :n_iters].real autocov = autocov / np.arange(n_iters, 0, -1) rho_t = 0 lag = 1 a = [] neff_array = [] for lag in range(lag_max): val = 1. - (var_chains - np.mean(autocov[:, lag])) / var_pooled a.append(val) if val >= 0: rho_t = rho_t + val else: #break rho_t = rho_t neff = n_iters * n_chains / (1 + 2 * rho_t) return neff, rho_t, autocov, np.asarray(a)
def _expm(A, use_exact_onenorm): # Core of expm, separated to allow testing exact and approximate # algorithms. # Avoid indiscriminate asarray() to allow sparse or other strange arrays. if isinstance(A, (list, tuple)): A = np.asarray(A) if len(A.shape) != 2 or A.shape[0] != A.shape[1]: raise ValueError('expected a square matrix') # Trivial case if A.shape == (1, 1): out = [[np.exp(A[0, 0])]] # Avoid indiscriminate casting to ndarray to # allow for sparse or other strange arrays if isspmatrix(A): return A.__class__(out) return np.array(out) # Ensure input is of float type, to avoid integer overflows etc. if ((isinstance(A, np.ndarray) or isspmatrix(A)) and not np.issubdtype(A.dtype, np.inexact)): A = A.astype(float) # Detect upper triangularity. structure = UPPER_TRIANGULAR if _is_upper_triangular(A) else None if use_exact_onenorm == "auto": # Hardcode a matrix order threshold for exact vs. estimated one-norms. use_exact_onenorm = A.shape[0] < 200 # Track functions of A to help compute the matrix exponential. h = _ExpmPadeHelper(A, structure=structure, use_exact_onenorm=use_exact_onenorm) # Try Pade order 3. eta_1 = max(h.d4_loose, h.d6_loose) if eta_1 < 1.495585217958292e-002 and _ell(h.A, 3) == 0: U, V = h.pade3() return _solve_P_Q(U, V, structure=structure) # Try Pade order 5. eta_2 = max(h.d4_tight, h.d6_loose) if eta_2 < 2.539398330063230e-001 and _ell(h.A, 5) == 0: U, V = h.pade5() return _solve_P_Q(U, V, structure=structure) # Try Pade orders 7 and 9. eta_3 = max(h.d6_tight, h.d8_loose) if eta_3 < 9.504178996162932e-001 and _ell(h.A, 7) == 0: U, V = h.pade7() return _solve_P_Q(U, V, structure=structure) if eta_3 < 2.097847961257068e+000 and _ell(h.A, 9) == 0: U, V = h.pade9() return _solve_P_Q(U, V, structure=structure) # Use Pade order 13. eta_4 = max(h.d8_loose, h.d10_loose) eta_5 = min(eta_3, eta_4) theta_13 = 4.25 # Choose smallest s>=0 such that 2**(-s) eta_5 <= theta_13 if eta_5 == 0: # Nilpotent special case s = 0 else: s = max(int(np.ceil(np.log2(eta_5 / theta_13))), 0) s = s + _ell(2**-s * h.A, 13) U, V = h.pade13_scaled(s) X = _solve_P_Q(U, V, structure=structure) if structure == UPPER_TRIANGULAR: # Invoke Code Fragment 2.1. X = _fragment_2_1(X, h.A, s) else: # X = r_13(A)^(2^s) by repeated squaring. for i in range(s): X = np.dot(X, X) return X
def log(ps): logs = np.log2(ps) logs[np.isinf(logs)] = 0 return logs
# Angles a_orig = np.array([x[5] for x in myplot]) a_new = np.array([x[6] for x in myplot]) plt.scatter(a_orig, a_new) plt.title('Angle differences') plt.xlabel('Original') plt.ylabel('New') plt.show() sys.exit() # Plot sizes and octaves z = np.array([0 for x in myplot]) sorig = np.array([x[1] for x in myplot]) snew = np.array([x[0] for x in myplot]) c_octaves = np.array([x[2] for x in myplot]) c_layers = np.array([x[3] for x in myplot]) c_scale = np.array([np.log2(x[4]) for x in myplot]) # c_fscale = np.array([ x[4] for x in myplot ]) # print(np.unique(list(zip(c_octaves, c_fscale)), axis=0)) # Build inverse octave dict print("Octave and log2 scale sum:", np.linalg.norm(c_octaves + c_scale)) octave_layer = sorted( [tuple(x) for x in np.unique(list(zip(c_octaves, c_layers)), axis=0)]) ol_dict = {k: [] for k in octave_layer} o_dict = {k: [] for k in c_octaves} false_formula = False # Test Octave encoding formula for j in range(len(x_[0])): s = keypoints[0][j].size octave, layer, scale = unpackSIFTOctave(keypoints[0][j].octave) ol_dict[(octave, layer)].append(s) o_dict[octave].append(s)
def test_log2(): fun = lambda x: 3.0 * np.log2(x) check_grads(fun)(abs(npr.randn()))
# ) < 1e-6 return U def channel_project(n_qubits, proj_qubit): assert proj_qubit == 0 # TODO currently only this is supported # |0><0| x Id bra = np.array([[1., 0.], [0., 0.]]) mat = np.kron(np.identity(2**(n_qubits - 1)), bra) return Kraus([mat]) if __name__ == "__main__": ch = random_quantum_channel(2, 2) unitary = Stinespring(ch).data num_anc = int(np.log2(unitary.shape[0])) - 1 exp = channel_expand(1, num_anc) qc = QuantumCircuit(1 + num_anc) qc.unitary(complete_unitary(unitary), list(range(1 + num_anc))) uni = SuperOp(qc) tr = channel_trace(1, num_anc) # test 1: check that expansion and tracing out cancel assert np.sum( np.abs( Choi(Kraus([np.identity(2)])).data - Choi(exp.compose(tr)).data)) < 1e-10 # test 2: check full stinespring dilation ch2 = exp.compose(uni.compose(tr))
Sz_list = [np.array([[1, 0.], [0., -1.]]) for i in range(L)] H_list = qTEBD.get_H(Hamiltonian, L, J, g, h) A_list = [np.array([1., 0.]).reshape([2, 1, 1]) for i in range(L)] t_list = [0] E_list = [np.sum(mps_func.expectation_values(A_list, H_list))] Sz_array = np.zeros([int(total_t // dt) + 1, L], dtype=np.complex) Sz_array[0, :] = mps_func.expectation_values_1_site(A_list, Sz_list) ent_array = np.zeros([int(total_t // dt) + 1, L - 1], dtype=np.double) ent_array[0, :] = mps_func.get_entanglement(A_list) update_error_list = [0.] U_list = qTEBD.make_U(H_list, 1j * dt) U_half_list = qTEBD.make_U(H_list, 0.5j * dt) exact_steps = int(np.log2(chi)) for idx in range(exact_steps): A_list, _ = mps_func.right_canonicalize(A_list, no_trunc=True) A_list, trunc_error = qTEBD.apply_U_all(A_list, U_list, 0, no_trunc=True) A_list, _ = mps_func.left_canonicalize(A_list, no_trunc=True) ## [ToDo] here assume no truncation fidelity_reached = 1. print("fidelity reached : ", fidelity_reached) update_error_list.append(1. - fidelity_reached) current_energy = np.sum(mps_func.expectation_values(A_list, H_list)) E_list.append(current_energy) Sz_array[1 + idx, :] = mps_func.expectation_values_1_site(
def test_log2(): fun = lambda x : 3.0 * np.log2(x) check_grads(fun)(abs(npr.randn()))
def fprop(params): """ Forward pass of the NTM. """ W = params # aliasing for brevity xs, zhs, hs, ys, ps, ts, zos, os = {}, {}, {}, {}, {}, {}, {}, {} def l(): """ Silly utility function that should be called in init. """ return [{} for _ in xrange(self.heads)] rs = l() zk_rs = l() k_rs, beta_rs, g_rs, s_rs, gamma_rs = l(),l(),l(),l(),l() k_ws, beta_ws, g_ws, s_ws, gamma_ws = l(),l(),l(),l(),l() adds, erases = l(),l() w_ws, w_rs = l(),l() # read weights and write weights for idx in range(self.heads): rs[idx][-1] = self.W['rsInit' + str(idx)] # stores values read from memory w_ws[idx][-1] = softmax(self.W['w_wsInit' + str(idx)]) w_rs[idx][-1] = softmax(self.W['w_rsInit' + str(idx)]) mems = {} # the state of the memory at every timestep mems[-1] = self.W['memsInit'] loss = 0 for t in xrange(len(inputs)): xs[t] = np.reshape(np.array(inputs[t]),inputs[t].shape[::-1]) rsum = 0 for idx in range(self.heads): rsum = rsum + np.dot(W['rh' + str(idx)], np.reshape(rs[idx][t-1],(self.M,1))) zhs[t] = np.dot(W['xh'], xs[t]) + rsum + W['bh'] hs[t] = np.tanh(zhs[t]) zos[t] = np.dot(W['ho'], hs[t]) + W['bo'] os[t] = np.tanh(zos[t]) for idx in range(self.heads): # parameters to the read head zk_rs[idx][t] =np.dot(W['ok_r' + str(idx)],os[t]) + W['bk_r' + str(idx)] k_rs[idx][t] = np.tanh(zk_rs[idx][t]) beta_rs[idx][t] = softplus(np.dot(W['obeta_r' + str(idx)],os[t]) + W['bbeta_r' + str(idx)]) g_rs[idx][t] = sigmoid(np.dot(W['og_r' + str(idx)],os[t]) + W['bg_r' + str(idx)]) s_rs[idx][t] = softmax(np.dot(W['os_r' + str(idx)],os[t]) + W['bs_r' + str(idx)]) gamma_rs[idx][t] = 1 + sigmoid(np.dot(W['ogamma_r' + str(idx)], os[t]) + W['bgamma_r' + str(idx)]) # parameters to the write head k_ws[idx][t] = np.tanh(np.dot(W['ok_w' + str(idx)],os[t]) + W['bk_w' + str(idx)]) beta_ws[idx][t] = softplus(np.dot(W['obeta_w' + str(idx)], os[t]) + W['bbeta_w' + str(idx)]) g_ws[idx][t] = sigmoid(np.dot(W['og_w' + str(idx)],os[t]) + W['bg_w' + str(idx)]) s_ws[idx][t] = softmax(np.dot(W['os_w' + str(idx)],os[t]) + W['bs_w' + str(idx)]) gamma_ws[idx][t] = 1 + sigmoid(np.dot(W['ogamma_w' + str(idx)], os[t]) + W['bgamma_w' + str(idx)]) # the erase and add vectors # these are also parameters to the write head # but they describe "what" is to be written rather than "where" adds[idx][t] = np.tanh(np.dot(W['oadds' + str(idx)], os[t]) + W['badds' + str(idx)]) erases[idx][t] = sigmoid(np.dot(W['oerases' + str(idx)], os[t]) + W['erases' + str(idx)]) w_ws[idx][t] = addressing.create_weights( k_ws[idx][t] , beta_ws[idx][t] , g_ws[idx][t] , s_ws[idx][t] , gamma_ws[idx][t] , w_ws[idx][t-1] , mems[t-1]) w_rs[idx][t] = addressing.create_weights( k_rs[idx][t] , beta_rs[idx][t] , g_rs[idx][t] , s_rs[idx][t] , gamma_rs[idx][t] , w_rs[idx][t-1] , mems[t-1]) ys[t] = np.dot(W['oy'], os[t]) + W['by'] ps[t] = sigmoid(ys[t]) one = np.ones(ps[t].shape) ts[t] = np.reshape(np.array(targets[t]),(self.out_size,1)) epsilon = 2**-23 # to prevent log(0) a = np.multiply(ts[t] , np.log2(ps[t] + epsilon)) b = np.multiply(one - ts[t], np.log2(one-ps[t] + epsilon)) loss = loss - (a + b) for idx in range(self.heads): # read from the memory rs[idx][t] = memory.read(mems[t-1],w_rs[idx][t]) # write into the memory mems[t] = memory.write(mems[t-1],w_ws[idx][t],erases[idx][t],adds[idx][t]) self.stats = [loss, mems, ps, ys, os, zos, hs, zhs, xs, rs, w_rs, w_ws, adds, erases] return np.sum(loss)
def plot_interp_contrast_tuning(ams=None, data=None, theta=None, these_sizes=[0, 2, 4], ninterp=101, usize=np.array((5, 8, 13, 22, 36, 60)), ucontrast=np.array( (0, 6, 12, 25, 50, 100)) / 100, colors=None, deriv=False, deriv_axis=2): ucontrast_interp = np.linspace(0, 1, ninterp) this_nsize = len(these_sizes) this_ucontrast = ucontrast_interp if ams is None: #assert(not data is None and not theta is None) assert (not data is None) ams = ayaz_model(data, usize=usize, ucontrast=ucontrast, theta=theta) this_usize = np.array([ams.usize[i] for i in these_sizes]) this_theta = ams.theta fn = ams.fn this_data = ams.data ind0 = 2 cinds = np.concatenate( ((5 + np.log2(ucontrast_interp[ind0]), ), np.arange(1, 6))) if deriv: if deriv_axis == 1: this_data = sca.compute_slope_avg(usize, this_data, axis=deriv_axis) elif deriv_axis == 2: this_data = sca.compute_slope_avg(ucontrast, this_data, axis=deriv_axis) this_modeled = np.zeros((this_nsize, ninterp)) for isize in range(this_nsize): if deriv_axis == 2: crf = lambda c: ams.fn(np.array( (c, )), np.array((this_usize[isize], )))[0, 0] cslope = np.array([grad(crf)(cc) for cc in this_ucontrast]) this_modeled[isize] = cslope #print(cslope) elif deriv_axis == 1: for icontrast, cc in enumerate(this_ucontrast): srf = lambda d: ams.fn(np.array((cc, )), np.array( (d, )))[0, 0] sslope = grad(srf)(this_usize[isize]) this_modeled[isize, icontrast] = sslope else: #this_modeled = nub_utils.ayaz_like_theta(this_ucontrast,this_usize,this_theta,fn=fn) this_modeled = ams.fn(this_ucontrast, this_usize) ut.plot_bootstrapped_errorbars_hillel(cinds, this_data[:, these_sizes, :].transpose( (0, 1, 2)), linewidth=0, colors=colors) for isize in range(this_nsize): plt.plot(5 + np.log2(ucontrast_interp[ind0:]), this_modeled[isize, ind0:], c=colors[isize]) plt.xticks(cinds, (100 * ucontrast).astype('int')) ut.erase_top_right() plt.xlabel('contrast (%)') plt.ylabel('event rate/mean') if not deriv: plt.gca().set_ylim(bottom=0) plt.tight_layout()
def size_to_octave_desc(s): octave = int(np.floor(np.log2(s / 3.6))) layer = int(np.floor(3 * np.log2(s / 3.6)) - 3 * octave) + 1 if octave < -1: # This is apparently the minimum octave, layer = -1, 1 return (octave & 255) | ((layer & 255) << 8)
def fprop(params): """ Forward pass of the NTM. """ W = params # aliasing for brevity xs, hs, ys, ps, ts, os = {}, {}, {}, {}, {}, {} def l(): """ Silly utility function that should be called in init. """ return [{} for _ in xrange(self.heads)] rs = l() k_rs, beta_rs, g_rs, s_rs, gamma_rs = l(),l(),l(),l(),l() k_ws, beta_ws, g_ws, s_ws, gamma_ws = l(),l(),l(),l(),l() adds, erases = l(),l() w_ws, w_rs = l(),l() # read weights and write weights for idx in range(self.heads): rs[idx][-1] = self.W['rsInit' + str(idx)] # stores values read from memory w_ws[idx][-1] = softmax(self.W['w_wsInit' + str(idx)]) w_rs[idx][-1] = softmax(self.W['w_rsInit' + str(idx)]) mems = {} # the state of the memory at every timestep mems[-1] = self.W['memsInit'] loss = 0 for t in xrange(len(inputs)): xs[t] = np.reshape(np.array(inputs[t]),inputs[t].shape[::-1]) rsum = 0 for idx in range(self.heads): rsum = rsum + np.dot(W['rh' + str(idx)], np.reshape(rs[idx][t-1],(self.M,1))) hs[t] = np.tanh(np.dot(W['xh'], xs[t]) + rsum + W['bh']) os[t] = np.tanh(np.dot(W['ho'], hs[t]) + W['bo']) for idx in range(self.heads): # parameters to the read head k_rs[idx][t] = np.tanh(np.dot(W['ok_r' + str(idx)],os[t]) + W['bk_r' + str(idx)]) beta_rs[idx][t] = softplus(np.dot(W['obeta_r' + str(idx)],os[t]) + W['bbeta_r' + str(idx)]) g_rs[idx][t] = sigmoid(np.dot(W['og_r' + str(idx)],os[t]) + W['bg_r' + str(idx)]) s_rs[idx][t] = softmax(np.dot(W['os_r' + str(idx)],os[t]) + W['bs_r' + str(idx)]) gamma_rs[idx][t] = 1 + sigmoid(np.dot(W['ogamma_r' + str(idx)], os[t]) + W['bgamma_r' + str(idx)]) # parameters to the write head k_ws[idx][t] = np.tanh(np.dot(W['ok_w' + str(idx)],os[t]) + W['bk_w' + str(idx)]) beta_ws[idx][t] = softplus(np.dot(W['obeta_w' + str(idx)], os[t]) + W['bbeta_w' + str(idx)]) g_ws[idx][t] = sigmoid(np.dot(W['og_w' + str(idx)],os[t]) + W['bg_w' + str(idx)]) s_ws[idx][t] = softmax(np.dot(W['os_w' + str(idx)],os[t]) + W['bs_w' + str(idx)]) gamma_ws[idx][t] = 1 + sigmoid(np.dot(W['ogamma_w' + str(idx)], os[t]) + W['bgamma_w' + str(idx)]) # the erase and add vectors # these are also parameters to the write head # but they describe "what" is to be written rather than "where" adds[idx][t] = np.tanh(np.dot(W['oadds' + str(idx)], os[t]) + W['badds' + str(idx)]) erases[idx][t] = sigmoid(np.dot(W['oerases' + str(idx)], os[t]) + W['erases' + str(idx)]) w_ws[idx][t] = addressing.create_weights( k_ws[idx][t] , beta_ws[idx][t] , g_ws[idx][t] , s_ws[idx][t] , gamma_ws[idx][t] , w_ws[idx][t-1] , mems[t-1]) w_rs[idx][t] = addressing.create_weights( k_rs[idx][t] , beta_rs[idx][t] , g_rs[idx][t] , s_rs[idx][t] , gamma_rs[idx][t] , w_rs[idx][t-1] , mems[t-1]) ys[t] = np.dot(W['oy'], os[t]) + W['by'] ps[t] = sigmoid(ys[t]) one = np.ones(ps[t].shape) ts[t] = np.reshape(np.array(targets[t]),(self.out_size,1)) epsilon = 2**-23 # to prevent log(0) a = np.multiply(ts[t] , np.log2(ps[t] + epsilon)) b = np.multiply(one - ts[t], np.log2(one-ps[t] + epsilon)) loss = loss - (a + b) for idx in range(self.heads): # read from the memory rs[idx][t] = memory.read(mems[t-1],w_rs[idx][t]) # write into the memory mems[t] = memory.write(mems[t-1],w_ws[idx][t],erases[idx][t],adds[idx][t]) self.stats = [loss, ps, w_rs, w_ws, adds, erases] return np.sum(loss)
def reconstruct_penalty(gain, rate, gain_penalty, rate_penalty): return gain_penalty * np.abs(gain).mean() + rate_penalty * np.abs( np.log2(rate)).mean()