def energy(self, x): r""" Calculates the energy of a pattern ``x`` according to the Hopfield network. The energy of a pattern ``x`` computes as: .. math:: E(x) = -\frac{1}{2} x^T \cdot [J - \text{diag}(J)] \cdot x + \theta\cdot x Parameters ---------- X : numpy array (M, N)-dim array of binary input patterns of length N, where N is the number of nodes in the network Returns ------- energy : float Energy of input pattern according to Hopfield network. """ X = np.atleast_2d(x) energies = [] for x in X: energies.append(-.5 * np.dot(x, np.dot(self._J - np.diag(self._J.diagonal()), x)) + np.dot(self._theta, x)) if len(energies) == 1: return np.array(energies[0], dtype = np.double) else: return np.array(energies, dtype = np.double)
def policy_backward(eph, epdlogp): """ backward pass. (eph is array of intermediate hidden states) """ dW2 = np.dot(eph.T, epdlogp).ravel() dh = np.outer(epdlogp, model['W2']) dh[eph <= 0] = 0 # backpro prelu dW1 = np.dot(dh.T, epx) return {'W1':dW1, 'W2':dW2}
def H(s, tres, QAA, QFF, QAF, QFA, kF): """ Evaluate H(s) funtion (Eq. 54, HJC92). HAA(s) = QAA + QAF * (s*I - QFF) ^(-1) * (I - exp(-(s*I - QFF) * tau)) * QFA To evaluate HFF(s) exhange A by F and F by A in function call. Parameters ---------- s : float Laplace transform argument. tres : float Time resolution (dead time). QAA : array_like, shape (kA, kA) QFF : array_like, shape (kF, kF) QAF : array_like, shape (kA, kF) QFA : array_like, shape (kF, kA) QAA, QFF, QAF, QFA - submatrices of Q. kF : int A number of shut states in kinetic scheme. Returns ------- H : ndarray, shape (kA, kA) """ IF = np.eye(kF) XFF = s * IF - QFF invXFF = nplin.inv(XFF) expXFF = expQt(-XFF, tres) H = QAA + np.dot(np.dot(np.dot(QAF, invXFF), IF - expXFF), QFA) return H
def objective_gradient(self, X, J=None, return_K=False): """ Computes MPF objective gradient on input data X given coupling strengths J. Parameters ---------- X : numpy array (M, N)-dim array of binary input patterns of length N, where N is the number of nodes in the network J : numpy array, optional Coupling matrix of size N x N, where N denotes the number of nodes in the network (default None) return_K : bool, optional Flag wether to return K (default False) Returns ------- dJ [, K] : numpy array [, numpy array] Update to coupling matrix J [and K if return_K is True] """ if J is None: J = self._J J[np.eye(self._N, dtype=bool)] = -2 * self._theta X = np.atleast_2d(X) M, N = X.shape S = 2 * X - 1 Kfull = np.exp(-S * np.dot(X, J.T) + .5 * np.diag(J)[None, :]) dJ = -np.dot(X.T, Kfull * S) + .5 * np.diag(Kfull.sum(0)) if self._symmetric is True: dJ = .5 * (dJ + dJ.T) if return_K: return Kfull.sum() / M, dJ / M else: return dJ / M
def CHSvec(roots, tres, tcrit, QFA, kA, expQAA, phiF, R): """ Calculate initial and final CHS vectors for HJC likelihood function (Eqs. 5.5 or 5.7, CHS96). Parameters ---------- roots : array_like, shape (1, kA) Roots of the asymptotic pdf. tres : float Time resolution (dead time). tcrit : float Critical time. QFA : array_like, shape(kF, kA) kA : int expQAA : array_like, shape(kA, kA) phiF : array_like, shape(1, kF) R : array_like, shape(kF, kF, kF) Returns ------- start : ndarray, shape (1, kA) CHS start vector (Eq. 5.11, CHS96). end : ndarray, shape (kF, 1) CHS end vector (Eq. 5.8, CHS96). """ H = HAF(roots, tres, tcrit, QFA, expQAA, R) u = np.ones((kA, 1)) start = np.dot(phiF, H) / np.dot(np.dot(phiF, H), u) end = np.dot(H, u) return start, end
def phiSub(Q, k1, k2): """ Calculate initial vector for any subset. Parameters ---------- mec : dcpyps.Mechanism The mechanism to be analysed. Returns ------- phi : ndarray, shape (kA) """ u = np.ones((k2 - k1 + 1, 1)) p = pinf(Q) p1, p2, p3 = np.hsplit(p,(k1, k2+1)) p1c = np.hstack((p1, p3)) #Q = Q.copy() Q1, Q2, Q3 = np.hsplit(Q,(k1, k2+1)) Q21, Q22, Q23 = np.hsplit(Q2.transpose(),(k1, k2+1)) Q22c = Q22.copy() Q12 = np.vstack((Q21.transpose(), Q23.transpose())) nom = np.dot(p1c, Q12) denom = np.dot(nom,u) phi = nom / denom return phi, Q22c
def phiHJC(eGAF, eGFA, kA): """ Calculate initial HJC vector for openings by solving phi*(I-eGAF*eGFA)=0 (Eq. 10, HJC92) For shuttings exhange A by F and F by A in function call. Parameters ---------- eGAF : array_like, shape (kA, kF) eGFA : array_like, shape (kF, kA) kA : int A number of open states in kinetic scheme. kF : int A number of shut states in kinetic scheme. Returns ------- phi : array_like, shape (kA) """ if kA == 1: phi = np.array([1]) else: Qsub = np.eye(kA) - np.dot(eGAF, eGFA) u = np.ones((kA, 1)) S = np.concatenate((Qsub, u), 1) phi = np.dot(u.transpose(), nplin.inv(np.dot(S, S.transpose())))[0] return phi
def iGs(Q, kA, kB): r""" Calculate GBA and GAB matrices (Eq. 1.25, CH82). Calculate also GFA and GAF if kF is given instead of kB. .. math:: \bs{G}_\cl{BA} &= -\bs{Q}_\cl{BB}^{-1} \bs{Q}_\cl{BA} \\ \bs{G}_\cl{AB} &= -\bs{Q}_\cl{AA}^{-1} \bs{Q}_\cl{AB} Parameters ---------- Q : array_like, shape (k, k) kA : int A number of open states in kinetic scheme. kB : int A number of short lived shut states in kinetic scheme. Returns ------- GAB : ndarray, shape (kA, kB) GBA : ndarray, shape (kB, kA) """ kE = kA + kB QBB = Q[kA:kE, kA:kE] QBA = Q[kA:kE, 0:kA] QAA = Q[0:kA, 0:kA] QAB = Q[0:kA, kA:kE] GAB = np.dot(nplin.inv(-1 * QAA), QAB) GBA = np.dot(nplin.inv(-1 * QBB), QBA) return GAB, GBA
def eGs(GAF, GFA, kA, kF, expQFF): """ Calculate eGAF, probabilities from transitions from apparently open to shut states regardles of when the transition occurs. Thease are Laplace transform of eGAF(t) when s=0. Used to calculat initial HJC vectors (HJC92). eGAF*(s=0) = (I - GAF * (I - expQFF) * GFA)^-1 * GAF * expQFF To caculate eGFA exhange A by F and F by A in function call. Parameters ---------- GAF : array_like, shape (kA, kF) GFA : array_like, shape (kF, kA) kA : int A number of open states in kinetic scheme. kF : int A number of shut states in kinetic scheme. Returns ------- eGAF : array_like, shape (kA, kF) """ temp = np.eye(kA) - np.dot(np.dot(GAF, np.eye(kF) - expQFF), GFA) eGAF = np.dot(np.dot(nplin.inv(temp), GAF), expQFF) return eGAF
def backprop(self, x, y): activation = x activations = [x] zs = [] for weight, bias in zip(self.weights, self.biases): z = np.dot(activation, weight)+bias zs.append(z) activation = sigmoid(z) activations.append(activation) delta = (activation-y)*sigmoid_prime(zs[-1]) nabla_weights = [np.zeros(w.shape) for w in self.weights] nabla_biases = [np.zeros(b.shape) for b in self.biases] nabla_weights[-1] = np.dot(activations[-2].transpose(), delta) nabla_biases[-1] = delta for l in xrange(2, len(self.layers)): delta = np.dot(delta, self.weights[-l+1].transpose())*sigmoid_prime(zs[-l]) nabla_weights[-l] = np.dot(activations[-l-1].transpose(), delta) nabla_biases[-l] = delta return (nabla_weights, nabla_biases)
def read_abinit(filename): with open(filename) as f: abinit_in = AbinitIn(f.readlines()) tags = abinit_in.get_variables() acell = tags['acell'] rprim = tags['rprim'].T scalecart = tags['scalecart'] lattice = rprim * acell if scalecart is not None: for i in range(3): lattice[i] *= scalecart[i] if tags['xcart'] is not None: pos_bohr = np.transpose(tags['xcart']) positions = np.dot(np.linalg.inv(lattice), pos_bohr).T elif tags['xangst'] is not None: pos_bohr = np.transpose(tags['xangst']) / Bohr positions = np.dot(np.linalg.inv(lattice), pos_bohr).T elif tags['xred'] is not None: positions = tags['xred'] numbers = [tags['znucl'][x - 1] for x in tags['typat']] return Atoms(numbers=numbers, cell=lattice.T, scaled_positions=positions)
def vertex_transform1(vertex): """ This transform was applied on the original surface. """ return np.dot(rotation_matrix(np.array([0.0, 0.0, 1.0]), math.pi), np.dot(rotation_matrix(np.array([1.0, 0.0, 0.0]), -math.pi / 1.6), np.array([float(x) / 1.5 for x in vertex[:3]]) + np.array([0.0, -40.0, 20.0])))
def test_grid_search_precomputed_kernel(): """Test that grid search works when the input features are given in the form of a precomputed kernel matrix """ X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0) # compute the training kernel matrix corresponding to the linear kernel K_train = np.dot(X_[:180], X_[:180].T) y_train = y_[:180] clf = SVC(kernel='precomputed') cv = GridSearchCV(clf, {'C': [0.1, 1.0]}) cv.fit(K_train, y_train) assert_true(cv.best_score_ >= 0) # compute the test kernel matrix K_test = np.dot(X_[180:], X_[:180].T) y_test = y_[180:] y_pred = cv.predict(K_test) assert_true(np.mean(y_pred == y_test) >= 0) # test error is raised when the precomputed kernel is not array-like # or sparse assert_raises(ValueError, cv.fit, K_train.tolist(), y_train)
def updateParameters(self, articlePicked, click, userID): self.counter +=1 self.Wlong = vectorize(self.W) featureDimension = len(articlePicked.featureVector) T_X = vectorize(np.outer(articlePicked.featureVector, self.W.T[userID])) self.A += np.outer(T_X, T_X) self.b += click*T_X self.AInv = np.linalg.inv(self.A) self.UserTheta = matrixize(np.dot(self.AInv, self.b), len(articlePicked.featureVector)) Xi_Matirx = np.zeros(shape = (featureDimension, self.userNum)) Xi_Matirx.T[userID] = articlePicked.featureVector W_X = vectorize( np.dot(np.transpose(self.UserTheta), Xi_Matirx)) self.batchGradient +=evaluateGradient(W_X, click, self.Wlong, self.lambda_, self.regu ) if self.counter%self.windowSize ==0: self.Wlong -= 1/(float(self.counter/self.windowSize)+1)*self.batchGradient self.W = matrixize(self.Wlong, self.userNum) self.W = normalize(self.W, axis=0, norm='l1') #print 'SVD', self.W self.batchGradient = np.zeros(self.userNum*self.userNum) # Use Ridge regression to fit W ''' plt.pcolor(self.W_b) plt.colorbar plt.show() ''' if self.W.T[userID].any() <0 or self.W.T[userID].any()>1: print self.W.T[userID] self.CoTheta = np.dot(self.UserTheta, self.W) self.BigW = np.kron(np.transpose(self.W), np.identity(n=len(articlePicked.featureVector))) self.CCA = np.dot(np.dot(self.BigW , self.AInv), np.transpose(self.BigW)) self.BigTheta = np.kron(np.identity(n=self.userNum) , self.UserTheta)
def fitToData(self, data): ''' param data: numpy array where [:,0] is x and [:,1] is y ''' x = data[:, 0][:, np.newaxis] y = data[:, 1][:, np.newaxis] D = np.hstack((x*x, x*y, y*y, x, y, np.ones_like(x))) S = np.dot(D.T, D) C = np.zeros([6, 6]) C[0, 2] = C[2, 0] = 2; C[1, 1] = -1 E, V = eig(np.dot(inv(S), C)) n = np.argmax(np.abs(E)) self.parameters = V[:, n] axes = self.ellipse_axis_length() self.a = axes[0] self.b = axes[1] self.angle = self.ellipse_angle_of_rotation() if not self.a or not self.b or self.parameters == None or np.iscomplexobj(self.parameters) or \ math.isnan(self.a) or math.isnan(self.b) or math.isnan(self.ellipse_center()[0]) or \ np.iscomplex(self.ellipse_center()[0]) or np.iscomplex(self.a) or np.iscomplex(self.b) or \ np.iscomplexobj(self.angle): self.a = 0 self.b = 0 self.parameters = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] self.angle = 0 self.error = True
def test_primal_dual_relationship(): y = y_diabetes.reshape(-1, 1) coef = _solve_cholesky(X_diabetes, y, alpha=[1e-2]) K = np.dot(X_diabetes, X_diabetes.T) dual_coef = _solve_cholesky_kernel(K, y, alpha=[1e-2]) coef2 = np.dot(X_diabetes.T, dual_coef).T assert_array_almost_equal(coef, coef2)
def EvaluatePolicy(s, w_pi, useRBFKernel = False): # the value of the improved policy value = np.zeros((len(s),1)) # the new policy policy = [False] * len(s) # iterate through every state, for idx in range(len(s)): # State-Action value function for actions 0.0 and 1.0 if useRBFKernel == True: q0 = np.dot(computePhiRBF(s[idx], 0.0).T, w_pi) q1 = np.dot(computePhiRBF(s[idx], 1.0).T, w_pi) else: q0 = np.dot(np.append(s[idx, 0],0.0), w_pi) q1 = np.dot(np.append(s[idx, 0],1.0), w_pi) # update the value value[idx] = max(q0, q1) # update the policy policy[idx] = True if q1 > q0 else False return (policy, value)
def Haffine_from_points(fp, tp): '''计算仿射变换的单应性矩阵H,使得tp是由fp经过仿射变换得到的''' if fp.shape != tp.shape: raise RuntimeError('number of points do not match') # 对点进行归一化 # 映射起始点 m = numpy.mean(fp[:2], axis=1) maxstd = numpy.max(numpy.std(fp[:2], axis=1)) + 1e-9 C1 = numpy.diag([1/maxstd, 1/maxstd, 1]) C1[0, 2] = -m[0] / maxstd C1[1, 2] = -m[1] / maxstd fp_cond = numpy.dot(C1, fp) # 映射对应点 m = numpy.mean(tp[:2], axis=1) maxstd = numpy.max(numpy.std(tp[:2], axis=1)) + 1e-9 C2 = numpy.diag([1/maxstd, 1/maxstd, 1]) C2[0, 2] = -m[0] / maxstd C2[1, 2] = -m[1] / maxstd tp_cond = numpy.dot(C2, tp) # 因为归一化之后点的均值为0,所以平移量为0 A = numpy.concatenate((fp_cond[:2], tp_cond[:2]), axis=0) U, S, V = numpy.linalg.svd(A.T) # 创建矩阵B和C tmp = V[:2].T B = tmp[:2] C = tmp[2:4] tmp2 = numpy.concatenate((numpy.dot(C, numpy.linalg.pinv(B)), numpy.zeros((2, 1))), axis=1) H = numpy.vstack((tmp2, [0, 0, 1])) H = numpy.dot(numpy.linalg.inv(C2), numpy.dot(H, C1)) # 反归一化 return H / H[2, 2] # 归一化,然后返回
def predict(self,samples): # this function returns the output layer activations (estimates) z2 = np.dot(samples,self.W1)+np.array([self.b1]) a2 = sigmoid(z2) z3 = np.dot(a2,self.W2)+np.array([self.b2]) a3 = sigmoid(z3) return a3
def runLabeling(file_path, gps_filename, output_name, frames_to_skip, final_frame, lp, rp, pickle_loc): video_reader = WarpedVideoReader(file_path) #video_reader.setSubsample(True) video_reader.setPerspectives(pickle_loc) gps_reader = GPSReader(gps_filename) gps_dat = gps_reader.getNumericData() cam = getCameraParams() cam_to_use = cam[int(output_name[-1]) - 1] lp = pixelTo3d(lp, cam_to_use) rp = pixelTo3d(rp, cam_to_use) tr = GPSTransforms(gps_dat, cam_to_use) pitch = -cam_to_use['rot_x'] height = 1.106 R_camera_pitch = euler_matrix(cam_to_use['rot_x'], cam_to_use['rot_y'], cam_to_use['rot_z'], 'sxyz')[0:3, 0:3] Tc = np.eye(4) Tc[0:3, 0:3] = R_camera_pitch.transpose() Tc[0:3, 3] = [-0.2, -height, -0.5] lpts = np.zeros((lp.shape[0], 4)) rpts = np.zeros((rp.shape[0], 4)) for t in range(min(tr.shape[0], lp.shape[0])): lpts[t, :] = np.dot(tr[t, :, :], np.linalg.solve(Tc, np.array([lp[t, 0], lp[t, 1], lp[t, 2], 1]))) rpts[t, :] = np.dot(tr[t, :, :], np.linalg.solve(Tc, np.array([rp[t, 0], rp[t, 1], rp[t, 2], 1]))) ldist = np.apply_along_axis(np.linalg.norm, 1, np.concatenate((np.array([[0, 0, 0, 0]]), lpts[1:] - lpts[0:-1]))) rdist = np.apply_along_axis(np.linalg.norm, 1, np.concatenate((np.array([[0, 0, 0, 0]]), rpts[1:] - rpts[0:-1]))) start_frame = frames_to_skip runBatch(video_reader, gps_dat, cam_to_use, output_name, start_frame, final_frame, lpts, rpts, ldist, rdist, tr) print "Done with %s" % output_name
def backprop(self, x, y): """Return a tuple ``(nabla_b, nabla_w)`` representing the gradient for the cost function C_x. ``nabla_b`` and ``nabla_w`` are layer-by-layer lists of numpy arrays, similar to ``self.biases`` and ``self.weights``.""" nabla_b = [np.zeros(b.shape) for b in self.biases] nabla_w = [np.zeros(w.shape) for w in self.weights] # feedforward activation = x activations = [x] # list to store all the activations, layer by layer zs = [] # list to store all the z vectors, layer by layer for b, w in zip(self.biases, self.weights): z = np.dot(w, activation)+b zs.append(z) activation = sigmoid(z) activations.append(activation) # backward pass delta = self.cost_derivative(activations[-1], y) * \ sigmoid_prime(zs[-1]) nabla_b[-1] = delta nabla_w[-1] = np.dot(delta, activations[-2].transpose()) # Note that the variable l in the loop below is used a little # differently to the notation in Chapter 2 of the book. Here, # l = 1 means the last layer of neurons, l = 2 is the # second-last layer, and so on. It's a renumbering of the # scheme in the book, used here to take advantage of the fact # that Python can use negative indices in lists. for l in xrange(2, self.num_layers): z = zs[-l] sp = sigmoid_prime(z) delta = np.dot(self.weights[-l+1].transpose(), delta) * sp nabla_b[-l] = delta nabla_w[-l] = np.dot(delta, activations[-l-1].transpose()) return (nabla_b, nabla_w)
def forward(self, X): #Propogate inputs though network self.z2 = np.dot(X, self.W1) self.a2 = self.sigmoid(self.z2) self.z3 = np.dot(self.a2, self.W2) yHat = self.sigmoid(self.z3) return yHat
def smooth_objective(self, x, mode='both', check_feasibility=False): """ Evaluate a smooth function and/or its gradient if mode == 'both', return both function value and gradient if mode == 'grad', return only the gradient if mode == 'func', return only the function value """ x = self.apply_offset(x) exp_x = np.exp(x) #TODO: Using transposes to scale the rows of a 2d array - should we use an affine_transform to do this? #JT: should be able to do this with np.newaxis if mode == 'both': ratio = ((self.trials/(1. + np.sum(exp_x, axis=1))) * exp_x.T).T f, g = -2. * self.scale(np.sum(self.firstcounts * x) - np.dot(self.trials, np.log(1. + np.sum(exp_x, axis=1)))), - 2 * self.scale(self.firstcounts - ratio) return f, g elif mode == 'grad': ratio = ((self.trials/(1. + np.sum(exp_x, axis=1))) * exp_x.T).T f, g = None, - 2 * self.scale(self.firstcounts - ratio) return g elif mode == 'func': f, g = -2. * self.scale(np.sum(self.firstcounts * x) - np.dot(self.trials, np.log(1. + np.sum(exp_x, axis=1)))), None return f else: raise ValueError("mode incorrectly specified")
def InitialAlignment(self, scale = 0.15): """ Compute SVD and align object to be in a certain coordinate frame. Usage: model.InitialAlignment(scale) Input: scale - Desired scale for object. Scale is defined as the length along the leading eigenvector, in meters. """ pts3D = self.pts3D # Compute eigenvecs and rotate according to them pc, evals, mean = utils.pca(pts3D, remove_mean = True) pts3D_rot = np.dot(pc.T, pts3D) # Find length according to max eigenvector mins = np.min(pts3D_rot, axis=1) maxs = np.max(pts3D_rot, axis=1) max_length = maxs[0] - mins[0] # Rotation matrix is the covariance matrix, but we want Z as the leading # eigenvector: rot = np.c_[-pc[2], pc[1], pc[0]] # Transform model to have zero mean, reasonable scale and rotation. self.transform(rot, np.dot(rot, -mean), float(scale) / max_length)
def get_corr_pred( self, sctx, u, du, tn, tn1, u_avg = None, B_mtx_grid = None, J_det_grid = None, ip_coords = None, ip_weights = None ): ''' Corrector and predictor evaluation. @param u current element displacement vector ''' if J_det_grid == None or B_mtx_grid == None: X_mtx = sctx.X show_comparison = True if ip_coords == None: ip_coords = self.ip_coords show_comparison = False if ip_weights == None: ip_weights = self.ip_weights ### Use for Jacobi Transformation n_e_dofs = self.n_e_dofs K = zeros( ( n_e_dofs, n_e_dofs ) ) F = zeros( n_e_dofs ) sctx.fets_eval = self ip = 0 for r_pnt, wt in zip( ip_coords, ip_weights ): #r_pnt = gp[0] sctx.r_pnt = r_pnt #caching cannot be switched off in the moment # if J_det_grid == None: # J_det = self._get_J_det( r_pnt, X_mtx ) # else: # J_det = J_det_grid[ip, ... ] # if B_mtx_grid == None: # B_mtx = self.get_B_mtx( r_pnt, X_mtx ) # else: # B_mtx = B_mtx_grid[ip, ... ] J_det = J_det_grid[ip, ... ] B_mtx = B_mtx_grid[ip, ... ] eps_mtx = dot( B_mtx, u ) d_eps_mtx = dot( B_mtx, du ) sctx.mats_state_array = sctx.elem_state_array[ip * self.m_arr_size: ( ip + 1 ) * self.m_arr_size] #print 'elem state ', sctx.elem_state_array #print 'mats state ', sctx.mats_state_array sctx.r_ls = sctx.ls_val[ip] sig_mtx, D_mtx = self.get_mtrl_corr_pred( sctx, eps_mtx, d_eps_mtx, tn, tn1 ) k = dot( B_mtx.T, dot( D_mtx, B_mtx ) ) k *= ( wt * J_det ) K += k f = dot( B_mtx.T, sig_mtx ) f *= ( wt * J_det ) F += f ip += 1 return F, K
def train(self, inp, out, training_weight=1.): inp = np.mat(inp).T out = np.mat(out).T deriv = [] val = inp vals = [val] # forward calculation of activations and derivatives for weight,bias in self.__weights: val = weight*val val += bias deriv.append(self.__derivative(val)) vals.append(self.__activation(val)) deriv = iter(reversed(deriv)) weights = iter(reversed(self.__weights)) errs = [] errs.append(np.multiply(vals[-1]-out, next(deriv))) # backwards propagation of errors for (w,b),d in zip(weights, deriv): errs.append(np.multiply(np.dot(w.T, errs[-1]), d)) weights = iter(self.__weights) for (w,b),v,e in zip(\ self.__weights,\ vals, reversed(errs)): e *= self.__learning_rate*training_weight w -= e*v.T b -= e tmp = vals[-1]-out return np.dot(tmp[0].T,tmp[0])*.5*training_weight
def top_eigenvector(A,niter=1000,force_iteration=False): ''' assuming the LEFT invariant subspace of A corresponding to the LEFT eigenvalue of largest modulus has geometric multiplicity of 1 (trivial Jordan block), returns the vector at the intersection of that eigenspace and the simplex A should probably be a ROW-stochastic matrix probably uses power iteration ''' n = A.shape[0] np.seterr(invalid='raise',divide='raise') if n <= 25 and not force_iteration: x = np.repeat(1./n,n) x = np.linalg.matrix_power(A.T,niter).dot(x) x /= x.sum() return x else: x1 = np.repeat(1./n,n) x2 = x1.copy() for itr in xrange(niter): np.dot(A.T,x1,out=x2) x2 /= x2.sum() x1,x2 = x2,x1 if np.linalg.norm(x1-x2) < 1e-8: break return x1
def test_dot(): # Test normal dot a = np.random.uniform(-3, 3, (3, 4)) b = np.random.uniform(-3, 3, (4, 5)) c = np.dot(a, b) A = mx.nd.array(a) B = mx.nd.array(b) C = mx.nd.dot(A, B) assert reldiff(c, C.asnumpy()) < 1e-5 # Test dot with transpose kargs a = np.random.uniform(-3, 3, (3, 4)) b = np.random.uniform(-3, 3, (3, 5)) c = np.dot(a.T, b) A = mx.nd.array(a) B = mx.nd.array(b) C = mx.nd.dot(A, B, transpose_a=True) assert reldiff(c, C.asnumpy()) < 1e-5 # Test dot with transpose kargs a = np.random.uniform(-3, 3, (3, 4)) b = np.random.uniform(-3, 3, (5, 4)) c = np.dot(a, b.T) A = mx.nd.array(a) B = mx.nd.array(b) C = mx.nd.dot(A, B, transpose_b=True) assert reldiff(c, C.asnumpy()) < 1e-5 # Test dot with transpose kargs a = np.random.uniform(-3, 3, (4, 3)) b = np.random.uniform(-3, 3, (5, 4)) c = np.dot(a.T, b.T) A = mx.nd.array(a) B = mx.nd.array(b) C = mx.nd.dot(A, B, transpose_a=True, transpose_b=True) assert reldiff(c, C.asnumpy()) < 1e-5
def test_grtm(): l = language(1000) n_iter = 1000 KL_thresh = 0.3 mu = 0. nu2 = 1. np.random.seed(l['seed']) H = np.random.normal(loc=mu, scale=nu2, size=(l['K'], l['K'])) zeta = pd.DataFrame([(i, j, np.dot(np.dot(l['thetas'][i], H), l['thetas'][j])) for i, j in product(range(l['D']), repeat=2)], columns=('tail', 'head', 'zeta')) zeta['y'] = (zeta.zeta >= 0).astype(int) y = zeta[['tail', 'head', 'y']].values skf = StratifiedKFold(y[:, 2], n_folds=100) _, train_idx = next(iter(skf)) _K = l['K'] _alpha = l['alpha'][:_K] _beta = np.repeat(0.01, l['V']) _b = 1. grtm = GRTM(_K, _alpha, _beta, mu, nu2, _b, n_iter, seed=l['seed'], n_report_iter=l['n_report_iters']) grtm.fit(l['doc_term_matrix'], y[train_idx]) assert_probablity_distribution(grtm.phi) check_KL_divergence(l['topics'], grtm.phi, KL_thresh)
def ehist_equalize_melhist(d, sr, refMelHist, edges): """ Modify a signal in the Mel domain by equalizing the Mel-subband histograms to match the passed-in ones """ # Calculate the (Mel) spectrograms, and histogram, and axes melHist, edges, D, DmeldB, melmx, freqs = mel_hist(d, sr, edges=edges) # Build mapping & modify mel spectrogram histmaps = make_hist_maps(melHist, refMelHist, edges) # for some reason, extrapolating madly below bottom edge - clip it DmeldBmapped = np.maximum(edges[0], np.minimum(edges[-1], apply_hist_maps(DmeldB, histmaps))) # Reconstruct audio based on mapped envelope # We map both original and modified Mel envelopes to FFT domain # then scale original STFT magnitudes by their ratio DmelInFFT = np.dot(melmx.T, idB(DmeldB)) DmappedInFFT = np.dot(melmx.T, idB(DmeldBmapped)) # Zero values in denominator will match to zeros in numerator, # so it's OK to drop them Dmask = DmappedInFFT / (DmelInFFT + (DmelInFFT==0)) # Median filter to remove short blips in gain medfiltwin = 7 DmaskF = median_filter(Dmask, size=(1, medfiltwin)) # Now scale each FFT val by their ratio Dmod = D * DmaskF # and resynthesize nfft = 2*(np.size(D, axis=0)-1) win = nfft hop = win/4 dout = istft(Dmod.T, win, hop) return dout
def tz(M): mz = matrix_power(M,-1) assert_almost_equal(identity(M.shape[0]), dot(mz,M))
def hinge_loss_gradient(w, x, y): if np.dot(w, x) * y >= 1: return 0 else: return y * x
def propdown(self, h): pre_activation = numpy.dot(h, self.W.T) + self.vbias return pre_activation
def step_length(f, g, xk, alpha, pk, c2): return interpolation(f, g, lambda alpha: f(xk + alpha * pk), lambda alpha: np.dot(g(xk + alpha * pk), pk), alpha, c2, lambda f, g, alpha, c2: wolfe(f, g, xk, alpha, pk))
def gold_stein(f, g, xk, alpha, pk, c): return (f(xk) + (1 - c) * alpha * np.dot(g(xk), pk) <= f(xk + alpha * pk)) and ( f(xk + alpha * pk) <= f(xk) + c * alpha * np.dot(g(xk), pk))
def strong_wolfe(f, g, xk, alpha, pk, c2): # typically, c2 = 0.9 when using Newton or quasi-Newton's method. # c2 = 0.1 when using non-linear conjugate gradient method. return wolfe(f, g, xk, alpha, pk) and abs(np.dot( g(xk + alpha * pk), pk)) <= c2 * abs(np.dot(g(xk), pk))
def wolfe(f, g, xk, alpha, pk): c1 = 1e-4 return f(xk + alpha * pk) <= f(xk) + c1 * alpha * np.dot(g(xk), pk)
def do(self, a, b): a_ginv = linalg.pinv(a) assert_almost_equal(dot(a, a_ginv), identity(asarray(a).shape[0])) assert imply(isinstance(a, matrix), isinstance(a_ginv, matrix))
def do(self, a, b): evalues, evectors = linalg.eig(a) assert_almost_equal(dot(a, evectors), multiply(evectors, evalues)) assert imply(isinstance(a, matrix), isinstance(evectors, matrix))
def tz(M): mz = matrix_power(M,2) assert_equal(mz, dot(M,M)) assert_equal(mz.dtype, M.dtype)
# QR decomposition ensures that the columns of B are orthogonal B, R = np.linalg.qr(B) L = B.shape[1] theta_old = theta[:nroot] print("EOMCCSD: Iter # {:>6} L = {}".format(EOMCCSD_iter, L)) # Build up the matrix S, holding the products Hbar*B, aka sigma vectors S = np.zeros_like(B) for i in range(L): B1 = B[:nov, i].reshape(ndocc, nvir).copy() B2 = B[nov:, i].reshape(ndocc, ndocc, nvir, nvir).copy() S1 = cceom.build_sigma1(B1, B2) S2 = cceom.build_sigma2(B1, B2) S[:nov, i] += S1.flatten() S[nov:, i] += S2.flatten() # Build the subspace Hamiltonian G = np.dot(B.T, S) # Diagonalize it, and sort the eigenvector/eigenvalue pairs theta, alpha = np.linalg.eig(G) idx = theta.argsort()[:nroot] theta = theta[idx] alpha = alpha[:, idx] # This vector will hold the new guess vectors to add to our space add_B = [] for j in range(nroot): # Compute a residual vector "w" for each root we seek # Note: for a more robust convergence criteria you can also check # that the norm of the residual vector is below some threshold. w = np.dot(S, alpha[:, j]) - theta[j] * np.dot(B, alpha[:, j]) # Precondition the residual vector to form a correction vector q = w / (theta[j] - D[j])
def do(self, a, b): u, s, vt = linalg.svd(a, 0) assert_almost_equal(a, dot(multiply(u, s), vt)) assert imply(isinstance(a, matrix), isinstance(u, matrix)) assert imply(isinstance(a, matrix), isinstance(vt, matrix))
def _rotate_points(self, points): _points = np.pad(points, [(0, 0), (0, 1)], 'constant') _points[:, 2] = 1 _points = np.dot(self._mat, _points.T) return _points.T.astype(points.dtype)
def do(self, a, b): x = linalg.solve(a, b) assert_almost_equal(b, dot(a, x)) assert imply(isinstance(b, matrix), isinstance(x, matrix))
def hessian(self, x=None, mean_output=False, mc_num=1, denormalize=False, method='exact'): """ | Calculate the hessian of output to input | | Please notice that the de-normalize (if True) assumes the output depends on the input data first orderly | in which the hessians does not depends on input scaling and only depends on output scaling | | The hessians can be all zeros and the common cause is you did not use any activation or | activation that is still too linear in some sense like ReLU. :param x: Input Data :type x: ndarray :param mean_output: False to get all hessian, True to get the mean :type mean_output: boolean :param mc_num: Number of monte carlo integration :type mc_num: int :param denormalize: De-normalize diagonal part of Hessian :type denormalize: bool :param method: Either 'exact' to calculate numerical Hessian or 'approx' to approximate Hessian from Jacobian :type method: str :return: An array of Hessian :rtype: ndarray :History: 2018-Jun-14 - Written - Henry Leung (University of Toronto) """ if not mean_output: print('only mean output is supported at this moment') mean_output = True if method == 'approx': all_args = locals() # remove unnecessary argument all_args.pop('self') all_args.pop('method') jacobian = self.jacobian(**all_args) hessians_master = np.stack([np.dot(jacobian[x_shape:x_shape + 1].T, jacobian[x_shape:x_shape + 1]) for x_shape in range(jacobian.shape[0])], axis=0) return hessians_master elif method == 'exact': self.has_model_check() if x is None: raise ValueError('Please provide data to calculate the jacobian') if mc_num < 1 or isinstance(mc_num, float): raise ValueError('mc_num must be a positive integer') if self.input_normalizer is not None: x_data = self.input_normalizer.normalize(x, calc=False) else: # Prevent shallow copy issue x_data = np.array(x) x_data -= self.input_mean x_data /= self.input_std try: input_tens = self.keras_model_predict.get_layer("input").input output_tens = self.keras_model_predict.get_layer("output").output input_shape_expectation = self.keras_model_predict.get_layer("input").input_shape output_shape_expectation = self.keras_model_predict.get_layer("output").output_shape except AttributeError: input_tens = self.keras_model.get_layer("input").input output_tens = self.keras_model.get_layer("output").output input_shape_expectation = self.keras_model.get_layer("input").input_shape output_shape_expectation = self.keras_model.get_layer("output").output_shape except ValueError: raise ValueError( "astroNN expects input layer is named as 'input' and output layer is named as 'output', " "but None is found.") if len(input_shape_expectation) == 1: input_shape_expectation = input_shape_expectation[0] # just in case only 1 data point is provided and mess up the shape issue if len(input_shape_expectation) == 3: x_data = np.atleast_3d(x_data) elif len(input_shape_expectation) == 4: if len(x_data.shape) < 4: x_data = x_data[:, :, :, np.newaxis] else: raise ValueError('Input data shape do not match neural network expectation') total_num = x_data.shape[0] hessians_list = [] for j in range(self._labels_shape): hessians_list.append(tf.hessians(output_tens[:, j], input_tens)) final_stack = tf.stack(tf.squeeze(hessians_list)) # Looping variables for tensorflow setup i = tf.constant(0) mc_num_tf = tf.constant(mc_num) # To store final result l = tf.TensorArray(dtype=tf.float32, infer_shape=False, size=1, dynamic_size=True) def body(i, l): l = l.write(i, final_stack) return i + 1, l tf_index, loop = tf.while_loop(lambda i, *_: tf.less(i, mc_num_tf), body, [i, l]) loops = tf.cond(tf.greater(mc_num_tf, 1), lambda: tf.reduce_mean(loop.stack(), axis=0), lambda: loop.stack()) start_time = time.time() hessians = np.concatenate( [get_session().run(loops, feed_dict={input_tens: x_data[i:i + 1], tfk.backend.learning_phase(): 0}) for i in range(0, total_num)], axis=0) if np.all(hessians == 0.): # warn user about not so linear activation like ReLU will get all zeros warnings.warn( 'The hessians is detected to be all zeros. The common cause is you did not use any activation or ' 'activation that is still too linear in some sense like ReLU.', UserWarning) if mean_output is True: hessians_master = np.mean(hessians, axis=0) else: hessians_master = np.array(hessians) hessians_master = np.squeeze(hessians_master) if denormalize: # no need to denorm input scaling because of we assume first order dependence if self.labels_std is not None: try: hessians_master = hessians_master * self.labels_std except ValueError: hessians_master = hessians_master * self.labels_std.reshape(-1, 1) print(f'Finished hessian ({method}) calculation, {(time.time() - start_time):.{2}f} seconds elapsed') return hessians_master else: raise ValueError(f'Unknown method -> {method}')
def label_softmax_grad(X, dY): dX = Y * 0.0 for i in range(n): d = np.dot(Y[i, :], dY[i, :]) dX[i, :] = Y[i, :] * (dY[i, :] - d) return [dX]
def calculate_nll(y, tx, w): """compute the cost by negative log likelihood.""" txw = np.dot(tx,w) return np.sum(np.log(1+np.exp(txw)) - y*txw)
def dot(a,b): return np.dot(a,b)
def conjugate_gradient( func, x0, args=(), fprime=None, alpha=0.5, scaling_factor=0.8, numIter=1e5, norm_lim=1e-7, epsilon=1e-10, order=2, disp=False, period=10000): """ Conjugate descent algorithm to optimize the cost function using Fletcher-Reeves Update Usage: func : function function to be optimized x0 : list initial guess args : other arguments to be passed to func fprime : function derivative or jacobian of func, if not passed then will be generated automatically alpha : float learning rate scaling_factor : float factor to multiply alpha with when doing line search numIter : int number of iterations norm_lim : float minimum value of norm epsilon : float delta x for calculting fprime order : int order of norm, max value = Inf Example: >>> def func(x): return pow(x[0]-2,6.0)+pow(x[1]-3,6.0) >>> x0 = [1,2] >>> point_of_optima = conjugate_gradient(func,x0) """ if fprime is None: fprime = hlp.compute_numerical_grad(func, len(x0), epsilon) iters = 0 func_value = func(x0, *args) gradient_prev = np.array(fprime(x0, *args)) norm_gradient = hlp.vecnorm(gradient_prev, order) xPrev = np.array(x0) pPrev = -gradient_prev while norm_gradient > norm_lim and iters < numIter: iters += 1 if disp and (iters % period == 0 or iters == 1): print("Iter : %d | Function value : %f" % (iters, func_value)) alp = alpha while func(xPrev + alp * pPrev, *args) > func(xPrev, *args): alp *= scaling_factor xUpdated = xPrev + alp * pPrev gradient_updated = np.array(fprime(xUpdated, *args)) betaUpdated = np.dot(gradient_updated, gradient_updated) / \ np.dot(gradient_prev, gradient_prev) pUpdated = -gradient_updated + betaUpdated * pPrev func_value = func(xUpdated, *args) norm_gradient = hlp.vecnorm(gradient_updated, order) pPrev = pUpdated gradient_prev = gradient_updated xPrev = xUpdated if disp and iters % period != 0: print("Iter : %d | Function value : %f" % (iters, func_value)) return xUpdated
def compute_error(y, tx, w): return y - np.dot(tx, w)
def simulate_eazy_sed(fieldidx='GOODS-S.21740', eazydata=None, returnfluxunit='AB', returnwaveunit='nm', limitwaverange=True, savetofile='', headerstring='# wave flux\n'): """ Pull best-fit SED from eazy-py output files. NB: Requires the eazy-py package to apply the IGM absorption! (https://github.com/gbrammer/eazy-py) Optional Args: returnfluxunit: ['AB', 'flambda'] TODO: add Jy returnwaveunit: ['A' or 'nm'] limitwaverange: limit the output wavelengths to the range covered by PFS savetofile: filename for saving the output spectrum as a two-column ascii data file (suitable for use with the SubaruPFS ETC from C. Hirata. Returns ------- templz : observed-frame wavelength, Angstroms or nm tempflux : flux density of best-fit template, erg/s/cm2/A or AB mag """ fieldstr, idxstr = fieldidx.split('.') field = fieldstr.lower().replace('-','') idx = int(idxstr) # TODO : this is a kludge. Should not assume only one eazypy.data.fits file per field if eazydata is None: fitsfilename = glob( '3DHST/{0}_3dhst.*.eazypy.data.fits'.format(field))[0] eazydata = EazyData(fitsfilename) imatch = eazydata.ID == idx if imatch.sum() == 0: print('ID {0} not found.'.format(idx)) return None, None ix = np.arange(len(imatch))[imatch][0] z = eazydata.ZBEST[ix] # the input data units are Angstroms for wavelength # and cgs for flux: erg/cm2/s/Ang templz = eazydata.TEMPL * (1 + z) templf = np.dot(eazydata.COEFFS[ix, :], eazydata.TEMPF) fnu_factor = 10 ** (-0.4 * (25 + 48.6)) flam_spec = 1. / (1 + z) ** 2 tempflux = templf * fnu_factor * flam_spec try: import eazy.igm igmz = eazy.igm.Inoue14().full_IGM(z, templz) tempflux *= igmz except: pass if limitwaverange: # to simplify things, we only write out the data over the Subaru PFS # wavelength range, from 300 to 1300 nm (3000 to 13000 Angstroms) ipfs = np.where((templz>2000) & (templz<25000))[0] templz = templz[ipfs] tempflux = tempflux[ipfs] if returnfluxunit=='AB': # convert from flux density f_lambda into AB mag: mAB_from_flambda = lambda f_lambda, wave: -2.5 * np.log10( 3.34e4 * wave * wave * f_lambda / 3631) tempflux = mAB_from_flambda(tempflux, templz) if returnwaveunit=='nm': templz = templz / 10. if savetofile: fout = open(savetofile, 'w') fout.write(headerstring) for i in range(len(templz)): fout.write('{wave:.3e} {flux:.3e}\n'.format( wave=templz[i], flux=tempflux[i])) fout.close() else: return templz, tempflux
import pickle import numpy as np # db = pickle.load(open('bert_fine_tune.p', 'rb')) from utils import Config, safe_pickle_dump, strip_version db = pickle.load(open(Config.db_path, 'rb')) orig = pickle.load(open('elmo_embed.p', 'rb')) # db = pickle.load(open('bert_out.p', 'rb')) # print(len(db)) # X = np.array(list(db.values())) # normalization X = orig / np.linalg.norm(orig, axis=1, keepdims=1) # print(X.shape) pids = list(db.keys()) # B = N ds = -np.asarray(np.dot(X, X.T)) #NxD * DxB => NxB # print(ds[0][0]) IX = np.argsort(ds, axis=0) # NxB # pid = '1407.2515' # pid = '1904.05856' # pid = '1904.07460' # ID = pids.index(pid) # print(IX.shape) ARXIV_PATH = 'https://arxiv.org/abs/' # print(ARXIV_PATH + pids[ID]) # print(orig[ID]) # for i in range(0,6): # # print(IX[ID][i]) # # print(orig[IX[i][ID]]) # # print(1+ds[ID][IX[i][ID]], end=' ') # sim_pid = pids[IX[i][ID]] # print(ARXIV_PATH + sim_pid)
def rgb2gray(self, rgb): ''' take a numpy rgb image return a new single channel image converted to greyscale ''' return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])
def simulate_eazy_sed_from_coeffs( eazycoeffs, eazytemplatedata, z, returnfluxunit='', returnwaveunit='A', limitwaverange=True, savetofile='', **outfile_kwargs): """ Generate a simulated SED from a given set of input eazy-py coefficients and eazypy templates. NB: Requires the eazy-py package to apply the IGM absorption! (https://github.com/gbrammer/eazy-py) Optional Args: returnfluxunit: ['AB', 'flambda'] TODO: add Jy 'AB'= return log(flux) as AB magnitudes 'flambda' = return flux density in erg/s/cm2/A returnwaveunit: ['A' or 'nm'] limitwaverange: limit the output wavelengths to the range covered by PFS savetofile: filename for saving the output spectrum as a two-column ascii data file (suitable for use with the SubaruPFS ETC from C. Hirata. Returns ------- obswave : observed-frame wavelength, Angstroms or nm obsflux : flux density of best-fit template, erg/s/cm2/A or AB mag """ # the input data units are Angstroms for wavelength # and cgs for flux: erg/cm2/s/Ang obswave = eazytemplatedata[0] * (1 + z) obsfluxmatrix = eazytemplatedata[1:] sedsimflux = np.dot(eazycoeffs, obsfluxmatrix) fnu_factor = 10 ** (-0.4 * (25 + 48.6)) flam_spec = 1. / (1 + z) ** 2 obsflux = sedsimflux * fnu_factor * flam_spec try: import eazy.igm igmz = eazy.igm.Inoue14().full_IGM(z, obswave) obsflux *= igmz except: pass if limitwaverange: # to simplify things, we only write out the data over the Subaru PFS # + WFIRST prism wavelength range, from 200 to 2500 nm # (3000 to 25000 Angstroms) iuvoir = np.where((obswave>2000) & (obswave<25000))[0] obswave = obswave[iuvoir] obsflux = obsflux[iuvoir] if returnfluxunit=='AB': # convert from flux density f_lambda into AB mag: mAB_from_flambda = lambda f_lambda, wave: -2.5 * np.log10( 3.34e4 * wave * wave * f_lambda / 3631) obsflux = mAB_from_flambda(obsflux, obswave) if returnwaveunit=='nm': obswave = obswave / 10. if savetofile: out_table = Table() outcol1 = Column(data=obswave, name='wave') outcol2 = Column(data=obsflux, name='flux') out_table.add_columns([outcol1, outcol2]) out_table.write(savetofile, **outfile_kwargs) return obswave, obsflux
def gv(w): return av + np.dot(lv, w - cg_shift)
import numpy as np A = np.array([[1,2,3],[4,5,6],[7,8,9]]) B = np.array([[1,2,3],[4,5,6],[7,8,9]]) newmatrix = [] i = 0 while i < len(A): eachrow = [np.sum(A[i]*B[:,j]) for j in range(len(A))] i += 1 newmatrix.append(eachrow) print(np.array(newmatrix)) print(np.dot(A,B))
def _beta_divergence(X, W, H, beta, square_root=False): """Compute the beta-divergence of X and dot(W, H). Parameters ---------- X : float or array-like of shape (n_samples, n_features) W : float or array-like of shape (n_samples, n_components) H : float or array-like of shape (n_components, n_features) beta : float or {'frobenius', 'kullback-leibler', 'itakura-saito'} Parameter of the beta-divergence. If beta == 2, this is half the Frobenius *squared* norm. If beta == 1, this is the generalized Kullback-Leibler divergence. If beta == 0, this is the Itakura-Saito divergence. Else, this is the general beta-divergence. square_root : bool, default=False If True, return np.sqrt(2 * res) For beta == 2, it corresponds to the Frobenius norm. Returns ------- res : float Beta divergence of X and np.dot(X, H) """ beta = _beta_loss_to_float(beta) # The method can be called with scalars if not sp.issparse(X): X = np.atleast_2d(X) W = np.atleast_2d(W) H = np.atleast_2d(H) # Frobenius norm if beta == 2: # Avoid the creation of the dense np.dot(W, H) if X is sparse. if sp.issparse(X): norm_X = np.dot(X.data, X.data) norm_WH = trace_dot(np.linalg.multi_dot([W.T, W, H]), H) cross_prod = trace_dot((X * H.T), W) res = (norm_X + norm_WH - 2. * cross_prod) / 2. else: res = squared_norm(X - np.dot(W, H)) / 2. if square_root: return np.sqrt(res * 2) else: return res if sp.issparse(X): # compute np.dot(W, H) only where X is nonzero WH_data = _special_sparse_dot(W, H, X).data X_data = X.data else: WH = np.dot(W, H) WH_data = WH.ravel() X_data = X.ravel() # do not affect the zeros: here 0 ** (-1) = 0 and not infinity indices = X_data > EPSILON WH_data = WH_data[indices] X_data = X_data[indices] # used to avoid division by zero WH_data[WH_data == 0] = EPSILON # generalized Kullback-Leibler divergence if beta == 1: # fast and memory efficient computation of np.sum(np.dot(W, H)) sum_WH = np.dot(np.sum(W, axis=0), np.sum(H, axis=1)) # computes np.sum(X * log(X / WH)) only where X is nonzero div = X_data / WH_data res = np.dot(X_data, np.log(div)) # add full np.sum(np.dot(W, H)) - np.sum(X) res += sum_WH - X_data.sum() # Itakura-Saito divergence elif beta == 0: div = X_data / WH_data res = np.sum(div) - np.product(X.shape) - np.sum(np.log(div)) # beta-divergence, beta not in (0, 1, 2) else: if sp.issparse(X): # slow loop, but memory efficient computation of : # np.sum(np.dot(W, H) ** beta) sum_WH_beta = 0 for i in range(X.shape[1]): sum_WH_beta += np.sum(np.dot(W, H[:, i])**beta) else: sum_WH_beta = np.sum(WH**beta) sum_X_WH = np.dot(X_data, WH_data**(beta - 1)) res = (X_data**beta).sum() - beta * sum_X_WH res += sum_WH_beta * (beta - 1) res /= beta * (beta - 1) if square_root: return np.sqrt(2 * res) else: return res
def f0(x): ang = [x[0], x[1:a_in + 1], x[1 + a_in:]] scr = angles2Score(device, ang, eta, vis) return av_curr + np.dot(lv_curr, scr - cg_shift)
def _multiplicative_update_w(X, W, H, beta_loss, l1_reg_W, l2_reg_W, gamma, H_sum=None, HHt=None, XHt=None, update_H=True): """update W in Multiplicative Update NMF""" if beta_loss == 2: # Numerator if XHt is None: XHt = safe_sparse_dot(X, H.T) if update_H: # avoid a copy of XHt, which will be re-computed (update_H=True) numerator = XHt else: # preserve the XHt, which is not re-computed (update_H=False) numerator = XHt.copy() # Denominator if HHt is None: HHt = np.dot(H, H.T) denominator = np.dot(W, HHt) else: # Numerator # if X is sparse, compute WH only where X is non zero WH_safe_X = _special_sparse_dot(W, H, X) if sp.issparse(X): WH_safe_X_data = WH_safe_X.data X_data = X.data else: WH_safe_X_data = WH_safe_X X_data = X # copy used in the Denominator WH = WH_safe_X.copy() if beta_loss - 1. < 0: WH[WH == 0] = EPSILON # to avoid taking a negative power of zero if beta_loss - 2. < 0: WH_safe_X_data[WH_safe_X_data == 0] = EPSILON if beta_loss == 1: np.divide(X_data, WH_safe_X_data, out=WH_safe_X_data) elif beta_loss == 0: # speeds up computation time # refer to /numpy/numpy/issues/9363 WH_safe_X_data **= -1 WH_safe_X_data **= 2 # element-wise multiplication WH_safe_X_data *= X_data else: WH_safe_X_data **= beta_loss - 2 # element-wise multiplication WH_safe_X_data *= X_data # here numerator = dot(X * (dot(W, H) ** (beta_loss - 2)), H.T) numerator = safe_sparse_dot(WH_safe_X, H.T) # Denominator if beta_loss == 1: if H_sum is None: H_sum = np.sum(H, axis=1) # shape(n_components, ) denominator = H_sum[np.newaxis, :] else: # computation of WHHt = dot(dot(W, H) ** beta_loss - 1, H.T) if sp.issparse(X): # memory efficient computation # (compute row by row, avoiding the dense matrix WH) WHHt = np.empty(W.shape) for i in range(X.shape[0]): WHi = np.dot(W[i, :], H) if beta_loss - 1 < 0: WHi[WHi == 0] = EPSILON WHi **= beta_loss - 1 WHHt[i, :] = np.dot(WHi, H.T) else: WH **= beta_loss - 1 WHHt = np.dot(WH, H.T) denominator = WHHt # Add L1 and L2 regularization if l1_reg_W > 0: denominator += l1_reg_W if l2_reg_W > 0: denominator = denominator + l2_reg_W * W denominator[denominator == 0] = EPSILON numerator /= denominator delta_W = numerator # gamma is in ]0, 1] if gamma != 1: delta_W **= gamma return delta_W, H_sum, HHt, XHt
def _multiplicative_update_h(X, W, H, beta_loss, l1_reg_H, l2_reg_H, gamma): """update H in Multiplicative Update NMF""" if beta_loss == 2: numerator = safe_sparse_dot(W.T, X) denominator = np.linalg.multi_dot([W.T, W, H]) else: # Numerator WH_safe_X = _special_sparse_dot(W, H, X) if sp.issparse(X): WH_safe_X_data = WH_safe_X.data X_data = X.data else: WH_safe_X_data = WH_safe_X X_data = X # copy used in the Denominator WH = WH_safe_X.copy() if beta_loss - 1. < 0: WH[WH == 0] = EPSILON # to avoid division by zero if beta_loss - 2. < 0: WH_safe_X_data[WH_safe_X_data == 0] = EPSILON if beta_loss == 1: np.divide(X_data, WH_safe_X_data, out=WH_safe_X_data) elif beta_loss == 0: # speeds up computation time # refer to /numpy/numpy/issues/9363 WH_safe_X_data **= -1 WH_safe_X_data **= 2 # element-wise multiplication WH_safe_X_data *= X_data else: WH_safe_X_data **= beta_loss - 2 # element-wise multiplication WH_safe_X_data *= X_data # here numerator = dot(W.T, (dot(W, H) ** (beta_loss - 2)) * X) numerator = safe_sparse_dot(W.T, WH_safe_X) # Denominator if beta_loss == 1: W_sum = np.sum(W, axis=0) # shape(n_components, ) W_sum[W_sum == 0] = 1. denominator = W_sum[:, np.newaxis] # beta_loss not in (1, 2) else: # computation of WtWH = dot(W.T, dot(W, H) ** beta_loss - 1) if sp.issparse(X): # memory efficient computation # (compute column by column, avoiding the dense matrix WH) WtWH = np.empty(H.shape) for i in range(X.shape[1]): WHi = np.dot(W, H[:, i]) if beta_loss - 1 < 0: WHi[WHi == 0] = EPSILON WHi **= beta_loss - 1 WtWH[:, i] = np.dot(W.T, WHi) else: WH **= beta_loss - 1 WtWH = np.dot(W.T, WH) denominator = WtWH # Add L1 and L2 regularization if l1_reg_H > 0: denominator += l1_reg_H if l2_reg_H > 0: denominator = denominator + l2_reg_H * H denominator[denominator == 0] = EPSILON numerator /= denominator delta_H = numerator # gamma is in ]0, 1] if gamma != 1: delta_H **= gamma return delta_H