def matrix_multiply(a, b): """Takes two matrices and does a complicated matrix multiply. Yes that one. NOTE: THIS APPEARS TO BE VERY BROKEN """ if len(a.shape) == 1: nrows, = a.shape a = np.zeros((nrows, 1)) if len(b.shape) == 1: bc, = b.shape if bc == a.shape[1]: b = np.zeros((bc, 1)) else: b = np.zeros((1, bc)) nrows,ac = a.shape bc,ncols = b.shape assert ac == bc if ispypy(): n = np.zeros((nrows, ncols)) for i in xrange(nrows): for j in xrange(ncols): n[i,j] = np.sum(a[i] * b[:,j]) return n else: np.dot(a, b)
def partial_slda_recalculate_eta_sigma(eta, y, phi): """ Same as slda_recalculate_eta_sigma, but also supports partial updates if len(eta) < phi.shape[1] . Will only update based on first Ks topics of phi """ D = len(phi) ensure(D >= 1) N,K = phi[0].shape Ks = len(eta) print 'e_a...' E_A = np.empty((D, Ks)) for d in xrange(D): E_A[d,:] = calculate_EZ(phi[d][:,:Ks]) E_ATA_inverse = calculate_E_ATA_inverse([p[:,:Ks] for p in phi]) print 'new eta...' new_eta = np.dot(np.dot(E_ATA_inverse, E_A.T), y) eta[:] = new_eta print 'new sigma squared...' new_sigma_squared = (1.0 / D) * (np.dot(y, y) - np.dot(np.dot(y, E_A), eta)) return new_sigma_squared
def test_dot(self): from numpypy import array, dot a = array(range(5)) assert a.dot(a) == 30.0 a = array(range(5)) assert a.dot(range(5)) == 30 assert dot(range(5), range(5)) == 30 assert (dot(5, [1, 2, 3]) == [5, 10, 15]).all()
def test_dot(self): from numpypy import array, dot, arange a = array(range(5)) assert dot(a, a) == 30.0 a = array(range(5)) assert a.dot(range(5)) == 30 assert dot(range(5), range(5)) == 30 assert (dot(5, [1, 2, 3]) == [5, 10, 15]).all() a = arange(12).reshape(3, 4) b = arange(12).reshape(4, 3) c = a.dot(b) assert (c == [[ 42, 48, 54], [114, 136, 158], [186, 224, 262]]).all() a = arange(24).reshape(2, 3, 4) raises(ValueError, "a.dot(a)") b = a[0, :, :].T #Superfluous shape test makes the intention of the test clearer assert a.shape == (2, 3, 4) assert b.shape == (4, 3) c = dot(a, b) assert (c == [[[14, 38, 62], [38, 126, 214], [62, 214, 366]], [[86, 302, 518], [110, 390, 670], [134, 478, 822]]]).all() c = dot(a, b[:, 2]) assert (c == [[62, 214, 366], [518, 670, 822]]).all() a = arange(3*2*6).reshape((3,2,6)) b = arange(3*2*6)[::-1].reshape((2,6,3)) assert dot(a, b)[2,0,1,2] == 1140 assert (dot([[1,2],[3,4]],[5,6]) == [17, 39]).all()
def lm_recalculate_eta_sigma(eta, y, phi1, phi2): """ Accepts eta (K+J)-size vector, also y (a D-size vector of reals), also two phi D-size vectors of NxK matrices. Returns new sigma squared update (a double). ηnew ← (E[ATA])-1 E[A]Ty σ2new ← (1/D) {yTy - yTE[A]ηnew} (Note that A is the D X (K + J) matrix whose rows are the vectors ZdT for document and comment concatenated.) (Also note that the dth row of E[A] is φd, and E[ATA] = Σd E[ZdZdT] .) (Also, note that E[Z] = φ := (1/N)Σnφn, and E[ZdZdT] = (1/N2)(ΣnΣm!=nφd,nφd,mT + Σndiag{φd,n}) """ ensure(len(phi1) == len(phi2)) D = len(phi1) Nd,K = phi1[0].shape Nc,J = phi2[0].shape Ndc, KJ = (Nd+Nc,K+J) #print 'e_a...' E_A = np.zeros((D, KJ)) for d in xrange(D): E_A[d,:] = calculate_EZ_from_small_phis(phi1[d], phi2[d]) #print 'inverse...' E_ATA_inverse = calculate_E_ATA_inverse_from_small_phis(phi1, phi2) #print 'new eta...' #new_eta = matrix_multiply(matrix_multiply(E_ATA_inverse, E_A.T), y) new_eta = np.dot(np.dot(E_ATA_inverse, E_A.T), y) if np.sum(np.abs(new_eta)) > (KJ * KJ * 5): print 'ETA is GOING CRAZY {0}'.format(eta) print 'aborting the update!!!' else: eta[:] = new_eta # todo: don't do this later # keep sigma squared fix #import pdb; pdb.set_trace() #new_sigma_squared = (1.0 / D) * (np.dot(y, y) - np.dot(np.dot(np.dot(np.dot(y, E_A), E_ATA_inverse), E_A.T), y)) new_sigma_squared = 1.0 return new_sigma_squared
def partial_slda_update_phi(text, phi, gamma, beta, y_d, eta, sigma_squared): """Same as slda update phi, but eta may be smaller than total number of topics. So only some of the topics contribute to y. """ (N, K) = phi.shape Ks = len(eta) phi_sum = np.sum(phi[:,:Ks], axis=0) Ns = (N * sigma_squared) ElogTheta = graphlib.dirichlet_expectation(gamma) front = (-1.0 / (2 * N * Ns)) eta_dot_eta = front * (eta * eta) pC = ((1.0 * y_d / Ns) * eta) + eta_dot_eta right_eta_times_const = (front * 2 * eta) if isinstance(text, np.ndarray): # if text is in array form, do an approximate fast matrix update phi_minus_n = -(phi[:,:Ks] - phi_sum) phi[:,:] = ElogTheta + np.log(beta[:,text].T) phi[:,:Ks] += pC phi[:,:Ks] += np.dot(np.matrix(np.dot(phi_minus_n, eta)).T, np.matrix(right_eta_times_const)) graphlib.log_row_normalize(phi) phi[:,:] = np.exp(phi[:,:]) else: # otherwise, iterate through each word for n,word,count in iterwords(text): phi_sum -= phi[n,:Ks] pB = np.log(beta[:,word]) pD = (np.dot(eta, phi_sum) * right_eta_times_const) # must exponentiate and normalize immediately! phi[n,:] = ElogTheta + pB phi[n,:] += pC + pD phi[n,:] -= graphlib.logsumexp(phi[n,:]) # normalize in logspace phi[n,:] = np.exp(phi[n,:]) # add this back into the sum # unlike in LDA, this cannot be computed in parallel phi_sum += phi[n,:Ks] return phi
def calculate_EZZT_from_small_phis(phi1, phi2): """ Accepts a big phi matrix (like ((Nd+Nc) x (K+J)) Calculates E[ZdZdT]. Returns the final matrix ((K+J) x (K+J)). (Also, E[ZdZdT] = (1/N2)(ΣNΣm!=nφd,nφd,mT + ΣNdiag{φd,n}) """ Nd,K = phi1.shape Nc,J = phi2.shape (Ndc, KJ) = (Nd+Nc, K+J) inner_sum = np.zeros((KJ, KJ)) p1 = np.matrix(phi1) p2 = np.matrix(phi2) for i in xrange(K): for j in xrange(K): m = np.dot(np.matrix(p1[:,i]), np.matrix(p1[:,j]).T) inner_sum[i,j] = np.sum(m) - np.sum(np.diagonal(m)) for i in xrange(J): for j in xrange(J): m = np.dot(np.matrix(p2[:,i]), np.matrix(p2[:,j]).T) inner_sum[K+i,K+j] = np.sum(m) - np.sum(np.diagonal(m)) for i in xrange(K): for j in xrange(J): m = np.dot(np.matrix(p1[:,i]), np.matrix(p2[:,j]).T) inner_sum[i,K+j] = np.sum(m) for i in xrange(J): for j in xrange(K): m = np.dot(np.matrix(p2[:,i]), np.matrix(p1[:,j]).T) inner_sum[K+i,j] = np.sum(m) big_phi_sum = np.concatenate((np.sum(phi1, axis=0), np.sum(phi2, axis=0)), axis=1) ensure(big_phi_sum.shape == (KJ,)) inner_sum += np.diagonal(big_phi_sum) inner_sum /= (Ndc * Ndc) return inner_sum
def test_dot_constant(self): from numpypy import array, dot a = array(range(5)) b = a.dot(2.5) for i in xrange(5): assert b[i] == 2.5 * a[i] c = dot(4, 3.0) assert c == 12.0 c = array(3.0).dot(array(4)) assert c == 12.0
def lm_elbo_y_from_small_phis(y, eta, phiD, phiC, sigma_squared): """ Calculates some terms in the elbo for a document. Same as in sLDA. E[log p(y|Z1:N,η,σ2)] = (–1/2)log 2πσ2 – (1/2σ2)[y2– 2yηTE[Z] + ηTE[ZZT]η] Test: Should be the same as slda_elbo_y when phiD and phiC are catercorner concatenated. """ elbo = 0.0 ss = sigma_squared elbo += (-0.5) * np.log(2 * np.pi * ss) ez = calculate_EZ_from_small_phis(phiD, phiC) ezzt = calculate_EZZT_from_small_phis(phiD, phiC) nEZZTn = np.dot(np.dot(eta, ezzt), eta) elbo += (-0.5 / ss) * (y*y - (2 * y * np.dot(eta, ez)) + nEZZTn) return elbo
def slda_elbo_y(y, eta, phi, sigma_squared): """ Calculates some terms in the elbo for a document. Same as in sLDA. E[log p(y|Z1:N,η,σ2)] = (–1/2)log 2πσ2 – (1/2σ2)[y2– 2yηTE[Z] + ηTE[ZZT]η] """ elbo = 0.0 ss = sigma_squared elbo += (-0.5) * np.log(2 * np.pi * ss) #print 'will calculate ez...' ez = calculate_EZ(phi) #print 'will calculate ezzt...' ezzt = calculate_EZZT(phi) #print 'will calculate nEZZTn...' nEZZTn = np.dot(np.dot(eta, ezzt), eta) #print 'will sum up elbo...' elbo += (-0.5 / ss) * (y*y - (2 * y * np.dot(eta, ez)) + nEZZTn) return elbo
def update(self, input_values, trace=False ): # This is a forward operation in the network. This is how we # calculate the network output from a set of input signals. output = input_values if trace: tracelist = [ output ] for i, weight_layer in enumerate(self.weights): # Loop over the network layers and calculate the output if i == 0: output = np.dot( output, weight_layer[1:,:] ) + weight_layer[0:1,:] # implicit bias else: output = np.dot( output, weight_layer[1:,:] ) + weight_layer[0:1,:] # implicit bias output = self.activation_functions[i]( output ) if trace: tracelist.append( output ) if trace: return tracelist return output
def test_dot_out(self): from numpypy import arange, dot a = arange(12).reshape(3, 4) b = arange(12).reshape(4, 3) out = arange(9).reshape(3, 3) c = dot(a, b, out=out) assert (c == out).all() assert (c == [[42, 48, 54], [114, 136, 158], [186, 224, 262]]).all() out = arange(9, dtype=float).reshape(3, 3) exc = raises(ValueError, dot, a, b, out) assert exc.value[0] == ('output array is not acceptable (must have the ' 'right type, nr dimensions, and be a C-Array)')
def _unoptimized_slda_update_phi(text, phi, gamma, beta, y_d, eta, sigma_squared): """ Update phi in LDA. phi is N x K matrix. gamma is a K-size vector update phid: φd,n ∝ exp{ E[log θ|γ] + E[log p(wn|β1:K)] + (y / Nσ2) η — [2(ηTφd,-n)η + (η∘η)] / (2N2σ2) } Note that E[log p(wn|β1:K)] = log βTwn """ (N, K) = phi.shape #assert len(eta) == K #assert len(gamma) == K #assert beta.shape[0] == K phi_sum = np.sum(phi, axis=0) Ns = (N * sigma_squared) ElogTheta = graphlib.dirichlet_expectation(gamma) ensure(len(ElogTheta) == K) pC = (1.0 * y_d / Ns * eta) eta_dot_eta = (eta * eta) front = (-1.0 / (2 * N * Ns)) for n,word,count in iterwords(text): phi_sum -= phi[n] ensure(len(phi_sum) == K) pB = np.log(beta[:,word]) pD = (front * (((2 * np.dot(eta, phi_sum) * eta) + eta_dot_eta)) ) ensure(len(pB) == K) ensure(len(pC) == K) ensure(len(pD) == K) # must exponentiate and sum immediately! #phi[n,:] = np.exp(ElogTheta + pB + pC + pD) #phi[n,:] /= np.sum(phi[n,:]) # log normalize before exp for numerical stability phi[n,:] = ElogTheta + pB + pC + pD phi[n,:] -= graphlib.logsumexp(phi[n,:]) phi[n,:] = np.exp(phi[n,:]) # add this back into the sum # unlike in LDA, this cannot be computed in parallel phi_sum += phi[n] return phi
def update(self, input_values, trace=False): # This is a forward operation in the network. This is how we # calculate the network output from a set of input signals. output = input_values if trace: tracelist = [output] for i, weight_layer in enumerate(self.weights): # Loop over the network layers and calculate the output if i == 0: output = np.dot(output, weight_layer[1:, :]) + weight_layer[ 0:1, :] # implicit bias else: output = np.dot(output, weight_layer[1:, :]) + weight_layer[ 0:1, :] # implicit bias output = self.activation_functions[i](output) if trace: tracelist.append(output) if trace: return tracelist return output
def lm_E_step_for_doc(global_iteration, d, document, comment, alphaD, alphaC, betaD, betaC, gammaD, gammaC, phiD, phiC, y, eta, sigma_squared): """Given phi and gamma matrices and document of the document. Recalculate phi and gamma repeatedly iteratively. Uses local elbo calculation to check for convergence. """ print "starting E step on doc {0}".format(d) graphlib.initialize_random(phiD) graphlib.initialize_random(phiC) i = 0 last_local_elbo, local_elbo = graphlib.INITIAL_ELBO - 100, graphlib.INITIAL_ELBO while graphlib.elbo_did_not_converge(local_elbo, last_local_elbo, i, criterion=0.1, max_iter=20): print 'will update gamma...' # update gammas lda_update_gamma(alphaD, phiD, gammaD) lda_update_gamma(alphaC, phiC, gammaC) Nd,Kd = phiD.shape print 'will update phis...' # update phis (note we have to pass the right part of eta!) slda_update_phi(document, phiD, gammaD, betaD, y[d], eta[:Kd], sigma_squared) slda_update_phi(comment, phiC, gammaC, betaC, y[d], eta[Kd:], sigma_squared) print 'will calculate y...' # update the response variable # y = ηTE[Z] = ηTφ [ where φ = 1/N * Σnφn ] y[d] = np.dot(eta, calculate_EZ_from_small_phis(phiD, phiC)) if i % 2 == 0: print 'will calculate elbo...' # calculate new ELBO last_local_elbo = local_elbo local_elbo = lm_local_elbo(document, comment, alphaD, alphaC, betaD, betaC, gammaD, gammaC, phiD, phiC, y[d], eta, sigma_squared) i += 1 #print {'beta': (betaD, betaC), 'gamma': (gammaD, gammaC), 'phi': (phiD, phiC), 'y': y, 'eta': eta} print "{2}: e-step iteration {0} ELBO: {1}".format(i, local_elbo, global_iteration) print "{2}: done e-step on doc {3}: {0} iterations ELBO: {1}".format(i, local_elbo, global_iteration, d) return i
def calculate_EZZT(big_phi): """ Accepts a big phi matrix (like (N x K) Calculates E[ZdZdT]. Returns the final matrix (K x K). (Also, E[ZdZdT] = (1/N2)(ΣNΣm!=nφd,nφd,mT + ΣNdiag{φd,n}) """ (N, K) = big_phi.shape inner_sum = np.empty((K, K)) for i in xrange(K): for j in xrange(K): inner_sum[i,j] = np.sum(np.multiply.outer(big_phi[:,i], big_phi[:,j])) - np.sum(np.dot(big_phi[:,i], big_phi[:,j])) inner_sum += np.diag(np.sum(big_phi, axis=0)) inner_sum /= (N * N) return inner_sum
def __run(score_task_knapsack, m_tasks, tasks, mac): n = len(tasks) Tu = [] Td = range(0, n) Pu = numpy.zeros(2, float) Z = 0 X = numpy.zeros(n, int) Tc = [] B = numpy.ones(2, float) P = numpy.zeros((n, 2), float) G = numpy.zeros(n, float) U = numpy.zeros(n, float) for x in range(0, n): P[x][0] = m_tasks[tasks[x]].CPU_usage / mac.free_CPU() P[x][1] = m_tasks[tasks[x]].mem_usage / mac.free_mem() keep_going = True cnt = math.sqrt(2) #w_cpu = 0.6 #w_mem = 1 - w_cpu #print "MAC = %d, ntasks = %d" % (mac.machine_ID, n) while keep_going : # step 2 del Tc Tc = [] for i in Td: if P[i][0] <= (1. - Pu[0]) and P[i][1] <= (1. - Pu[1]): Tc.append(i) #print "2" # step 3 # terminate if Tc = empty if len(Tc) == 0: keep_going = False else: # step 4 # (a) if (numpy.dot(Pu, Pu) == 0.): for i in Tc: d = sum(P[i]) G[i] = (score_task_knapsack(m_tasks[tasks[i]], mac) * cnt)/d # (b) else: mod_Pu = math.sqrt(numpy.dot(Pu, Pu)) E = numpy.array(Pu * (1./mod_Pu)) for i in Tc: d = numpy.dot(P[i], E) G[i] = score_task_knapsack(m_tasks[tasks[i]], mac) / d #print "4" # step 5 v_max = -1 i_max = 0 for i in Tc: if G[i] > v_max: v_max = G[i] i_max = i #print "5" # step 6 Tu.append(i_max) Td.remove(i_max) Pu = Pu + P[i_max] Z = Z + m_tasks[tasks[i_max]].CPU_usage #print "(%f, %f)" % (mac.capacity_CPU, mac.capacity_memory) #print Pu return Tu
def backpropagation(self, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.3, momentum_factor = 0.9 ): assert trainingset[0].features.shape[0] == self.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == self.n_outputs, \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) MSE = ( ) # inf neterror = None momentum = collections.defaultdict( int ) batch_size = self.batch_size if self.batch_size != 0 else training_data.shape[0] epoch = 0 while MSE > ERROR_LIMIT: epoch += 1 for start in xrange( 0, len(training_data), batch_size ): batch = training_data[start : start+batch_size] input_layers = self.update( training_data, trace=True ) out = input_layers[-1] error = out - training_targets delta = error MSE = np.mean( np.power(error,2) ) loop = itertools.izip( xrange(len(self.weights)-1, -1, -1), reversed(self.weights), reversed(input_layers[:-1]), ) for i, weight_layer, input_signals in loop: # Loop over the weight layers in reversed order to calculate the deltas if i == 0: dropped = dropout( add_bias(input_signals).T, self.input_layer_dropout ) else: dropped = dropout( add_bias(input_signals).T, self.hidden_layer_dropout ) # Calculate weight change dW = learning_rate * np.dot( dropped, delta ) + momentum_factor * momentum[i] if i!= 0: """Do not calculate the delta unnecessarily.""" # Skipping the bias weight during calculation. weight_delta = np.dot( delta, weight_layer[1:,:].T ) # Calculate the delta for the subsequent layer delta = np.multiply( weight_delta, self.activation_functions[i-1]( input_signals, derivative=True) ) # Store the momentum momentum[i] = dW # Update the weights self.weights[ i ] -= dW if epoch%1000==0: # Show the current training status print "* current network error (MSE):", MSE print "* Converged to error bound (%.4g) with MSE = %.4g." % ( ERROR_LIMIT, MSE ) print "* Trained for %d epochs." % epoch
expectedValueFunction = np.zeros((nGridCapital,nGridProductivity),dtype=float) # 4. We pre-build output for each point in the grid for nProductivity in range(nGridProductivity): mOutput[:,nProductivity] = vProductivity[nProductivity]*(vGridCapital**aalpha) ## 5. Main iteration maxDifference = 10.0 tolerance = 0.0000001 iteration = 0 while(maxDifference > tolerance): expectedValueFunction = np.dot(mValueFunction,mTransition.T) for nProductivity in range(nGridProductivity): # We start from previous choice (monotonicity of policy function) gridCapitalNextPeriod = 0 for nCapital in range(nGridCapital): valueHighSoFar = -100000.0 capitalChoice = vGridCapital[0] for nCapitalNextPeriod in range(gridCapitalNextPeriod,nGridCapital): consumption = mOutput[nCapital,nProductivity] - vGridCapital[nCapitalNextPeriod]
def backpropagation(self, trainingset, ERROR_LIMIT=1e-3, learning_rate=0.3, momentum_factor=0.9): assert trainingset[0].features.shape[0] == self.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == self.n_outputs, \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset]) training_targets = np.array( [instance.targets for instance in trainingset]) MSE = () # inf neterror = None momentum = collections.defaultdict(int) batch_size = self.batch_size if self.batch_size != 0 else training_data.shape[ 0] epoch = 0 while MSE > ERROR_LIMIT: epoch += 1 for start in xrange(0, len(training_data), batch_size): batch = training_data[start:start + batch_size] input_layers = self.update(training_data, trace=True) out = input_layers[-1] error = out - training_targets delta = error MSE = np.mean(np.power(error, 2)) loop = itertools.izip( xrange(len(self.weights) - 1, -1, -1), reversed(self.weights), reversed(input_layers[:-1]), ) for i, weight_layer, input_signals in loop: # Loop over the weight layers in reversed order to calculate the deltas if i == 0: dropped = dropout( add_bias(input_signals).T, self.input_layer_dropout) else: dropped = dropout( add_bias(input_signals).T, self.hidden_layer_dropout) # Calculate weight change dW = learning_rate * np.dot( dropped, delta) + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skipping the bias weight during calculation. weight_delta = np.dot(delta, weight_layer[1:, :].T) # Calculate the delta for the subsequent layer delta = np.multiply( weight_delta, self.activation_functions[i - 1](input_signals, derivative=True)) # Store the momentum momentum[i] = dW # Update the weights self.weights[i] -= dW if epoch % 1000 == 0: # Show the current training status print "* current network error (MSE):", MSE print "* Converged to error bound (%.4g) with MSE = %.4g." % ( ERROR_LIMIT, MSE) print "* Trained for %d epochs." % epoch
def test_flatiter_array_conv(self): from numpypy import array, dot a = array([1, 2, 3]) assert dot(a.flat, a.flat) == 14
# 4. We pre-build output for each point in the grid for nProductivity in range(nGridProductivity): mOutput[:, nProductivity] = vProductivity[nProductivity] * (vGridCapital** aalpha) ## 5. Main iteration maxDifference = 10.0 tolerance = 0.0000001 iteration = 0 while (maxDifference > tolerance): expectedValueFunction = np.dot(mValueFunction, mTransition.T) for nProductivity in range(nGridProductivity): # We start from previous choice (monotonicity of policy function) gridCapitalNextPeriod = 0 for nCapital in range(nGridCapital): valueHighSoFar = -100000.0 capitalChoice = vGridCapital[0] for nCapitalNextPeriod in range(gridCapitalNextPeriod, nGridCapital): consumption = mOutput[