def evaluate_cv(self, data=None, verbose=0): all_results = [] if self.random_splits: splits = ShuffleSplit(n=len(data), n_iter=self.cv, test_size=1 - self.train_percentage, random_state=random) else: splits = KFold(n=len(data), n_folds=self.cv, shuffle=True, random_state=random) if verbose <= 1: timing.set_resolution(datetime.timedelta(minutes=5)) timing.start_task('Train split' if self.random_splits else 'CV fold', self.cv) for eval_num, (train_indices, test_indices) in enumerate(splits): timing.progress(eval_num) train = [self.data[i] for i in train_indices] test = [self.data[i] for i in test_indices] all_results.append( self.evaluate(train=train, test=test, eval_num=eval_num, verbose=verbose)) timing.end_task() return all_results
def jacobi_eigensolver(Ain,N): '''Finds eigenvalues of symmetric matrix A with N iterations using Jacobi rotation method''' A=np.asarray(Ain) tol=0.000001 for i in progress(range(N)): ind=offdiag_max(A) if abs(A[ind])<tol: break return A else: given=given_gen(A,ind) A=np.transpose(given)*A*given return np.sort(np.diag(A))
def powersolve(A, TOL): ''' e, w = powersolve(A, TOL) generates eigenvalues e and corresponding eigenvectors w of the matrix A. TOL is the margin of error. ''' k = A.shape[0] v = np.random.rand(k) # Starting vector eig = np.zeros(k) vectors = np.empty((k, k)) u = dot(A, v) / norm(dot(A, v)) B = A for n in progress(xrange(k)): while norm(abs(u) - abs(v)) > TOL: u = v v = dot(B, u) v = v / norm(v) eig[n] = dot(transpose(v), dot(B, v)) vectors[n] = v B = dot(dot((np.identity(k) - np.outer(v, v)), B), (np.identity(k) - np.outer(v, v))) v = np.random.rand(k) return eig, vectors
def SGD(self, D=None, l2_coeff=None, verbose=0): """Implements stochatic (sub)gradient descent. `D` should be an iterable of `(id, x, y, domain, attrs)` tuples, where domain is a list of possible outputs (`y in domain` should be `True`) and attrs is the list of object properties expressed by `x`. `messages` should be a list of possible inputs.""" if verbose >= 1: print 'Training with eta=%f, l2_coeff=%f, use_adagrad=%s' % \ (self.eta, self.l2_coeff, self.use_adagrad) if self.only_relevant_alts: D = inst.add_relevant_alts(D) elif not self.only_local_alts: # messages is the set of utterances observed in training, as a proxy for # the set of all possible utterances. messages = [d[1] for d in D] l2_coeff = self.l2_coeff if l2_coeff == None else l2_coeff self.vectorizer = FeatureVectorizer(phi=self.phi, verbose=verbose) weights = defaultdict(float) adagrad = defaultdict(lambda: 0.0) timing.start_task('Iteration', self.T) for iteration in range(self.T): timing.progress(iteration) #if verbose: # print('Iteration %d of %d' % (iteration, self.T)) random.shuffle(D) error = 0.0 update_mag = 0.0 timing.start_task('Example', len(D)) for i, d in enumerate(D): timing.progress(i) if self.only_relevant_alts or self.only_local_alts: (id_, x, y, domain, attrs_, messages) = d else: (id_, x, y, domain, attrs_) = d[:5] # Get all (score, y') pairs: scores = [score(x, y_alt, self.phi, weights)+cost(y, y_alt) for y_alt in domain] # Get the maximal score: max_score = sorted(scores)[-1] error += max_score - score(x, y, self.phi, weights) # Compute the gradient of the objective function: grad = self.gradient(x_actual=x, x_alts=messages, y_actual=y, y_alts=domain, w=weights, verbose=verbose) # L2 regularization: subtract constant multiple of weight values if l2_coeff: for f in set(weights.keys()): grad[f] -= l2_coeff * weights[f] # Weight-update (a bit cumbersome because of the dict-based implementation): if self.use_adagrad: for f in set(grad.keys()): adagrad[f] += grad[f] ** 2 if adagrad[f] != 0.0: dw = self.eta * grad[f] / np.sqrt(adagrad[f]) weights[f] += dw update_mag += dw ** 2 else: for f in set(grad.keys()): dw = self.eta * grad[f] weights[f] += dw update_mag += dw ** 2 timing.end_task() if verbose: print 'Error: %f' % error print 'Weight update magnitude: %f' % update_mag if error <= self.epsilon: if verbose: print "Terminating after %s iterations; error is minimized." % iteration break if update_mag <= self.epsilon: if verbose: print "Terminating after %s iterations; reached local minimum." % iteration break timing.end_task() return (weights, error, iteration, messages)