def householder(a): n = len(a) for k in range(n - 2): u = a[k + 1:n, k] uMag = sqrt(dot(u, u)) if u[0] < 0.0: uMag = -uMag u[0] = u[0] + uMag h = dot(u, u) / 2.0 v = matrixmultiply(a[k + 1:n, k + 1:n], u) / h g = dot(u, v) / (2.0 * h) v = v - g * u a[k+1:n,k+1:n] = a[k+1:n,k+1:n] - outerproduct(v,u) \ - outerproduct(u,v) a[k, k + 1] = -uMag return diagonal(a), diagonal(a, 1)
def householder(a): n = len(a) for k in range(n-2): u = a[k+1:n,k] uMag = sqrt(dot(u,u)) if u[0] < 0.0: uMag = -uMag u[0] = u[0] + uMag h = dot(u,u)/2.0 v = matrixmultiply(a[k+1:n,k+1:n],u)/h g = dot(u,v)/(2.0*h) v = v - g*u a[k+1:n,k+1:n] = a[k+1:n,k+1:n] - outerproduct(v,u) \ - outerproduct(u,v) a[k,k+1] = -uMag return diagonal(a),diagonal(a,1)
def computeP(a): n = len(a) p = identity(n) * 1.0 for k in range(n - 2): u = a[k + 1:n, k] h = dot(u, u) / 2.0 v = matrixmultiply(p[1:n, k + 1:n], u) / h p[1:n, k + 1:n] = p[1:n, k + 1:n] - outerproduct(v, u) return p
def computeP(a): n = len(a) p = identity(n)*1.0 for k in range(n-2): u = a[k+1:n,k] h = dot(u,u)/2.0 v = matrixmultiply(p[1:n,k+1:n],u)/h p[1:n,k+1:n] = p[1:n,k+1:n] - outerproduct(v,u) return p
def _deltas(self, train_toks, #fd_list, labeled_tokens, labels, classifier, unattested, ffreq_emperical, nfmap, nfarray, nftranspose): """ Calculate the update values for the classifier weights for this iteration of IIS. These update weights are the value of C{delta} that solves the equation:: ffreq_emperical[i] = SUM[t,l] (classifier.prob(LabeledText(t,l)) * fd_list.detect(LabeledText(t,l))[i] * exp(delta[i] * nf(LabeledText(t,l)))) Where: - M{t} is a text C{labeled_tokens} - M{l} is an element of C{labels} - M{nf(ltext)} = SUM[M{j}] C{fd_list.detect}(M{ltext})[M{j}] This method uses Newton's method to solve this equation for M{delta[i]}. In particular, it starts with a guess of C{delta[i]}=1; and iteratively updates C{delta} with:: delta[i] -= (ffreq_emperical[i] - sum1[i])/(-sum2[i]) until convergence, where M{sum1} and M{sum2} are defined as:: sum1 = SUM[t,l] (classifier.prob(LabeledText(t,l)) * fd_list.detect(LabeledText(t,l))[i] * exp(delta[i] * nf(LabeledText(t,l)))) sum2 = SUM[t,l] (classifier.prob(LabeledText(t,l)) * fd_list.detect(LabeledText(t,l))[i] * nf(LabeledText(t,l)) * exp(delta[i] * nf(LabeledText(t,l)))) Note that M{sum1} and M{sum2} depend on C{delta}; so they need to be re-computed each iteration. The variables C{nfmap}, C{nfarray}, and C{nftranspose} are used to generate a dense encoding for M{nf(ltext)}. This allows C{_deltas} to calculate M{sum1} and M{sum2} using matrices, which yields a signifigant performance improvement. @param fd_list: The feature detector list for the classifier that this C{IISMaxentClassifierTrainer} is training. @type fd_list: C{FeatureDetectorListI} @param labeled_tokens: The set of training tokens. @type labeled_tokens: C{list} of C{Token} with C{LabeledText} type @param labels: The set of labels that should be considered by the classifier constructed by this C{IISMaxentClassifierTrainer}. @type labels: C{list} of (immutable) @param classifier: The current classifier. @type classifier: C{ClassifierI} @param ffreq_emperical: An array containing the emperical frequency for each feature. The M{i}th element of this array is the emperical frequency for feature M{i}. @type ffreq_emperical: C{sequence} of C{float} @param unattested: An array that is 1 for features that are not attested in the training data; and 0 for features that are attested. In other words, C{unattested[i]==0} iff C{ffreq_emperical[i]==0}. @type unattested: C{sequence} of C{int} @param nfmap: A map that can be used to compress C{nf} to a dense vector. @type nfmap: C{dictionary} from C{int} to C{int} @param nfarray: An array that can be used to uncompress C{nf} from a dense vector. @type nfarray: C{array} of C{float} @param nftranspose: C{array} of C{float} @type nftranspose: The transpose of C{nfarray} """ # These parameters control when we decide that we've # converged. It probably should be possible to set these # manually, via keyword arguments to train. NEWTON_CONVERGE = 1e-12 MAX_NEWTON = 30 deltas = numarray.ones(self._weight_vector_len, 'd') # Precompute the A matrix: # A[nf][id] = sum ( p(text) * p(label|text) * f(text,label) ) # over all label,text s.t. num_features[label,text]=nf A = numarray.zeros((len(nfmap), self._weight_vector_len), 'd') for i, tok in enumerate(train_toks): dist = classifier.get_class_probs(tok) # Find the number of active features. feature_vector = tok['FEATURE_VECTOR'] assignments = feature_vector.assignments() nf = sum([val for (id, val) in assignments]) # Update the A matrix for cls, offset in self._offsets.items(): for (id, val) in assignments: A[nfmap[nf], id+offset] += dist.prob(cls) * val A /= len(train_toks) # Iteratively solve for delta. Use the following variables: # - nf_delta[x][y] = nf[x] * delta[y] # - exp_nf_delta[x][y] = exp(nf[x] * delta[y]) # - nf_exp_nf_delta[x][y] = nf[x] * exp(nf[x] * delta[y]) # - sum1[i][nf] = sum p(text)p(label|text)f[i](label,text) # exp(delta[i]nf) # - sum2[i][nf] = sum p(text)p(label|text)f[i](label,text) # nf exp(delta[i]nf) for rangenum in range(MAX_NEWTON): nf_delta = numarray.outerproduct(nfarray, deltas) exp_nf_delta = numarray.exp(nf_delta) nf_exp_nf_delta = nftranspose * exp_nf_delta sum1 = numarray.sum(exp_nf_delta * A) sum2 = numarray.sum(nf_exp_nf_delta * A) # Avoid division by zero. sum2 += unattested # Update the deltas. deltas -= (ffreq_emperical - sum1) / -sum2 # We can stop once we converge. n_error = (numarray.sum(abs((ffreq_emperical-sum1)))/ numarray.sum(abs(deltas))) if n_error < NEWTON_CONVERGE: return deltas return deltas
def _deltas( self, train_toks, #fd_list, labeled_tokens, labels, classifier, unattested, ffreq_emperical, nfmap, nfarray, nftranspose): """ Calculate the update values for the classifier weights for this iteration of IIS. These update weights are the value of C{delta} that solves the equation:: ffreq_emperical[i] = SUM[t,l] (classifier.prob(LabeledText(t,l)) * fd_list.detect(LabeledText(t,l))[i] * exp(delta[i] * nf(LabeledText(t,l)))) Where: - M{t} is a text C{labeled_tokens} - M{l} is an element of C{labels} - M{nf(ltext)} = SUM[M{j}] C{fd_list.detect}(M{ltext})[M{j}] This method uses Newton's method to solve this equation for M{delta[i]}. In particular, it starts with a guess of C{delta[i]}=1; and iteratively updates C{delta} with:: delta[i] -= (ffreq_emperical[i] - sum1[i])/(-sum2[i]) until convergence, where M{sum1} and M{sum2} are defined as:: sum1 = SUM[t,l] (classifier.prob(LabeledText(t,l)) * fd_list.detect(LabeledText(t,l))[i] * exp(delta[i] * nf(LabeledText(t,l)))) sum2 = SUM[t,l] (classifier.prob(LabeledText(t,l)) * fd_list.detect(LabeledText(t,l))[i] * nf(LabeledText(t,l)) * exp(delta[i] * nf(LabeledText(t,l)))) Note that M{sum1} and M{sum2} depend on C{delta}; so they need to be re-computed each iteration. The variables C{nfmap}, C{nfarray}, and C{nftranspose} are used to generate a dense encoding for M{nf(ltext)}. This allows C{_deltas} to calculate M{sum1} and M{sum2} using matrices, which yields a signifigant performance improvement. @param fd_list: The feature detector list for the classifier that this C{IISMaxentClassifierTrainer} is training. @type fd_list: C{FeatureDetectorListI} @param labeled_tokens: The set of training tokens. @type labeled_tokens: C{list} of C{Token} with C{LabeledText} type @param labels: The set of labels that should be considered by the classifier constructed by this C{IISMaxentClassifierTrainer}. @type labels: C{list} of (immutable) @param classifier: The current classifier. @type classifier: C{ClassifierI} @param ffreq_emperical: An array containing the emperical frequency for each feature. The M{i}th element of this array is the emperical frequency for feature M{i}. @type ffreq_emperical: C{sequence} of C{float} @param unattested: An array that is 1 for features that are not attested in the training data; and 0 for features that are attested. In other words, C{unattested[i]==0} iff C{ffreq_emperical[i]==0}. @type unattested: C{sequence} of C{int} @param nfmap: A map that can be used to compress C{nf} to a dense vector. @type nfmap: C{dictionary} from C{int} to C{int} @param nfarray: An array that can be used to uncompress C{nf} from a dense vector. @type nfarray: C{array} of C{float} @param nftranspose: C{array} of C{float} @type nftranspose: The transpose of C{nfarray} """ # These parameters control when we decide that we've # converged. It probably should be possible to set these # manually, via keyword arguments to train. NEWTON_CONVERGE = 1e-12 MAX_NEWTON = 30 deltas = numarray.ones(self._weight_vector_len, 'd') # Precompute the A matrix: # A[nf][id] = sum ( p(text) * p(label|text) * f(text,label) ) # over all label,text s.t. num_features[label,text]=nf A = numarray.zeros((len(nfmap), self._weight_vector_len), 'd') for i, tok in enumerate(train_toks): dist = classifier.get_class_probs(tok) # Find the number of active features. feature_vector = tok['FEATURE_VECTOR'] assignments = feature_vector.assignments() nf = sum([val for (id, val) in assignments]) # Update the A matrix for cls, offset in self._offsets.items(): for (id, val) in assignments: A[nfmap[nf], id + offset] += dist.prob(cls) * val A /= len(train_toks) # Iteratively solve for delta. Use the following variables: # - nf_delta[x][y] = nf[x] * delta[y] # - exp_nf_delta[x][y] = exp(nf[x] * delta[y]) # - nf_exp_nf_delta[x][y] = nf[x] * exp(nf[x] * delta[y]) # - sum1[i][nf] = sum p(text)p(label|text)f[i](label,text) # exp(delta[i]nf) # - sum2[i][nf] = sum p(text)p(label|text)f[i](label,text) # nf exp(delta[i]nf) for rangenum in range(MAX_NEWTON): nf_delta = numarray.outerproduct(nfarray, deltas) exp_nf_delta = numarray.exp(nf_delta) nf_exp_nf_delta = nftranspose * exp_nf_delta sum1 = numarray.sum(exp_nf_delta * A) sum2 = numarray.sum(nf_exp_nf_delta * A) # Avoid division by zero. sum2 += unattested # Update the deltas. deltas -= (ffreq_emperical - sum1) / -sum2 # We can stop once we converge. n_error = (numarray.sum(abs( (ffreq_emperical - sum1))) / numarray.sum(abs(deltas))) if n_error < NEWTON_CONVERGE: return deltas return deltas