def significance_test(size, bag, samplespace, factors, E_k): tt = [incexc(i, bag, size) for i in samplespace] if any([count < 0 for count in tt]): raise Exception("Value Error: -ve %s" % tt) n = float(bag[()]) q_ref = [v/n for v in tt] K = E_k(q_ref) model = maxentropy.model(factors, samplespace) model.fit(K, algorithm="CG") # The algorithm can be 'CG', 'BFGS', 'LBFGSB', 'Powell', or 'Nelder-Mead'. p = model.probdist() KLdiv = sum([ p_i * (log2(p_i) - log2(q_i)) for p_i, q_i in zip(q_ref, p)]) return (KLdiv, p[-1], q_ref[-1])
def f0(x): return x in samplespace def f1(x): return x == 'dans' or x == 'en' def f2(x): return x == 'dans' or x == a_grave f = [f0, f1, f2] model = maxentropy.model(f, samplespace) # Now set the desired feature expectations K = [1.0, 0.3, 0.5] model.verbose = True # Fit the model model.fit(K) # Output the distribution print "\nFitted model parameters are:\n" + str(model.params) print "\nFitted distribution is:" p = model.probdist() for j in range(len(model.samplespace)): x = model.samplespace[j]
""" from scipy import maxentropy import numpy as np samplespace = [1., 2., 3., 4., 5., 6.] def sump(x): return x in samplespace def meanp(x): return np.mean(x) # Set the constraints # 1) We have a proper probability # 2) The mean is equal to... F = [sump, meanp] model = maxentropy.model(F, samplespace) # set the desired feature expectations K = np.ones((5,2)) K[:,1] = [2.,3.,3.5,4.,5.] model.verbose = False for i in range(K.shape[0]): model.fit(K[i]) # Output the distribution print("\nFitted model parameters are:\n" + str(model.params)) print("\nFitted distribution is:") p = model.probdist() for j in range(len(model.samplespace)):
bag[(0,1,2)] = val tt = [incexc(i, bag, size) for i in samplespace] if any([count < 0 for count in tt]): print val, '-ve' continue if debug: for pattern, count in zip(samplespace, tt): print pattern, count den = float(bag[()])#float(sum(tt)) q_ref = [v/den for v in tt] K = E_k(q_ref) model = maxentropy.model(factors, samplespace) t1 = time.clock() model.fit(K, algorithm="CG") # The algorithm can be 'CG', 'BFGS', 'LBFGSB', 'Powell', or 'Nelder-Mead'. t2 = time.clock() if debug: print "Training time = %.4f sec" % (t2 - t1) print "\nFitted model parameters are:\n" + str(model.params) p = model.probdist() K_fit = E_k(p) KLdiv = sum([ p_i * (log2(p_i) - log2(q_i)) for p_i, q_i in zip(q_ref, p)]) if debug: print "Comparison of distribution: fit (ref)" for i in samplespace:
xs, logprobs = sampler.sample(n, return_probs=2) F = maxentropy.sparsefeaturematrix(f, xs, SPARSEFORMAT) yield F, logprobs print "Generating an initial sample ..." model.setsampleFgen(sampleFgen(sampler, f, n)) model.verbose = True # Fit the model model.avegtol = 1e-4 model.fit(K, algorithm=algorithm) # Output the true distribution print "\nFitted model parameters are:\n" + str(model.params) smallmodel = maxentropy.model(f, samplespace) smallmodel.setparams(model.params) print "\nFitted distribution is:" p = smallmodel.probdist() for j in range(len(smallmodel.samplespace)): x = smallmodel.samplespace[j] print ("\tx = %-15s" %(x + ":",) + " p(x) = "+str(p[j])).encode('utf-8') # Now show how well the constraints are satisfied: print print "Desired constraints:" print "\tp['dans'] + p['en'] = 0.3" print ("\tp['dans'] + p['" + a_grave + "'] = 0.5").encode('utf-8') print print "Actual expectations under the fitted model:"