Esempio n. 1
0
def significance_test(size, bag, samplespace, factors, E_k):
    tt = [incexc(i, bag, size) for i in samplespace]
    if any([count < 0 for count in tt]):
        raise Exception("Value Error: -ve %s" % tt)
        
    n = float(bag[()])
    q_ref = [v/n for v in tt]
    K = E_k(q_ref)
    
    model = maxentropy.model(factors, samplespace)
    model.fit(K, algorithm="CG") #  The algorithm can be 'CG', 'BFGS', 'LBFGSB', 'Powell', or 'Nelder-Mead'.
    
    p = model.probdist()
    KLdiv = sum([ p_i * (log2(p_i) - log2(q_i))   for p_i, q_i in zip(q_ref, p)])
    
    return (KLdiv, p[-1], q_ref[-1])
Esempio n. 2
0
def f0(x):
    return x in samplespace


def f1(x):
    return x == 'dans' or x == 'en'


def f2(x):
    return x == 'dans' or x == a_grave


f = [f0, f1, f2]

model = maxentropy.model(f, samplespace)

# Now set the desired feature expectations
K = [1.0, 0.3, 0.5]

model.verbose = True

# Fit the model
model.fit(K)

# Output the distribution
print "\nFitted model parameters are:\n" + str(model.params)
print "\nFitted distribution is:"
p = model.probdist()
for j in range(len(model.samplespace)):
    x = model.samplespace[j]
Esempio n. 3
0
"""

from scipy import maxentropy
import numpy as np

samplespace = [1., 2., 3., 4., 5., 6.]
def sump(x):
    return x in samplespace

def meanp(x):
    return np.mean(x)
# Set the constraints
# 1) We have a proper probability
# 2) The mean is equal to...
F = [sump, meanp]
model = maxentropy.model(F, samplespace)

# set the desired feature expectations
K = np.ones((5,2))
K[:,1] = [2.,3.,3.5,4.,5.]

model.verbose = False

for i in range(K.shape[0]):
    model.fit(K[i])

    # Output the distribution
    print("\nFitted model parameters are:\n" + str(model.params))
    print("\nFitted distribution is:")
    p = model.probdist()
    for j in range(len(model.samplespace)):
Esempio n. 4
0
     bag[(0,1,2)] = val
     tt = [incexc(i, bag, size) for i in samplespace]
     if any([count < 0 for count in tt]):
         print val, '-ve'
         continue
     
     if debug:
         for pattern, count in zip(samplespace, tt):
             print pattern, count
 
     den = float(bag[()])#float(sum(tt))
     q_ref = [v/den for v in tt]
         
     
     K = E_k(q_ref)
     model = maxentropy.model(factors, samplespace)
     
     t1 = time.clock()
     model.fit(K, algorithm="CG") #  The algorithm can be 'CG', 'BFGS', 'LBFGSB', 'Powell', or 'Nelder-Mead'.
     t2 = time.clock()
     if debug:
         print "Training time = %.4f sec" % (t2 - t1)
         print "\nFitted model parameters are:\n" + str(model.params)
     
     p = model.probdist()
     K_fit = E_k(p)
     KLdiv = sum([ p_i * (log2(p_i) - log2(q_i))   for p_i, q_i in zip(q_ref, p)])
     
     if debug:
         print "Comparison of distribution:  fit (ref)"
         for i in samplespace:
        xs, logprobs = sampler.sample(n, return_probs=2)
        F = maxentropy.sparsefeaturematrix(f, xs, SPARSEFORMAT)
        yield F, logprobs

print "Generating an initial sample ..."
model.setsampleFgen(sampleFgen(sampler, f, n))

model.verbose = True

# Fit the model
model.avegtol = 1e-4
model.fit(K, algorithm=algorithm)

# Output the true distribution
print "\nFitted model parameters are:\n" + str(model.params)
smallmodel = maxentropy.model(f, samplespace)
smallmodel.setparams(model.params)
print "\nFitted distribution is:"
p = smallmodel.probdist()
for j in range(len(smallmodel.samplespace)):
    x = smallmodel.samplespace[j]
    print ("\tx = %-15s" %(x + ":",) + " p(x) = "+str(p[j])).encode('utf-8')


# Now show how well the constraints are satisfied:
print
print "Desired constraints:"
print "\tp['dans'] + p['en'] = 0.3"
print ("\tp['dans'] + p['" + a_grave + "']  = 0.5").encode('utf-8')
print
print "Actual expectations under the fitted model:"