Esempio n. 1
0
def getCombinedFactor(index, assignment, w1, w2):
    global factorLookUp

    sk1, sk2, ps = basics.findingSkips(w1, w2)
    factorLookUp = basics.getFactor(len(w1), len(w2), sk1, sk2, ps)

    relevantFactors = factorLookUp[index]

    if index<len(w1):
        combinedFactor = basics.ocrDat[w1[index]].copy()
    else:
        combinedFactor = basics.ocrDat[w2[index - len(w1)]].copy()

    for factor in relevantFactors:
        typeOfFactor, vars = factor
        if typeOfFactor=='t':
            for i in basics.characterArray:
                if index==vars[0]:
                    fromVar = i
                    toVar = assignment[index + 1]
                else:
                    fromVar = assignment[index - 1]
                    toVar = i

                combinedFactor[i] += basics.transDat[fromVar][toVar]
        elif typeOfFactor=='s' or typeOfFactor=='p':
            for i in basics.characterArray:
                if index==vars[0]:
                    otherVar = assignment[vars[1]]
                else:
                    otherVar = assignment[vars[0]]

                combinedFactor[i] += basics.getPairSkipFactor(i, otherVar)
    return combinedFactor
Esempio n. 2
0
def GibbsSamplerRandomisedConvergence(w1, w2, actualWord1, actualWord2):
    global factorLookUp

    n1 = len(w1)
    n2 = len(w2)

    #Create a factor look-up list
    sk1, sk2, ps = basics.findingSkips(w1, w2)
    factorLookUp = basics.getFactor(n1, n2, sk1, sk2, ps)

    #Generate initial assignment
    seed(time())
    assignment = [choice(basics.characterArray) for i in xrange(n1 + n2)]
    samples = [{t:0 for t in basics.characterArray} for i in xrange(n1 + n2)]

    burnInSamples = [{t:0 for t in basics.characterArray} for i in xrange(n1 + n2)]
    prevLogLikelihoodOfMLA = 500
    burninInfo = []
    count = 0

    while(True):
        varSampled = choice(xrange(n1 + n2))
        sample = getRandomSample(varSampled, assignment, w1, w2)
        assignment[varSampled] = sample
        #burnInSamples[varSampled][sample] += 1
        for x in xrange(len(assignment)):
            burnInSamples[x][assignment[x]] += 1
        count += 1
        if count>100:
            logLikelihoodOfMLA = 0
            for j in xrange(n1 + n2):
                logLikelihoodOfMLA += float(max(burnInSamples[j].values()))/count
            if abs(logLikelihoodOfMLA - prevLogLikelihoodOfMLA)<0.0002:
                break
            else:
                prevLogLikelihoodOfMLA = logLikelihoodOfMLA
                burninInfo.append(logLikelihoodOfMLA)

    #print 'Burn-in ', len(burninInfo)

    iterationInfo = []

    count = 0
    for i in xrange(20000):
        varSampled = choice(xrange(n1 + n2))
        count += 1
        sample = getRandomSample(varSampled, assignment, w1, w2)
        assignment[varSampled] = sample
        if count%10==0:
            for x in xrange(n1 + n2):
                samples[x][assignment[x]] += 1
            MLA = ''
            for k in samples:
                maxMarginal = max(k.values())
                for j in k.keys():
                    if k[j]==maxMarginal:
                        MLA += j
                        break
            word1 = MLA[:len(w1)]
            word2 = MLA[len(w1):]
            iterationInfo.append((word1==actualWord1, word2==actualWord2))

    return iterationInfo


#print GibbsSamplerRandomisedConvergence([542,949,830], [742,981,543,625,830,758], 'ade', 'atoner')
Esempio n. 3
0
def GibbsSamplerConvergence(w1, w2, actualWord1, actualWord2):
    global factorLookUp

    n1 = len(w1)
    n2 = len(w2)

    #Create a factor look-up list
    sk1, sk2, ps = basics.findingSkips(w1, w2)
    factorLookUp = basics.getFactor(n1, n2, sk1, sk2, ps)

    #Generate initial assignment
    seed(time())
    assignment = [choice(basics.characterArray) for i in xrange(n1 + n2)]
    samples = [{t:0 for t in basics.characterArray} for i in xrange(n1 + n2)]

    burnInSamples = [{t:0 for t in basics.characterArray} for i in xrange(n1 + n2)]
    prevLogLikelihoodOfMLA = 500
    burninInfo = []
    count = 0

    while(True):
        flag = False
        for j in xrange(n1 + n2):
            sample = getRandomSample(j, assignment, w1, w2)
            assignment[j] = sample
            #burnInSamples[j][sample] += 1
            for x in xrange(len(assignment)):
                burnInSamples[x][assignment[x]] += 1
            count += 1
            if count>100:
                logLikelihoodOfMLA = 0
                for k in xrange(n1 + n2):
                    logLikelihoodOfMLA += float(max(burnInSamples[k].values()))/count
                if abs(logLikelihoodOfMLA - prevLogLikelihoodOfMLA)<0.0002:
                    flag = True
                    break
                else:
                    prevLogLikelihoodOfMLA = logLikelihoodOfMLA
                    burninInfo.append(logLikelihoodOfMLA)

        if flag:
            break

    iterationInfo = []

    count = 0
    for i in xrange(10000):
        for j in xrange(n1 + n2):
            count += 1
            sample = getRandomSample(j, assignment, w1, w2)
            assignment[j] = sample
            if count%10==0:
                for x in xrange(n1 + n2):
                    samples[x][assignment[x]] += 1
                MLA = ''
                for k in samples:
                    maxMarginal = max(k.values())
                    for l in k.keys():
                        if k[l]==maxMarginal:
                            MLA += l
                            break

                word1 = MLA[:len(w1)]
                word2 = MLA[len(w1):]
                iterationInfo.append((word1==actualWord1, word2==actualWord2))

        if count >= 20000:
            break

    return iterationInfo
Esempio n. 4
0
def GibbsSamplerRandomisedConvergence(w1, w2, actualWord1, actualWord2):
    global factorLookUp

    n1 = len(w1)
    n2 = len(w2)

    #Create a factor look-up list
    sk1, sk2, ps = basics.findingSkips(w1, w2)
    factorLookUp = basics.getFactor(n1, n2, sk1, sk2, ps)

    #Generate initial assignment
    seed(time())
    assignment = [choice(basics.characterArray) for i in xrange(n1 + n2)]
    samples = [{t: 0 for t in basics.characterArray} for i in xrange(n1 + n2)]

    burnInSamples = [{t: 0
                      for t in basics.characterArray} for i in xrange(n1 + n2)]
    prevLogLikelihoodOfMLA = 500
    burninInfo = []
    count = 0

    while (True):
        varSampled = choice(xrange(n1 + n2))
        sample = getRandomSample(varSampled, assignment, w1, w2)
        assignment[varSampled] = sample
        #burnInSamples[varSampled][sample] += 1
        for x in xrange(len(assignment)):
            burnInSamples[x][assignment[x]] += 1
        count += 1
        if count > 100:
            logLikelihoodOfMLA = 0
            for j in xrange(n1 + n2):
                logLikelihoodOfMLA += float(max(
                    burnInSamples[j].values())) / count
            if abs(logLikelihoodOfMLA - prevLogLikelihoodOfMLA) < 0.0002:
                break
            else:
                prevLogLikelihoodOfMLA = logLikelihoodOfMLA
                burninInfo.append(logLikelihoodOfMLA)

    #print 'Burn-in ', len(burninInfo)

    iterationInfo = []

    count = 0
    for i in xrange(20000):
        varSampled = choice(xrange(n1 + n2))
        count += 1
        sample = getRandomSample(varSampled, assignment, w1, w2)
        assignment[varSampled] = sample
        if count % 10 == 0:
            for x in xrange(n1 + n2):
                samples[x][assignment[x]] += 1
            MLA = ''
            for k in samples:
                maxMarginal = max(k.values())
                for j in k.keys():
                    if k[j] == maxMarginal:
                        MLA += j
                        break
            word1 = MLA[:len(w1)]
            word2 = MLA[len(w1):]
            iterationInfo.append((word1 == actualWord1, word2 == actualWord2))

    return iterationInfo


#print GibbsSamplerRandomisedConvergence([542,949,830], [742,981,543,625,830,758], 'ade', 'atoner')
Esempio n. 5
0
def GibbsSamplerConvergence(w1, w2, actualWord1, actualWord2):
    global factorLookUp

    n1 = len(w1)
    n2 = len(w2)

    #Create a factor look-up list
    sk1, sk2, ps = basics.findingSkips(w1, w2)
    factorLookUp = basics.getFactor(n1, n2, sk1, sk2, ps)

    #Generate initial assignment
    seed(time())
    assignment = [choice(basics.characterArray) for i in xrange(n1 + n2)]
    samples = [{t: 0 for t in basics.characterArray} for i in xrange(n1 + n2)]

    burnInSamples = [{t: 0
                      for t in basics.characterArray} for i in xrange(n1 + n2)]
    prevLogLikelihoodOfMLA = 500
    burninInfo = []
    count = 0

    while (True):
        flag = False
        for j in xrange(n1 + n2):
            sample = getRandomSample(j, assignment, w1, w2)
            assignment[j] = sample
            #burnInSamples[j][sample] += 1
            for x in xrange(len(assignment)):
                burnInSamples[x][assignment[x]] += 1
            count += 1
            if count > 100:
                logLikelihoodOfMLA = 0
                for k in xrange(n1 + n2):
                    logLikelihoodOfMLA += float(max(
                        burnInSamples[k].values())) / count
                if abs(logLikelihoodOfMLA - prevLogLikelihoodOfMLA) < 0.0002:
                    flag = True
                    break
                else:
                    prevLogLikelihoodOfMLA = logLikelihoodOfMLA
                    burninInfo.append(logLikelihoodOfMLA)

        if flag:
            break

    iterationInfo = []

    count = 0
    for i in xrange(10000):
        for j in xrange(n1 + n2):
            count += 1
            sample = getRandomSample(j, assignment, w1, w2)
            assignment[j] = sample
            if count % 10 == 0:
                for x in xrange(n1 + n2):
                    samples[x][assignment[x]] += 1
                MLA = ''
                for k in samples:
                    maxMarginal = max(k.values())
                    for l in k.keys():
                        if k[l] == maxMarginal:
                            MLA += l
                            break

                word1 = MLA[:len(w1)]
                word2 = MLA[len(w1):]
                iterationInfo.append(
                    (word1 == actualWord1, word2 == actualWord2))

        if count >= 20000:
            break

    return iterationInfo
Esempio n. 6
0
def GibbsSampler(w1, w2):
    global factorLookUp

    n1 = len(w1)
    n2 = len(w2)

    #Create a factor look-up list
    sk1, sk2, ps = basics.findingSkips(w1, w2)
    factorLookUp = basics.getFactor(n1, n2, sk1, sk2, ps)

    #Generate initial assignment
    seed(time())
    assignment = [choice(basics.characterArray) for i in xrange(n1 + n2)]
    samples = [{t:0 for t in basics.characterArray} for i in xrange(n1 + n2)]

    burnInSamples = [{t:0 for t in basics.characterArray} for i in xrange(n1 + n2)]
    prevLogLikelihoodOfMLA = 500
    burninInfo = []
    count = 0

    for i in xrange(len(assignment)):
        burnInSamples[i][assignment[i]] += 1

    #If burn-in graph is to be generated, uncomment this
    '''
    holyStop = -1
    flag = True
    for i in xrange(300):
        for j in xrange(n1 + n2):
            count += 1
            sample = getRandomSample(j, assignment, w1, w2)
            assignment[j] = sample
            for x in xrange(len(assignment)):
                burnInSamples[x][assignment[x]] += 1
            #burnInSamples[j][sample] += 1
            logLikelihoodOfMLA = 0

            for k in xrange(n1 + n2):
                maxProbAssignment = float(max(burnInSamples[k].values()))/count
                if maxProbAssignment==0:
                    maxProbAssignment = 0.001
                #print maxProbAssignment
                logLikelihoodOfMLA += log(maxProbAssignment)
                #print logLikelihoodOfMLA


            if abs(prevLogLikelihoodOfMLA - logLikelihoodOfMLA) < 0.0002 and count>500 and flag:
                holyStop = count
                flag = False
                print holyStop

            prevLogLikelihoodOfMLA = logLikelihoodOfMLA

            burninInfo.append(logLikelihoodOfMLA)

    set1 = burninInfo[:holyStop]
    set2 = burninInfo[holyStop:]

    plt.plot(set1)
    plt.plot(range(holyStop, len(burninInfo)), set2, '--', color = 'r')
    plt.ylabel('Log-Likelihood of Most Likely Assignment')
    plt.xlabel('Number of Iterations')
    plt.axes([0, len(burninInfo), 0, max(burninInfo) + 2])
    plt.plot([holyStop], [burninInfo[holyStop]], 'r^')
    plt.savefig('burnin.eps')
    '''

    start = clock()
    while(True):
        flag = False
        for j in xrange(n1 + n2):
            sample = getRandomSample(j, assignment, w1, w2)
            assignment[j] = sample
            #burnInSamples[j][sample] += 1
            for x in xrange(len(assignment)):
                burnInSamples[x][assignment[x]] += 1
            count += 1
            if count>100:
                logLikelihoodOfMLA = 0
                for k in xrange(n1 + n2):
                    logLikelihoodOfMLA += float(max(burnInSamples[k].values()))/count
                if abs(logLikelihoodOfMLA - prevLogLikelihoodOfMLA)<0.0002:
                    flag = True
                    break
                else:
                    prevLogLikelihoodOfMLA = logLikelihoodOfMLA
                    burninInfo.append(logLikelihoodOfMLA)
        if flag:
            break
    #print len(burninInfo)


    count = 0
    for i in xrange(10000):
        for j in xrange(n1 + n2):
            count += 1
            sample = getRandomSample(j, assignment, w1, w2)
            assignment[j] = sample
            if count%10==0:
                for x in xrange(n1 + n2):
                    samples[x][assignment[x]] += 1

    MLA = ''
    for i in samples:
        maxMarginal = max(i.values())
        for j in i.keys():
            if i[j]==maxMarginal:
                MLA += j
                break
    end = clock()
    totalTime = end - start

    return MLA, samples,totalTime
Esempio n. 7
0
def GibbsSamplerRandomized(w1, w2):
    global factorLookUp

    n1 = len(w1)
    n2 = len(w2)

    #Create a factor look-up list
    sk1, sk2, ps = basics.findingSkips(w1, w2)
    factorLookUp = basics.getFactor(n1, n2, sk1, sk2, ps)

    #Generate initial assignment
    seed(time())
    assignment = [choice(basics.characterArray) for i in xrange(n1 + n2)]
    samples = [{t:0 for t in basics.characterArray} for i in xrange(n1 + n2)]

    burnInSamples = [{t:0 for t in basics.characterArray} for i in xrange(n1 + n2)]
    prevLogLikelihoodOfMLA = 500
    burninInfo = []
    count = 0

    for i in xrange(len(assignment)):
        burnInSamples[i][assignment[i]] += 1

    start = clock()
    while(True):
        varSampled = choice(xrange(n1 + n2))
        sample = getRandomSample(varSampled, assignment, w1, w2)
        assignment[varSampled] = sample
        for x in xrange(len(assignment)):
            burnInSamples[x][assignment[x]] += 1
        count += 1
        if count>100:
            logLikelihoodOfMLA = 0
            for j in xrange(n1 + n2):
                logLikelihoodOfMLA += float(max(burnInSamples[j].values()))/count
            if abs(logLikelihoodOfMLA - prevLogLikelihoodOfMLA)<0.0002:
                break
            else:
                prevLogLikelihoodOfMLA = logLikelihoodOfMLA
                burninInfo.append(logLikelihoodOfMLA)


    count = 0
    for i in xrange(10000):
        varSampled = choice(xrange(n1 + n2))
        count += 1
        sample = getRandomSample(varSampled, assignment, w1, w2)
        assignment[varSampled] = sample
        if count%10==0:
            for x in xrange(n1 + n2):
                samples[x][assignment[x]] += 1

    MLA = ''
    for i in samples:
        maxMarginal = max(i.values())
        for j in i.keys():
            if i[j]==maxMarginal:
                MLA += j
                break
    end = clock()
    totalTime = end - start

    return MLA, samples,totalTime



#GibbsSampler([82,338,293,484,505,211], [776,477,10,82,338])
#GibbsSampler([542,949,830], [742,981,543,625,830,758])
#GibbsSamplerRandomized([542,949,830], [742,981,543,625,830,758])[0]