Esempio n. 1
0
import sys
import math
from nlm import NLM

fin = sys.stdin
fout = sys.stdout

lines = [line.strip() for line in fin.readlines()]

NLM.load('base')
# NLM.load('large')
entropy, seqlen = 0, 0
for line in lines:
    h = NLM()
    for c in line:
        c = c if c != ' ' else '_'
        entropy += -math.log(h.next_prob(c), 2)
        h += c
    entropy += -math.log(h.next_prob('</s>'), 2)
    h += '</s>'
    seqlen += len(line) + 1
print('p: {}'.format(entropy / seqlen))
Esempio n. 2
0
import sys
import math
from nlm import NLM

# sents = ['therestcanbeatotalmessandyoucanstillreaditwithoutaproblem', 'thisisbecausethehumanminddoesnotreadeveryletterbyitselfbutthewordasawhole']
fin = sys.stdin
fout = sys.stdout

sents = [line.strip() for line in fin.readlines()]
sents = [sent.replace(' ', '') for sent in sents]

bw = 20

NLM.load('base')
for sent in sents:
    h = NLM()
    prevbeam = [(0, h)]
    for i, c in enumerate(list(sent) + ["</s>"]):
        currbeam = []
        for prob, h in prevbeam:
            currbeam.append((prob - math.log(h.next_prob(c)), h + c))
            currbeam.append((prob - math.log(h.next_prob(c)) - math.log(
                (h + c).next_prob('_')), h + c + '_'))
            # print(currbeam)
        if len(currbeam) > bw:
            # print(currbeam)
            currbeam = sorted(currbeam)[:bw]
            # print(currbeam)
            # exit()
        prevbeam = currbeam
        # print(prevbeam)
import sys
sys.path.insert(1, '../')
from nlm import NLM
from math import log

path = './test.txt.novowels'

if __name__ == "__main__":

    NLM.load("base")

    f = open(path, 'r')

    vowels = ['a', 'e', 'i', 'o', 'u']
    repeat_times = 2

    h = NLM()

    for line in f.readlines():
        line = line.replace(' ', '_')
        beam = [(0, h)]
        b = 40
        for c in list(line[:-1]) + ["</s>"]:
            new_beam = []
            prev = [beam]
            for i in range(repeat_times + 1):
                tmp = []
                for score, state in prev[-1]:
                    new_score = score + log(state.next_prob(c))
                    new_state = state + c
                    new_beam.append((new_score, new_state))
Esempio n. 4
0
import math
from nlm import NLM
import sys

fin = sys.stdin
fout = sys.stdout

lines = [line.strip() for line in fin.readlines()]
newlines = []
vowels = ['a', 'e', 'i', 'o', 'u']
bw = 40

NLM.load('base')
for k, line in enumerate(lines):
    newline = ''.join([c for c in line if c not in vowels])
    newline = [c if c != ' ' else '_' for c in newline]
    h = NLM()
    prevbeam = [(0, h)]
    for i, c in enumerate(newline + ['</s>']):
        currbeam = []
        for prob, h in prevbeam:
            currbeam.append((prob - math.log(h.next_prob(c)), h + c))
            for v1 in vowels:
                currbeam.append((prob - math.log(
                    (h + v1).next_prob(c)) - math.log(h.next_prob(v1)),
                                 h + v1 + c))
                for v2 in vowels:
                    currbeam.append(
                        (prob - math.log(
                            (h + v1 + v2).next_prob(c)) - math.log(
                                (h + v1).next_prob(v2)) -
import sys
import math
from nlm import NLM
from random import choices
import random

fout = sys.stdout

NLM.load('large')
sentences = []
for _ in range(10):
    h = NLM()
    sent = ''
    choicekeys = list(h.next_prob().keys())
    choicechar = choices(population=choicekeys,
                         weights=h.next_prob().values())[0]
    while choicechar != '</s>':
        if choicechar != '_':
            sent += choicechar
        else:
            sent += ' '
        h += choicechar
        choicekeys = list(h.next_prob().keys())
        choicechar = choices(population=choicekeys,
                             weights=h.next_prob().values())[0]
    sentences.append(sent)

for s in sentences:
    fout.write(s + '\n\n')
from nlm import NLM
import math
import sys

if __name__ == "__main__":

    NLM.load('base')
    p = 0
    l = 0

    for line in sys.stdin:
        line = line.strip().replace(" ", "_")
        h = NLM()

        for char in line:
            p += -math.log(h.next_prob(char), 2)
            h += char
        p += -math.log(h.next_prob("</s>"), 2)
        l += len(line) + 1

    entropy = p / l
    print('Entropy:', entropy)
from nlm import NLM
import math
import sys
import random

if __name__ == "__main__":
    NLM.load('base')

    h = NLM("t o m _ a n d _ j e r r y")
    t = 0.5
    
    for _ in range(10):
        h = NLM("t o m _ a n d _ j e r r y")
        s = list(h.next_prob().keys())

        while s != "</s>":
            prob_dict = h.next_prob()

            for c, p in prob_dict.items():
                prob_dict = {c: p**(1/t)}

            [choice] = random.choices(s, [prob_dict[c] for c in s])

            if choice != "</s>":
                print(choice, end=' ')
                h += choice

            else:
                print(choice)
                h += choice
                h = NLM()
Esempio n. 8
0
import sys

sys.path.insert(1, '../')
from nlm import NLM
from math import log

path = './test.txt.nospaces'

if __name__ == "__main__":

    NLM.load("huge")

    f = open(path, 'r')

    h = NLM()

    for line in f.readlines():
        beam = [(0, h)]
        b = 20
        for c in list(line[:-1]) + ["</s>"]:
            newbeam = []
            for score, state in beam:
                newscore = score + log(state.next_prob(c))
                newstate = state + c
                newbeam.append((newscore, newstate))

                newscore = score + log(state.next_prob("_")) + log(
                    (state + "_").next_prob(c))
                newstate = state + '_' + c
                newbeam.append((newscore, newstate))
Esempio n. 9
0
from nlm import NLM
from math import log
import sys

if __name__ == "__main__":
    NLM.load('base')

    # entropy
    p = 0
    l = 0
    for line in sys.stdin:
    # for line in open('test.txt', 'r').readlines():
        line = line.strip().replace(" ", "_")
        h = NLM()
        for c in line:
            p += -log(h.next_prob(c), 2)
            h += c
        p += -log(h.next_prob("</s>"), 2)
        l += len(line) + 1
    print(p / l)