import sys import math from nlm import NLM fin = sys.stdin fout = sys.stdout lines = [line.strip() for line in fin.readlines()] NLM.load('base') # NLM.load('large') entropy, seqlen = 0, 0 for line in lines: h = NLM() for c in line: c = c if c != ' ' else '_' entropy += -math.log(h.next_prob(c), 2) h += c entropy += -math.log(h.next_prob('</s>'), 2) h += '</s>' seqlen += len(line) + 1 print('p: {}'.format(entropy / seqlen))
import sys import math from nlm import NLM # sents = ['therestcanbeatotalmessandyoucanstillreaditwithoutaproblem', 'thisisbecausethehumanminddoesnotreadeveryletterbyitselfbutthewordasawhole'] fin = sys.stdin fout = sys.stdout sents = [line.strip() for line in fin.readlines()] sents = [sent.replace(' ', '') for sent in sents] bw = 20 NLM.load('base') for sent in sents: h = NLM() prevbeam = [(0, h)] for i, c in enumerate(list(sent) + ["</s>"]): currbeam = [] for prob, h in prevbeam: currbeam.append((prob - math.log(h.next_prob(c)), h + c)) currbeam.append((prob - math.log(h.next_prob(c)) - math.log( (h + c).next_prob('_')), h + c + '_')) # print(currbeam) if len(currbeam) > bw: # print(currbeam) currbeam = sorted(currbeam)[:bw] # print(currbeam) # exit() prevbeam = currbeam # print(prevbeam)
import sys sys.path.insert(1, '../') from nlm import NLM from math import log path = './test.txt.novowels' if __name__ == "__main__": NLM.load("base") f = open(path, 'r') vowels = ['a', 'e', 'i', 'o', 'u'] repeat_times = 2 h = NLM() for line in f.readlines(): line = line.replace(' ', '_') beam = [(0, h)] b = 40 for c in list(line[:-1]) + ["</s>"]: new_beam = [] prev = [beam] for i in range(repeat_times + 1): tmp = [] for score, state in prev[-1]: new_score = score + log(state.next_prob(c)) new_state = state + c new_beam.append((new_score, new_state))
import math from nlm import NLM import sys fin = sys.stdin fout = sys.stdout lines = [line.strip() for line in fin.readlines()] newlines = [] vowels = ['a', 'e', 'i', 'o', 'u'] bw = 40 NLM.load('base') for k, line in enumerate(lines): newline = ''.join([c for c in line if c not in vowels]) newline = [c if c != ' ' else '_' for c in newline] h = NLM() prevbeam = [(0, h)] for i, c in enumerate(newline + ['</s>']): currbeam = [] for prob, h in prevbeam: currbeam.append((prob - math.log(h.next_prob(c)), h + c)) for v1 in vowels: currbeam.append((prob - math.log( (h + v1).next_prob(c)) - math.log(h.next_prob(v1)), h + v1 + c)) for v2 in vowels: currbeam.append( (prob - math.log( (h + v1 + v2).next_prob(c)) - math.log( (h + v1).next_prob(v2)) -
import sys import math from nlm import NLM from random import choices import random fout = sys.stdout NLM.load('large') sentences = [] for _ in range(10): h = NLM() sent = '' choicekeys = list(h.next_prob().keys()) choicechar = choices(population=choicekeys, weights=h.next_prob().values())[0] while choicechar != '</s>': if choicechar != '_': sent += choicechar else: sent += ' ' h += choicechar choicekeys = list(h.next_prob().keys()) choicechar = choices(population=choicekeys, weights=h.next_prob().values())[0] sentences.append(sent) for s in sentences: fout.write(s + '\n\n')
from nlm import NLM import math import sys if __name__ == "__main__": NLM.load('base') p = 0 l = 0 for line in sys.stdin: line = line.strip().replace(" ", "_") h = NLM() for char in line: p += -math.log(h.next_prob(char), 2) h += char p += -math.log(h.next_prob("</s>"), 2) l += len(line) + 1 entropy = p / l print('Entropy:', entropy)
from nlm import NLM import math import sys import random if __name__ == "__main__": NLM.load('base') h = NLM("t o m _ a n d _ j e r r y") t = 0.5 for _ in range(10): h = NLM("t o m _ a n d _ j e r r y") s = list(h.next_prob().keys()) while s != "</s>": prob_dict = h.next_prob() for c, p in prob_dict.items(): prob_dict = {c: p**(1/t)} [choice] = random.choices(s, [prob_dict[c] for c in s]) if choice != "</s>": print(choice, end=' ') h += choice else: print(choice) h += choice h = NLM()
import sys sys.path.insert(1, '../') from nlm import NLM from math import log path = './test.txt.nospaces' if __name__ == "__main__": NLM.load("huge") f = open(path, 'r') h = NLM() for line in f.readlines(): beam = [(0, h)] b = 20 for c in list(line[:-1]) + ["</s>"]: newbeam = [] for score, state in beam: newscore = score + log(state.next_prob(c)) newstate = state + c newbeam.append((newscore, newstate)) newscore = score + log(state.next_prob("_")) + log( (state + "_").next_prob(c)) newstate = state + '_' + c newbeam.append((newscore, newstate))
from nlm import NLM from math import log import sys if __name__ == "__main__": NLM.load('base') # entropy p = 0 l = 0 for line in sys.stdin: # for line in open('test.txt', 'r').readlines(): line = line.strip().replace(" ", "_") h = NLM() for c in line: p += -log(h.next_prob(c), 2) h += c p += -log(h.next_prob("</s>"), 2) l += len(line) + 1 print(p / l)