コード例 #1
0
def inference_dset(model: pl.LightningModule, dset: SignSequenceDataset):
    # list of sequences with elements (pred_class, true_class)
    labeled = []
    for i in trange(len(dset)):
        images, targets, _ = dset[i]
        logits = model(images)

        _, pred = logits.topk(1, dim=1)

        # print(pred)

        predicted_signs = pred[:, 0].tolist()

        # print(predicted_signs)

        target_ints = [int(targ) for targ in targets.tolist()]

        labeled.append(list(zip(predicted_signs, target_ints)))

    return labeled


labeled_train = inference_dset(trained, dset_train)
labeled_val = inference_dset(trained, dset_val)

hmm_trainer = HiddenMarkovModelTrainer()

hmm_tagger = hmm_trainer.train(labeled_sequences=labeled_train)

hmm_tagger.test(labeled_val, verbose=False)
コード例 #2
0
for file in glob.glob("twitie-tagger/corpora/*agree"):
	print (file)
	f=open(file)
	lines = [line.strip().split() for line in f]
	f.close()

	tokenized_docs = tokenized_docs + [[word.split("_")[-2:] for word in line if len(word)>1] for line in lines]


tokenized_docs_tuples = [[tuple(word) for word in line] for line in tokenized_docs]

for sent in tokenized_docs_tuples:
	for word in sent:
		if len(word) != 2:
			print (word)

words = [word[0] for line in tokenized_docs for word in line]
wordsVocab = list(set(words))
states = [word[1] for line in tokenized_docs for word in line if len(word)>1]
statesVocab = list(set(states))

#HMMtrainer = HiddenMarkovModelTrainer(states=statesVocab,symbols=wordsVocab)

HMMtrainer = HiddenMarkovModelTrainer()
hmmmodel = HMMtrainer.train(tokenized_docs_tuples)


#print (hmmmodel.tag("wtf did u do ?".split()))
#sentence = "my home is burning".split()
#print (hmmmodel.tag(sentence))