コード例 #1
0
                words = line.rstrip("\n")
                if words != " " and words not in data_set:
                    data_set.append(words)
        return data_set


def check(filename, word):
    dict = {}
    with open(filename) as file:
        data = file.readlines()
        for line in data:
            words = line.rstrip("\n").split(" ")
            if word == words[1]:
                dict[words[0]] = float(words[2])
    return dict


def argmax(alist):
    for key in alist.keys():
        if alist[key] == min(alist.values()):
            return key


testing = "POS/Part 5/test.in"
emission_count = "POS/Part 5/emission_train_count.txt"
state_count = "POS/Part 5/emission_count.txt"
filetest = testing_splitter(testing, mode="unique")
bjos, predicts = gen_bjos(state_count, filetest, emission_count)
output_to_file(bjos, "POS/Part 5/emission_testing.txt")
tagger(predicts, "POS/Part 5/emission_testing_tags.txt")
コード例 #2
0
ファイル: emission_part5.py プロジェクト: glencbz/MLproject
		data_set = []
		with open(filename) as file:
			data = file.readlines()
			for line in data:
				words = line.strip()
				if words != " " and words not in data_set:
					data_set.append(words)
		return data_set

def check(filename, word):
	dict = {}
	with open(filename) as file:
		data = file.readlines()
		for line in data:
			words = line.strip().split(" ")
			if word == words[1]:
				dict[words[0]] = float(words[2])
	return dict

def argmax(alist):
	for key in alist.keys():
		if alist[key] == min(alist.values()):
			return key

testing = "../dev.in"
emission_count = "../Part 3/emission_train_count.txt"
state_count = "../Part 3/emission_count.txt"
filetest = testing_splitter(testing, mode="unique")
bjos, predicts = gen_bjos(state_count, filetest, emission_count)
output_to_file(bjos, "part5_emission_testing.txt")
コード例 #3
0
ファイル: training.py プロジェクト: DarrenAscione/MLproject
				dict[words[1]][words[0]] += 1
	return dict

def count_all_states(word_count):
	states = {}
	for key in word_count.keys():
		states[key] = sum(word_count[key].values())
	return states

# Computes the bjos
def gen_bjo(word_count):
	dict = {}
	state_count = count_all_states(word_count)
	for key in word_count.keys():
		dict[key] = {}
		for words in word_count[key].keys():
			dict[key][words] = word_count[key][words]*1.0 / state_count[key]
	return dict

training = "POS/train"
word_count = count_word(training)
bjo = gen_bjo(word_count)
#Outputs readable format
output_to_file(bjo, "POS/Part 5/emission_trainingReadable.txt", mode="readable")
#Outputs normal format (Emission Probability)
output_to_file(bjo, "POS/Part 5/emission_training.txt")
#Outputs normal format (Word count for each Tag)
output_to_file(count_word(training), "POS/Part 5/emission_train_count.txt")
# output_to_file(count_all_states(word_count), "POS/state_count.txt")
output_to_file(count_all_states(word_count), "POS/Part 5/emission_count.txt", mode="state")
コード例 #4
0
ファイル: testing.py プロジェクト: glencbz/MLproject
			data = file.readlines()
			for line in data:
				words = line.rstrip("\n")
				if words != " " and words not in data_set:
					data_set.append(words)
		return data_set

def check(filename, word):
	dict = {}
	with open(filename) as file:
		data = file.readlines()
		for line in data:
			words = line.rstrip("\n").split(" ")
			if word == words[1]:
				dict[words[0]] = float(words[2])
	return dict

def argmax(alist):
	for key in alist.keys():
		if alist[key] == min(alist.values()):
			return key

testing = "POS/Part 5/test.in"
emission_count = "POS/Part 5/emission_train_count.txt"
state_count = "POS/Part 5/emission_count.txt"
filetest = testing_splitter(testing, mode="unique")
bjos, predicts = gen_bjos(state_count, filetest, emission_count)
output_to_file(bjos, "POS/Part 5/emission_testing.txt")
tagger(predicts, "POS/Part 5/emission_testing_tags.txt")

コード例 #5
0
            for line in data:
                words = line.strip()
                if words != " " and words not in data_set:
                    data_set.append(words)
        return data_set


def check(filename, word):
    dict = {}
    with open(filename) as file:
        data = file.readlines()
        for line in data:
            words = line.strip().split(" ")
            if word == words[1]:
                dict[words[0]] = float(words[2])
    return dict


def argmax(alist):
    for key in alist.keys():
        if alist[key] == min(alist.values()):
            return key


testing = "../dev.in"
emission_count = "../Part 3/emission_train_count.txt"
state_count = "../Part 3/emission_count.txt"
filetest = testing_splitter(testing, mode="unique")
bjos, predicts = gen_bjos(state_count, filetest, emission_count)
output_to_file(bjos, "part5_emission_testing.txt")