def infer_prepare_params(basic_or_complex, fileToInfer): train_parser = MyParser("../train.wtag") seenWordsToTagsDict = train_parser.getSeenWordsToTagsDict() fb, filePrefix = None, None if basic_or_complex == 'basic': fb = BasicFeatureVectorBuilder(train_parser, 0) filePrefix = 'finish_basic_opt_v_' elif basic_or_complex == 'complex': fb = ComplexFeatureVectorBuilder(train_parser, False) filePrefix = 'finish_complex_opt_v_' else: assert (False) fn = str(fileToInfer).replace('.', '').replace('/', '') parser = MyParser(fileToInfer) splitted = parser.splitted mle = MLE(train_parser.getUniqueTags(), splitted, fb) prefixed = [ filename for filename in os.listdir('.') if filename.startswith(filePrefix) ] prefixed.sort() print(prefixed) results = [] for v_file in prefixed: v = np.loadtxt(v_file) vit = Viterbi(mle, mle.allTags, v, seenWordsToTagsDict) res_file = open(fn + "_results_" + v_file, 'w') exp_file = open(fn + "_expected_" + v_file, 'w') accuracy = infer_aux(exp_file, res_file, v_file, splitted, vit) res_file.close() exp_file.close() results = results + [accuracy] infer_aux_results(prefixed, results, fileToInfer, fn)
def main(): '''if len(sys.argv) == 1: print(f"Usage; {sys.argv[0]} [-t | <file_name>]") return string = None if sys.argv[1] == "-t": string = sys.stdin.read() else: with open(sys.argv[1], "r") as f: string = f.read() if string is None: print("Unknown Error, try again") return''' #string = sys.stdin.read() string =""" int main(){ int x; x = 5; print(0, x, false); return 0; } """ lexer = MyLexer() token_list = [] for token in lexer.tokenize(string): token.index = find_column(string, token) token_list.append(token) parser = MyParser(token_list) out = parser.parse(iter(token_list)) if out is None: print(False) else: print(out[1])
def realDataTest(): parser = MyParser("../train.wtag") splitted = parser.splitted fb = BasicFeatureVectorBuilder(parser) tags = parser.getUniqueTags() start = time.time() mle = MLE(tags, splitted, fb) end = time.time() print("End of preprocessing, took: ", end - start) v = np.ones(fb.size) start = time.time() print(mle.calculate(v)) end = time.time() print("calcV took: " + str((end - start) / 60)) start = time.time() array = mle.calculateGradient(v) np.savetxt('train_gradient2.txt', array) end = time.time() print("calcGrad took: " + str((end - start) / 60)) truth = np.loadtxt("train_gradient.txt") current = np.loadtxt("train_gradient2.txt") dist = np.linalg.norm(truth - current) print(dist) best_v = mle.findBestV() print(best_v)
def basicTest(): parser = MyParser("MLE_db.wtag") splitted = parser.splitted fb = BasicFeatureVectorBuilder(parser) mle = MLE(["t1", "t2", "t3", "t5"], splitted, fb) v = np.ones(fb.size) res = mle.calculateGradient(v) print(res)
def realData(): p = MyParser('../train.wtag') words = p.getAllPairTagsCombinations() f104 = F104Builder(words) firstSent = [w for (w, t) in p.splitted[0]] history = History("RB", "VBG", firstSent, 3) assert f104.getFeatureVector(history, "bla").size == 0 assert f104.getFeatureVector(history, "RP").size == 1
def realData(): p = MyParser('../train.wtag') words = p.getWordsWithTag() f100 = F100Builder(words) firstSent = [w for (w, t) in p.splitted[0]] history=History("t5","t2",firstSent,3) assert f100.getFeatureVector(history,"bla").size == 0 assert f100.getFeatureVector(history,"RB").size == 1
def basicTest(): parser = MyParser("MLE_db.wtag") splitted = parser.splitted fb = BasicFeatureVectorBuilder(parser) mle = MLE(["t1", "t2", "t3", "t5"], splitted, fb) v = np.ones(fb.size) history = History("t1", "t2", ["w1", "w2", "w3", "w2"], 2) res = mle.p(history, "t3", v) print(res)
def TRAIN(): print("Training: ") parser = MyParser("../train.wtag") splitted = parser.splitted fb = BasicFeatureVectorBuilder(parser) tags = parser.getUniqueTags() mle = MLE(tags, splitted, fb) best_v = mle.findBestV(np.loadtxt("opt_v.txt")) print(best_v)
def train(): train_parser = MyParser("../train.wtag") seenSentencesToTagsDict = train_parser.getSeenWordsToTagsDict() parser = MyParser("../comp748.wtag") splitted = parser.splitted fb = BasicFeatureVectorBuilder(parser,0) mle = MLE(parser.getUniqueTags(), splitted, fb) v = np.loadtxt("opt_v_3.txt") sentences = list(map(lambda tuples: [t[0] for t in tuples], splitted)) expected_tags = list(map(lambda tuples: [t[1] for t in tuples], splitted)) seenSentencesToTagsDict = parser.getSeenWordsToTagsDict() vit = Viterbi(mle, mle.allTags, v, seenSentencesToTagsDict) total_res = 0 words_count = 0 total_time = 0 for s,expected,idx in zip(sentences,expected_tags,range(0,len(splitted))): curr_word_len = len(s) words_count = words_count + curr_word_len start = time.time() tags = vit.inference(s) res_file = open("test_wtag748_results.txt",'a') for item in tags: res_file.write("%s " % item) res_file.write("\n") res_file.close() exp_file = open("test_wtag748_expected.txt", 'a') for item in expected: exp_file.write("%s " % item) exp_file.write("\n") exp_file.close() stop = time.time() e = np.array([hash(x) for x in expected]) t = np.array([hash(x) for x in tags]) current_correct = np.sum(e == t) print("---------------------") print("Inference for sentence# ", idx, " took: ", stop - start, " seconds") total_time = total_time + (stop-start) print("Current sentence accuracy: ", current_correct, " of: ", curr_word_len) total_res = total_res + current_correct print("Total sentence accuracy: ", total_res, " of: ", words_count, "=", (100*total_res)/words_count, "%") print("Total time for ", idx, " sentences: ", (total_time / 60), " minutes")
def fit_basic_model(continueTraining): v = None if continueTraining: v = np.loadtxt("finish_basic_opt_v_lambda_0_007.txt") lambdas = [0.007] parser = MyParser("../train.wtag") splitted = parser.splitted basicFeatureBuilder = BasicFeatureVectorBuilder(parser, 0) tags = parser.getUniqueTags() mle = MLE(tags, splitted, basicFeatureBuilder) fit_model_aux(mle, "basic", lambdas, 550, v)
def fit_complex_model(continueTraining): v = None if continueTraining: v = np.loadtxt("finish_complex_opt_v_lambda_0_007.txt") lambdas = [0.007] parser = MyParser("../train.wtag") splitted = parser.splitted cfb = ComplexFeatureVectorBuilder(parser, False) tags = parser.getUniqueTags() mle = MLE(tags, splitted, cfb) fit_model_aux(mle, "complex", lambdas, 300, v)
def calcTupleTestBasic(): parser = MyParser("MLE_db.wtag") splitted = parser.splitted fb = BasicFeatureVectorBuilder(parser) mle = MLE(["t1", "t2", "t3", "t5"], splitted, fb) v = np.zeros(fb.size) res = mle.calcTuple(v) print(res) best_v = mle.findBestV() print(best_v) res1 = mle.calcTuple(best_v) print(res1)
def toJson(data): lexer = MyLexer() parser = MyParser() try: result = parser.parse(lexer.tokenize(data)) new_result = normalize(result) json_str = json.dumps(new_result, indent=4, ensure_ascii=False) json_str = json_str.replace('"\\\"', '"').replace('\\""', '"') return json_str except TypeError: return 'Syntax error!' except EOFError: return 'EOF error!'
def info(self): torrent_name = self.recv() try: rc = MyParser(torrent_name) self.send(rc.get_state() + " " + rc.get_progress()) except: self.send(" ") print "---" print rc.get_progress() if "100%" in rc.get_progress(): if torrent_name in TorrentDict.keys( ) and TorrentDict[torrent_name] != "100%": print "full" mymutex.acquire() endServerDownloadFullTorrent(torrent_name) mymutex.release() TorrentDict[torrent_name] = rc.get_progress().split()[0]
def basicConfusion(): mp = MyParser("../train.wtag") tags = mp.getUniqueTags() cm = ConfusionMatrix(tags) expected = open('testwtag_expected_finish_basic_opt_v_lambda_0_007.txt') actual = open('testwtag_results_finish_basic_opt_v_lambda_0_007.txt') mat, res = cm.calculateMatrixForLowestNTags(expected, actual, 10) expected.close() actual.close() output = open('basicConfusionMatrix_141217.txt', 'a') for tag in tags: output.write(" {}".format(tag)) output.write('\n') for tag, idx in zip(res, range(0, len(res))): output.write("{} ".format(tag)) for j in range(0, mat[idx].size): output.write("{} ".format(mat[idx][j])) output.write('\n')
def calcTupleTestRealData(): parser = MyParser("../train.wtag") splitted = parser.splitted # fb = BasicFeatureVectorBuilder(parser,0) fb = ComplexFeatureVectorBuilder(parser) tags = parser.getUniqueTags() start = time.time() mle = MLE(tags, splitted, fb, 0, "tmp1234.txt") end = time.time() print("End of preprocessing, took: ", end - start) v = np.ones(fb.size) start = time.time() f = open("train_gradientTuple.txt", "w") lv, grad = mle.calcTuple(v) print("L(V) = ", lv) print(grad) np.savetxt('train_gradientTuple.txt', grad) end = time.time() print("calcTuple took: ", end - start, " seconds")
def basicTest(): parser = MyParser('small.wtag') basic = BasicFeatureVectorBuilder(parser) history1 = History("t3", "t8",["w2","w2","w5","w3","w13","w31","w33"],2) vec1 = basic.getFeatureVector(history1,"t50") assert vec1.size == 3 print (vec1) history2 = History("t4", "t8", ["w2", "w2", "w5", "w3", "w13", "w31", "w33"], 2) vec2 = basic.getFeatureVector(history2, "t50") assert vec2.size == 2 print(vec2) history3 = History("t4", "t8", ["w2", "w2", "w4", "w3", "w13", "w31", "w33"], 2) vec3 = basic.getFeatureVector(history3, "t50") assert vec3.size == 1 print(vec3) vec4 = basic.getFeatureVector(history3, "noTag") assert vec4.size == 0
def torrentdetails(self): torrent_name = self.recv() try: rc = MyParser(torrent_name) self.send("ACK") self.recv() except: self.send("NAK") self.recv() return (seeds, peers, avail) = rc.get_info() self.send(seeds) self.recv() self.send(peers) self.recv() self.send(avail) self.recv() ID = rc.get_ID() self.send(ID) print "OK1" self.recv() print "OK2" eta = rc.get_eta() self.send(eta + " ") print eta print "OK3" self.recv() # print "OK4" self.send(rc.get_downspeed()) print "OK5" self.recv() self.send(rc.get_upspeed()) self.recv() mymutex.acquire() count = getClientCountForTorrent(torrent_name) mymutex.release() self.send(str(count)) self.recv() print "done"
def trainwtagTest(): parser = MyParser('../train.wtag') basic = BasicFeatureVectorBuilder(parser) splitted = parser.splitted[2829] sentence = [l[0] for l in splitted] history1 = History("IN", "DT", sentence, 11) vec1 = basic.getFeatureVector(history1, "NN") assert vec1.size == 3 print(vec1) history2 = History("NoTag", "DT", sentence, 11) vec2 = basic.getFeatureVector(history2, "NN") assert vec2.size == 2 print(vec2) history3 = History("NoTag", "IN", sentence, 11) vec3 = basic.getFeatureVector(history3, "DT") assert vec3.size == 1 print(vec3) vec4 = basic.getFeatureVector(history3, "noTag") assert vec4.size == 0 print(vec4)
i = input(); output(factorial(i)); return 0; } """ # 词法分析器获得输入 lexer.input(source_str) # 标记化 # for tok in lexer: # print(tok) # 语法分析 # 构建语法分析器 parser = MyParser("AST") # 语法分析器分析输入 root_node = parser.parse(source_str, lexer=lexer) # 语义分析器构建符号表和错误检查 my_semantic_analyzer = MySemanticAnalyzer() my_semantic_analyzer.build_semantic_analyzer(root_node) if not my_semantic_analyzer.error: # 代码生成初始化 build_code_generator(root_node) print(emit_util.result) # 打印语法树 # root_node.print()
from MyParser import MyParser p = MyParser("../train.wtag") words = p.getWordsWithTag() tag3 = p.getAllThreeTagsCombinations() tag2 = p.getAllPairTagsCombinations() tag = p.getUniqueTags() # print(tag3) # print(tag2) print(tag)
def feature_num_print(): parser = MyParser('../train.wtag') ComplexFeatureVectorBuilder(parser, parser, True)
def parse(token_list: list) -> tuple: parser = MyParser(token_list) return parser.parse(iter(token_list))