def main(argv): input = FileStream(argv[1]) lexer = MyLexer(input) stream = CommonTokenStream(lexer) parser = MyParser(stream) tree = parser.message() print(tree.toStringTree(recog=parser))
def realDataTest(): parser = MyParser("../train.wtag") splitted = parser.splitted fb = BasicFeatureVectorBuilder(parser) tags = parser.getUniqueTags() start = time.time() mle = MLE(tags, splitted, fb) end = time.time() print("End of preprocessing, took: ", end - start) v = np.ones(fb.size) start = time.time() print(mle.calculate(v)) end = time.time() print("calcV took: " + str((end - start) / 60)) start = time.time() array = mle.calculateGradient(v) np.savetxt('train_gradient2.txt', array) end = time.time() print("calcGrad took: " + str((end - start) / 60)) truth = np.loadtxt("train_gradient.txt") current = np.loadtxt("train_gradient2.txt") dist = np.linalg.norm(truth - current) print(dist) best_v = mle.findBestV() print(best_v)
def main(argv): input = FileStream(argv[1]) lexer = MyLexer(input) stream = CommonTokenStream(lexer) parser = MyParser(stream) tree = parser.file() # Replace "file" with the name of the start rule print(tree.toStringTree(recog=parser))
def infer_prepare_params(basic_or_complex, fileToInfer): train_parser = MyParser("../train.wtag") seenWordsToTagsDict = train_parser.getSeenWordsToTagsDict() fb, filePrefix = None, None if basic_or_complex == 'basic': fb = BasicFeatureVectorBuilder(train_parser, 0) filePrefix = 'finish_basic_opt_v_' elif basic_or_complex == 'complex': fb = ComplexFeatureVectorBuilder(train_parser, False) filePrefix = 'finish_complex_opt_v_' else: assert (False) fn = str(fileToInfer).replace('.', '').replace('/', '') parser = MyParser(fileToInfer) splitted = parser.splitted mle = MLE(train_parser.getUniqueTags(), splitted, fb) prefixed = [ filename for filename in os.listdir('.') if filename.startswith(filePrefix) ] prefixed.sort() print(prefixed) results = [] for v_file in prefixed: v = np.loadtxt(v_file) vit = Viterbi(mle, mle.allTags, v, seenWordsToTagsDict) res_file = open(fn + "_results_" + v_file, 'w') exp_file = open(fn + "_expected_" + v_file, 'w') accuracy = infer_aux(exp_file, res_file, v_file, splitted, vit) res_file.close() exp_file.close() results = results + [accuracy] infer_aux_results(prefixed, results, fileToInfer, fn)
def main(): '''if len(sys.argv) == 1: print(f"Usage; {sys.argv[0]} [-t | <file_name>]") return string = None if sys.argv[1] == "-t": string = sys.stdin.read() else: with open(sys.argv[1], "r") as f: string = f.read() if string is None: print("Unknown Error, try again") return''' #string = sys.stdin.read() string =""" int main(){ int x; x = 5; print(0, x, false); return 0; } """ lexer = MyLexer() token_list = [] for token in lexer.tokenize(string): token.index = find_column(string, token) token_list.append(token) parser = MyParser(token_list) out = parser.parse(iter(token_list)) if out is None: print(False) else: print(out[1])
def realData(): p = MyParser('../train.wtag') words = p.getWordsWithTag() f100 = F100Builder(words) firstSent = [w for (w, t) in p.splitted[0]] history=History("t5","t2",firstSent,3) assert f100.getFeatureVector(history,"bla").size == 0 assert f100.getFeatureVector(history,"RB").size == 1
def realData(): p = MyParser('../train.wtag') words = p.getAllPairTagsCombinations() f104 = F104Builder(words) firstSent = [w for (w, t) in p.splitted[0]] history = History("RB", "VBG", firstSent, 3) assert f104.getFeatureVector(history, "bla").size == 0 assert f104.getFeatureVector(history, "RP").size == 1
def getImageUrlList(url): stream = getStream(url) data = getCodingContent(stream) parser = MyParser() parser.feed(data) alist = parser.scriptList longestStr = longestString(alist) var_img = cutTo(longestStr, ';') return getImageUrlFromScript(var_img)
def __init__(self, parser: MyParser, offset) -> None: self.parser = parser self.f100 = F100Builder(parser.getWordsWithTag(), 0) self.f103 = F103Builder(parser.getAllThreeTagsCombinations(), self.f100.size) self.f104 = F104Builder(parser.getAllPairTagsCombinations(), self.f100.size + self.f103.size) super().__init__(self.f100.size + self.f103.size + self.f104.size, offset)
def TRAIN(): print("Training: ") parser = MyParser("../train.wtag") splitted = parser.splitted fb = BasicFeatureVectorBuilder(parser) tags = parser.getUniqueTags() mle = MLE(tags, splitted, fb) best_v = mle.findBestV(np.loadtxt("opt_v.txt")) print(best_v)
def fit_basic_model(continueTraining): v = None if continueTraining: v = np.loadtxt("finish_basic_opt_v_lambda_0_007.txt") lambdas = [0.007] parser = MyParser("../train.wtag") splitted = parser.splitted basicFeatureBuilder = BasicFeatureVectorBuilder(parser, 0) tags = parser.getUniqueTags() mle = MLE(tags, splitted, basicFeatureBuilder) fit_model_aux(mle, "basic", lambdas, 550, v)
def fit_complex_model(continueTraining): v = None if continueTraining: v = np.loadtxt("finish_complex_opt_v_lambda_0_007.txt") lambdas = [0.007] parser = MyParser("../train.wtag") splitted = parser.splitted cfb = ComplexFeatureVectorBuilder(parser, False) tags = parser.getUniqueTags() mle = MLE(tags, splitted, cfb) fit_model_aux(mle, "complex", lambdas, 300, v)
def searchResult(url): parser = MyParser() parser.feed(getCodingContent(getStream(url))) block = longestString(parser.scriptList) parser.close() pattern = re.compile('(?<="listNum":)\d*(?=,)') count = pattern.findall(block) if count: count = int(count[0]) return count return 0
def main(argv): istream = FileStream(argv[1]) lexer = MyLexer(istream) stream = CommonTokenStream(lexer) parser = MyParser(stream) tree = parser.intfile() print(tree.toStringTree(recog=parser)) walker = ParseTreeWalker() walker.walk(RewriteListener(), tree) print("Done")
def main(argv): input = FileStream(argv[1]) lexer = MyLexer(input) stream = CommonTokenStream(lexer) parser = MyParser(stream) tree = parser.intfile() # changed parser.StartRule() to parser.intfile() print(tree.toStringTree(recog=parser))
class Adapter(IParserType): def __init__(self): self.parser = None def parseFromFile(self, core, path): if (self.parser == None): self.parser = MyParser(core) return self.parser.parseFile(path) def parseFromConsole(self, core, command): if (self.parser == None): self.parser = MyParser(core) return self.parser.parseConsole(command)
def toJson(data): lexer = MyLexer() parser = MyParser() try: result = parser.parse(lexer.tokenize(data)) new_result = normalize(result) json_str = json.dumps(new_result, indent=4, ensure_ascii=False) json_str = json_str.replace('"\\\"', '"').replace('\\""', '"') return json_str except TypeError: return 'Syntax error!' except EOFError: return 'EOF error!'
def basicTest(): parser = MyParser("MLE_db.wtag") splitted = parser.splitted fb = BasicFeatureVectorBuilder(parser) mle = MLE(["t1", "t2", "t3", "t5"], splitted, fb) v = np.ones(fb.size) res = mle.calculateGradient(v) print(res)
def basicConfusion(): mp = MyParser("../train.wtag") tags = mp.getUniqueTags() cm = ConfusionMatrix(tags) expected = open('testwtag_expected_finish_basic_opt_v_lambda_0_007.txt') actual = open('testwtag_results_finish_basic_opt_v_lambda_0_007.txt') mat, res = cm.calculateMatrixForLowestNTags(expected, actual, 10) expected.close() actual.close() output = open('basicConfusionMatrix_141217.txt', 'a') for tag in tags: output.write(" {}".format(tag)) output.write('\n') for tag, idx in zip(res, range(0, len(res))): output.write("{} ".format(tag)) for j in range(0, mat[idx].size): output.write("{} ".format(mat[idx][j])) output.write('\n')
def basicTest(): parser = MyParser("MLE_db.wtag") splitted = parser.splitted fb = BasicFeatureVectorBuilder(parser) mle = MLE(["t1", "t2", "t3", "t5"], splitted, fb) v = np.ones(fb.size) history = History("t1", "t2", ["w1", "w2", "w3", "w2"], 2) res = mle.p(history, "t3", v) print(res)
def calcTupleTestRealData(): parser = MyParser("../train.wtag") splitted = parser.splitted # fb = BasicFeatureVectorBuilder(parser,0) fb = ComplexFeatureVectorBuilder(parser) tags = parser.getUniqueTags() start = time.time() mle = MLE(tags, splitted, fb, 0, "tmp1234.txt") end = time.time() print("End of preprocessing, took: ", end - start) v = np.ones(fb.size) start = time.time() f = open("train_gradientTuple.txt", "w") lv, grad = mle.calcTuple(v) print("L(V) = ", lv) print(grad) np.savetxt('train_gradientTuple.txt', grad) end = time.time() print("calcTuple took: ", end - start, " seconds")
def calcTupleTestBasic(): parser = MyParser("MLE_db.wtag") splitted = parser.splitted fb = BasicFeatureVectorBuilder(parser) mle = MLE(["t1", "t2", "t3", "t5"], splitted, fb) v = np.zeros(fb.size) res = mle.calcTuple(v) print(res) best_v = mle.findBestV() print(best_v) res1 = mle.calcTuple(best_v) print(res1)
def __init__(self, parser: MyParser, offset) -> None: self.d_train = {} self.d_inference = {} digit_letters_tuple = parser.getAllTagsForDigitLetters() digit_letters_len = len(digit_letters_tuple) for (w, t), i in zip(digit_letters_tuple, range(0, digit_letters_len)): if t not in self.d_inference: self.d_inference[t] = len(self.d_inference) + offset self.d_train[(w, t)] = self.d_inference[t] super().__init__(len(self.d_inference), offset)
def info(self): torrent_name = self.recv() try: rc = MyParser(torrent_name) self.send(rc.get_state() + " " + rc.get_progress()) except: self.send(" ") print "---" print rc.get_progress() if "100%" in rc.get_progress(): if torrent_name in TorrentDict.keys( ) and TorrentDict[torrent_name] != "100%": print "full" mymutex.acquire() endServerDownloadFullTorrent(torrent_name) mymutex.release() TorrentDict[torrent_name] = rc.get_progress().split()[0]
def info(self): torrent_name = self.recv() try: rc = MyParser(torrent_name) self.send(rc.get_state()+" "+rc.get_progress()) except: self.send(" ") print "---" print rc.get_progress() if "100%" in rc.get_progress(): if torrent_name in TorrentDict.keys() and TorrentDict[torrent_name]!="100%": print "full" mymutex.acquire() endServerDownloadFullTorrent(torrent_name) mymutex.release() TorrentDict[torrent_name]=rc.get_progress().split()[0]
def __init__(self, parser: MyParser, offset) -> None: self.d_train = {} self.d_inference = {} self.digits = [ 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten' ] digit_triplet = parser.getAllTagsForLettersNumbers(self.digits) # caps_tuple = parser.getAllTagsForCaps() # digit_letters_tuple = parser.getAllTagsForLettersNumbers(self.digits) digit_len = len(digit_triplet) for (w, x, t), i in zip(digit_triplet, range(0, digit_len)): if (x, t) not in self.d_inference: self.d_inference[(x, t)] = len(self.d_inference) + offset self.d_train[(w, t)] = self.d_inference[(x, t)] super().__init__(len(self.d_inference), offset)
def __init__(self, parser: MyParser, offset) -> None: self.d_train = {} self.d_inference = {} self.suffixes = ['ful', 'ive', 'ic', 'al', 'able', 'ed', 'ible', 'ing', 'ous', 'ish', 'like', 'some', 'worthy', 'ly', 'ily', 'ely', 'ingly', 'ment', 'ness', 'sion', 'tion', 'ty', 'al', 'ance', 'hood', 'dom', 'ght', 'ful', 'er', 'age', 'sis', 'ism', 'ity', 'ant', 'ssion', 'ship', 'th', 'cess', 'ize', 'ise', 'yse', 'ate', 'ent', 'en', 'ify', 'fy', 'ct', 'fine', 'ive', 'ed'] self.suffixes = sorted(set(self.suffixes)) suf_triplet = parser.getAllTagsForSuffix(self.suffixes) self.suffixes = [] suf_len = len(suf_triplet) for (w,x,t),i in zip(suf_triplet, range(0, suf_len)): if x not in self.suffixes: self.suffixes.append(x) if (x,t) not in self.d_inference: self.d_inference[(x,t)] = len(self.d_inference) + offset self.d_train[(w,t)] = self.d_inference[(x,t)] super().__init__(len(self.d_inference),offset)
def __init__(self, train_parser: MyParser, isTraining) -> None: self.parser = train_parser self.isTraining = isTraining vecSize = 0 self.f100 = F100Builder(train_parser.getWordsWithTag(), vecSize) vecSize = self.f100.size print("F100 size", self.f100.size) self.f103 = F103Builder(train_parser.getAllThreeTagsCombinations(), vecSize) vecSize = vecSize + self.f103.size print("F103 size", self.f103.size) self.f104 = F104Builder(train_parser.getAllPairTagsCombinations(), vecSize) vecSize = vecSize + self.f104.size print("F104 size", self.f104.size) self.f106 = F106Builder(train_parser.getUniqueTags(), vecSize) vecSize = vecSize + self.f106.size print("F106 size", self.f106.size) self.fSuf = SuffixFeatureBuilder(train_parser, vecSize) vecSize = vecSize + self.fSuf.size print("Suffix size", self.fSuf.size) self.fPref = PrefixFeatureBuilder(train_parser, vecSize) vecSize = vecSize + self.fPref.size print("Prefix size", self.fPref.size) self.fDigNum = DigitNumberFeatureBuilder(train_parser, vecSize) vecSize = vecSize + self.fDigNum.size print("DigitNum size", self.fDigNum.size) self.fLetNum = DigitWordFeatureBuilder(train_parser, vecSize) vecSize = vecSize + self.fLetNum.size print("DigitLetter size", self.fLetNum.size) self.fCaps = CapsFeatureBuilder(train_parser, vecSize) vecSize = vecSize + self.fCaps.size print("Caps size", self.fCaps.size) self.fPrevNext = PrevNextWordFeatureBuilder( train_parser.getAllPrevWordTagCombinations(), train_parser.getAllNextWordTagCombinations(), vecSize) vecSize = vecSize + self.fPrevNext.size print("PrevNext size", self.fPrevNext.size) super().__init__(vecSize, 0)
def train(): train_parser = MyParser("../train.wtag") seenSentencesToTagsDict = train_parser.getSeenWordsToTagsDict() parser = MyParser("../comp748.wtag") splitted = parser.splitted fb = BasicFeatureVectorBuilder(parser,0) mle = MLE(parser.getUniqueTags(), splitted, fb) v = np.loadtxt("opt_v_3.txt") sentences = list(map(lambda tuples: [t[0] for t in tuples], splitted)) expected_tags = list(map(lambda tuples: [t[1] for t in tuples], splitted)) seenSentencesToTagsDict = parser.getSeenWordsToTagsDict() vit = Viterbi(mle, mle.allTags, v, seenSentencesToTagsDict) total_res = 0 words_count = 0 total_time = 0 for s,expected,idx in zip(sentences,expected_tags,range(0,len(splitted))): curr_word_len = len(s) words_count = words_count + curr_word_len start = time.time() tags = vit.inference(s) res_file = open("test_wtag748_results.txt",'a') for item in tags: res_file.write("%s " % item) res_file.write("\n") res_file.close() exp_file = open("test_wtag748_expected.txt", 'a') for item in expected: exp_file.write("%s " % item) exp_file.write("\n") exp_file.close() stop = time.time() e = np.array([hash(x) for x in expected]) t = np.array([hash(x) for x in tags]) current_correct = np.sum(e == t) print("---------------------") print("Inference for sentence# ", idx, " took: ", stop - start, " seconds") total_time = total_time + (stop-start) print("Current sentence accuracy: ", current_correct, " of: ", curr_word_len) total_res = total_res + current_correct print("Total sentence accuracy: ", total_res, " of: ", words_count, "=", (100*total_res)/words_count, "%") print("Total time for ", idx, " sentences: ", (total_time / 60), " minutes")
def basicTest(): parser = MyParser('small.wtag') basic = BasicFeatureVectorBuilder(parser) history1 = History("t3", "t8",["w2","w2","w5","w3","w13","w31","w33"],2) vec1 = basic.getFeatureVector(history1,"t50") assert vec1.size == 3 print (vec1) history2 = History("t4", "t8", ["w2", "w2", "w5", "w3", "w13", "w31", "w33"], 2) vec2 = basic.getFeatureVector(history2, "t50") assert vec2.size == 2 print(vec2) history3 = History("t4", "t8", ["w2", "w2", "w4", "w3", "w13", "w31", "w33"], 2) vec3 = basic.getFeatureVector(history3, "t50") assert vec3.size == 1 print(vec3) vec4 = basic.getFeatureVector(history3, "noTag") assert vec4.size == 0
def torrentdetails(self): torrent_name = self.recv() try: rc = MyParser(torrent_name) self.send("ACK") self.recv() except: self.send("NAK") self.recv() return (seeds, peers, avail) = rc.get_info() self.send(seeds) self.recv() self.send(peers) self.recv() self.send(avail) self.recv() ID = rc.get_ID() self.send(ID) print "OK1" self.recv() print "OK2" eta = rc.get_eta() self.send(eta + " ") print eta print "OK3" self.recv() # print "OK4" self.send(rc.get_downspeed()) print "OK5" self.recv() self.send(rc.get_upspeed()) self.recv() mymutex.acquire() count = getClientCountForTorrent(torrent_name) mymutex.release() self.send(str(count)) self.recv() print "done"
def __init__(self, parser: MyParser, offset) -> None: self.d_train = {} self.d_inference = {} self.prefixes = [ 'a', 'ante', 'anti', 'arch', 'auto', 'bi', 'circum', 'co', 'col', 'com', 'con', 'contra', 'counter', 'de', 'dia', 'dis', 'dys', 'e', 'eco', 'en', 'em', 'equi', 'ex', 'extra', 'fore', 'hyper', 'il', 'im', 'in', 'ir', 'inter', 'inrta', 'kilo', 'macro', 'mal', 'micro', 'mid', 'mis', 'mono', 'multi', 'neo', 'non', 'out', 'over', 'post', 'pre', 'pro', 'pseudo', 're', 'retro', 'semi', 'sub', 'super', 'trans', 'ultra', 'un', 'under', 'well' ] self.prefixes = sorted(set(self.prefixes)) pref_triplet = parser.getAllTagsForPrefix(self.prefixes) self.prefixes = [] pref_len = len(pref_triplet) for (w, x, t), i in zip(pref_triplet, range(0, pref_len)): if x not in self.prefixes: self.prefixes.append(x) if (x, t) not in self.d_inference: self.d_inference[(x, t)] = len(self.d_inference) + offset self.d_train[(w, t)] = self.d_inference[(x, t)] super().__init__(len(self.d_inference), offset)
def trainwtagTest(): parser = MyParser('../train.wtag') basic = BasicFeatureVectorBuilder(parser) splitted = parser.splitted[2829] sentence = [l[0] for l in splitted] history1 = History("IN", "DT", sentence, 11) vec1 = basic.getFeatureVector(history1, "NN") assert vec1.size == 3 print(vec1) history2 = History("NoTag", "DT", sentence, 11) vec2 = basic.getFeatureVector(history2, "NN") assert vec2.size == 2 print(vec2) history3 = History("NoTag", "IN", sentence, 11) vec3 = basic.getFeatureVector(history3, "DT") assert vec3.size == 1 print(vec3) vec4 = basic.getFeatureVector(history3, "noTag") assert vec4.size == 0 print(vec4)
def torrentdetails(self): torrent_name = self.recv() try: rc = MyParser(torrent_name) self.send("ACK") self.recv() except: self.send("NAK") self.recv() return (seeds,peers,avail) = rc.get_info() self.send(seeds) self.recv() self.send(peers) self.recv() self.send(avail) self.recv() ID = rc.get_ID() self.send(ID) print "OK1" self.recv() print "OK2" eta = rc.get_eta() self.send(eta+" ") print eta print "OK3" self.recv() # print "OK4" self.send(rc.get_downspeed()) print "OK5" self.recv() self.send(rc.get_upspeed()) self.recv() mymutex.acquire() count = getClientCountForTorrent(torrent_name) mymutex.release() self.send(str(count)) self.recv() print "done"
def feature_num_print(): parser = MyParser('../train.wtag') ComplexFeatureVectorBuilder(parser, parser, True)
def parseFromFile(self, core, path): if (self.parser == None): self.parser = MyParser(core) return self.parser.parseFile(path)
def parseFromConsole(self, core, command): if (self.parser == None): self.parser = MyParser(core) return self.parser.parseConsole(command)
#!/usr/bin/env python from MyParser import MyParser if __name__ == '__main__': parser = MyParser("Books.xml") parser.parse() parser.print_books()
def test_PathToFileIsSetAndFoundFileThatIsNotEmpty(self): parser = MyParser("Books.xml") parser.parse() self.assertNotEqual(0, len(parser._books_list))
i = input(); output(factorial(i)); return 0; } """ # 词法分析器获得输入 lexer.input(source_str) # 标记化 # for tok in lexer: # print(tok) # 语法分析 # 构建语法分析器 parser = MyParser("AST") # 语法分析器分析输入 root_node = parser.parse(source_str, lexer=lexer) # 语义分析器构建符号表和错误检查 my_semantic_analyzer = MySemanticAnalyzer() my_semantic_analyzer.build_semantic_analyzer(root_node) if not my_semantic_analyzer.error: # 代码生成初始化 build_code_generator(root_node) print(emit_util.result) # 打印语法树 # root_node.print()
def parse(token_list: list) -> tuple: parser = MyParser(token_list) return parser.parse(iter(token_list))
from MyParser import MyParser p = MyParser("../train.wtag") words = p.getWordsWithTag() tag3 = p.getAllThreeTagsCombinations() tag2 = p.getAllPairTagsCombinations() tag = p.getUniqueTags() # print(tag3) # print(tag2) print(tag)