Example #1
0
def infer_prepare_params(basic_or_complex, fileToInfer):
    train_parser = MyParser("../train.wtag")
    seenWordsToTagsDict = train_parser.getSeenWordsToTagsDict()
    fb, filePrefix = None, None
    if basic_or_complex == 'basic':
        fb = BasicFeatureVectorBuilder(train_parser, 0)
        filePrefix = 'finish_basic_opt_v_'
    elif basic_or_complex == 'complex':
        fb = ComplexFeatureVectorBuilder(train_parser, False)
        filePrefix = 'finish_complex_opt_v_'
    else:
        assert (False)
    fn = str(fileToInfer).replace('.', '').replace('/', '')
    parser = MyParser(fileToInfer)
    splitted = parser.splitted
    mle = MLE(train_parser.getUniqueTags(), splitted, fb)

    prefixed = [
        filename for filename in os.listdir('.')
        if filename.startswith(filePrefix)
    ]
    prefixed.sort()
    print(prefixed)
    results = []

    for v_file in prefixed:
        v = np.loadtxt(v_file)
        vit = Viterbi(mle, mle.allTags, v, seenWordsToTagsDict)
        res_file = open(fn + "_results_" + v_file, 'w')
        exp_file = open(fn + "_expected_" + v_file, 'w')
        accuracy = infer_aux(exp_file, res_file, v_file, splitted, vit)
        res_file.close()
        exp_file.close()
        results = results + [accuracy]
    infer_aux_results(prefixed, results, fileToInfer, fn)
def main():
    '''if len(sys.argv) == 1:
        print(f"Usage; {sys.argv[0]} [-t | <file_name>]")
        return
    string = None
    if sys.argv[1] == "-t":
        string = sys.stdin.read()
    else:
        with open(sys.argv[1], "r") as f:
            string = f.read()
    if string is None:
        print("Unknown Error, try again")
        return'''
    #string = sys.stdin.read()
    string ="""
    int main(){
        int x;
        x = 5;
        print(0, x, false);
        return 0;
    }
    """
    lexer = MyLexer()
    token_list = []
    for token in lexer.tokenize(string):
        token.index = find_column(string, token)
        token_list.append(token)
    parser = MyParser(token_list)
    out = parser.parse(iter(token_list))
    if out is None:
        print(False)
    else:
        print(out[1])
Example #3
0
def realDataTest():

    parser = MyParser("../train.wtag")
    splitted = parser.splitted
    fb = BasicFeatureVectorBuilder(parser)
    tags = parser.getUniqueTags()
    start = time.time()
    mle = MLE(tags, splitted, fb)
    end = time.time()
    print("End of preprocessing, took: ", end - start)
    v = np.ones(fb.size)
    start = time.time()
    print(mle.calculate(v))
    end = time.time()
    print("calcV took: " + str((end - start) / 60))
    start = time.time()
    array = mle.calculateGradient(v)
    np.savetxt('train_gradient2.txt', array)
    end = time.time()
    print("calcGrad took: " + str((end - start) / 60))
    truth = np.loadtxt("train_gradient.txt")
    current = np.loadtxt("train_gradient2.txt")
    dist = np.linalg.norm(truth - current)
    print(dist)
    best_v = mle.findBestV()
    print(best_v)
Example #4
0
def basicTest():
    parser = MyParser("MLE_db.wtag")
    splitted = parser.splitted
    fb = BasicFeatureVectorBuilder(parser)
    mle = MLE(["t1", "t2", "t3", "t5"], splitted, fb)
    v = np.ones(fb.size)
    res = mle.calculateGradient(v)
    print(res)
Example #5
0
def realData():
    p = MyParser('../train.wtag')
    words = p.getAllPairTagsCombinations()
    f104 = F104Builder(words)
    firstSent = [w for (w, t) in p.splitted[0]]
    history = History("RB", "VBG", firstSent, 3)
    assert f104.getFeatureVector(history, "bla").size == 0
    assert f104.getFeatureVector(history, "RP").size == 1
Example #6
0
def realData():
    p = MyParser('../train.wtag')
    words = p.getWordsWithTag()
    f100 = F100Builder(words)
    firstSent = [w for (w, t) in p.splitted[0]]
    history=History("t5","t2",firstSent,3)
    assert f100.getFeatureVector(history,"bla").size == 0
    assert f100.getFeatureVector(history,"RB").size == 1
Example #7
0
def basicTest():
    parser = MyParser("MLE_db.wtag")
    splitted = parser.splitted
    fb = BasicFeatureVectorBuilder(parser)
    mle = MLE(["t1", "t2", "t3", "t5"], splitted, fb)
    v = np.ones(fb.size)
    history = History("t1", "t2", ["w1", "w2", "w3", "w2"], 2)
    res = mle.p(history, "t3", v)
    print(res)
Example #8
0
def TRAIN():
    print("Training: ")
    parser = MyParser("../train.wtag")
    splitted = parser.splitted
    fb = BasicFeatureVectorBuilder(parser)
    tags = parser.getUniqueTags()
    mle = MLE(tags, splitted, fb)
    best_v = mle.findBestV(np.loadtxt("opt_v.txt"))
    print(best_v)
Example #9
0
def train():
    train_parser = MyParser("../train.wtag")
    seenSentencesToTagsDict = train_parser.getSeenWordsToTagsDict()
    parser = MyParser("../comp748.wtag")
    splitted = parser.splitted
    fb = BasicFeatureVectorBuilder(parser,0)
    mle = MLE(parser.getUniqueTags(), splitted, fb)
    v = np.loadtxt("opt_v_3.txt")
    sentences = list(map(lambda tuples: [t[0] for t in tuples], splitted))
    expected_tags = list(map(lambda tuples: [t[1] for t in tuples], splitted))
    seenSentencesToTagsDict = parser.getSeenWordsToTagsDict()
    vit = Viterbi(mle, mle.allTags, v, seenSentencesToTagsDict)
    total_res = 0
    words_count = 0
    total_time = 0
    for s,expected,idx in zip(sentences,expected_tags,range(0,len(splitted))):
        curr_word_len = len(s)
        words_count = words_count + curr_word_len
        start = time.time()
        tags = vit.inference(s)

        res_file = open("test_wtag748_results.txt",'a')
        for item in tags:
            res_file.write("%s " % item)
        res_file.write("\n")
        res_file.close()

        exp_file = open("test_wtag748_expected.txt", 'a')
        for item in expected:
            exp_file.write("%s " % item)
        exp_file.write("\n")
        exp_file.close()

        stop = time.time()
        e = np.array([hash(x) for x in expected])
        t = np.array([hash(x) for x in tags])
        current_correct = np.sum(e == t)
        print("---------------------")
        print("Inference for sentence# ", idx, " took: ", stop - start, " seconds")
        total_time = total_time + (stop-start)
        print("Current sentence accuracy: ", current_correct, " of: ", curr_word_len)
        total_res = total_res + current_correct
        print("Total sentence accuracy: ", total_res, " of: ", words_count, "=", (100*total_res)/words_count, "%")
        print("Total time for ", idx, " sentences: ", (total_time / 60), " minutes")
Example #10
0
def fit_basic_model(continueTraining):
    v = None
    if continueTraining:
        v = np.loadtxt("finish_basic_opt_v_lambda_0_007.txt")
    lambdas = [0.007]
    parser = MyParser("../train.wtag")
    splitted = parser.splitted
    basicFeatureBuilder = BasicFeatureVectorBuilder(parser, 0)
    tags = parser.getUniqueTags()
    mle = MLE(tags, splitted, basicFeatureBuilder)
    fit_model_aux(mle, "basic", lambdas, 550, v)
Example #11
0
def fit_complex_model(continueTraining):
    v = None
    if continueTraining:
        v = np.loadtxt("finish_complex_opt_v_lambda_0_007.txt")
    lambdas = [0.007]
    parser = MyParser("../train.wtag")
    splitted = parser.splitted
    cfb = ComplexFeatureVectorBuilder(parser, False)
    tags = parser.getUniqueTags()
    mle = MLE(tags, splitted, cfb)
    fit_model_aux(mle, "complex", lambdas, 300, v)
Example #12
0
def calcTupleTestBasic():
    parser = MyParser("MLE_db.wtag")
    splitted = parser.splitted
    fb = BasicFeatureVectorBuilder(parser)
    mle = MLE(["t1", "t2", "t3", "t5"], splitted, fb)
    v = np.zeros(fb.size)
    res = mle.calcTuple(v)
    print(res)
    best_v = mle.findBestV()
    print(best_v)
    res1 = mle.calcTuple(best_v)
    print(res1)
Example #13
0
def toJson(data):
    lexer = MyLexer()
    parser = MyParser()

    try:
        result = parser.parse(lexer.tokenize(data))
        new_result = normalize(result)
        json_str = json.dumps(new_result, indent=4, ensure_ascii=False)
        json_str = json_str.replace('"\\\"', '"').replace('\\""', '"')
        return json_str
    except TypeError:
        return 'Syntax error!'
    except EOFError:
        return 'EOF error!'
Example #14
0
 def info(self):
     torrent_name = self.recv()
     try:
         rc = MyParser(torrent_name)
         self.send(rc.get_state() + " " + rc.get_progress())
     except:
         self.send(" ")
     print "---"
     print rc.get_progress()
     if "100%" in rc.get_progress():
         if torrent_name in TorrentDict.keys(
         ) and TorrentDict[torrent_name] != "100%":
             print "full"
             mymutex.acquire()
             endServerDownloadFullTorrent(torrent_name)
             mymutex.release()
     TorrentDict[torrent_name] = rc.get_progress().split()[0]
Example #15
0
def basicConfusion():
    mp = MyParser("../train.wtag")
    tags = mp.getUniqueTags()
    cm = ConfusionMatrix(tags)
    expected = open('testwtag_expected_finish_basic_opt_v_lambda_0_007.txt')
    actual = open('testwtag_results_finish_basic_opt_v_lambda_0_007.txt')
    mat, res = cm.calculateMatrixForLowestNTags(expected, actual, 10)
    expected.close()
    actual.close()
    output = open('basicConfusionMatrix_141217.txt', 'a')
    for tag in tags:
        output.write(" {}".format(tag))
    output.write('\n')
    for tag, idx in zip(res, range(0, len(res))):
        output.write("{} ".format(tag))
        for j in range(0, mat[idx].size):
            output.write("{} ".format(mat[idx][j]))
        output.write('\n')
Example #16
0
def calcTupleTestRealData():
    parser = MyParser("../train.wtag")
    splitted = parser.splitted
    # fb = BasicFeatureVectorBuilder(parser,0)
    fb = ComplexFeatureVectorBuilder(parser)
    tags = parser.getUniqueTags()
    start = time.time()
    mle = MLE(tags, splitted, fb, 0, "tmp1234.txt")
    end = time.time()
    print("End of preprocessing, took: ", end - start)
    v = np.ones(fb.size)
    start = time.time()
    f = open("train_gradientTuple.txt", "w")
    lv, grad = mle.calcTuple(v)
    print("L(V) = ", lv)
    print(grad)
    np.savetxt('train_gradientTuple.txt', grad)
    end = time.time()
    print("calcTuple took: ", end - start, " seconds")
Example #17
0
def basicTest():
    parser = MyParser('small.wtag')
    basic = BasicFeatureVectorBuilder(parser)

    history1 = History("t3", "t8",["w2","w2","w5","w3","w13","w31","w33"],2)
    vec1 = basic.getFeatureVector(history1,"t50")
    assert vec1.size == 3
    print (vec1)

    history2 = History("t4", "t8", ["w2", "w2", "w5", "w3", "w13", "w31", "w33"], 2)
    vec2 = basic.getFeatureVector(history2, "t50")
    assert vec2.size == 2
    print(vec2)

    history3 = History("t4", "t8", ["w2", "w2", "w4", "w3", "w13", "w31", "w33"], 2)
    vec3 = basic.getFeatureVector(history3, "t50")
    assert vec3.size == 1
    print(vec3)

    vec4 = basic.getFeatureVector(history3, "noTag")
    assert vec4.size == 0
Example #18
0
 def torrentdetails(self):
     torrent_name = self.recv()
     try:
         rc = MyParser(torrent_name)
         self.send("ACK")
         self.recv()
     except:
         self.send("NAK")
         self.recv()
         return
     (seeds, peers, avail) = rc.get_info()
     self.send(seeds)
     self.recv()
     self.send(peers)
     self.recv()
     self.send(avail)
     self.recv()
     ID = rc.get_ID()
     self.send(ID)
     print "OK1"
     self.recv()
     print "OK2"
     eta = rc.get_eta()
     self.send(eta + " ")
     print eta
     print "OK3"
     self.recv()  #
     print "OK4"
     self.send(rc.get_downspeed())
     print "OK5"
     self.recv()
     self.send(rc.get_upspeed())
     self.recv()
     mymutex.acquire()
     count = getClientCountForTorrent(torrent_name)
     mymutex.release()
     self.send(str(count))
     self.recv()
     print "done"
Example #19
0
def trainwtagTest():
    parser = MyParser('../train.wtag')
    basic = BasicFeatureVectorBuilder(parser)
    splitted = parser.splitted[2829]
    sentence = [l[0] for l in splitted]

    history1 = History("IN", "DT", sentence, 11)
    vec1 = basic.getFeatureVector(history1, "NN")
    assert vec1.size == 3
    print(vec1)

    history2 = History("NoTag", "DT", sentence, 11)
    vec2 = basic.getFeatureVector(history2, "NN")
    assert vec2.size == 2
    print(vec2)

    history3 = History("NoTag", "IN", sentence, 11)
    vec3 = basic.getFeatureVector(history3, "DT")
    assert vec3.size == 1
    print(vec3)

    vec4 = basic.getFeatureVector(history3, "noTag")
    assert vec4.size == 0
    print(vec4)
Example #20
0
    i = input();
    output(factorial(i));
    return 0;
}
"""

    # 词法分析器获得输入
    lexer.input(source_str)

    # 标记化
    # for tok in lexer:
    #     print(tok)

    # 语法分析
    # 构建语法分析器
    parser = MyParser("AST")

    # 语法分析器分析输入
    root_node = parser.parse(source_str, lexer=lexer)

    # 语义分析器构建符号表和错误检查
    my_semantic_analyzer = MySemanticAnalyzer()
    my_semantic_analyzer.build_semantic_analyzer(root_node)
    if not my_semantic_analyzer.error:
        # 代码生成初始化
        build_code_generator(root_node)
        print(emit_util.result)

    # 打印语法树
    # root_node.print()
Example #21
0
from MyParser import MyParser

p = MyParser("../train.wtag")
words = p.getWordsWithTag()
tag3 = p.getAllThreeTagsCombinations()
tag2 = p.getAllPairTagsCombinations()
tag = p.getUniqueTags()
# print(tag3)
# print(tag2)
print(tag)
Example #22
0
def feature_num_print():
    parser = MyParser('../train.wtag')
    ComplexFeatureVectorBuilder(parser, parser, True)
def parse(token_list: list) -> tuple:
    parser = MyParser(token_list)
    return parser.parse(iter(token_list))