Example #1
0
def main():
    par = plyj.parser.Parser()
    corpus_path = "../Java/Test/"
    blacklist = ["5a8beeae20366b5094d0db8148e0563", "3cd87ee90872cfcb72b3cb3b773d8efa"]
    sm = [["levels", 3], ["levels", 2], ["levels", 1], ["levels", "MEMM"], ["cfs", 3], ["cfs", 2], ["cfs", 1]]
    vp = [3, 2, 1]
    fill = ["max", "random"]
    fout = open("results.csv", 'w')
    for subdir, dirs, files in os.walk(corpus_path):
        for f in files:
            clear = True
            for h in blacklist:
                if h in f: clear = False
            if clear:
                p = os.path.join(subdir, f)
                cus = e.ExtractCode(par, p)
                for smod, mp in sm:
                    for v in vp:
                        for fi in fill:
                            for i, cu in cus:
                                cu = copy.deepcopy(cu)
                                ans = LM.getLL(cu, i, smod, mp, v, "pot", fi)
                                print smod, mp, v, "pot", fi
                                for call, ll in ans[:20]:
                                    print str(ll) + ': ' + e.nstr(call)
                                if f.endswith(".java"):
                                    unk = True
                                else:
                                    unk = False
                                fout.write(f[:-5] + ';' + smod + ';' + str(mp) + ';' + str(v) + ';' + fi + ';' + str(unk) + ';' + str(ll) + '\n')
    fout.close()
Example #2
0
def main():
    par = plyj.parser.Parser()
    file_path = "../Java/Corpus/"
    cul = []
    vocab = {}
    sentlens = {}
    sf = []
    fields = []
    ctr = 1
    blacklist = [
        "5a8beeae20366b5094d0db8148e0563", "3cd87ee90872cfcb72b3cb3b773d8efa"
    ]
    for subdir, dirs, files in os.walk(file_path):
        for f in files:
            clear = True
            for h in blacklist:
                if h in f: clear = False
            if f.endswith(".java") and clear:
                p = os.path.join(subdir, f)
                cus = e.ExtractCode(par, p)
                #cul.extend(cus)
            for i, cu in cus:
                #print cu.getStr()
                sf2, fi, sents = seq.getSents(cu, i, "levels")
                sf.extend(sf2)
                fields.extend(fi)
                print str(ctr) + ": " + str(len(sents))
                ctr += 1
                for sent, vl in sents:
                    #print str(len(sf)) + " importables"
                    #print str(len(fields)) + " fields"
                    if not len(sent) in sentlens:
                        sentlens[len(sent)] = 0
                    sentlens[len(sent)] += 1
                    if len(sent) > 0:
                        for stat in sent:
                            s = getSig(stat, vl)
                            if not s[0] in vocab:
                                vocab[s[0]] = []
                            vocab[s[0]].append(s[1:])
            #break
    for s in vocab:
        print s
        for sig in resolveSigs(vocab[s]):
            print '\t' + e.nstr(sig)
    print len(vocab)
    print len(set(sf))
    print len(set(fields))
    print sentlens
Example #3
0
def main():
    par = plyj.parser.Parser()
    file_path = "../Java/ParseTests/"
    unr = []
    for subdir, dirs, files in os.walk(file_path):
        for f in files:
            print f
            if f.endswith("test.java"):
                p = os.path.join(subdir, f)
                cus = e.ExtractCode(par, p)
                for i, cu in cus:
                    unr.extend(cu.getUNR())
                    cu.renameVars()
                    print cu.getStr()
                    for v in cu.dumpVars():
                        print e.nstr(v)
                    #break
            #break
    for s in unr:
        print e.nstr(s)
Example #4
0
def main():
    par = plyj.parser.Parser()
    file_path = "../Java/Corpus/"
    cul = []
    vocab = {}
    sentlens = {}
    sf = []
    fields = []
    ctr = 1
    blacklist = ["5a8beeae20366b5094d0db8148e0563", "3cd87ee90872cfcb72b3cb3b773d8efa"]
    for subdir, dirs, files in os.walk(file_path):
        for f in files:
            clear = True
            for h in blacklist:
                if h in f: clear = False
            if f.endswith(".java") and clear:
                p = os.path.join(subdir, f)
                cus = e.ExtractCode(par, p)
                #cul.extend(cus)
            for i, cu in cus:
                #print cu.getStr()
                sf2, fi, sents = seq.getSents(cu, i, "levels")
                sf.extend(sf2)
                fields.extend(fi)
                print str(ctr) + ": " + str(len(sents))
                ctr += 1
                for sent, vl in sents:
                    #print str(len(sf)) + " importables"
                    #print str(len(fields)) + " fields"
                    if not len(sent) in sentlens:
                        sentlens[len(sent)] = 0
                    sentlens[len(sent)] += 1
                    if len(sent) > 0:
                        for stat in sent:
                            s = getSig(stat, vl)
                            if not s[0] in vocab:
                                vocab[s[0]] = []
                            vocab[s[0]].append(s[1:])
            #break
    for s in vocab:
        print s
        for sig in resolveSigs(vocab[s]):
            print '\t' + e.nstr(sig)
    print len(vocab)
    print len(set(sf))
    print len(set(fields))
    print sentlens
Example #5
0
def main():
    par = plyj.parser.Parser()
    modes = ["cfs", "levels"]
    if len(sys.argv) > 1:
        mode = sys.argv[1]
    else:
        mode = "levels"
    if mode not in modes:
        mode = "levels"
    corpus_path = "../Java/Corpus/"
    data_path = "../Data/Raw"
    ####
    meth_name = "method_sentences_" + mode + ".txt"
    var_name = "variable_sentences_" + mode + ".txt"
    vocab_name = "vocab_" + mode + ".txt"
    ####
    meth_file = open(os.path.join(data_path, meth_name), 'w')
    var_file = open(os.path.join(data_path, var_name), 'w')
    vocab_file = open(os.path.join(data_path, vocab_name), 'w')
    ####
    vocab = {}
    sf = []
    fields = []
    ctr = 1
    blacklist = ["5a8beeae20366b5094d0db8148e0563", "3cd87ee90872cfcb72b3cb3b773d8efa"]
    for subdir, dirs, files in os.walk(corpus_path):
        for f in files:
            clear = True
            for h in blacklist:
                if h in f: clear = False
            if f.endswith(".java") and clear:
                p = os.path.join(subdir, f)
                cus = e.ExtractCode(par, p)
                for i, cu in cus:
                    sf2, fi, sents = seq.getSents(cu, i, mode)
                    sf.extend(sf2)
                    fields.extend(fi)
                    print str(ctr) + ": " + str(len(sents))
                    ctr += 1
                    for sent, vl in sents:
                        meth_file.write("<S2>\n")
                        meth_file.write("<S1>\n")
                        for stat, ctx in sent:
                            meth_file.write(e.nstr(t.getSig(stat, vl, False)) + ' # ' + e.nstr(ctx) + '\n')
                            s = t.getSig(stat, vl)
                            if not s[0] in vocab:
                                vocab[s[0]] = []
                            vocab[s[0]].append(s[1:])
                        meth_file.write('<END>\n')
                    vsents = seq.getVarSents(sents)
                    for vsent in vsents:
                        var_file.write("<S2>\n")
                        var_file.write("<S1>\n")
                        for stat, ctx in vsent:
                            var_file.write(e.nstr(stat) + '\n')
                        var_file.write('<END>\n')
            #break
        for s in vocab:
            vocab_file.write(s + '\n')
            for sig in t.resolveSigs(vocab[s]):
                vocab_file.write('\t' + e.nstr(sig) + '\n')
    meth_file.close()
    var_file.close()
    vocab_file.close()
Example #6
0
def main():
    par = plyj.parser.Parser()
    modes = ["cfs", "levels"]
    if len(sys.argv) > 1:
        mode = sys.argv[1]
    else:
        mode = "levels"
    if mode not in modes:
        mode = "levels"
    corpus_path = "../Java/Corpus/"
    data_path = "../Data/Raw"
    ####
    meth_name = "method_sentences_" + mode + ".txt"
    var_name = "variable_sentences_" + mode + ".txt"
    vocab_name = "vocab_" + mode + ".txt"
    ####
    meth_file = open(os.path.join(data_path, meth_name), 'w')
    var_file = open(os.path.join(data_path, var_name), 'w')
    vocab_file = open(os.path.join(data_path, vocab_name), 'w')
    ####
    vocab = {}
    sf = []
    fields = []
    ctr = 1
    blacklist = [
        "5a8beeae20366b5094d0db8148e0563", "3cd87ee90872cfcb72b3cb3b773d8efa"
    ]
    for subdir, dirs, files in os.walk(corpus_path):
        for f in files:
            clear = True
            for h in blacklist:
                if h in f: clear = False
            if f.endswith(".java") and clear:
                p = os.path.join(subdir, f)
                cus = e.ExtractCode(par, p)
                for i, cu in cus:
                    sf2, fi, sents = seq.getSents(cu, i, mode)
                    sf.extend(sf2)
                    fields.extend(fi)
                    print str(ctr) + ": " + str(len(sents))
                    ctr += 1
                    for sent, vl in sents:
                        meth_file.write("<S2>\n")
                        meth_file.write("<S1>\n")
                        for stat, ctx in sent:
                            meth_file.write(
                                e.nstr(t.getSig(stat, vl, False)) + ' # ' +
                                e.nstr(ctx) + '\n')
                            s = t.getSig(stat, vl)
                            if not s[0] in vocab:
                                vocab[s[0]] = []
                            vocab[s[0]].append(s[1:])
                        meth_file.write('<END>\n')
                    vsents = seq.getVarSents(sents)
                    for vsent in vsents:
                        var_file.write("<S2>\n")
                        var_file.write("<S1>\n")
                        for stat, ctx in vsent:
                            var_file.write(e.nstr(stat) + '\n')
                        var_file.write('<END>\n')
            #break
        for s in vocab:
            vocab_file.write(s + '\n')
            for sig in t.resolveSigs(vocab[s]):
                vocab_file.write('\t' + e.nstr(sig) + '\n')
    meth_file.close()
    var_file.close()
    vocab_file.close()