Ejemplo n.º 1
0
def getVarSents2(sents):
    vsents = []
    for sent, v_list in sents:
        for var in v_list:
            sen = []
            for stat, ctx in sent:
                app = []
                for i in range(len(stat)):
                    if stat[i] == var[2]:
                        app.append(i - 3)
                if len(app) > 0:
                    s = t.getSig(stat, v_list)
                    f = s[0]
                    n = len(s) - 1
                    sen.append((f, n, tuple(app)))
            vsents.append(sen)
    return vsents
Ejemplo n.º 2
0
def getVarSents(sents):
    vsents = []
    for sent, v_list in sents:
        for var in v_list:
            sen = []
            for stat, ctx in sent:
                app = []
                for i in range(len(stat)):
                    if stat[i] == var[2]:
                        app.append(i - 3)
                if len(app) > 0:
                    s = t.getSig(stat, v_list)
                    s.append('|')
                    s.extend(app)
                    sen.append((s, ctx))
            vsents.append(sen)
    return vsents
Ejemplo n.º 3
0
def getVarSents2(sents):
    vsents = []
    for sent, v_list in sents:
        for var in v_list:
            sen = []
            for stat, ctx in sent:
                app = []
                for i in range(len(stat)):
                    if stat[i] == var[2]:
                        app.append(i - 3)
                if len(app) > 0:
                    s = t.getSig(stat, v_list)
                    f = s[0]
                    n = len(s) - 1
                    sen.append((f, n, tuple(app)))
            vsents.append(sen)
    return vsents
Ejemplo n.º 4
0
def getVarSents(sents):
    vsents = []
    for sent, v_list in sents:
        for var in v_list:
            sen = []
            for stat, ctx in sent:
                app = []
                for i in range(len(stat)):
                    if stat[i] == var[2]:
                        app.append(i - 3)
                if len(app) > 0:
                    s = t.getSig(stat, v_list)
                    s.append("|")
                    s.extend(app)
                    sen.append((s, ctx))
            vsents.append(sen)
    return vsents
Ejemplo n.º 5
0
def main():
    par = plyj.parser.Parser()
    modes = ["cfs", "levels"]
    if len(sys.argv) > 1:
        mode = sys.argv[1]
    else:
        mode = "levels"
    if mode not in modes:
        mode = "levels"
    corpus_path = "../Java/Corpus/"
    data_path = "../Data/Raw"
    ####
    meth_name = "method_sentences_" + mode + ".txt"
    var_name = "variable_sentences_" + mode + ".txt"
    vocab_name = "vocab_" + mode + ".txt"
    ####
    meth_file = open(os.path.join(data_path, meth_name), 'w')
    var_file = open(os.path.join(data_path, var_name), 'w')
    vocab_file = open(os.path.join(data_path, vocab_name), 'w')
    ####
    vocab = {}
    sf = []
    fields = []
    ctr = 1
    blacklist = ["5a8beeae20366b5094d0db8148e0563", "3cd87ee90872cfcb72b3cb3b773d8efa"]
    for subdir, dirs, files in os.walk(corpus_path):
        for f in files:
            clear = True
            for h in blacklist:
                if h in f: clear = False
            if f.endswith(".java") and clear:
                p = os.path.join(subdir, f)
                cus = e.ExtractCode(par, p)
                for i, cu in cus:
                    sf2, fi, sents = seq.getSents(cu, i, mode)
                    sf.extend(sf2)
                    fields.extend(fi)
                    print str(ctr) + ": " + str(len(sents))
                    ctr += 1
                    for sent, vl in sents:
                        meth_file.write("<S2>\n")
                        meth_file.write("<S1>\n")
                        for stat, ctx in sent:
                            meth_file.write(e.nstr(t.getSig(stat, vl, False)) + ' # ' + e.nstr(ctx) + '\n')
                            s = t.getSig(stat, vl)
                            if not s[0] in vocab:
                                vocab[s[0]] = []
                            vocab[s[0]].append(s[1:])
                        meth_file.write('<END>\n')
                    vsents = seq.getVarSents(sents)
                    for vsent in vsents:
                        var_file.write("<S2>\n")
                        var_file.write("<S1>\n")
                        for stat, ctx in vsent:
                            var_file.write(e.nstr(stat) + '\n')
                        var_file.write('<END>\n')
            #break
        for s in vocab:
            vocab_file.write(s + '\n')
            for sig in t.resolveSigs(vocab[s]):
                vocab_file.write('\t' + e.nstr(sig) + '\n')
    meth_file.close()
    var_file.close()
    vocab_file.close()
Ejemplo n.º 6
0
def main():
    par = plyj.parser.Parser()
    modes = ["cfs", "levels"]
    if len(sys.argv) > 1:
        mode = sys.argv[1]
    else:
        mode = "levels"
    if mode not in modes:
        mode = "levels"
    corpus_path = "../Java/Corpus/"
    data_path = "../Data/Raw"
    ####
    meth_name = "method_sentences_" + mode + ".txt"
    var_name = "variable_sentences_" + mode + ".txt"
    vocab_name = "vocab_" + mode + ".txt"
    ####
    meth_file = open(os.path.join(data_path, meth_name), 'w')
    var_file = open(os.path.join(data_path, var_name), 'w')
    vocab_file = open(os.path.join(data_path, vocab_name), 'w')
    ####
    vocab = {}
    sf = []
    fields = []
    ctr = 1
    blacklist = [
        "5a8beeae20366b5094d0db8148e0563", "3cd87ee90872cfcb72b3cb3b773d8efa"
    ]
    for subdir, dirs, files in os.walk(corpus_path):
        for f in files:
            clear = True
            for h in blacklist:
                if h in f: clear = False
            if f.endswith(".java") and clear:
                p = os.path.join(subdir, f)
                cus = e.ExtractCode(par, p)
                for i, cu in cus:
                    sf2, fi, sents = seq.getSents(cu, i, mode)
                    sf.extend(sf2)
                    fields.extend(fi)
                    print str(ctr) + ": " + str(len(sents))
                    ctr += 1
                    for sent, vl in sents:
                        meth_file.write("<S2>\n")
                        meth_file.write("<S1>\n")
                        for stat, ctx in sent:
                            meth_file.write(
                                e.nstr(t.getSig(stat, vl, False)) + ' # ' +
                                e.nstr(ctx) + '\n')
                            s = t.getSig(stat, vl)
                            if not s[0] in vocab:
                                vocab[s[0]] = []
                            vocab[s[0]].append(s[1:])
                        meth_file.write('<END>\n')
                    vsents = seq.getVarSents(sents)
                    for vsent in vsents:
                        var_file.write("<S2>\n")
                        var_file.write("<S1>\n")
                        for stat, ctx in vsent:
                            var_file.write(e.nstr(stat) + '\n')
                        var_file.write('<END>\n')
            #break
        for s in vocab:
            vocab_file.write(s + '\n')
            for sig in t.resolveSigs(vocab[s]):
                vocab_file.write('\t' + e.nstr(sig) + '\n')
    meth_file.close()
    var_file.close()
    vocab_file.close()