Exemple #1
0
def main():
    par = plyj.parser.Parser()
    corpus_path = "../Java/Test/"
    blacklist = ["5a8beeae20366b5094d0db8148e0563", "3cd87ee90872cfcb72b3cb3b773d8efa"]
    sm = [["levels", 3], ["levels", 2], ["levels", 1], ["levels", "MEMM"], ["cfs", 3], ["cfs", 2], ["cfs", 1]]
    vp = [3, 2, 1]
    fill = ["max", "random"]
    fout = open("results.csv", 'w')
    for subdir, dirs, files in os.walk(corpus_path):
        for f in files:
            clear = True
            for h in blacklist:
                if h in f: clear = False
            if clear:
                p = os.path.join(subdir, f)
                cus = e.ExtractCode(par, p)
                for smod, mp in sm:
                    for v in vp:
                        for fi in fill:
                            for i, cu in cus:
                                cu = copy.deepcopy(cu)
                                ans = LM.getLL(cu, i, smod, mp, v, "pot", fi)
                                print smod, mp, v, "pot", fi
                                for call, ll in ans[:20]:
                                    print str(ll) + ': ' + e.nstr(call)
                                if f.endswith(".java"):
                                    unk = True
                                else:
                                    unk = False
                                fout.write(f[:-5] + ';' + smod + ';' + str(mp) + ';' + str(v) + ';' + fi + ';' + str(unk) + ';' + str(ll) + '\n')
    fout.close()
    def preprocess(self, path, split_token=' '):
        data = open(path, encoding='utf-8', errors='ignore').readlines()
        np.random.seed(0)
        np.random.shuffle(data)

        y = deque()
        x = deque()

        for i in data:
            y_, x_ = i.split(split_token, 1)

            x_nltk = util.normalizeString(x_)
            x_nltk = nltk.word_tokenize(x_nltk)
            x_nltk = util.removeStop(x_nltk)
            x_nltk = ' '.join(x_nltk)

            x.append(x_nltk)
            y.append(int(y_))

        return LM.ONE_hot(list(x)).load_LM(), list(y)
    def __init__(self, path='../data/rt-polarity.all'):
        data = open(path, encoding='utf-8', errors='ignore').readlines()

        np.random.seed(0)
        np.random.shuffle(data)

        wv = LM.pretrained_2()

        x, y = load_data(data)
        x = map(tokenizer.tokenize, x)
        x = map(tokenizer.remove_stopwords, x)

        x = wv.preprocess(list(x))

        train_x, test_x, train_y, test_y = train_test_split(x,
                                                            y,
                                                            test_size=0.1)
        train_x, dev_x, train_y, dev_y = train_test_split(train_x,
                                                          train_y,
                                                          test_size=0.1)

        max_len = 256
        padding_item = [300 * [0]]

        train_x = util.padding(train_x,
                               max_len=max_len,
                               padding_item=padding_item)
        dev_x = util.padding(dev_x, max_len=max_len, padding_item=padding_item)
        test_x = util.padding(test_x,
                              max_len=max_len,
                              padding_item=padding_item)
        print(np.array(train_x).shape)
        print(np.array(dev_x).shape)
        print(np.array(test_x).shape)

        self.data = (train_x, train_y), (dev_x, dev_y), (test_x, test_y)

        train_type = [torch.FloatTensor, torch.LongTensor]
        val_type = [torch.FloatTensor, torch.LongTensor]
        test_type = [torch.FloatTensor, torch.LongTensor]
        self.data_type = [train_type, val_type, test_type]
Exemple #4
0
import LM
import pickle as pkl
import Visual_tools as vt
import matplotlib.pyplot as plot

if __name__ == '__main__':
    fb_dict = LM.fb_file_to_dict('../../Data/FB/JN_msgDict.txt')
    new_dict = LM.filter_outlier(fb_dict, threshold=200)
    all_LMs = LM.build_all_chat_LM(new_dict, max_len=2)
    

    dissimilarity, ID_map = LM.export_dissimilarity(all_LMs)
    plot.matshow(dissimilarity)
    plot.colorbar()
    plot.show()
    pkl.dump(ID_map, open('./ID_map.pkl', 'w'))

    pos, plt = vt.visual_mds(dissimilarity)
    plt.show()

Exemple #5
0
import LM
import pickle as pkl
import Visual_tools as vt
import matplotlib.pyplot as plot

if __name__ == '__main__':
    fb_dict = LM.fb_file_to_dict('../../Data/FB/JN_msgDict.txt')
    new_dict = LM.filter_outlier(fb_dict, threshold=200)
    all_LMs = LM.build_all_chat_LM(new_dict, max_len=2)

    dissimilarity, ID_map = LM.export_dissimilarity(all_LMs)
    plot.matshow(dissimilarity)
    plot.colorbar()
    plot.show()
    pkl.dump(ID_map, open('./ID_map.pkl', 'w'))

    pos, plt = vt.visual_mds(dissimilarity)
    plt.show()
Exemple #6
0
@author: Guy
"""

import LM
while True:
    print("Choose a sample knowledge base:")
    print(
        "The options are the Tweety (T), Flying Fish (F), the Paper (P), Make your own (O) or quit(x)"
    )
    choice = input("Please Type either T, F, P, O or x: ")
    if (choice == 'x'):
        break
    elif (choice == "T"):
        KnowB = ["(p)>(b)", "(*b)>(f)", "(*p)>(-f)"]
        RM = LM.createRankedModel(KnowB)
        print("")
        print("The Knowledge Base you have chosen is as follows:")
        print(KnowB)
        print()
        print(
            "Would you like to Check Entailment (E) of a sentence, see the LM-Minimal Model (LM) of the Knowledge Base or both (B)"
        )
        choice = input("Please Type either E, LM, or B: ")
        if (choice == "LM"):
            print("")
            print("The LM-Minimal Model is as follows:")
            print()
            check = False
            for i in RM:
                if (len(RM[i]) == 0):
Exemple #7
0
import LM

nn = LM.NN([1, 5, 1])
nn.train_lm([[1], [2], [3]], [[1], [2], [3]])
print(nn.predict([[1]]))
    def __init__(self,
                 seq_max_len,
                 embed_w,
                 vocab_size,
                 n_layer,
                 n_hidden,
                 keep_prob,
                 lr,
                 n_gpu,
                 grad_clip,
                 init_idx=0,
                 punkt_idx=[],
                 is_training=True,
                 scope_name=""):

        self.__seq_max_len = seq_max_len
        self.__vocab_size = vocab_size
        self.__vocab_init = init_idx
        self.__vocab_pad = self.__vocab_size - 1
        self.__vocab_unk = self.__vocab_size - 2

        self.__vocab_punkt = punkt_idx

        with tf.variable_scope("forward", reuse=None):
            self.__forward_tr = LM.LanguageModel(seq_max_len=seq_max_len,
                                                 embed_w=embed_w,
                                                 vocab_size=vocab_size,
                                                 n_layer=n_layer,
                                                 n_hidden=n_hidden,
                                                 keep_prob=keep_prob,
                                                 lr=lr,
                                                 n_gpu=n_gpu,
                                                 grad_clip=grad_clip,
                                                 scope=scope_name + "/forward",
                                                 is_training=is_training)

        if scope_name == "":
            self.__forward_var = [
                x for x in tf.trainable_variables()
                if x.name.startswith('forward')
            ]
        else:
            self.__forward_var = {}
            for v in tf.trainable_variables():
                if v.name.startswith(scope_name):
                    tmp_name = v.name
                    while scope_name in tmp_name:
                        tmp_name = tmp_name.strip(scope_name + "/")
                    if "forward" in tmp_name:
                        self.__forward_var[tmp_name.split(":")[0]] = v
        self.__forward_saver = tf.train.Saver(self.__forward_var,
                                              max_to_keep=1)

        with tf.variable_scope("backward", reuse=None):
            self.__backward_tr = LM.LanguageModel(seq_max_len=seq_max_len,
                                                  embed_w=embed_w,
                                                  vocab_size=vocab_size,
                                                  n_layer=n_layer,
                                                  n_hidden=n_hidden,
                                                  keep_prob=keep_prob,
                                                  lr=lr,
                                                  n_gpu=n_gpu,
                                                  grad_clip=grad_clip,
                                                  scope=scope_name +
                                                  "/backward",
                                                  is_training=is_training)
        if scope_name == "":
            self.__backward_var = [
                x for x in tf.trainable_variables()
                if x.name.startswith('backward')
            ]
        else:
            self.__backward_var = {}
            for v in tf.trainable_variables():
                if v.name.startswith(scope_name):
                    tmp_name = v.name
                    while scope_name in tmp_name:
                        tmp_name = tmp_name.strip(scope_name + "/")
                    if "backward" in tmp_name:
                        self.__backward_var[tmp_name.split(":")[0]] = v
        self.__backward_saver = tf.train.Saver(self.__backward_var,
                                               max_to_keep=1)
Exemple #9
0
 def umbralbinary(self, beta):
     self.listpixels = f.umbralbinary(self.listpixels, beta)
     self.pixels = lm.listTonumpy2d(self.listpixels, (self.w, self.h),
                                    np.uint8)
Exemple #10
0
def test_app():
    nn = LM.NN([1, 5, 1])
    nn.train_lm([[1], [2], [3]], [[1], [2], [3]])
    assert True
Exemple #11
0
    import Tkinter as tk
except ImportError:
    import tkinter as tk

try:
    import ttk
    py3 = False
except ImportError:
    import tkinter.ttk as ttk
    py3 = True


def init(top, gui, *args, **kwargs):
    global w, top_level, root
    w = gui
    top_level = top
    root = top
    root.resizable(0, 0)


def destroy_window():
    # Function which closes the window.
    global top_level
    top_level.destroy()
    top_level = None


if __name__ == '__main__':
    import LM
    LM.vp_start_gui()