def main(): par = plyj.parser.Parser() corpus_path = "../Java/Test/" blacklist = ["5a8beeae20366b5094d0db8148e0563", "3cd87ee90872cfcb72b3cb3b773d8efa"] sm = [["levels", 3], ["levels", 2], ["levels", 1], ["levels", "MEMM"], ["cfs", 3], ["cfs", 2], ["cfs", 1]] vp = [3, 2, 1] fill = ["max", "random"] fout = open("results.csv", 'w') for subdir, dirs, files in os.walk(corpus_path): for f in files: clear = True for h in blacklist: if h in f: clear = False if clear: p = os.path.join(subdir, f) cus = e.ExtractCode(par, p) for smod, mp in sm: for v in vp: for fi in fill: for i, cu in cus: cu = copy.deepcopy(cu) ans = LM.getLL(cu, i, smod, mp, v, "pot", fi) print smod, mp, v, "pot", fi for call, ll in ans[:20]: print str(ll) + ': ' + e.nstr(call) if f.endswith(".java"): unk = True else: unk = False fout.write(f[:-5] + ';' + smod + ';' + str(mp) + ';' + str(v) + ';' + fi + ';' + str(unk) + ';' + str(ll) + '\n') fout.close()
def preprocess(self, path, split_token=' '): data = open(path, encoding='utf-8', errors='ignore').readlines() np.random.seed(0) np.random.shuffle(data) y = deque() x = deque() for i in data: y_, x_ = i.split(split_token, 1) x_nltk = util.normalizeString(x_) x_nltk = nltk.word_tokenize(x_nltk) x_nltk = util.removeStop(x_nltk) x_nltk = ' '.join(x_nltk) x.append(x_nltk) y.append(int(y_)) return LM.ONE_hot(list(x)).load_LM(), list(y)
def __init__(self, path='../data/rt-polarity.all'): data = open(path, encoding='utf-8', errors='ignore').readlines() np.random.seed(0) np.random.shuffle(data) wv = LM.pretrained_2() x, y = load_data(data) x = map(tokenizer.tokenize, x) x = map(tokenizer.remove_stopwords, x) x = wv.preprocess(list(x)) train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.1) train_x, dev_x, train_y, dev_y = train_test_split(train_x, train_y, test_size=0.1) max_len = 256 padding_item = [300 * [0]] train_x = util.padding(train_x, max_len=max_len, padding_item=padding_item) dev_x = util.padding(dev_x, max_len=max_len, padding_item=padding_item) test_x = util.padding(test_x, max_len=max_len, padding_item=padding_item) print(np.array(train_x).shape) print(np.array(dev_x).shape) print(np.array(test_x).shape) self.data = (train_x, train_y), (dev_x, dev_y), (test_x, test_y) train_type = [torch.FloatTensor, torch.LongTensor] val_type = [torch.FloatTensor, torch.LongTensor] test_type = [torch.FloatTensor, torch.LongTensor] self.data_type = [train_type, val_type, test_type]
import LM import pickle as pkl import Visual_tools as vt import matplotlib.pyplot as plot if __name__ == '__main__': fb_dict = LM.fb_file_to_dict('../../Data/FB/JN_msgDict.txt') new_dict = LM.filter_outlier(fb_dict, threshold=200) all_LMs = LM.build_all_chat_LM(new_dict, max_len=2) dissimilarity, ID_map = LM.export_dissimilarity(all_LMs) plot.matshow(dissimilarity) plot.colorbar() plot.show() pkl.dump(ID_map, open('./ID_map.pkl', 'w')) pos, plt = vt.visual_mds(dissimilarity) plt.show()
@author: Guy """ import LM while True: print("Choose a sample knowledge base:") print( "The options are the Tweety (T), Flying Fish (F), the Paper (P), Make your own (O) or quit(x)" ) choice = input("Please Type either T, F, P, O or x: ") if (choice == 'x'): break elif (choice == "T"): KnowB = ["(p)>(b)", "(*b)>(f)", "(*p)>(-f)"] RM = LM.createRankedModel(KnowB) print("") print("The Knowledge Base you have chosen is as follows:") print(KnowB) print() print( "Would you like to Check Entailment (E) of a sentence, see the LM-Minimal Model (LM) of the Knowledge Base or both (B)" ) choice = input("Please Type either E, LM, or B: ") if (choice == "LM"): print("") print("The LM-Minimal Model is as follows:") print() check = False for i in RM: if (len(RM[i]) == 0):
import LM nn = LM.NN([1, 5, 1]) nn.train_lm([[1], [2], [3]], [[1], [2], [3]]) print(nn.predict([[1]]))
def __init__(self, seq_max_len, embed_w, vocab_size, n_layer, n_hidden, keep_prob, lr, n_gpu, grad_clip, init_idx=0, punkt_idx=[], is_training=True, scope_name=""): self.__seq_max_len = seq_max_len self.__vocab_size = vocab_size self.__vocab_init = init_idx self.__vocab_pad = self.__vocab_size - 1 self.__vocab_unk = self.__vocab_size - 2 self.__vocab_punkt = punkt_idx with tf.variable_scope("forward", reuse=None): self.__forward_tr = LM.LanguageModel(seq_max_len=seq_max_len, embed_w=embed_w, vocab_size=vocab_size, n_layer=n_layer, n_hidden=n_hidden, keep_prob=keep_prob, lr=lr, n_gpu=n_gpu, grad_clip=grad_clip, scope=scope_name + "/forward", is_training=is_training) if scope_name == "": self.__forward_var = [ x for x in tf.trainable_variables() if x.name.startswith('forward') ] else: self.__forward_var = {} for v in tf.trainable_variables(): if v.name.startswith(scope_name): tmp_name = v.name while scope_name in tmp_name: tmp_name = tmp_name.strip(scope_name + "/") if "forward" in tmp_name: self.__forward_var[tmp_name.split(":")[0]] = v self.__forward_saver = tf.train.Saver(self.__forward_var, max_to_keep=1) with tf.variable_scope("backward", reuse=None): self.__backward_tr = LM.LanguageModel(seq_max_len=seq_max_len, embed_w=embed_w, vocab_size=vocab_size, n_layer=n_layer, n_hidden=n_hidden, keep_prob=keep_prob, lr=lr, n_gpu=n_gpu, grad_clip=grad_clip, scope=scope_name + "/backward", is_training=is_training) if scope_name == "": self.__backward_var = [ x for x in tf.trainable_variables() if x.name.startswith('backward') ] else: self.__backward_var = {} for v in tf.trainable_variables(): if v.name.startswith(scope_name): tmp_name = v.name while scope_name in tmp_name: tmp_name = tmp_name.strip(scope_name + "/") if "backward" in tmp_name: self.__backward_var[tmp_name.split(":")[0]] = v self.__backward_saver = tf.train.Saver(self.__backward_var, max_to_keep=1)
def umbralbinary(self, beta): self.listpixels = f.umbralbinary(self.listpixels, beta) self.pixels = lm.listTonumpy2d(self.listpixels, (self.w, self.h), np.uint8)
def test_app(): nn = LM.NN([1, 5, 1]) nn.train_lm([[1], [2], [3]], [[1], [2], [3]]) assert True
import Tkinter as tk except ImportError: import tkinter as tk try: import ttk py3 = False except ImportError: import tkinter.ttk as ttk py3 = True def init(top, gui, *args, **kwargs): global w, top_level, root w = gui top_level = top root = top root.resizable(0, 0) def destroy_window(): # Function which closes the window. global top_level top_level.destroy() top_level = None if __name__ == '__main__': import LM LM.vp_start_gui()