def __init__(self, filename='word.vec', dict=None): self.ncols = 10 # word converts to list having 10 items if dict: self.dict = dict if not dict: self.dict = dictionary.Dictionary() self.data = []
def buildDictionary(self): """ Populate dictionary mapping and statistics. This is done by sequentially retrieving the article fulltexts, splitting them into tokens and converting tokens to their ids (creating new ids as necessary). """ logging.info("creating dictionary from %i articles" % len(self.documents)) self.dictionary = dictionary.Dictionary() numPositions = 0 for docNo, (sourceId, docUri) in enumerate(self.documents): if docNo % 1000 == 0: logging.info("PROGRESS: at document #%i/%i (%s, %s)" % (docNo, len(self.documents), sourceId, docUri)) source = self.config.sources[sourceId] contents = source.getContent(docUri) words = [ source.normalizeWord(word) for word in source.tokenize(contents) ] numPositions += len(words) # convert to bag-of-words, but ignore the result -- here we only care about updating token ids _ = self.dictionary.doc2bow(words, allowUpdate=True) logging.info("built %s from %i documents (total %i corpus positions)" % (self.dictionary, len(self.documents), numPositions))
def create_dictionary(direction): dict_ = dictionary.Dictionary() t5_vocab = [[tok.sp_model.id_to_piece(id), id] for id in range(tok.sp_model.get_piece_size())] assert t5_vocab.pop(0)[0] == "<pad>" assert t5_vocab.pop(0)[0] == "</s>" assert t5_vocab.pop(0)[0] == "<unk>" for word, id in t5_vocab: dict_.add_symbol(word) for word, id in sorted( zip(tok.additional_special_tokens, tok.additional_special_tokens_ids), key=lambda x: x[1] ): dict_.add_symbol(word) t5_vocab_dict = dict(((w, id) for w, id in t5_vocab)) t5_vocab_dict.update( zip(tok.additional_special_tokens, tok.additional_special_tokens_ids) ) for word, id in dict_.indices.items(): if word in {"<Lua heritage>", "<pad>", "</s>", "<unk>"}: continue assert id == t5_vocab_dict[word] + 1 dict_.finalize() dict_.save(os.path.join(path, f"dict.{direction}.txt"))
def main(): dict = dictionary.Dictionary() display = False print(w) command, args = "", "" while command != "exit": line = input(">>> ").split(' ') if len(line) == 0 or len(line) > 2: continue elif len(line) == 1: command = line[0] if command == 'display': display = not display elif command == 'print': dict.print_words() continue else: command, args = line if command in ['insert', 'delete', 'batch_load', 'batch_delete']: dict.print_info() print(dict.__getattribute__(command)(args)) dict.print_info() elif command == 'batch_lookup': print("Found: ", dict.batch_lookup(args, display)) elif command == 'lookup': found, _ = dict.lookup(args) print("Word is in the Dictionary" if found else "Word is not in the Dictionary")
def calculate(self): if self.ent1.get() and self.ent2.get() and self.ent3.get(): first = str(self.ent1.get()) second = str(self.ent2.get()) count_system = int(self.ent3.get()) #creating the dictionary for translating self.dictionary = dictionary.Dictionary("base.csv", "r", ",") #creating the translator self.translator = translator.Translator( self.dictionary.get_dictionary(count_system), first) if self.translator.get_collection() == None: self.txt.delete(0.0, END) self.txt.insert(0.0, "Перепроверьте введенные данные!") else: self.translator = translator.Translator( self.dictionary.get_dictionary(count_system), second) if self.translator.get_collection() == None: self.txt.delete(0.0, END) self.txt.insert(0.0, "Перепроверьте введенные данные!") else: self.txt.delete(0.0, END) self.txt.insert(0.0, count(self.choose.get())) else: self.txt.delete(0.0, END) self.txt.insert(0.0, "Не все поля заполнены!")
def initFromFile(self): setup.init() with open('disk.txt') as file: line = file.readline() self.super_block = super_block.SuperBlock(line) block_bitmap = "" for i in range(128): block_bitmap += file.readline().strip() self.fat = fat.FAT(block_bitmap) fcb_bitmap = "" for i in range(4): fcb_bitmap += file.readline().strip() fcb_list = [] for i in range(512): line = file.readline() attr_list = line.split(" ") line = file.readline() attr_list += line.split(" ") # name file_type pos create_time update_time # size first_block write_user read_user delete_able is_able if line == "-1\n": fcb_list.append(FCB('-1', '-1', -1, '-1', '-1', -1, '-1', '-1', '-1', -1, -1)) continue fcb_list.append( FCB(attr_list[0], attr_list[1], evalPro(attr_list[2]), attr_list[3], attr_list[4], evalPro(attr_list[5]), attr_list[6], attr_list[7], attr_list[8], evalPro(attr_list[9]), evalPro(attr_list[10]))) if evalPro(attr_list[10]) == 1 and evalPro(attr_list[2]) >= 0: fcb_list[evalPro(attr_list[2])].child_list.append(i) self.dictionary = dictionary.Dictionary(fcb_bitmap, fcb_list) for i in range(2048): self.data.append(file.readline().strip())
def initialize(self, length_count, length_to_word): self.guessed_letters = set() # initialize game state for letter in range(self.word_length): self.game_state.append('_') if self.multiplayer.lower() == 'y': # list of the word of such length to choose (print about 50 of them) print("Here are possible words of such length: ") if length_count[self.word_length] >= 50: print(length_to_word[self.word_length][:50]) else: print(length_to_word[self.word_length]) word = str( input("Please choose a word of length " + str(self.word_length) + ": ")) while len(word) != self.word_length or word not in length_to_word[ self.word_length]: word = str( input("Invalid choice. Please choose a word of length " + str(self.word_length) + ":")) else: ## create an AI to get a word of length based on level. self.level = str( input( "Please choose a level (easy,intermediate,medium,hard): ")) while self.level not in self.set_levels: self.level = str( input( "Please choose a proper level (easy,intermediate,medium,hard): " )) # create an instant of AI_computer based on level self.computer = AI.AI_Computer( self.level, dictionary.Dictionary(length_to_word[self.word_length])) # choose a word using the level word = self.computer.choose_word() # print ("word:",word) self.word = word # initial our word, it could be a bag of word from hard/ insane level if self.level != "hard": self.solver = AI.AI_solver() else: self.solver = AI.AI_hard_solver() self.solver.dic = dictionary.Dictionary( length_to_word[self.word_length] ) # object initial our solver data as a dictionary
def main(solve, board): d = dictionary.Dictionary(DICTS) ans = [] for word in solve(board, d, SIZE): if len(word) >= MIN_LENGTH: ans.append(word) return ans
def test_dictionary(): #Arrange: Array = [6, 2, 84, 9] expected = {0: 6, 1: 2, 2: 84, 3: 9} #Actual: actual = dictionary.Dictionary(Array) #Assert assert expected == actual
def __init__(self, node_id, port, num_dc): self.node_id = node_id self.s = socket.socket() self.timeTable = timeTable.TimeTable(num_dc, node_id) self.hostname = socket.gethostname() # get local machine name self.port = port self.addr = '' self.c = None self.log = log.Log() self.dictionary = dictionary.Dictionary(node_id) self.threads = []
def test_add_first_word_to_board(self): """ Unit test for add first word to board not in center """ bb_test = sbd.Board(5) dd_test = sdi.Dictionary(['aa']) pos = [0, 0] wd_test = 'aa' direct = 'down' with self.assertRaises(ValueError): bb_test.add_word(pos, direct, wd_test, dd_test)
def test_wrong_direction(self): """ Unit test to check that direction is well posed """ bb_test = sbd.Board(5) dd_test = sdi.Dictionary(['aa']) pos = [0, 0] wd_test = 'aa' direct = 'asdf' with self.assertRaises(ValueError): bb_test.add_word(pos, direct, wd_test, dd_test)
def test_add_word_out_of_board(self): """ Unit test to check that position is within the board """ bb_test = sbd.Board(5) dd_test = sdi.Dictionary(['aa']) pos = [-1, 2] wd_test = 'aa' direct = 'down' with self.assertRaises(ValueError): bb_test.add_word(pos, direct, wd_test, dd_test)
def test_add_too_large_word(self): """ Unit test to check that a word can fit on the board """ bb_test = sbd.Board(5) dd_test = sdi.Dictionary(['aa']) pos = [0, 2] wd_test = '123456789' direct = 'down' with self.assertRaises(ValueError): bb_test.add_word(pos, direct, wd_test, dd_test)
def init_dictionary(): dic = dictionary.Dictionary() # unigram dic.create_table(constants.UNIGRAM) dic.tables[constants.UNIGRAM].set_unk(constants.UNK_SYMBOL) # segmentation label dic.create_table(constants.SEG_LABEL) for label in constants.SEG_LABELS: dic.tables[constants.SEG_LABEL].get_id(label, update=True) return dic
def init_dictionary(num_attrs=0): dic = dictionary.Dictionary() # unigram dic.create_table(constants.UNIGRAM) dic.tables[constants.UNIGRAM].set_unk(constants.UNK_SYMBOL) # attributes for i in range(num_attrs): dic.create_table(constants.ATTR_LABEL(i)) # dic.tables[constants.ATTR_LABEL(i)].set_unk(constants.UNK_SYMBOL) return dic
def main(): parser = argparse.ArgumentParser() parser.add_argument('search_terms', nargs="+") parser.add_argument('-l', '--lang', nargs='?', default='en', dest='lang', choices=dictionaries.keys(), help=u'language (enda, daen, de, fr)') parser.add_argument('-t', '--trans', nargs='?', default=0, dest="translate", choices=['0', '1', '2'], help='0: from Danish, 1: to Danish, 2: both ways') args = parser.parse_args() translate = int(args.translate) language = args.lang search_terms = [term.lstrip(' ').rstrip(' ') for term in args.search_terms] language_name = dictionaries[language]['name'] directions = [('fromDanish', 'Dansk-%s' % language_name), ('toDanish', '%s-Dansk' % language_name)] if dictionaries[language]['doubflag'] < 2 or translate == 0: del directions[1] elif translate == 1: del directions[0] dic = dictionary.Dictionary(dictionaries) tables = [('lookup', 'Artikler')] if len(search_terms) > 1: tables.append(('collocation_lookup', 'Ordforbindelser')) prefix, tab_terms = tab(dic, search_terms, directions, tables, language) if not tab_terms and len(search_terms) == 1: tables = [('collocation_lookup', 'Ordforbindelser')] prefix, tab_terms = tab(dic, search_terms, directions, tables, language) ofile = open('tabterms.txt', 'w') ofile.write("%s\n" % " ".join(search_terms)) ofile.write("%s\n" % prefix) if len(tab_terms) >= 1: for term in tab_terms: ofile.write("%s\n" % (term)) else: ofile.write("") ofile.close()
def __init__(self): self.LIMIT_COUNT = 5 self.WORD_NAME = 'word' self.PLUS_NAME = 'plus' self.MINUS_NAME = 'minus' self.SPLIT_COUNT = 1000 self.START_NAME = 'start' self.FINAL_NAME = 'final' self.DATE_NAME = 'date' self.LIMIT_YEAR_SEPERATOR = 5 self.INTERVAL_YEAR_SEPERATOR = 73 self.FINANCE_NAME = 'finance' self.dbm = dbmanager.DBManager() self.dic = dictionary.Dictionary() self.THREAD_LIMIT_COUNT = 4
def testRadiusServerStart(self): ipaddress = socket.gethostbyname(socket.gethostname()) ipaddress_port = (ipaddress, 6000) try: ## Start the Radius Server RadiusSvr = radserver.Radserver('10.0.28.131') RadiusSvr.AddUser(ipaddress, 'passwd') RadiusSvr.Start() except: self.assert_(False, "Failed to Start RADIUS server") ## Test whether the RADIUS server responds by sending an authentication request srv=client.Client(server="radiusprimary", secret="passwd", dict=dictionary.Dictionary(\ "/usr/lib/python2.3/site-packages/pyrad/dictionary")) try: srv.bind(ipaddress_port) req = srv.CreateAuthPacket(code=pyrad.packet.AccessRequest, User_Name="test", NAS_Identifier=socket.gethostbyname( socket.gethostname())) req["User-Password"] = req.PwCrypt("passwd") reply = srv.SendPacket(req) except: self.assert_( False, "Failed to create and send Authentication request packet") print("Authentication Request packet sent") if reply.code == pyrad.packet.AccessAccept: print( "RADIUS server is running and has Accepted the AccessRequest packet" ) else: self.assert_(False, "RADIUS server failed to Accept the request") try: RadiusSvr.Stop() except: self.assert_(False, "Failed to STOP the RADIUS server") try: reply = srv.SendPacket(req) except: self.assert_(True, "RADIUS server Started/Stopped successfully") else: self.assert_(False, "Failed to Stop RADIUS server")
def test_add_second_word_not_in_di(self): """ Unit test to check what happens if by adding a second word we form a word not in dict/lexic """ bb_test = sbd.Board(5) dd_test = sdi.Dictionary(['aa', 'ab']) pos = [2, 2] wd_test = 'aa' direct = 'd' bb_test.add_word(pos, direct, wd_test, dd_test) pos = [2, 1] wd_test = 'ab' direct = 'd' with self.assertRaises(ValueError): bb_test.add_word(pos, direct, wd_test, dd_test)
def test_add_second_word_not_ol(self): """ Unit test that checks if second word added is not overlapped with words on board """ bb_test = sbd.Board(5) dd_test = sdi.Dictionary(['aa']) pos = [2, 2] wd_test = 'aa' direct = 'd' bb_test.add_word(pos, direct, wd_test, dd_test) pos = [0, 0] wd_test = 'aa' direct = 'd' with self.assertRaises(ValueError): bb_test.add_word(pos, direct, wd_test, dd_test)
def test_add_first_word(self): """ Unit test to add first word correctly """ bb_test = sbd.Board(5) dd_test = sdi.Dictionary(['aa']) pos = [2, 2] wd_test = 'aa' direct = 'r' bb_test.add_word(pos, direct, wd_test, dd_test) self.assertEqual(bb_test.board, [['.']*5, ['.']*5, ['.', '.', 'a', 'a', '.'], ['.']*5, ['.']*5])
def translated_data(): directory = 'C:\\Users\\olive\\Desktop\\Datasets_for_thesis\\Prisjakt\\training_data' extracted_data = extractor.json_extract(directory) extracted_reviews = extracted_data[0] polarities = extracted_data[1] preprocessed_reviews = preprocessor.preprocess(extracted_reviews) dictionary = dict.Dictionary(preprocessed_reviews).dictionary # review_translator.translate_reviews(preprocessed_reviews, polarities) with open('untranslated_reviews validation combined.txt', 'r') as file: untranslated_reviews = np.concatenate( vectorizer.vectorize_data( preprocessor.preprocess(file.readlines()), dictionary, 300)) with open('translated_polarities validation combined.txt', 'r') as file: translated_polarities = [] for line in file: translated_polarities.append(int(line)) return [untranslated_reviews, np.array(translated_polarities)]
def __init__(self, path=None, raw=None): self._path = path self._dict = dic.Dictionary() if self._path is None: self._logs = [] self._data = {} self._data['logs'] = self._logs self._data['dictionary'] = None if raw is not None: with codecs.open(raw, 'r', encoding='utf-8') as f: rawdata = json.load(f) self.convert(rawdata) else: with codecs.open(self._path, 'r', encoding='utf-8') as f: self._data = json.load(f) self._logs = self._data['logs'] self._dict.set(self._data['dictionary'])
def main(): # check om sprogene er tilgængelige filenotfound = False for d in dictionaries.keys(): lang = dictionaries[d] if not (os.path.exists(lang['gddfile']) and os.path.exists(lang['datfile'])): print 'Ordbogsfiler for %s kan ikke findes i mappen %s' % ( lang['name'], os.path.dirname(os.path.abspath( lang['gddfile']))) filenotfound = True if filenotfound: return dic = dictionary.Dictionary(dictionaries) dictionaryGUI = gui.DictionaryGUI(dic) dictionaryGUI.run()
def __init__(self): self.prevKey = '' self.should_keep_going = True self.paused = False self.hp = 30 self.wordDictionary = dictionary.Dictionary("dictionary_testfile") self.stat = stats.Stats() self.scrn = screen.Screen(self.stat) self.scrn.scr.nodelay(1) self.words = [] self.wordsOnScreen = 10 self.pos_in_word = 0 self.last_time = datetime.datetime.now() self.probableWords = [] self.poss_vals = list(range(1, self.scrn.height - 10)) debug.init() self.new_game()
def initLanguage(): # os.path.dirname(os.path.abspath(file)) # (QtCore.QDir.currentPath() + "/session") # directory = os.path.dirname(__file__) # directory = (QtCore.QDir.currentPath() + "/yomi_base/japanese") #directory = os.path.split(os.path.realpath(__file__))[0] # fix for --onefile directory = os.path.dirname(sys.executable) # + "/yomi_base/japanese" if "Python27" == os.path.basename(directory): directory = os.path.dirname(__file__) else: directory = os.path.dirname(sys.executable) + "/yomi_base/japanese" return translate.Translator( deinflect.Deinflector(os.path.join(directory, 'deinflect.json')), dictionary.Dictionary(os.path.join(directory, 'dictionary.db')))
def get_text_utts(indir, compilexpath): txt = load_txt_dir(indir) dct = dictionary.Dictionary(compilexpath) oov = get_oov_words(txt, dct) if len(oov) != 0: print "Please remove all OOV word containing sents or add the words to dictonary before proceeding." for w in oov: print w raise SiReError("OOV words present, cannot continue.") args.dictionary = dct args.intype = "txt" utts = get_utts(txt, args) return utts
def __init__(self): self.dictionary = dictionary.Dictionary() self.dictionary.load_dictionary('japonais.csv') self.show_furigana = True self.show_roumaji = True self.root = tkinter.Tk() self.root.title('Words Quiz') self.root.config(padx=20, pady=20) self.root.bind('<Return>', self.check) self.menubar = tkinter.Menu(self.root) self.menu = tkinter.Menu(self.menubar, tearoff=0) self.menu.add_command(label="Furigana", command=self.set_furigana) self.menu.add_command(label="Roumaji", command=self.set_roumaji) self.menubar.add_cascade(label="Select", menu=self.menu) self.root['menu'] = self.menubar self.native = tkinter.Label(self.root) self.native['font'] = ('Arial', 20) self.native.pack() self.alternate = tkinter.Label(self.root) self.alternate['font'] = ('Arial', 10) self.alternate.pack() self.latin = tkinter.Label(self.root) self.latin['font'] = ('Arial', 10) self.latin.pack() self.entry = tkinter.Entry(self.root) self.entry['justify'] = 'center' self.entry.pack() self.button = tkinter.Button(self.root, text='Check') self.button.pack_configure(pady=10) self.button.pack() self.button['command'] = self.check self.new_random_word() self.root.mainloop()
def test_add_second_word_neigh(self): """ Unit tests for adding a word which neighbors but doesn't overlap """ bb_test = sbd.Board(5) dd_test = sdi.Dictionary(['aa', 'aaa']) pos = [2, 2] wd_test = 'aa' direct = 'd' bb_test.add_word(pos, direct, wd_test, dd_test) pos = [2, 3] bb_test.add_word(pos, direct, wd_test, dd_test) self.assertEqual(bb_test.board, [['.']*5, ['.']*5, ['.', '.', 'a', 'a', '.'], ['.', '.', 'a', 'a', '.'], ['.']*5])