예제 #1
0
 def __init__(self, filename='word.vec', dict=None):
     self.ncols = 10  # word converts to list having 10 items
     if dict:
         self.dict = dict
     if not dict:
         self.dict = dictionary.Dictionary()
     self.data = []
예제 #2
0
    def buildDictionary(self):
        """
        Populate dictionary mapping and statistics.

        This is done by sequentially retrieving the article fulltexts, splitting
        them into tokens and converting tokens to their ids (creating new ids as
        necessary).
        """
        logging.info("creating dictionary from %i articles" %
                     len(self.documents))
        self.dictionary = dictionary.Dictionary()
        numPositions = 0
        for docNo, (sourceId, docUri) in enumerate(self.documents):
            if docNo % 1000 == 0:
                logging.info("PROGRESS: at document #%i/%i (%s, %s)" %
                             (docNo, len(self.documents), sourceId, docUri))
            source = self.config.sources[sourceId]
            contents = source.getContent(docUri)
            words = [
                source.normalizeWord(word)
                for word in source.tokenize(contents)
            ]
            numPositions += len(words)

            # convert to bag-of-words, but ignore the result -- here we only care about updating token ids
            _ = self.dictionary.doc2bow(words, allowUpdate=True)
        logging.info("built %s from %i documents (total %i corpus positions)" %
                     (self.dictionary, len(self.documents), numPositions))
예제 #3
0
        def create_dictionary(direction):
            dict_ = dictionary.Dictionary()
            t5_vocab = [[tok.sp_model.id_to_piece(id), id] for id in range(tok.sp_model.get_piece_size())]
            assert t5_vocab.pop(0)[0] == "<pad>"
            assert t5_vocab.pop(0)[0] == "</s>"
            assert t5_vocab.pop(0)[0] == "<unk>"
            for word, id in t5_vocab:
                dict_.add_symbol(word)
            for word, id in sorted(
                    zip(tok.additional_special_tokens, tok.additional_special_tokens_ids), key=lambda x: x[1]
            ):
                dict_.add_symbol(word)

            t5_vocab_dict = dict(((w, id) for w, id in t5_vocab))
            t5_vocab_dict.update(
                zip(tok.additional_special_tokens, tok.additional_special_tokens_ids)
            )


            for word, id in dict_.indices.items():
                if word in {"<Lua heritage>", "<pad>", "</s>", "<unk>"}:
                    continue
                assert id == t5_vocab_dict[word] + 1

            dict_.finalize()
            dict_.save(os.path.join(path, f"dict.{direction}.txt"))
예제 #4
0
def main():
    dict = dictionary.Dictionary()
    display = False
    print(w)
    command, args = "", ""
    while command != "exit":
        line = input(">>> ").split(' ')
        if len(line) == 0 or len(line) > 2:
            continue
        elif len(line) == 1:
            command = line[0]
            if command == 'display':
                display = not display
            elif command == 'print':
                dict.print_words()
            continue
        else:
            command, args = line

        if command in ['insert', 'delete', 'batch_load', 'batch_delete']:
            dict.print_info()
            print(dict.__getattribute__(command)(args))
            dict.print_info()
        elif command == 'batch_lookup':
            print("Found: ", dict.batch_lookup(args, display))
        elif command == 'lookup':
            found, _ = dict.lookup(args)
            print("Word is in the Dictionary"
                  if found else "Word is not in the Dictionary")
예제 #5
0
 def calculate(self):
     if self.ent1.get() and self.ent2.get() and self.ent3.get():
         first = str(self.ent1.get())
         second = str(self.ent2.get())
         count_system = int(self.ent3.get())
         #creating the dictionary for translating
         self.dictionary = dictionary.Dictionary("base.csv", "r", ",")
         #creating the translator
         self.translator = translator.Translator(
             self.dictionary.get_dictionary(count_system), first)
         if self.translator.get_collection() == None:
             self.txt.delete(0.0, END)
             self.txt.insert(0.0, "Перепроверьте введенные данные!")
         else:
             self.translator = translator.Translator(
                 self.dictionary.get_dictionary(count_system), second)
             if self.translator.get_collection() == None:
                 self.txt.delete(0.0, END)
                 self.txt.insert(0.0, "Перепроверьте введенные данные!")
             else:
                 self.txt.delete(0.0, END)
                 self.txt.insert(0.0, count(self.choose.get()))
     else:
         self.txt.delete(0.0, END)
         self.txt.insert(0.0, "Не все поля заполнены!")
예제 #6
0
 def initFromFile(self):
     setup.init()
     with open('disk.txt') as file:
         line = file.readline()
         self.super_block = super_block.SuperBlock(line)
         block_bitmap = ""
         for i in range(128):
             block_bitmap += file.readline().strip()
         self.fat = fat.FAT(block_bitmap)
         fcb_bitmap = ""
         for i in range(4):
             fcb_bitmap += file.readline().strip()
         fcb_list = []
         for i in range(512):
             line = file.readline()
             attr_list = line.split(" ")
             line = file.readline()
             attr_list += line.split(" ")
             # name file_type pos create_time update_time
             # size first_block write_user read_user delete_able is_able
             if line == "-1\n":
                 fcb_list.append(FCB('-1', '-1', -1, '-1', '-1', -1, '-1', '-1', '-1', -1, -1))
                 continue
             fcb_list.append(
                 FCB(attr_list[0], attr_list[1], evalPro(attr_list[2]), attr_list[3], attr_list[4],
                     evalPro(attr_list[5]), attr_list[6], attr_list[7], attr_list[8], evalPro(attr_list[9]),
                     evalPro(attr_list[10])))
             if evalPro(attr_list[10]) == 1 and evalPro(attr_list[2]) >= 0:
                 fcb_list[evalPro(attr_list[2])].child_list.append(i)
         self.dictionary = dictionary.Dictionary(fcb_bitmap, fcb_list)
         for i in range(2048):
             self.data.append(file.readline().strip())
예제 #7
0
    def initialize(self, length_count, length_to_word):
        self.guessed_letters = set()
        # initialize game state
        for letter in range(self.word_length):
            self.game_state.append('_')

        if self.multiplayer.lower() == 'y':
            # list of the word of such length to choose (print about 50 of them)
            print("Here are possible words of such length: ")
            if length_count[self.word_length] >= 50:
                print(length_to_word[self.word_length][:50])
            else:
                print(length_to_word[self.word_length])
            word = str(
                input("Please choose a word of length " +
                      str(self.word_length) + ": "))
            while len(word) != self.word_length or word not in length_to_word[
                    self.word_length]:
                word = str(
                    input("Invalid choice. Please choose a word of length " +
                          str(self.word_length) + ":"))
        else:
            ## create an AI to get a word of length based on level.
            self.level = str(
                input(
                    "Please choose a level (easy,intermediate,medium,hard): "))
            while self.level not in self.set_levels:
                self.level = str(
                    input(
                        "Please choose a proper level (easy,intermediate,medium,hard): "
                    ))
            # create an instant of AI_computer based on level
            self.computer = AI.AI_Computer(
                self.level,
                dictionary.Dictionary(length_to_word[self.word_length]))
            # choose a word using the level
            word = self.computer.choose_word()
            # print ("word:",word)
        self.word = word  # initial our word, it could be a bag of word from hard/ insane level
        if self.level != "hard":
            self.solver = AI.AI_solver()
        else:
            self.solver = AI.AI_hard_solver()
        self.solver.dic = dictionary.Dictionary(
            length_to_word[self.word_length]
        )  #  object initial our solver data as a dictionary
예제 #8
0
def main(solve, board):
	d = dictionary.Dictionary(DICTS)
	ans = []
	for word in solve(board, d, SIZE):
		if len(word) >= MIN_LENGTH:
			ans.append(word)

	return ans
예제 #9
0
def test_dictionary():

    #Arrange:
    Array = [6, 2, 84, 9]
    expected = {0: 6, 1: 2, 2: 84, 3: 9}

    #Actual:
    actual = dictionary.Dictionary(Array)

    #Assert
    assert expected == actual
예제 #10
0
 def __init__(self, node_id, port, num_dc):
     self.node_id = node_id
     self.s = socket.socket()
     self.timeTable = timeTable.TimeTable(num_dc, node_id)
     self.hostname = socket.gethostname()  # get local machine name
     self.port = port
     self.addr = ''
     self.c = None
     self.log = log.Log()
     self.dictionary = dictionary.Dictionary(node_id)
     self.threads = []
예제 #11
0
 def test_add_first_word_to_board(self):
     """
     Unit test for add first word to board
     not in center
     """
     bb_test = sbd.Board(5)
     dd_test = sdi.Dictionary(['aa'])
     pos = [0, 0]
     wd_test = 'aa'
     direct = 'down'
     with self.assertRaises(ValueError):
         bb_test.add_word(pos, direct, wd_test, dd_test)
예제 #12
0
 def test_wrong_direction(self):
     """
     Unit test to check that direction is
     well posed
     """
     bb_test = sbd.Board(5)
     dd_test = sdi.Dictionary(['aa'])
     pos = [0, 0]
     wd_test = 'aa'
     direct = 'asdf'
     with self.assertRaises(ValueError):
         bb_test.add_word(pos, direct, wd_test, dd_test)
예제 #13
0
 def test_add_word_out_of_board(self):
     """
     Unit test to check that position
     is within the board
     """
     bb_test = sbd.Board(5)
     dd_test = sdi.Dictionary(['aa'])
     pos = [-1, 2]
     wd_test = 'aa'
     direct = 'down'
     with self.assertRaises(ValueError):
         bb_test.add_word(pos, direct, wd_test, dd_test)
예제 #14
0
 def test_add_too_large_word(self):
     """
     Unit test to check that a word can
     fit on the board
     """
     bb_test = sbd.Board(5)
     dd_test = sdi.Dictionary(['aa'])
     pos = [0, 2]
     wd_test = '123456789'
     direct = 'down'
     with self.assertRaises(ValueError):
         bb_test.add_word(pos, direct, wd_test, dd_test)
def init_dictionary():
    dic = dictionary.Dictionary()

    # unigram
    dic.create_table(constants.UNIGRAM)
    dic.tables[constants.UNIGRAM].set_unk(constants.UNK_SYMBOL)

    # segmentation label
    dic.create_table(constants.SEG_LABEL)
    for label in constants.SEG_LABELS:
        dic.tables[constants.SEG_LABEL].get_id(label, update=True)

    return dic
예제 #16
0
def init_dictionary(num_attrs=0): 
    dic = dictionary.Dictionary()

    # unigram
    dic.create_table(constants.UNIGRAM)
    dic.tables[constants.UNIGRAM].set_unk(constants.UNK_SYMBOL)

    # attributes
    for i in range(num_attrs):
        dic.create_table(constants.ATTR_LABEL(i))
        # dic.tables[constants.ATTR_LABEL(i)].set_unk(constants.UNK_SYMBOL)

    return dic
예제 #17
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('search_terms', nargs="+")
    parser.add_argument('-l',
                        '--lang',
                        nargs='?',
                        default='en',
                        dest='lang',
                        choices=dictionaries.keys(),
                        help=u'language (enda, daen, de, fr)')
    parser.add_argument('-t',
                        '--trans',
                        nargs='?',
                        default=0,
                        dest="translate",
                        choices=['0', '1', '2'],
                        help='0: from Danish, 1: to Danish, 2: both ways')
    args = parser.parse_args()

    translate = int(args.translate)
    language = args.lang
    search_terms = [term.lstrip(' ').rstrip(' ') for term in args.search_terms]
    language_name = dictionaries[language]['name']
    directions = [('fromDanish', 'Dansk-%s' % language_name),
                  ('toDanish', '%s-Dansk' % language_name)]
    if dictionaries[language]['doubflag'] < 2 or translate == 0:
        del directions[1]
    elif translate == 1:
        del directions[0]

    dic = dictionary.Dictionary(dictionaries)
    tables = [('lookup', 'Artikler')]
    if len(search_terms) > 1:
        tables.append(('collocation_lookup', 'Ordforbindelser'))
    prefix, tab_terms = tab(dic, search_terms, directions, tables, language)

    if not tab_terms and len(search_terms) == 1:
        tables = [('collocation_lookup', 'Ordforbindelser')]
        prefix, tab_terms = tab(dic, search_terms, directions, tables,
                                language)

    ofile = open('tabterms.txt', 'w')
    ofile.write("%s\n" % " ".join(search_terms))
    ofile.write("%s\n" % prefix)
    if len(tab_terms) >= 1:
        for term in tab_terms:
            ofile.write("%s\n" % (term))
    else:
        ofile.write("")
    ofile.close()
예제 #18
0
파일: miner.py 프로젝트: ymJung/scrap
 def __init__(self):
     self.LIMIT_COUNT = 5
     self.WORD_NAME = 'word'
     self.PLUS_NAME = 'plus'
     self.MINUS_NAME = 'minus'
     self.SPLIT_COUNT = 1000
     self.START_NAME = 'start'
     self.FINAL_NAME = 'final'
     self.DATE_NAME = 'date'
     self.LIMIT_YEAR_SEPERATOR = 5
     self.INTERVAL_YEAR_SEPERATOR = 73
     self.FINANCE_NAME = 'finance'
     self.dbm = dbmanager.DBManager()
     self.dic = dictionary.Dictionary()
     self.THREAD_LIMIT_COUNT = 4
예제 #19
0
    def testRadiusServerStart(self):
        ipaddress = socket.gethostbyname(socket.gethostname())
        ipaddress_port = (ipaddress, 6000)
        try:
            ## Start the Radius Server
            RadiusSvr = radserver.Radserver('10.0.28.131')
            RadiusSvr.AddUser(ipaddress, 'passwd')
            RadiusSvr.Start()
        except:
            self.assert_(False, "Failed to Start RADIUS server")

        ## Test whether the RADIUS server responds by sending an authentication request
        srv=client.Client(server="radiusprimary", secret="passwd", dict=dictionary.Dictionary(\
        "/usr/lib/python2.3/site-packages/pyrad/dictionary"))
        try:
            srv.bind(ipaddress_port)

            req = srv.CreateAuthPacket(code=pyrad.packet.AccessRequest,
                                       User_Name="test",
                                       NAS_Identifier=socket.gethostbyname(
                                           socket.gethostname()))
            req["User-Password"] = req.PwCrypt("passwd")

            reply = srv.SendPacket(req)

        except:
            self.assert_(
                False,
                "Failed to create and send Authentication request packet")
        print("Authentication Request packet sent")
        if reply.code == pyrad.packet.AccessAccept:
            print(
                "RADIUS server is running and has Accepted the AccessRequest packet"
            )
        else:
            self.assert_(False, "RADIUS server failed to Accept the request")

        try:
            RadiusSvr.Stop()
        except:
            self.assert_(False, "Failed to STOP the RADIUS server")

        try:
            reply = srv.SendPacket(req)
        except:
            self.assert_(True, "RADIUS server Started/Stopped successfully")
        else:
            self.assert_(False, "Failed to Stop RADIUS server")
예제 #20
0
 def test_add_second_word_not_in_di(self):
     """
     Unit test to check what happens if by adding
     a second word we form a word not in dict/lexic
     """
     bb_test = sbd.Board(5)
     dd_test = sdi.Dictionary(['aa', 'ab'])
     pos = [2, 2]
     wd_test = 'aa'
     direct = 'd'
     bb_test.add_word(pos, direct, wd_test, dd_test)
     pos = [2, 1]
     wd_test = 'ab'
     direct = 'd'
     with self.assertRaises(ValueError):
         bb_test.add_word(pos, direct, wd_test, dd_test)
예제 #21
0
 def test_add_second_word_not_ol(self):
     """
     Unit test that checks if second word added
     is not overlapped with words on board
     """
     bb_test = sbd.Board(5)
     dd_test = sdi.Dictionary(['aa'])
     pos = [2, 2]
     wd_test = 'aa'
     direct = 'd'
     bb_test.add_word(pos, direct, wd_test, dd_test)
     pos = [0, 0]
     wd_test = 'aa'
     direct = 'd'
     with self.assertRaises(ValueError):
         bb_test.add_word(pos, direct, wd_test, dd_test)
예제 #22
0
 def test_add_first_word(self):
     """
     Unit test to add first word correctly
     """
     bb_test = sbd.Board(5)
     dd_test = sdi.Dictionary(['aa'])
     pos = [2, 2]
     wd_test = 'aa'
     direct = 'r'
     bb_test.add_word(pos, direct, wd_test, dd_test)
     self.assertEqual(bb_test.board,
                      [['.']*5,
                       ['.']*5,
                       ['.', '.', 'a', 'a', '.'],
                       ['.']*5,
                       ['.']*5])
예제 #23
0
def translated_data():
    directory = 'C:\\Users\\olive\\Desktop\\Datasets_for_thesis\\Prisjakt\\training_data'
    extracted_data = extractor.json_extract(directory)
    extracted_reviews = extracted_data[0]
    polarities = extracted_data[1]
    preprocessed_reviews = preprocessor.preprocess(extracted_reviews)
    dictionary = dict.Dictionary(preprocessed_reviews).dictionary
    # review_translator.translate_reviews(preprocessed_reviews, polarities)
    with open('untranslated_reviews validation combined.txt', 'r') as file:
        untranslated_reviews = np.concatenate(
            vectorizer.vectorize_data(
                preprocessor.preprocess(file.readlines()), dictionary, 300))
    with open('translated_polarities validation combined.txt', 'r') as file:
        translated_polarities = []
        for line in file:
            translated_polarities.append(int(line))
    return [untranslated_reviews, np.array(translated_polarities)]
예제 #24
0
 def __init__(self, path=None, raw=None):
     self._path = path
     self._dict = dic.Dictionary()
     if self._path is None:
         self._logs = []
         self._data = {}
         self._data['logs'] = self._logs
         self._data['dictionary'] = None
         if raw is not None:
             with codecs.open(raw, 'r', encoding='utf-8') as f:
                 rawdata = json.load(f)
                 self.convert(rawdata)
     else:
         with codecs.open(self._path, 'r', encoding='utf-8') as f:
             self._data = json.load(f)
             self._logs = self._data['logs']
             self._dict.set(self._data['dictionary'])
예제 #25
0
파일: main.py 프로젝트: ejvindh/spt-gro
def main():

    # check om sprogene er tilgængelige
    filenotfound = False
    for d in dictionaries.keys():
        lang = dictionaries[d]
        if not (os.path.exists(lang['gddfile'])
                and os.path.exists(lang['datfile'])):
            print 'Ordbogsfiler for %s kan ikke findes i mappen %s' % (
                lang['name'], os.path.dirname(os.path.abspath(
                    lang['gddfile'])))
            filenotfound = True
    if filenotfound:
        return
    dic = dictionary.Dictionary(dictionaries)
    dictionaryGUI = gui.DictionaryGUI(dic)
    dictionaryGUI.run()
예제 #26
0
 def __init__(self):
     self.prevKey = ''
     self.should_keep_going = True
     self.paused = False
     self.hp = 30
     self.wordDictionary = dictionary.Dictionary("dictionary_testfile")
     self.stat = stats.Stats()
     self.scrn = screen.Screen(self.stat)
     self.scrn.scr.nodelay(1)
     self.words = []
     self.wordsOnScreen = 10
     self.pos_in_word = 0
     self.last_time = datetime.datetime.now()
     self.probableWords = []
     self.poss_vals = list(range(1, self.scrn.height - 10))
     debug.init()
     self.new_game()
예제 #27
0
def initLanguage():
    # os.path.dirname(os.path.abspath(file))
    # (QtCore.QDir.currentPath() + "/session")
    # directory = os.path.dirname(__file__)
    # directory = (QtCore.QDir.currentPath() + "/yomi_base/japanese")
    #directory = os.path.split(os.path.realpath(__file__))[0]

    # fix for --onefile
    directory = os.path.dirname(sys.executable)  # + "/yomi_base/japanese"
    if "Python27" == os.path.basename(directory):
        directory = os.path.dirname(__file__)
    else:
        directory = os.path.dirname(sys.executable) + "/yomi_base/japanese"

    return translate.Translator(
        deinflect.Deinflector(os.path.join(directory, 'deinflect.json')),
        dictionary.Dictionary(os.path.join(directory, 'dictionary.db')))
예제 #28
0
def get_text_utts(indir, compilexpath):
    txt = load_txt_dir(indir)

    dct = dictionary.Dictionary(compilexpath)

    oov = get_oov_words(txt, dct)

    if len(oov) != 0:
        print "Please remove all OOV word containing sents or add the words to dictonary before proceeding."
        for w in oov:
            print w
        raise SiReError("OOV words present, cannot continue.")

    args.dictionary = dct
    args.intype = "txt"
    utts = get_utts(txt, args)
    return utts
    def __init__(self):
        self.dictionary = dictionary.Dictionary()
        self.dictionary.load_dictionary('japonais.csv')

        self.show_furigana = True
        self.show_roumaji = True

        self.root = tkinter.Tk()
        self.root.title('Words Quiz')
        self.root.config(padx=20, pady=20)
        self.root.bind('<Return>', self.check)

        self.menubar = tkinter.Menu(self.root)
        self.menu = tkinter.Menu(self.menubar, tearoff=0)
        self.menu.add_command(label="Furigana", command=self.set_furigana)
        self.menu.add_command(label="Roumaji", command=self.set_roumaji)
        self.menubar.add_cascade(label="Select", menu=self.menu)
        self.root['menu'] = self.menubar

        self.native = tkinter.Label(self.root)
        self.native['font'] = ('Arial', 20)
        self.native.pack()

        self.alternate = tkinter.Label(self.root)
        self.alternate['font'] = ('Arial', 10)
        self.alternate.pack()

        self.latin = tkinter.Label(self.root)
        self.latin['font'] = ('Arial', 10)
        self.latin.pack()

        self.entry = tkinter.Entry(self.root)
        self.entry['justify'] = 'center'
        self.entry.pack()

        self.button = tkinter.Button(self.root, text='Check')
        self.button.pack_configure(pady=10)
        self.button.pack()
        self.button['command'] = self.check

        self.new_random_word()

        self.root.mainloop()
예제 #30
0
 def test_add_second_word_neigh(self):
     """
     Unit tests for adding a word which
     neighbors but doesn't overlap
     """
     bb_test = sbd.Board(5)
     dd_test = sdi.Dictionary(['aa', 'aaa'])
     pos = [2, 2]
     wd_test = 'aa'
     direct = 'd'
     bb_test.add_word(pos, direct, wd_test, dd_test)
     pos = [2, 3]
     bb_test.add_word(pos, direct, wd_test, dd_test)
     self.assertEqual(bb_test.board,
                      [['.']*5,
                       ['.']*5,
                       ['.', '.', 'a', 'a', '.'],
                       ['.', '.', 'a', 'a', '.'],
                       ['.']*5])