def setUp(self): self.dict = Dictionary( os.path.join(os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian')) self.in_memory_dict = Dictionary(os.path.join( os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian'), in_memory=True)
class DictionaryTest(unittest.TestCase): def setUp(self): self.dict = Dictionary(os.path.join(os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian')) self.in_memory_dict = Dictionary(os.path.join(os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian'), in_memory=True) def test001Idx(self): self.assertEqual(self.dict.idx['test'], (581161, 16,)) def test002Dict(self): self.assertEqual(self.dict.dict['test'], 'проверка') def test003Dictionary(self): self.assertEqual(self.dict['test'], 'проверка') def test004Contains(self): self.assertTrue('test' in self.dict) self.assertFalse('testtt' in self.dict) def test005Delitem(self): self.dict['test'] del self.dict['test'] self.assertFalse('test' in self.dict._dict_cache) def test006Len(self): self.assertEqual(len(self.dict), 31705) def test007Eq(self): y = Dictionary(os.path.join(os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian')) self.assertTrue(self.dict == y) def test008Ne(self): y = Dictionary(os.path.join(os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian')) self.assertFalse(self.dict != y) def test009Repr(self): self.assertEqual(repr(self.dict), '''<class 'pystardict.Dictionary'> quick_english-russian''') def test010Clear(self): self.dict['test'] self.dict.clear() self.assertEqual(len(self.dict._dict_cache), 0) def test011Get(self): self.assertEqual(self.dict.get('test', 't'), 'проверка') self.assertEqual(self.dict.get('testtt', 't'), 't') def test012InMemoryDict(self): self.assertEqual(self.in_memory_dict['test'], 'проверка') def test013IterateDict(self): for k, v in self.in_memory_dict.items(): self.assertEqual(self.in_memory_dict[k], v)
class TranslatorConfig(AppConfig): """file path of dataset in the form of a tuple. If translated sentences are stored in two files, this tuple will have two elements""" raw_data_file_path = (DATASET_DIR + en_dataset, DATASET_DIR + th_dataset) input_lang, output_lang, train_pairs, test_pairs = prepareData( input_lang_name, output_lang_name, raw_data_file_path, max_vocab_size=max_vocab_size, reverse=reverse, trim=trim, start_filter=start_filter, perc_train_set=perc_train_set, print_to=None, tagging=tagging) name = 'translator' en_th_dict = Dictionary(en_th_dict_path) th_en_dict = Dictionary(th_en_dict_path) """create the Encoder""" encoder = EncoderRNN(input_lang.vocab_size, hidden_size, layers=layers, dropout=dropout, bidirectional=bidirectional) """create the Decoder""" decoder = DecoderAttn(hidden_size, output_lang.vocab_size, layers=layers, dropout=dropout, bidirectional=bidirectional) if use_cuda: encoder.load_state_dict(torch.load(encoder_file)) decoder.load_state_dict(torch.load(decoder_file)) else: encoder.load_state_dict(torch.load(encoder_file, map_location='cpu')) decoder.load_state_dict(torch.load(decoder_file, map_location='cpu')) encoder.eval() decoder.eval() if use_cuda: print('Cuda being used') encoder = encoder.cuda() decoder = decoder.cuda() en_nlp = spacy.load("en_core_web_sm") en_words_list = list(set(words.words())) en_words_list.sort() with open(TH_VOCABULARY_PATH, encoding='utf-8') as f: th_words_list = f.read().splitlines() th_words_list = list(set(th_words_list)) th_words_list.sort()
def getrans(dicpath,rsf): zhPattern = re.compile(u'[\u4e00-\u9fa5]+') dicts_dir = os.path.join(os.path.dirname(__file__)) print dicts_dir dict = Dictionary(dicpath,True) with open(rsf,"wb") as fwrt: for k,v in dict.iteritems(): if zhPattern.search(k): tmp=k+"|||"+resolveword(v)+"\n" fwrt.write(tmp.encode("utf-8","ignore"))
def fixture_dict(request): return { 'on_disk': Dictionary( os.path.join(os.path.dirname(__file__), os.pardir, 'examples', 'stardict-quick_eng-rus-2.4.2', 'quick_english-russian')), 'in_memory': Dictionary(os.path.join(os.path.dirname(__file__), os.pardir, 'examples', 'stardict-quick_eng-rus-2.4.2', 'quick_english-russian'), in_memory=True), }[request.param]
def import_definitions(self, dictionary): the_dict = Dictionary(dictionary) for word in self.words: if not word.definition.strip(): try: word.definition = the_dict[word.word] except KeyError: pass
def demo(): milestone1 = datetime.datetime.today() dicts_dir = os.path.join(os.path.dirname(__file__)) dict1 = Dictionary( os.path.join(dicts_dir, 'stardict-quick_eng-rus-2.4.2', 'quick_english-russian')) dict2 = Dictionary( os.path.join(dicts_dir, 'stardict-quick_rus-eng-2.4.2', 'quick_russian-english')) milestone2 = datetime.datetime.today() print '2 dicts load:', milestone2 - milestone1 print dict1.idx['test'] print dict2.idx['проверка'] milestone3 = datetime.datetime.today() print '2 cords getters:', milestone3 - milestone2 print dict1.dict['test'] print dict2.dict['проверка'] milestone4 = datetime.datetime.today() print '2 direct data getters (w\'out cache):', milestone4 - milestone3 print dict1['test'] print dict2['проверка'] milestone5 = datetime.datetime.today() print '2 high level data getters (not cached):', milestone5 - milestone4 print dict1['test'] print dict2['проверка'] milestone6 = datetime.datetime.today() print '2 high level data getters (cached):', milestone6 - milestone5 # list dictionary keys and dictionary content according to the key for key in dict1.ids.keys(): print dict1.dict[key]
def handle(self, *args, **options): for fname in glob.glob('sources/*.dict'): dict_name = Path(fname).stem dict1 = Dictionary(os.path.join('sources', dict_name)) print('Importing the %s dictionary...' % dict_name) for word in Word.objects.all(): word_data = dict1.get(word.content) soup = BeautifulSoup(word_data, "html.parser") word.phrase_set.all().delete() i = 0 for ex in soup.find_all('ex'): if ex.text: phrase = Phrase(word=word, content=ex.text) phrase.save() i += 1 if i >= 10: break # One dictionary is enough for now break
def __init__(self): self.__pathToCustomDictionary = os.path.join(os.path.dirname(os.path.abspath(__file__)), "dictionaries", "custom", "rusEngDictionary.json") rusToEngDict = None with open(self.__pathToCustomDictionary, "r") as fp: rusToEngDict = json.load(fp) __customTranslator = OfflineWordTranslator(rusToEngDict) pathToDictionary = os.path.join(os.path.dirname(os.path.abspath(__file__)), "dictionaries", "Wiktionary Russian-English", "Wiktionary Russian-English") self.__wiktionaryTranslator = OfflineWordTranslatorForWiktionary(Dictionary(pathToDictionary)) pathToDictionary = os.path.join(os.path.dirname(os.path.abspath(__file__)), "dictionaries", "stardict-quick_rus-eng-2.4.2", "quick_russian-english") self.__starDictQuickTranslator = OfflineWordTranslatorForStarDictQuick(Dictionary(pathToDictionary))
def handle(self, *args, **options): for fname in glob.glob('sources/*.dict'): dict_name = Path(fname).stem dict1 = Dictionary(os.path.join('sources', dict_name)) print('Updating transcription from %s dictionary...' % dict_name) for word in Word.objects.all(): word_data = dict1.get(word.content) soup = BeautifulSoup(word_data, "html.parser") trans_vars = soup.find_all('c', attrs={'c': 'teal'}) for trans_var in trans_vars: if trans_var.text.startswith('['): word.transcription = trans_var.text[1:-1] word.save() break word.phrase_set.all().delete() for ex in soup.find_all('ex'): if ex.text: phrase = Phrase(word=word, content=ex.text) phrase.save() break break
def open_dict(self): self.dict_dir = os.path.join(self.dir, self.current_dict_name) dict_name = '' for file in os.listdir(self.dict_dir): name, suffix = os.path.splitext(file) if suffix in SUFFIX: continue if dict_name and dict_name != name: logger.error("dict broken!") return False dict_name = name #dict dir name maybe not the dict file name. del self.dict gc.collect() self.dict = Dictionary(os.path.join(self.dict_dir, dict_name)) logger.info("载入%s", os.path.join(self.dict_dir, dict_name))
class stardict(import_dictionary): def __init__(self, path, stardict_db_folder, stardict_db_name): import_dictionary.__init__(self) self.dict1 = Dictionary( os.path.join(path, stardict_db_folder, stardict_db_name)) def add_words_to_dictionary(self): try: dictionary_id = self.check_dictionary(self.dict1.ifo.bookname, self.dict1.ifo.wordcount, abstractive=1) print("\n'Add all new words to Dictionary '{0}':\n".format( self.dict1.ifo.bookname)) for w in tqdm(self.dict1.idx._idx): word = w.decode('Utf-8') word_id = self.check_word(word) definition = self.dict1.get(word) self.db.write_definition(dictionary_id, word_id, definition) self.db._commit() return dictionary_id except: self.db.db_connection.rollback() raise
def on_dictionary_found(self, event, dispatcher): dictionary_source = event.data if dictionary_source.name not in self.disabled: with Dictionary(dictionary_source.path) as dictionary: self._dictionaries.append(dictionary) garbage.collect()
from pystardict import Dictionary import os import sys import codecs if __name__ == "__main__": if len(sys.argv) <= 1: print("Usage: ./stardict.py start_dict_ifo_file_path") else: start_dictionary = Dictionary(sys.argv[1].split(".ifo")[0]) f = codecs.open("company-english-helper-data.el", "w", "utf-8") f.write(";; -*- mode: fundamental; -*-\n") f.write(";;\n") f.write( ";; This file is generate by stardict.py, please don't edit this file.\n" ) f.write(";;\n\n") f.write("(defconst english-helper-completions\n '(\n") index = 0 for word in start_dictionary.keys(): is_english_word = all(ord(char) < 128 for char in word) if is_english_word: first_line_translation = start_dictionary.dict[word].split()[0] no_phonetic_translation = first_line_translation.split(">")[-1] candiate_word = word.lower().replace('\"', ' ') candiate_translateion = no_phonetic_translation.strip( ).replace('\"', ' ') f.write(' #(\"' + candiate_word + '\" ' + '0 1' + '\n' +
def test008Ne(self): y = Dictionary( os.path.join(os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian')) self.assertFalse(self.dict != y)
def init_dicts(): global e_r_dict e_r_dict = Dictionary(os.path.join(STARDICT_DIR, STARDICT_NAME))
class DictionaryTest(unittest.TestCase): def setUp(self): self.dict = Dictionary( os.path.join(os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian')) self.in_memory_dict = Dictionary(os.path.join( os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian'), in_memory=True) def test001Idx(self): self.assertEqual(self.dict.idx['test'], ( 581161, 16, )) def test002Dict(self): self.assertEqual(self.dict.dict['test'], 'проверка') def test003Dictionary(self): self.assertEqual(self.dict['test'], 'проверка') def test004Contains(self): self.assertTrue('test' in self.dict) self.assertFalse('testtt' in self.dict) def test005Delitem(self): self.dict['test'] del self.dict['test'] self.assertFalse('test' in self.dict._dict_cache) def test006Len(self): self.assertEqual(len(self.dict), 31705) def test007Eq(self): y = Dictionary( os.path.join(os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian')) self.assertTrue(self.dict == y) def test008Ne(self): y = Dictionary( os.path.join(os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian')) self.assertFalse(self.dict != y) def test009Repr(self): self.assertEqual( repr(self.dict), '''<class 'pystardict.Dictionary'> quick_english-russian''') def test010Clear(self): self.dict['test'] self.dict.clear() self.assertEqual(len(self.dict._dict_cache), 0) def test011Get(self): self.assertEqual(self.dict.get('test', 't'), 'проверка') self.assertEqual(self.dict.get('testtt', 't'), 't') def test012InMemoryDict(self): self.assertEqual(self.in_memory_dict['test'], 'проверка') def test013IterateDict(self): for k, v in self.in_memory_dict.items(): self.assertEqual(self.in_memory_dict[k], v)
def test008Ne(fixture_dict): y = Dictionary( os.path.join(os.path.dirname(__file__), os.pardir, 'examples', 'stardict-quick_eng-rus-2.4.2', 'quick_english-russian')) assert (fixture_dict != y) is False
def __init__(self, dictPath, dictPrefix): self.dictionary = Dictionary(os.path.join(dictPath, dictPrefix))
def __init__(self): self.dict_lazyworm = Dictionary(self.lazyworm_path) self.dict_quick = Dictionary(self.quick_path)
def __init__(self, path, stardict_db_folder, stardict_db_name): import_dictionary.__init__(self) self.dict1 = Dictionary( os.path.join(path, stardict_db_folder, stardict_db_name))
def setUp(self): self.dict = Dictionary(os.path.join(os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian')) self.in_memory_dict = Dictionary(os.path.join(os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian'), in_memory=True)
# N.xml - file with words from grammar db # BelRusVorvul/BelRusVorvul - name of belarusian-russian dict in StarDict format (script was tested only with A. Vorvul's dict) # Output: # uwords.txt - file with words that have no translations in format: # <word_in_bel> # pwords.txt - file with words that were translated in format: # <word_in_bel> | <gender_in_bel> | <word/words_in_rus> import sys, xml import xml.etree.ElementTree as ET from pystardict import Dictionary f = ET.parse(sys.argv[1]) unknown_words_f = open('uwords.txt', 'w') words_f = open('pwords.txt', 'w') dict_bel_rus = Dictionary(sys.argv[2]) words = set() path = './/Paradigm' for paradigm in f.findall(path): lem = paradigm.attrib['Lemma'].replace('´', '') words.add(lem) print("Parsed: {} words".format(len(words))) translated = 0 for element in words: if dict_bel_rus.has_key(element): translated += 1 rus_eq = dict_bel_rus.get(element).split("\n")[1]