class DictionaryTest(unittest.TestCase): def setUp(self): self.dict = Dictionary(os.path.join(os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian')) self.in_memory_dict = Dictionary(os.path.join(os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian'), in_memory=True) def test001Idx(self): self.assertEqual(self.dict.idx['test'], (581161, 16,)) def test002Dict(self): self.assertEqual(self.dict.dict['test'], 'проверка') def test003Dictionary(self): self.assertEqual(self.dict['test'], 'проверка') def test004Contains(self): self.assertTrue('test' in self.dict) self.assertFalse('testtt' in self.dict) def test005Delitem(self): self.dict['test'] del self.dict['test'] self.assertFalse('test' in self.dict._dict_cache) def test006Len(self): self.assertEqual(len(self.dict), 31705) def test007Eq(self): y = Dictionary(os.path.join(os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian')) self.assertTrue(self.dict == y) def test008Ne(self): y = Dictionary(os.path.join(os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian')) self.assertFalse(self.dict != y) def test009Repr(self): self.assertEqual(repr(self.dict), '''<class 'pystardict.Dictionary'> quick_english-russian''') def test010Clear(self): self.dict['test'] self.dict.clear() self.assertEqual(len(self.dict._dict_cache), 0) def test011Get(self): self.assertEqual(self.dict.get('test', 't'), 'проверка') self.assertEqual(self.dict.get('testtt', 't'), 't') def test012InMemoryDict(self): self.assertEqual(self.in_memory_dict['test'], 'проверка') def test013IterateDict(self): for k, v in self.in_memory_dict.items(): self.assertEqual(self.in_memory_dict[k], v)
def handle(self, *args, **options): for fname in glob.glob('sources/*.dict'): dict_name = Path(fname).stem dict1 = Dictionary(os.path.join('sources', dict_name)) print('Importing the %s dictionary...' % dict_name) for word in Word.objects.all(): word_data = dict1.get(word.content) soup = BeautifulSoup(word_data, "html.parser") word.phrase_set.all().delete() i = 0 for ex in soup.find_all('ex'): if ex.text: phrase = Phrase(word=word, content=ex.text) phrase.save() i += 1 if i >= 10: break # One dictionary is enough for now break
def handle(self, *args, **options): for fname in glob.glob('sources/*.dict'): dict_name = Path(fname).stem dict1 = Dictionary(os.path.join('sources', dict_name)) print('Updating transcription from %s dictionary...' % dict_name) for word in Word.objects.all(): word_data = dict1.get(word.content) soup = BeautifulSoup(word_data, "html.parser") trans_vars = soup.find_all('c', attrs={'c': 'teal'}) for trans_var in trans_vars: if trans_var.text.startswith('['): word.transcription = trans_var.text[1:-1] word.save() break word.phrase_set.all().delete() for ex in soup.find_all('ex'): if ex.text: phrase = Phrase(word=word, content=ex.text) phrase.save() break break
class stardict(import_dictionary): def __init__(self, path, stardict_db_folder, stardict_db_name): import_dictionary.__init__(self) self.dict1 = Dictionary( os.path.join(path, stardict_db_folder, stardict_db_name)) def add_words_to_dictionary(self): try: dictionary_id = self.check_dictionary(self.dict1.ifo.bookname, self.dict1.ifo.wordcount, abstractive=1) print("\n'Add all new words to Dictionary '{0}':\n".format( self.dict1.ifo.bookname)) for w in tqdm(self.dict1.idx._idx): word = w.decode('Utf-8') word_id = self.check_word(word) definition = self.dict1.get(word) self.db.write_definition(dictionary_id, word_id, definition) self.db._commit() return dictionary_id except: self.db.db_connection.rollback() raise
class DictionaryTest(unittest.TestCase): def setUp(self): self.dict = Dictionary( os.path.join(os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian')) self.in_memory_dict = Dictionary(os.path.join( os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian'), in_memory=True) def test001Idx(self): self.assertEqual(self.dict.idx['test'], ( 581161, 16, )) def test002Dict(self): self.assertEqual(self.dict.dict['test'], 'проверка') def test003Dictionary(self): self.assertEqual(self.dict['test'], 'проверка') def test004Contains(self): self.assertTrue('test' in self.dict) self.assertFalse('testtt' in self.dict) def test005Delitem(self): self.dict['test'] del self.dict['test'] self.assertFalse('test' in self.dict._dict_cache) def test006Len(self): self.assertEqual(len(self.dict), 31705) def test007Eq(self): y = Dictionary( os.path.join(os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian')) self.assertTrue(self.dict == y) def test008Ne(self): y = Dictionary( os.path.join(os.path.dirname(__file__), '..', 'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian')) self.assertFalse(self.dict != y) def test009Repr(self): self.assertEqual( repr(self.dict), '''<class 'pystardict.Dictionary'> quick_english-russian''') def test010Clear(self): self.dict['test'] self.dict.clear() self.assertEqual(len(self.dict._dict_cache), 0) def test011Get(self): self.assertEqual(self.dict.get('test', 't'), 'проверка') self.assertEqual(self.dict.get('testtt', 't'), 't') def test012InMemoryDict(self): self.assertEqual(self.in_memory_dict['test'], 'проверка') def test013IterateDict(self): for k, v in self.in_memory_dict.items(): self.assertEqual(self.in_memory_dict[k], v)
import xml.etree.ElementTree as ET from pystardict import Dictionary f = ET.parse(sys.argv[1]) unknown_words_f = open('uwords.txt', 'w') words_f = open('pwords.txt', 'w') dict_bel_rus = Dictionary(sys.argv[2]) words = set() path = './/Paradigm' for paradigm in f.findall(path): lem = paradigm.attrib['Lemma'].replace('´', '') words.add(lem) print("Parsed: {} words".format(len(words))) translated = 0 for element in words: if dict_bel_rus.has_key(element): translated += 1 rus_eq = dict_bel_rus.get(element).split("\n")[1] words_f.write("{} | {}\n".format(element, rus_eq)) else: unknown_words_f.write(element + "\n") print("{} were translated".format(translated)) unknown_words_f.close() words_f.close()