コード例 #1
0
 def setUp(self):
     self.dict = Dictionary(
         os.path.join(os.path.dirname(__file__), '..', 'examples',
                      'stardict-quick_eng-rus-2.4.2/quick_english-russian'))
     self.in_memory_dict = Dictionary(os.path.join(
         os.path.dirname(__file__), '..', 'examples',
         'stardict-quick_eng-rus-2.4.2/quick_english-russian'),
                                      in_memory=True)
コード例 #2
0
ファイル: __init__.py プロジェクト: zhang-z/pystardict
class DictionaryTest(unittest.TestCase):
    
    def setUp(self):
        self.dict = Dictionary(os.path.join(os.path.dirname(__file__), '..',
            'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian'))
        self.in_memory_dict = Dictionary(os.path.join(os.path.dirname(__file__), '..',
            'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian'), in_memory=True)
    
    def test001Idx(self):
        self.assertEqual(self.dict.idx['test'], (581161, 16,))

    def test002Dict(self):
        self.assertEqual(self.dict.dict['test'], 'проверка')
    
    def test003Dictionary(self):
        self.assertEqual(self.dict['test'], 'проверка')
    
    def test004Contains(self):
        self.assertTrue('test' in self.dict)
        self.assertFalse('testtt' in self.dict)
    
    def test005Delitem(self):
        self.dict['test']
        del self.dict['test']
        self.assertFalse('test' in self.dict._dict_cache)
    
    def test006Len(self):
        self.assertEqual(len(self.dict), 31705)
    
    def test007Eq(self):
        y = Dictionary(os.path.join(os.path.dirname(__file__), '..',
            'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian'))
        self.assertTrue(self.dict == y)
    
    def test008Ne(self):
        y = Dictionary(os.path.join(os.path.dirname(__file__), '..',
            'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian'))
        self.assertFalse(self.dict != y)
    
    def test009Repr(self):
        self.assertEqual(repr(self.dict),
            '''<class 'pystardict.Dictionary'> quick_english-russian''')
    
    def test010Clear(self):
        self.dict['test']
        self.dict.clear()
        self.assertEqual(len(self.dict._dict_cache), 0)
    
    def test011Get(self):
        self.assertEqual(self.dict.get('test', 't'), 'проверка')
        self.assertEqual(self.dict.get('testtt', 't'), 't')

    def test012InMemoryDict(self):
        self.assertEqual(self.in_memory_dict['test'], 'проверка')

    def test013IterateDict(self):
        for k, v in self.in_memory_dict.items():
            self.assertEqual(self.in_memory_dict[k], v)
コード例 #3
0
class TranslatorConfig(AppConfig):
    """file path of dataset in the form of a tuple. If translated sentences are
    stored in two files, this tuple will have two elements"""
    raw_data_file_path = (DATASET_DIR + en_dataset, DATASET_DIR + th_dataset)

    input_lang, output_lang, train_pairs, test_pairs = prepareData(
        input_lang_name,
        output_lang_name,
        raw_data_file_path,
        max_vocab_size=max_vocab_size,
        reverse=reverse,
        trim=trim,
        start_filter=start_filter,
        perc_train_set=perc_train_set,
        print_to=None,
        tagging=tagging)

    name = 'translator'
    en_th_dict = Dictionary(en_th_dict_path)
    th_en_dict = Dictionary(th_en_dict_path)
    """create the Encoder"""
    encoder = EncoderRNN(input_lang.vocab_size,
                         hidden_size,
                         layers=layers,
                         dropout=dropout,
                         bidirectional=bidirectional)
    """create the Decoder"""
    decoder = DecoderAttn(hidden_size,
                          output_lang.vocab_size,
                          layers=layers,
                          dropout=dropout,
                          bidirectional=bidirectional)

    if use_cuda:
        encoder.load_state_dict(torch.load(encoder_file))
        decoder.load_state_dict(torch.load(decoder_file))
    else:
        encoder.load_state_dict(torch.load(encoder_file, map_location='cpu'))
        decoder.load_state_dict(torch.load(decoder_file, map_location='cpu'))

    encoder.eval()
    decoder.eval()
    if use_cuda:
        print('Cuda being used')
        encoder = encoder.cuda()
        decoder = decoder.cuda()
    en_nlp = spacy.load("en_core_web_sm")
    en_words_list = list(set(words.words()))
    en_words_list.sort()

    with open(TH_VOCABULARY_PATH, encoding='utf-8') as f:
        th_words_list = f.read().splitlines()
    th_words_list = list(set(th_words_list))
    th_words_list.sort()
コード例 #4
0
ファイル: get.py プロジェクト: hfxunlp/readstardict
def getrans(dicpath,rsf):

	zhPattern = re.compile(u'[\u4e00-\u9fa5]+')
	dicts_dir = os.path.join(os.path.dirname(__file__))
	print dicts_dir
	dict = Dictionary(dicpath,True)

	with open(rsf,"wb") as fwrt:
		for k,v in dict.iteritems():
			if zhPattern.search(k):
				tmp=k+"|||"+resolveword(v)+"\n"
				fwrt.write(tmp.encode("utf-8","ignore"))
コード例 #5
0
ファイル: test_pystardict.py プロジェクト: zdict/pystardict
def fixture_dict(request):
    return {
        'on_disk':
        Dictionary(
            os.path.join(os.path.dirname(__file__), os.pardir, 'examples',
                         'stardict-quick_eng-rus-2.4.2',
                         'quick_english-russian')),
        'in_memory':
        Dictionary(os.path.join(os.path.dirname(__file__), os.pardir,
                                'examples', 'stardict-quick_eng-rus-2.4.2',
                                'quick_english-russian'),
                   in_memory=True),
    }[request.param]
コード例 #6
0
 def import_definitions(self, dictionary):
     the_dict = Dictionary(dictionary) 
     for word in self.words:
         if not word.definition.strip():
             try:
                 word.definition = the_dict[word.word]
             except KeyError:
                 pass
コード例 #7
0
ファイル: demo.py プロジェクト: wyh/pystardict
def demo():

    milestone1 = datetime.datetime.today()

    dicts_dir = os.path.join(os.path.dirname(__file__))
    dict1 = Dictionary(
        os.path.join(dicts_dir, 'stardict-quick_eng-rus-2.4.2',
                     'quick_english-russian'))
    dict2 = Dictionary(
        os.path.join(dicts_dir, 'stardict-quick_rus-eng-2.4.2',
                     'quick_russian-english'))

    milestone2 = datetime.datetime.today()
    print '2 dicts load:', milestone2 - milestone1

    print dict1.idx['test']
    print dict2.idx['проверка']

    milestone3 = datetime.datetime.today()
    print '2 cords getters:', milestone3 - milestone2

    print dict1.dict['test']
    print dict2.dict['проверка']

    milestone4 = datetime.datetime.today()
    print '2 direct data getters (w\'out cache):', milestone4 - milestone3

    print dict1['test']
    print dict2['проверка']

    milestone5 = datetime.datetime.today()
    print '2 high level data getters (not cached):', milestone5 - milestone4

    print dict1['test']
    print dict2['проверка']

    milestone6 = datetime.datetime.today()
    print '2 high level data getters (cached):', milestone6 - milestone5

    # list dictionary keys and dictionary content according to the key
    for key in dict1.ids.keys():
        print dict1.dict[key]
コード例 #8
0
ファイル: import_sentences.py プロジェクト: eugenzor/ddict
    def handle(self, *args, **options):
        for fname in glob.glob('sources/*.dict'):
            dict_name = Path(fname).stem
            dict1 = Dictionary(os.path.join('sources', dict_name))
            print('Importing the %s dictionary...' % dict_name)
            for word in Word.objects.all():
                word_data = dict1.get(word.content)
                soup = BeautifulSoup(word_data, "html.parser")

                word.phrase_set.all().delete()
                i = 0
                for ex in soup.find_all('ex'):
                    if ex.text:
                        phrase = Phrase(word=word, content=ex.text)
                        phrase.save()
                        i += 1
                    if i >= 10:
                        break
            # One dictionary is enough for now
            break
コード例 #9
0
 def __init__(self):
     self.__pathToCustomDictionary = os.path.join(os.path.dirname(os.path.abspath(__file__)), 
                                     "dictionaries", 
                                     "custom", 
                                     "rusEngDictionary.json")
     rusToEngDict = None
     with open(self.__pathToCustomDictionary, "r") as fp:
         rusToEngDict = json.load(fp)
     __customTranslator = OfflineWordTranslator(rusToEngDict)
     
     pathToDictionary = os.path.join(os.path.dirname(os.path.abspath(__file__)), 
                                     "dictionaries", 
                                     "Wiktionary Russian-English", 
                                     "Wiktionary Russian-English")
     self.__wiktionaryTranslator = OfflineWordTranslatorForWiktionary(Dictionary(pathToDictionary))
     
     pathToDictionary = os.path.join(os.path.dirname(os.path.abspath(__file__)), 
                                     "dictionaries", 
                                     "stardict-quick_rus-eng-2.4.2", 
                                     "quick_russian-english")
     self.__starDictQuickTranslator = OfflineWordTranslatorForStarDictQuick(Dictionary(pathToDictionary))
コード例 #10
0
    def handle(self, *args, **options):
        for fname in glob.glob('sources/*.dict'):
            dict_name = Path(fname).stem
            dict1 = Dictionary(os.path.join('sources', dict_name))
            print('Updating transcription from %s dictionary...' % dict_name)
            for word in Word.objects.all():
                word_data = dict1.get(word.content)
                soup = BeautifulSoup(word_data, "html.parser")
                trans_vars = soup.find_all('c', attrs={'c': 'teal'})
                for trans_var in trans_vars:
                    if trans_var.text.startswith('['):
                        word.transcription = trans_var.text[1:-1]
                        word.save()
                        break

                word.phrase_set.all().delete()
                for ex in soup.find_all('ex'):
                    if ex.text:
                        phrase = Phrase(word=word, content=ex.text)
                        phrase.save()
                        break
            break
コード例 #11
0
 def open_dict(self):
     self.dict_dir = os.path.join(self.dir, self.current_dict_name)
     dict_name = ''
     for file in os.listdir(self.dict_dir):
         name, suffix = os.path.splitext(file)
         if suffix in SUFFIX:
             continue
         if dict_name and dict_name != name:
             logger.error("dict broken!")
             return False
         dict_name = name
     #dict dir name maybe not the dict file name.
     del self.dict
     gc.collect()
     self.dict = Dictionary(os.path.join(self.dict_dir, dict_name))
     logger.info("载入%s", os.path.join(self.dict_dir, dict_name))
コード例 #12
0
ファイル: add_dictionary.py プロジェクト: rewisch/ReDict
class stardict(import_dictionary):
    def __init__(self, path, stardict_db_folder, stardict_db_name):
        import_dictionary.__init__(self)
        self.dict1 = Dictionary(
            os.path.join(path, stardict_db_folder, stardict_db_name))

    def add_words_to_dictionary(self):
        try:
            dictionary_id = self.check_dictionary(self.dict1.ifo.bookname,
                                                  self.dict1.ifo.wordcount,
                                                  abstractive=1)
            print("\n'Add all new words to Dictionary '{0}':\n".format(
                self.dict1.ifo.bookname))
            for w in tqdm(self.dict1.idx._idx):
                word = w.decode('Utf-8')
                word_id = self.check_word(word)
                definition = self.dict1.get(word)
                self.db.write_definition(dictionary_id, word_id, definition)
            self.db._commit()
            return dictionary_id
        except:
            self.db.db_connection.rollback()
            raise
コード例 #13
0
 def on_dictionary_found(self, event, dispatcher):
     dictionary_source = event.data
     if dictionary_source.name not in self.disabled:
         with Dictionary(dictionary_source.path) as dictionary:
             self._dictionaries.append(dictionary)
             garbage.collect()
コード例 #14
0
ファイル: stardict.py プロジェクト: dnxbjyj/.emacs.d
from pystardict import Dictionary
import os
import sys
import codecs

if __name__ == "__main__":
    if len(sys.argv) <= 1:
        print("Usage: ./stardict.py start_dict_ifo_file_path")
    else:
        start_dictionary = Dictionary(sys.argv[1].split(".ifo")[0])

        f = codecs.open("company-english-helper-data.el", "w", "utf-8")
        f.write(";; -*- mode: fundamental; -*-\n")
        f.write(";;\n")
        f.write(
            ";; This file is generate by stardict.py, please don't edit this file.\n"
        )
        f.write(";;\n\n")
        f.write("(defconst english-helper-completions\n  '(\n")

        index = 0
        for word in start_dictionary.keys():
            is_english_word = all(ord(char) < 128 for char in word)
            if is_english_word:
                first_line_translation = start_dictionary.dict[word].split()[0]
                no_phonetic_translation = first_line_translation.split(">")[-1]

                candiate_word = word.lower().replace('\"', ' ')
                candiate_translateion = no_phonetic_translation.strip(
                ).replace('\"', ' ')
                f.write('    #(\"' + candiate_word + '\" ' + '0 1' + '\n' +
コード例 #15
0
 def test008Ne(self):
     y = Dictionary(
         os.path.join(os.path.dirname(__file__), '..', 'examples',
                      'stardict-quick_eng-rus-2.4.2/quick_english-russian'))
     self.assertFalse(self.dict != y)
コード例 #16
0
ファイル: main.py プロジェクト: SergeyLp/pyCheckList
def init_dicts():
    global e_r_dict
    e_r_dict = Dictionary(os.path.join(STARDICT_DIR, STARDICT_NAME))
コード例 #17
0
class DictionaryTest(unittest.TestCase):
    def setUp(self):
        self.dict = Dictionary(
            os.path.join(os.path.dirname(__file__), '..', 'examples',
                         'stardict-quick_eng-rus-2.4.2/quick_english-russian'))
        self.in_memory_dict = Dictionary(os.path.join(
            os.path.dirname(__file__), '..', 'examples',
            'stardict-quick_eng-rus-2.4.2/quick_english-russian'),
                                         in_memory=True)

    def test001Idx(self):
        self.assertEqual(self.dict.idx['test'], (
            581161,
            16,
        ))

    def test002Dict(self):
        self.assertEqual(self.dict.dict['test'], 'проверка')

    def test003Dictionary(self):
        self.assertEqual(self.dict['test'], 'проверка')

    def test004Contains(self):
        self.assertTrue('test' in self.dict)
        self.assertFalse('testtt' in self.dict)

    def test005Delitem(self):
        self.dict['test']
        del self.dict['test']
        self.assertFalse('test' in self.dict._dict_cache)

    def test006Len(self):
        self.assertEqual(len(self.dict), 31705)

    def test007Eq(self):
        y = Dictionary(
            os.path.join(os.path.dirname(__file__), '..', 'examples',
                         'stardict-quick_eng-rus-2.4.2/quick_english-russian'))
        self.assertTrue(self.dict == y)

    def test008Ne(self):
        y = Dictionary(
            os.path.join(os.path.dirname(__file__), '..', 'examples',
                         'stardict-quick_eng-rus-2.4.2/quick_english-russian'))
        self.assertFalse(self.dict != y)

    def test009Repr(self):
        self.assertEqual(
            repr(self.dict),
            '''<class 'pystardict.Dictionary'> quick_english-russian''')

    def test010Clear(self):
        self.dict['test']
        self.dict.clear()
        self.assertEqual(len(self.dict._dict_cache), 0)

    def test011Get(self):
        self.assertEqual(self.dict.get('test', 't'), 'проверка')
        self.assertEqual(self.dict.get('testtt', 't'), 't')

    def test012InMemoryDict(self):
        self.assertEqual(self.in_memory_dict['test'], 'проверка')

    def test013IterateDict(self):
        for k, v in self.in_memory_dict.items():
            self.assertEqual(self.in_memory_dict[k], v)
コード例 #18
0
ファイル: test_pystardict.py プロジェクト: zdict/pystardict
def test008Ne(fixture_dict):
    y = Dictionary(
        os.path.join(os.path.dirname(__file__), os.pardir, 'examples',
                     'stardict-quick_eng-rus-2.4.2', 'quick_english-russian'))
    assert (fixture_dict != y) is False
コード例 #19
0
 def __init__(self, dictPath, dictPrefix):
     self.dictionary = Dictionary(os.path.join(dictPath, dictPrefix))
コード例 #20
0
 def __init__(self):
     self.dict_lazyworm = Dictionary(self.lazyworm_path)
     self.dict_quick = Dictionary(self.quick_path)
コード例 #21
0
ファイル: add_dictionary.py プロジェクト: rewisch/ReDict
 def __init__(self, path, stardict_db_folder, stardict_db_name):
     import_dictionary.__init__(self)
     self.dict1 = Dictionary(
         os.path.join(path, stardict_db_folder, stardict_db_name))
コード例 #22
0
ファイル: __init__.py プロジェクト: zhang-z/pystardict
 def setUp(self):
     self.dict = Dictionary(os.path.join(os.path.dirname(__file__), '..',
         'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian'))
     self.in_memory_dict = Dictionary(os.path.join(os.path.dirname(__file__), '..',
         'examples', 'stardict-quick_eng-rus-2.4.2/quick_english-russian'), in_memory=True)
コード例 #23
0
#     N.xml - file with words from grammar db
#     BelRusVorvul/BelRusVorvul - name of belarusian-russian dict in StarDict format (script was tested only with A. Vorvul's dict)
# Output:
#     uwords.txt - file with words that have no translations in format:
#           <word_in_bel>
#     pwords.txt - file with words that were translated in format:
#           <word_in_bel> | <gender_in_bel> | <word/words_in_rus>

import sys, xml
import xml.etree.ElementTree as ET
from pystardict import Dictionary

f = ET.parse(sys.argv[1])
unknown_words_f = open('uwords.txt', 'w')
words_f = open('pwords.txt', 'w')
dict_bel_rus = Dictionary(sys.argv[2])

words = set()

path = './/Paradigm'
for paradigm in f.findall(path):
    lem = paradigm.attrib['Lemma'].replace('´', '')
    words.add(lem)

print("Parsed: {} words".format(len(words)))

translated = 0
for element in words:
    if dict_bel_rus.has_key(element):
        translated += 1
        rus_eq = dict_bel_rus.get(element).split("\n")[1]