def importOldLists(ln='en'): l = 0; if ln == 'ru': l = 1 with open('oldDict-'+ln+'.json', 'r') as f: oldDict = json.load(f) with open('dict-'+ln+'.json', 'r') as f: curDict = json.load(f) newDict = {} for w in oldDict: if w == trimmer(w, l): del oldDict[w]['parsedUrl'] del oldDict[w]['href'] newDict[w] = oldDict[w] for w in curDict: if not (w in newDict): newDict[w] = curDict[w] else: curW = curDict[w] newW = newDict[w] if ln == 'en': if not ('translations' in newW): newW['translations'] = [] newW['translations'] = [trimmer(x, l) for x in newW['translations'] if trimmer(x, l)] newW['translations'] = curW['translations'] + newW['translations'] saveDictionary(newDict, 'dict-'+ln+'.json')
def parsWiki(enDict='dict-en.json', ruDict='dict-ru.json'): if isinstance(ruDict, str): with open(ruDict, 'r') as f: ruDict = json.load(f) if isinstance(enDict, str): with open(enDict, 'r') as f: enDict = json.load(f) prefix = 'parsWiki-' + str(int(time.time())) parsDict(ruDict, parsWikiRu, prefix + '-ru') saveDictionary(ruDict, 'dict-ru.json') parsDict(enDict, parsWikiEn, prefix + '-en') saveDictionary(ruDict, 'dict-en.json')
def translateWords(filename='dict.xdxf'): if not os.path.isfile(filename): print('file ' + filename + ' not found!') print('Please download and extract dictionary by url: ' + dnlHref) return with open('dict-en.json', 'r') as f: enDict = json.load(f) with open('dict-ru.json', 'r') as f: ruDict = json.load(f) trStart = -1 trsStr = '' with open(filename, 'r') as xdxf: en = False for line in xdxf: if line[:7] == '<ar><k>': if en: trs = split(trsStr, 1) for tr in trs: if not (tr in ruDict): ruDict[tr] = {'text': tr} if not('translations' in en): en['translations'] = [] en['translations'] += trs if(len(en['translations']) > 10): en['translations'] = en['translations'][0:9] en = False trsStr = '' text = trimmer(line) if text: if text in enDict: en = enDict[text] else: en = enDict[text] = {'text': text} elif en: trStart = line.find('<tr>') if trStart != -1: trEnd = line.find('</tr>') if trEnd == -1: line = line[0:trStart] trStart = 0 else: if trStart == 0: line = line[trEnd + 5:] else: line = line[0:trStart] + line[trEnd+5:] trStart = -1 trsStr += line saveDictionary(enDict, 'dict-en.json') saveDictionary(ruDict, 'dict-ru.json')