Esempio n. 1
0
def importOldLists(ln='en'):
	l = 0;
	if ln == 'ru':
		l = 1

	with open('oldDict-'+ln+'.json', 'r') as f:
		oldDict = json.load(f)

	with open('dict-'+ln+'.json', 'r') as f:
		curDict = json.load(f)

	newDict = {}

	for w in oldDict:
		if w == trimmer(w, l):
			del oldDict[w]['parsedUrl']
			del oldDict[w]['href']
			newDict[w] = oldDict[w]

	for w in curDict:
		if not (w in newDict):
			newDict[w] = curDict[w]
		else:
			curW = curDict[w]
			newW = newDict[w]
			if ln == 'en':
				if not ('translations' in newW):
					newW['translations'] = []
				newW['translations'] = [trimmer(x, l) for x in newW['translations'] if trimmer(x, l)]
				newW['translations'] = curW['translations'] + newW['translations']


	saveDictionary(newDict, 'dict-'+ln+'.json')
Esempio n. 2
0
def parsWiki(enDict='dict-en.json', ruDict='dict-ru.json'):
	if isinstance(ruDict, str):
		with open(ruDict, 'r') as f:
			ruDict = json.load(f)
	if isinstance(enDict, str):
		with open(enDict, 'r') as f:
			enDict = json.load(f)
	prefix = 'parsWiki-' + str(int(time.time()))
	parsDict(ruDict, parsWikiRu, prefix + '-ru')
	saveDictionary(ruDict, 'dict-ru.json')
	parsDict(enDict, parsWikiEn, prefix + '-en')
	saveDictionary(ruDict, 'dict-en.json')
Esempio n. 3
0
def translateWords(filename='dict.xdxf'):
	if not os.path.isfile(filename):
		print('file ' + filename + ' not found!')
		print('Please download and extract dictionary by url: ' + dnlHref)
		return
	with open('dict-en.json', 'r') as f:
		enDict = json.load(f)
	with open('dict-ru.json', 'r') as f:
		ruDict = json.load(f)

	trStart = -1
	trsStr = ''
	with open(filename, 'r') as xdxf:
		en = False
		for line in xdxf:
			if line[:7] == '<ar><k>':
				if en:
					trs = split(trsStr, 1)
					for tr in trs:
						if not (tr in ruDict):
							ruDict[tr] = {'text': tr}
					if not('translations' in en):
						en['translations'] = []
					en['translations'] += trs
					if(len(en['translations']) > 10):
						en['translations'] = en['translations'][0:9]
				en = False
				trsStr = ''
				text = trimmer(line)
				if text:
					if text in enDict:
						en = enDict[text]
					else:
						en = enDict[text] = {'text': text}
			elif en:
				trStart = line.find('<tr>')
				if trStart != -1:
					trEnd = line.find('</tr>')
					if trEnd == -1:
						line = line[0:trStart]
						trStart = 0
					else:
						if trStart == 0:
							line = line[trEnd + 5:]
						else:
							line = line[0:trStart] + line[trEnd+5:]
						trStart = -1
				trsStr += line
	saveDictionary(enDict, 'dict-en.json')
	saveDictionary(ruDict, 'dict-ru.json')