Example #1
0
	def test4():
		import stardict
		lm1 = stardict.LemmaDB()
		lm2 = stardict.LemmaDB()
		lm1.load('bnc-lemma.txt')
		lm2.load('lemma.en.txt')
		count1 = 0
		count2 = 0
		for stem in lm2.dump('stem'):
			childs = lm2.get(stem)
			stem = stem.lower()
			if len(stem) <= 2 and stem.isupper():
				continue
			if not stem in lm1:
				count1 += 1
			else:
				obj = lm1.get(stem)
				for word in childs:
					word = word.lower()
					if not word in obj:
						print '%s -> %s'%(stem, word)
						count2 += 1
			for word in childs:
				lm1.add(stem, word.lower())
		print 'count', count1, count2
		lm1.save('lemma-bnc.txt')
		return 0
Example #2
0
 def __init__ (self, filepath, threshold=0):
     self.dbname = "stardict.db"
     self.filepath = filepath
     self.sd = stardict.StarDict(self.dbname)
     self.lemma = stardict.LemmaDB()
     self.lemma.load('lemma.en.txt')
     self.worddict = OrderedDict()
     self.threshold = threshold
     self.find_uncapword_sent=False
     self.myvocab = [line.split("\t")[0] for line in open("myvocab.txt", 'r').read().split('\n') 
                                  if not line.startswith("#")]
Example #3
0
 def lemma(self):
     if self._lemma is None:
         fn = ccinit.path_home('share/dict/lemma.en.txt')
         self._lemma = stardict.LemmaDB()
         self._lemma.load(fn)
     return self._lemma