def addItemWithExample(self, key, example): """Prepares item, create fact, create example""" item = self.addItem(key) if item: fact = Fact(key=item) # TODO: add fact to item itself! example = Example(example=unicode(example, 'utf-8')).save() fact.examples.append(example) fact.save() return item, fact, example return None
def addExampleWithItems(self, example, keys): """Prepares items, corresponding facts and refereces example""" example = Example(example=unicode(example, 'utf-8')).save() results = [] for key in keys: item = self.addItem(key) if item: fact = Fact(key=item) # TODO: add fact to item itself! fact.examples.append(example) fact.save() results.append((item, fact)) return results
def process(self, category='kanji', limit=100): """Process all new & unprocessed kanji keys""" wn = Wordnet() mc = MeCab() ji = Jisho() # 0. Find unprocessed kanji key try: for key in Key.objects( category=category, status='new' ).timeout(False).limit(limit): print 'Processing ', key.value # 0a. Get reading for kanji itself key_reading = mc.reading(key.value) key_gloss = Gloss() key_gloss.readings.update({'default': key_reading}) key_gloss.save() # 0b. Initialize corresponding Fact key_fact = Fact(key=key, gloss=key_gloss) # 1. Get usages from WordNet words = wn.complete(key.value) if words: for word in words[:7]: # 2. Check, if reading is found reading = mc.reading(word) if(not reading): continue # 3. Check, if definition is found definitions = wn.lookup(word) if(not definitions): continue # 4. Create new Key and corresponding Fact entities try: # Check if such item already exists existing_key = Key.objects.get(value=word) fact = existing_key.fact except (DoesNotExist, MultipleObjectsReturned): # 5a. Create Gloss entity for most common definitions gloss = Gloss() # No more than 2-4 definitions! for definition in definitions[:3]: gloss.translations.append(definition['gloss']) gloss.readings.update({'default': reading}) gloss.save() # 5b. Create corresponding key & fact new_key = Key( value=word, category='word', tags=['minor'] ).save() fact = Fact(key=new_key, gloss=gloss).save() new_key.fact = fact new_key.status = 'processed' new_key.save() # TODO: add synonyms based on 'words'? # TODO: parse components? # TODO: find advanced examples? #6. Link fact to key-fact as usages key_fact.usages.append(fact) # 1a. If still no usages found (or not enough) if len(key_fact.usages) < 2: words = ji.define(key.value, 7) for word, info in words: # 4. Create new Key and corresponding Fact entities try: # Check if such item already exists existing_key = Key.objects.get(value=word) fact = existing_key.fact except (DoesNotExist, MultipleObjectsReturned): # 5a. Create Gloss entity for most common definitions gloss = Gloss() gloss.translations.append(info['meaning']) gloss.readings.update({'default': info['kana']}) gloss.save() # 5b. Create corresponding key & fact new_key = Key( value=word, category='word', tags=['minor'] ).save() fact = Fact(key=new_key, gloss=gloss).save() new_key.fact = fact new_key.status = 'processed' new_key.save() #6. Link fact to key-fact as usages key_fact.usages.append(fact) #7. Save key fact and corresponding key (bi-directional link) key_fact.save() key.fact = key_fact if len(key_fact.usages) > 0: # todo: if still nothing found -> lookup in names # dictionary (jisho) key.status = 'processed' key.save() print 'Total usages: ', len(key.usages()) print '----------------' except OperationFailure as e: print 'There was an error querying mongo db: %s' % e