Beispiel #1
0
 def random(self, category='kanji'):
     """Get random item from collection"""
     try:
         return choice(Key.objects(category=category))
     except:
         # No item found in this category
         return Key(value=u'')
Beispiel #2
0
def del_item(key):
    """Remove existing item"""
    results = {}
    for item in Key.objects(value=key):
        item.delete()
        results[key] = {'result': 'success'}
    return results
Beispiel #3
0
    def random_with_usages(self, category='kanji'):
        """
        Get random item, only if some usages provided
        NB: takes some time to compelete due to 'dumb' logic
        """
        # Number of items of such category in DB
        tries = len(Key.objects(category=category))

        while True:
            key = choice(Key.objects(category=category))
            tries -= 1
            if len(key.usages()) > 1 or tries <= 0:
                break

        # No item found in this category
        if tries <= 0:
            return Key(value=u'')
        else:
            return key
Beispiel #4
0
def list_all():
    """ List all (AJAX) """
    items = []
    for i, item in enumerate(Key.objects()):
        items.append({
            'id': i,
            'value': item.value,
            'added': item.added.strftime("%Y-%m-%d %H:%M:%S"),
            'pos': item.pos,
            'status': item.status,
            'category': item.category,
            'lang': item.lang,
        })

    # NB: bottle allows only dictionary serialization
    response.content_type = 'application/json'
    return dumps(items)
Beispiel #5
0
def list_all():
    """ List all (AJAX) """
    items = []
    for i, item in enumerate(Key.objects()):
        items.append({
            'id': i,
            'value': item.value,
            'added': item.added.strftime("%Y-%m-%d %H:%M:%S"),
            'pos': item.pos,
            'status': item.status,
            'category': item.category,
            'lang': item.lang,
        })

    # NB: bottle allows only dictionary serialization
    response.content_type = 'application/json'
    return dumps(items)
Beispiel #6
0
    def addItem(self, key):
        """Add pending item to DB"""
        key = unicode(key, 'utf-8')
        # Check, if key already exists
        if len(Key.objects(value=key)) == 1:
            return None

        # Detect language
        detected = set(Language().detect(key))
        supported = [
            lang for lang in languages
            if detected.intersection(languages.get(lang))
        ]
        # Supported language detected
        if supported:
            # NB: should deinflect to baseform (not so simple, actually!)
            item = Key(value=key, lang=supported.pop())
            if(item.lang == 'Japanese'):
                # Set tags
                item.tags = ['testing']
                # Detect part of speech
                # NB: should probably do this in POST-PROCESSING
                item.pos = MeCab().partOfSpeech(item.value)
                #item.pos, base = MeCab().partOfSpeech(item.value)
                #if base != item.value:
                    #item.value = base
                # TODO: get type (somehow, based on pos)
                # TODO: if noun & 1 symbol == kanji, if two and more = word...
                if len(item.value) == 1:
                    item.category = 'kanji'
                elif item.pos == '':
                    item.category = 'compound'
                else:
                    item.category = 'word'
            # Unprocesed item
            item.status = 'new'
            # Save item
            item.save()
            return item
        # Unsupported language
        else:
            return None
Beispiel #7
0
 def get_item(self, value, category='kanji'):
     """Get item by its id and category"""
     return Key.objects(value=value, category='kanji').first()
Beispiel #8
0
 def get(self, id, category='kanji'):
     """Get item by its id and category"""
     " TODO: use objects.get() instead of first"
     return Key.objects(id=id, category='kanji').first()
Beispiel #9
0
 def export(self, category):
     """Return all keys in specified catgory"""
     return Key.objects(category=category)
Beispiel #10
0
 def count(self, category='kanji', status='new'):
     """Count objects by category and status"""
     return len(Key.objects(category=category, status=status))
Beispiel #11
0
    def process(self, category='kanji', limit=100):
        """Process all new & unprocessed kanji keys"""
        wn = Wordnet()
        mc = MeCab()
        ji = Jisho()
        # 0. Find unprocessed kanji key
        try:
            for key in Key.objects(
                category=category, status='new'
            ).timeout(False).limit(limit):

                print 'Processing ', key.value

                # 0a. Get reading for kanji itself
                key_reading = mc.reading(key.value)
                key_gloss = Gloss()
                key_gloss.readings.update({'default': key_reading})
                key_gloss.save()

                # 0b. Initialize corresponding Fact
                key_fact = Fact(key=key, gloss=key_gloss)

                # 1. Get usages from WordNet
                words = wn.complete(key.value)
                if words:
                    for word in words[:7]:
                        # 2. Check, if reading is found
                        reading = mc.reading(word)
                        if(not reading):
                            continue

                        # 3. Check, if definition is found
                        definitions = wn.lookup(word)
                        if(not definitions):
                            continue

                        # 4. Create new Key and corresponding Fact entities
                        try:
                            # Check if such item already exists
                            existing_key = Key.objects.get(value=word)
                            fact = existing_key.fact
                        except (DoesNotExist, MultipleObjectsReturned):
                            # 5a. Create Gloss entity for most common definitions
                            gloss = Gloss()
                            # No more than 2-4 definitions!
                            for definition in definitions[:3]:
                                gloss.translations.append(definition['gloss'])
                            gloss.readings.update({'default': reading})
                            gloss.save()

                            # 5b. Create corresponding key & fact
                            new_key = Key(
                                value=word,
                                category='word',
                                tags=['minor']
                            ).save()
                            fact = Fact(key=new_key, gloss=gloss).save()
                            new_key.fact = fact
                            new_key.status = 'processed'
                            new_key.save()

                        # TODO: add synonyms based on 'words'?
                        # TODO: parse components?
                        # TODO: find advanced examples?

                        #6. Link fact to key-fact as usages
                        key_fact.usages.append(fact)

                # 1a. If still no usages found (or not enough)
                if len(key_fact.usages) < 2:
                    words = ji.define(key.value, 7)
                    for word, info in words:
                        # 4. Create new Key and corresponding Fact entities
                        try:
                            # Check if such item already exists
                            existing_key = Key.objects.get(value=word)
                            fact = existing_key.fact
                        except (DoesNotExist, MultipleObjectsReturned):
                            # 5a. Create Gloss entity for most common definitions
                            gloss = Gloss()
                            gloss.translations.append(info['meaning'])
                            gloss.readings.update({'default': info['kana']})
                            gloss.save()

                            # 5b. Create corresponding key & fact
                            new_key = Key(
                                value=word,
                                category='word',
                                tags=['minor']
                            ).save()
                            fact = Fact(key=new_key, gloss=gloss).save()
                            new_key.fact = fact
                            new_key.status = 'processed'
                            new_key.save()

                            #6. Link fact to key-fact as usages
                            key_fact.usages.append(fact)

                #7. Save key fact and corresponding key (bi-directional link)
                key_fact.save()
                key.fact = key_fact
                if len(key_fact.usages) > 0:
                    # todo: if still nothing found -> lookup in names
                    # dictionary (jisho)
                    key.status = 'processed'
                key.save()

                print 'Total usages: ', len(key.usages())
                print '----------------'
        except OperationFailure as e:
            print 'There was an error querying mongo db: %s' % e
Beispiel #12
0
def get_item(key):
    """ Get existing item """
    return {
        'found': [item.value for item in
                  Key.objects(value=unicode(key, 'utf-8'))]
    }