Esempio n. 1
0
    def _do_new_item(self, desc, initial_tag, create_time=None, other=None):
        """helper function for implementing :meth:`new_item`"""
        assert isinstance(desc, ItemDescBase), \
            'bad desc: {!r}'.format(type(desc))
        assert isinstance(initial_tag, list) and \
            all([isinstance(i, basestring) for i in initial_tag]), \
            'bad initial_tag: {!r}'.format(initial_tag)
        assert other is None or isinstance(other, dict), \
            'bad other arg: {!r}'.format(other)
        initial_tag = map(unicode, initial_tag)

        declare_tag(initial_tag)
        if create_time is None:
            create_time = time.localtime()
        db = get_mongo('item')
        item_id = global_counter('item')
        db.ensure_index('fetcher_type')
        db.ensure_index('fetcher_name')
        db.ensure_index('tag')
        db.ensure_index('creation_time')
        doc = {
            '_id': item_id,
            'fetcher_type': self.fetcher_type,
            'fetcher_name': self.fetcher_name,
            'desc': deepcopy(desc),
            'tag': initial_tag,
            'other': other,
            'creation_time': datetime.fromtimestamp(time.mktime(create_time))}
        prefilter.apply(self, doc)
        doc['desc'] = Binary(doc['desc'].serialize())
        db.insert(doc)

        return item_id
Esempio n. 2
0
def auto_tagging(ctx, doc):
    """auto tagging an item.
        It will load tagger model from `ukconfig.tagger_path`.
        Model should be trained prior to make this function work"""
    global _tagger
    if _tagger is None:
        try:
            log_info('loading tagger ...')
            _tagger = TextTagger.load(ukconfig.tagger_path)
        except IOError:
            log_info('tagger model not found.')
            return

    tags = _tagger.predict_one(doc['desc'].render_content())
    declare_tag(tags)
    log_info('original tag: ' + str(doc['tag']))
    log_info('autotagging: ' + str(tags))
    doc['tag'] = list(set(doc['tag'] + tags))
    """auto tag """