Example #1
0
def load_sentence():
    data = []
    for doc_id in datastore.get_all_ids(limit=-1):
        row = datastore.get(doc_id, ['content', 'meta_info'])
        text = row['content']
        meta_info = json.loads(row['meta_info'])
        for i, sent in enumerate(datastore.get_annotation(doc_id, 'sentence')):
            # Solr へ登録するデータ構造へ変換
            data.append({
                'id': '{0:d}.{1:s}.{2:d}'.format(doc_id, 'sentence', i),
                'doc_id_i': doc_id,
                'anno_id_i': i,
                'name_s': 'sentence',
                'sentence_txt_ja': text[sent['begin']:sent['end']],
                'title_txt_ja': meta_info['title'],
                'url_s': meta_info['url'],
            })
    # Solr への登録を実行
    indexer.load('anno', data)
Example #2
0
def load_affiliation():
    anno_name = 'affiliation'
    data = []
    for doc_id in datastore.get_all_ids(limit=-1):
        row = datastore.get(doc_id, ['content', 'meta_info'])
        text = row['content']
        meta_info = json.loads(row['meta_info'])
        sents = datastore.get_annotation(doc_id, 'sentence')
        for i, anno in enumerate(datastore.get_annotation(doc_id, anno_name)):
            # Solr へ登録するデータ構造へ変換
            sent = find_x_including_y(sents, anno)
            data.append({
                'id': '{0:d}.{1:s}.{2:d}'.format(doc_id, anno_name, i),
                'doc_id_i': doc_id,
                'anno_id_i': i,
                'name_s': anno_name,
                'sentence_txt_ja': text[sent['begin']:sent['end']],
                anno_name + '_txt_ja': text[anno['begin']:anno['end']],
                'title_txt_ja': meta_info['title'],
                'url_s': meta_info['url'],
            })
    # Solr への登録を実行
    indexer.load('anno', data)
Example #3
0
        'doc_id_i': doc_id,
        'anno_id_i': i,
        'name_s': anno_name,
        'sentence_txt_ja': text[sent['begin']:sent['end']],
        anno_name + '_txt_ja': text[anno['begin']:anno['end']],
        ref_anno_name + '_txt_ja': text[ref_anno['begin']:ref_anno['end']],
        'title_txt_ja': meta_info['title'],
        'url_s': meta_info['url'],
    }
    return data


if __name__ == '__main__':
    datastore.connect()
    anno_name = 'cause'
    data = []
    for doc_id in datastore.get_all_ids(limit=-1):
        row = datastore.get(doc_id, fl=['content', 'meta_info'])
        text = row['content']
        meta_info = json.loads(row['meta_info'])
        sents = datastore.get_annotation(doc_id, 'sentence')
        for i, anno in enumerate(datastore.get_annotation(doc_id, anno_name)):
            sent = find_x_including_y(sents, anno)
            data.append(
                create_index_data(doc_id, meta_info, anno_name, anno, i, sent,
                                  text))

    # Solr への登録を実行
    indexer.load('anno', data)
    datastore.close()
#!/usr/bin/env python

import json

import sqlitedatastore as datastore
import solrindexer as indexer

if __name__ == '__main__':
    datastore.connect()
    data = []

    for doc_id in datastore.get_all_ids(limit=-1):
        row = datastore.get(doc_id, ['id', 'content', 'meta_info'])
        meta_info = json.loads(row['meta_info'])
        data.append({
            'id': str(row['id']),
            'doc_id_i': row['id'],
            'content_txt_ja': row['content'],
            'title_txt_ja': meta_info['title'],
            'url_s': meta_info['url'],
        })

    # load to Solr
    indexer.load('doc', data)
    datastore.close()