コード例 #1
0
# -*- coding: utf-8 -*-
import constants
import pdb
from pickle_file_saver_for_original import PickleFileSaverForOriginal
from page_data_loader import PageDataLoader
from web_page import WebPage

if __name__ == '__main__':
    queries = constants.QUERIES_4
    saver = PickleFileSaverForOriginal()
    with PageDataLoader() as page_loader:
        for query in queries:
            pages = []
            page_ids = page_loader.page_ids_with_query(query)
            for page_id in page_ids:
                pagedata = page_loader.pagedata_with_id(
                    page_id)  # (id, url, snippet, body, rank)
                page = WebPage(id=page_id,
                               url=pagedata[0],
                               query=pagedata[1],
                               snippet=pagedata[2],
                               rank=pagedata[3])
                pages.append(page)
            saver.save_pages_with_query(pages=pages, query=query)
コード例 #2
0
# -*- coding: utf-8 -*-
import constants
import pdb
from bing_searcher import BingSearcher
from pickle_file_saver_for_original import PickleFileSaverForOriginal

if __name__ == '__main__':
    queries = constants.QUERIES_4
    saver = PickleFileSaverForOriginal()
    for query in queries:
        if saver.can_find_page_with_query(query):
            print('%sはもうあります' % query)
            continue
        bs = BingSearcher(query)
        pages = bs.result_pages(page_num=1000)  # len(pages)が1000ないこともある
        saver.save_pages_with_query(pages=pages, query=query)

コード例 #3
0
# -*- coding: utf-8 -*-
from pickle_file_loader_for_original import PickleFileLoaderForOriginal
from pickle_file_saver_for_original import PickleFileSaverForOriginal
from page_data_loader import PageDataLoader
import constants
from sentence import Sentence
import pdb

if __name__ == '__main__':
    queries = constants.QUERIES_4
    for query in queries:
        pfl = PickleFileLoaderForOriginal()
        pages = pfl.load_fetched_pages_with_query(query)
        for i, page in enumerate(pages):
            with PageDataLoader() as page_loader:
                sentences = page_loader.sentences_with_id(page.id)
                page.sentences = []
                for sentence in sentences:
                    page.sentences.append(Sentence(sentence, page.query))
            page.set_tasks_from_sentences()
            print('%s の %i 番目のページにtasksをセットしました!' % (page.query, i))

        pfs = PickleFileSaverForOriginal()
        pfs.save_pages_with_query(pages=pages, query=query)