# -*- coding: utf-8 -*- import constants from pickle_file_loader import PickleFileLoader from pickle_file_saver import PickleFileSaver from path_mover import PathMover import pdb if __name__ == '__main__': query = 'ネコ 預ける' dirname = constants.FETCHED_PAGES_O_DIR_NAME pfl = PickleFileLoader() saver = PickleFileSaver() pages = pfl.load_fetched_pages_with_query(query) pm = PathMover() pm.go_or_create_and_go_to(dirname) pm.go_or_create_and_go_to(query) # クエリ拡張するのならもう一度深くへ for i, page in enumerate(pages): if hasattr(page, 'sentences'): if page.sentences: print('%sはもうsentencesがあります' % page.title) continue try: page.fetch_html() print('%sのフェッチ完了!' % page.title) page.set_text_from_html_body() page.set_sentences_from_text() filename = '%s_%i.pkl' % (query, i) saver.save_file(obj=page, filename=filename) print('%sの保存完了!' % page.title) #pfs.save_pages_with_query_expansion() except (ValueError, IndexError):
# -*- coding: utf-8 -*- import constants from bing_searcher import BingSearcher from pickle_file_saver import PickleFileSaver if __name__ == '__main__': query = 'ネコ 預ける' bs = BingSearcher(query) pages = bs.result_pages(page_num=1000) saver = PickleFileSaver() saver.save_pages_with_query(pages=pages, query=query)