def search_with_expansion_words(self): total_page_num = constants.NUM_OF_TOTAL_FETCHED_PAGES page_num_per_query = math.floor(total_page_num / len(self.queries())) pages = dict() for query in self.queries(): bs = BingSearcher(query) result_pages = bs.result_pages(page_num_per_query) pages[query] = result_pages return pages # {'犬 育てる': [Page, Page,...], '犬 育てる 教育': [Page,...]}
# -*- coding: utf-8 -*- import constants import pdb from bing_searcher import BingSearcher from pickle_file_saver_for_original import PickleFileSaverForOriginal if __name__ == '__main__': queries = constants.QUERIES_4 saver = PickleFileSaverForOriginal() for query in queries: if saver.can_find_page_with_query(query): print('%sはもうあります' % query) continue bs = BingSearcher(query) pages = bs.result_pages(page_num=1000) # len(pages)が1000ないこともある saver.save_pages_with_query(pages=pages, query=query)
def result_pages(self, term, context): query = '"という%s" %s' % (term, context) bs = BingSearcher(query) pages = bs.result_pages() return pages
# -*- coding: utf-8 -*- import constants from bing_searcher import BingSearcher from pickle_file_saver import PickleFileSaver if __name__ == '__main__': query = 'ネコ 預ける' bs = BingSearcher(query) pages = bs.result_pages(page_num=1000) saver = PickleFileSaver() saver.save_pages_with_query(pages=pages, query=query)