# -*- coding: utf-8 -*- import constants from pickle_file_loader import PickleFileLoader from pickle_file_saver import PickleFileSaver from path_mover import PathMover import pdb if __name__ == '__main__': query = 'ネコ 預ける' dirname = constants.FETCHED_PAGES_O_DIR_NAME pfl = PickleFileLoader() saver = PickleFileSaver() pages = pfl.load_fetched_pages_with_query(query) pm = PathMover() pm.go_or_create_and_go_to(dirname) pm.go_or_create_and_go_to(query) # クエリ拡張するのならもう一度深くへ for i, page in enumerate(pages): if hasattr(page, 'sentences'): if page.sentences: print('%sはもうsentencesがあります' % page.title) continue try: page.fetch_html() print('%sのフェッチ完了!' % page.title) page.set_text_from_html_body() page.set_sentences_from_text() filename = '%s_%i.pkl' % (query, i) saver.save_file(obj=page, filename=filename) print('%sの保存完了!' % page.title) #pfs.save_pages_with_query_expansion() except (ValueError, IndexError):
# -*- coding: utf-8 -*- import pdb import constants from pickle_file_loader import PickleFileLoader if __name__ == '__main__': pfl = PickleFileLoader() results_dic = pfl.load_simple_task_search_result_with_query( constants.QUERY) results_set = set() for broader_word in results_dic: narrower_tasks = results_dic[broader_word] for task in narrower_tasks: results_set.add(task) print(results_set) pdb.set_trace()
# -*- coding: utf-8 -*- import pdb from pickle_file_loader import PickleFileLoader from object_predicate_toiu_searcher import ObjectPredicateToiuSearcher if __name__ == '__main__': pfl = PickleFileLoader() results_dic = pfl.load_simple_task_search_result() ops = ObjectPredicateToiuSearcher() concrete_terms = ops.concrete_terms(results_dic) print(concrete_terms)
# -*- coding: utf-8 -*- import pdb import constants from pickle_file_loader import PickleFileLoader if __name__ == '__main__': pfl = PickleFileLoader() dictionaries = pfl.load_entailment_dictionaries() pages = pfl.load_fetched_pages_with_query(constants.QUERY) for dictionary_type in constants.ENTAILMENT_DICTIONARY_TYPES: for filename in constants.ENTAILMENT_DICTIONARY_NAMES: for page in pages: for task in page.tasks: if task.predicate_term in dictionaries[filename + '_' + dictionary_type]: for term in dictionaries[filename + '_' + dictionary_type][task.predicate_term]: print('%s %s %s changes into %s %s %s %s' % ( task.object_term.name, task.cmp, task.predicate_term, task.object_term.name, term, task.cmp, filename ))
# -*- coding: utf-8 -*- import pdb import constants from pickle_file_loader import PickleFileLoader if __name__ == '__main__': pfl = PickleFileLoader() dictionaries = pfl.load_entailment_dictionaries() pages = pfl.load_fetched_pages_with_query(constants.QUERY) for dictionary_type in constants.ENTAILMENT_DICTIONARY_TYPES: for filename in constants.ENTAILMENT_DICTIONARY_NAMES: for page in pages: for task in page.tasks: if task.predicate_term in dictionaries[filename + '_' + dictionary_type]: for term in dictionaries[filename + '_' + dictionary_type][ task.predicate_term]: print('%s %s %s changes into %s %s %s %s' % (task.object_term.name, task.cmp, task.predicate_term, task.object_term.name, term, task.cmp, filename))
# -*- coding: utf-8 -*- import pdb from pickle_file_loader import PickleFileLoader if __name__ == '__main__': pfl = PickleFileLoader() queries = pfl.load_queries() pdb.set_trace()