# -*- coding: utf-8 -*-
import constants
from pickle_file_loader_for_original import PickleFileLoaderForOriginal
from pickle_file_saver_for_original import PickleFileSaverForOriginal
from path_mover import PathMover
from page_data_inserter import PageDataInserter
import pdb

if __name__ == '__main__':
    queries = constants.QUERIES_4
    pfl = PickleFileLoaderForOriginal()
    saver = PickleFileSaverForOriginal()
    pm = PathMover()
    di = PageDataInserter()
    for i, query in enumerate(queries):
        pages = pfl.load_fetched_pages_with_query(query)
        pm.go_or_create_and_go_to(constants.FETCHED_PAGES_O_DIR_NAME)
        pm.go_or_create_and_go_to(query)
        for i, page in enumerate(pages):
            if '.pdf' in page.url:
                continue
            if di.has_body(page.query, page.url):
                print(str(i))
                continue
            try:
                print('%i番目の%sのページをフェッチします' % (i, query))
                page.fetch_html()
                print('%sのフェッチ完了!' % page.title)
                page.set_text_from_html_body()
                #page.set_sentences_from_text()
                #filename = '%s_%i.pkl' % (query, i)
Exemple #2
0
        for subtype in page.subtypes:
            self.graph.add_edge(subtype, page.query_task(), relation='subtype-of')
            for task in page.tasks:
                pdb.set_trace()
                self.graph.add_edge(task.task_name(), subtype)

    def _add_non_subtype_page(self, page):
        for task in page.tasks:
            pdb.set_trace()
            self.graph.add_edge(task.task_name(), page.query_task())

    def show_graph(self):
        nx.draw(self.graph)
        plt.show()
        plt.savefig("path.png")


if __name__ == '__main__':
    import constants
    from pickle_file_loader_for_original import PickleFileLoaderForOriginal
    queries = constants.QUERIES_4
    for query in queries:
        generator = TaskGraphGenerator()
        pfl = PickleFileLoaderForOriginal()
        pages = pfl.load_fetched_pages_with_query(query)
        for i, page in enumerate(pages):
            if i > 100:
                break
            generator.add_page(page)
        generator.show_graph()
Exemple #3
0
#coding: utf-8
import constants
from sentence import Sentence
from task_data_inserter import TaskDataInserter
from pickle_file_loader_for_original import PickleFileLoaderForOriginal
from sentence_data_loader import SentenceDataLoader
import sqlite3
import pdb

if __name__ == '__main__':
    pfl = PickleFileLoaderForOriginal()
    di = TaskDataInserter()
    loader = SentenceDataLoader()
    for i_sentence in range(450000):
        try:
            body = loader.body_with_id(i_sentence + 1)
        except EOFError:
            continue
        if not body:
            continue
        sentence = Sentence(body, 'a')
        if sentence.set_noun_verb_if_good_task():
            di.insert(noun=sentence.noun,
                      cmp=sentence.cmp,
                      verb=sentence.verb,
                      sentence_id=i_sentence + 1)
Exemple #4
0
# -*- coding: utf-8 -*-
from pickle_file_loader_for_ex import PickleFileLoaderForExpandedQuery
from pickle_file_loader_for_original import PickleFileLoaderForOriginal
from task_graph_zero_answerer import TaskGraphZeroAnswerer
from answer_printer import AnswererPrinter
from path_mover import PathMover
import constants
import pdb

if __name__ == '__main__':
    queries = constants.QUERIES_4
    for query in queries:
        pfl = PickleFileLoaderForOriginal()
        g = pfl.load_graph_with_query(query)
        noun, cmp, verb = query.split(' ')
        query_task = '_'.join([noun, cmp, verb])

        pm = PathMover()

        print('zeroの結果です')

        answerer = TaskGraphZeroAnswerer(graph=g, query_task=query_task)
        print('zero_answererをinstance化しました')
        answerer.set_result_tasks()
        print('set_result_tasks')
        answerer.set_task_scores()
        answerer.remove_generalized_tasks()
        print('set_task_scores')
        answerer.set_united_results()
        simple_results = []
        for united_result in answerer.united_results:
Exemple #5
0
# -*- coding: utf-8 -*-
from pickle_file_loader_for_ex import PickleFileLoaderForExpandedQuery
from pickle_file_loader_for_original import PickleFileLoaderForOriginal
from task_graph_first_answerer import TaskGraphFirstAnswerer
from answer_printer import AnswererPrinter
from path_mover import PathMover
import constants
import pdb

if __name__ == '__main__':
    queries = constants.QUERIES_4
    for query in queries:
        #pfl = PickleFileLoaderForExpandedQuery()
        pfl = PickleFileLoaderForOriginal()
        g = pfl.load_graph_with_query(query)
        print('ロードしました')
        noun, cmp, verb = query.split(' ')
        query_task = '_'.join([noun, cmp, verb])

        if not g:
            print('%sのグラフが存在しません!' % query)
            pdb.set_trace()
            continue

        if not g.nodes():
            print('%sのグラフに異常があります' % query)
            continue
        query_task = '_'.join(query.split(' '))


        # answererがいらないノードをremoveしてくれてるはず
Exemple #6
0
# -*- coding: utf-8 -*-
import pdb
from pickle_file_loader_for_original import PickleFileLoaderForOriginal

if __name__ == '__main__':
    loader = PickleFileLoaderForOriginal()
    pages = loader.load_fetched_pages_with_query('花粉症 を 対策する')
    task_set = set()
    for page in pages:
        tasks = page.tasks
        for task in tasks:
            task_set.add(task)
    print(len(task_set))