Exemple #1
0
        for subtype in page.subtypes:
            self.graph.add_edge(subtype, page.query_task(), relation='subtype-of')
            for task in page.tasks:
                pdb.set_trace()
                self.graph.add_edge(task.task_name(), subtype)

    def _add_non_subtype_page(self, page):
        for task in page.tasks:
            pdb.set_trace()
            self.graph.add_edge(task.task_name(), page.query_task())

    def show_graph(self):
        nx.draw(self.graph)
        plt.show()
        plt.savefig("path.png")


if __name__ == '__main__':
    import constants
    from pickle_file_loader_for_original import PickleFileLoaderForOriginal
    queries = constants.QUERIES_4
    for query in queries:
        generator = TaskGraphGenerator()
        pfl = PickleFileLoaderForOriginal()
        pages = pfl.load_fetched_pages_with_query(query)
        for i, page in enumerate(pages):
            if i > 100:
                break
            generator.add_page(page)
        generator.show_graph()
# -*- coding: utf-8 -*-
import constants
from pickle_file_loader_for_original import PickleFileLoaderForOriginal
from pickle_file_saver_for_original import PickleFileSaverForOriginal
from path_mover import PathMover
from page_data_inserter import PageDataInserter
import pdb

if __name__ == '__main__':
    queries = constants.QUERIES_4
    pfl = PickleFileLoaderForOriginal()
    saver = PickleFileSaverForOriginal()
    pm = PathMover()
    di = PageDataInserter()
    for i, query in enumerate(queries):
        pages = pfl.load_fetched_pages_with_query(query)
        pm.go_or_create_and_go_to(constants.FETCHED_PAGES_O_DIR_NAME)
        pm.go_or_create_and_go_to(query)
        for i, page in enumerate(pages):
            if '.pdf' in page.url:
                continue
            if di.has_body(page.query, page.url):
                print(str(i))
                continue
            try:
                print('%i番目の%sのページをフェッチします' % (i, query))
                page.fetch_html()
                print('%sのフェッチ完了!' % page.title)
                page.set_text_from_html_body()
                #page.set_sentences_from_text()
                #filename = '%s_%i.pkl' % (query, i)
Exemple #3
0
# -*- coding: utf-8 -*-
import pdb
from pickle_file_loader_for_original import PickleFileLoaderForOriginal

if __name__ == '__main__':
    loader = PickleFileLoaderForOriginal()
    pages = loader.load_fetched_pages_with_query('花粉症 を 対策する')
    task_set = set()
    for page in pages:
        tasks = page.tasks
        for task in tasks:
            task_set.add(task)
    print(len(task_set))