# -*- coding: utf-8 -*-
import constants
import pdb
from bing_searcher import BingSearcher
from pickle_file_saver_for_original import PickleFileSaverForOriginal

if __name__ == '__main__':
    queries = constants.QUERIES_4
    saver = PickleFileSaverForOriginal()
    for query in queries:
        if saver.can_find_page_with_query(query):
            print('%sはもうあります' % query)
            continue
        bs = BingSearcher(query)
        pages = bs.result_pages(page_num=1000)  # len(pages)が1000ないこともある
        saver.save_pages_with_query(pages=pages, query=query)

Example #2
0
# -*- coding: utf-8 -*-
import pdb
from posinega_graph_mapper import PosinegaGraphMapper
from pickle_file_saver_for_original import PickleFileSaverForOriginal
from pickle_file_loader_for_original import PickleFileLoaderForOriginal
import networkx as nx
import matplotlib.pyplot as plt
import constants

if __name__ == '__main__':
    original_queries = constants.QUERIES_4
    pfs = PickleFileSaverForOriginal()
    pfl = PickleFileLoaderForOriginal()
    for query in original_queries:
        pages = pfl.load_fetched_pages_with_query(query)
        posinega_graph_mapper = PosinegaGraphMapper()

        for i, page in enumerate(pages):
            if -1 < page.rank < 5:
                if page.tasks:
                    posinega_graph_mapper.add_edges_with_page(page)
                    print('%i 番目のページのタスクをグラフに追加しました' % i)
        nx.draw(posinega_graph_mapper.graph)
        plt.show()
        print('added all edges!')
Example #3
0
# -*- coding: utf-8 -*-
import pdb
from graph_task_mapper import GraphTaskMapper
from pickle_file_saver_for_original import PickleFileSaverForOriginal
from pickle_file_loader_for_original import PickleFileLoaderForOriginal
from task_data_selector import TaskDataSelector
from task_subtype_data_loader import TaskSubtypeDataLoader
import constants
from task import Task


if __name__ == '__main__':
    original_queries = constants.QUERIES_4
    pfs = PickleFileSaverForOriginal()
    pfl = PickleFileLoaderForOriginal()
    gtm = GraphTaskMapper()
    with TaskDataSelector() as selector:
        for query in original_queries:
            task_ids = selector.task_ids_with_query(query)
            for task_id in task_ids:
                with TaskSubtypeDataLoader() as task_subtype_loader:
                    distance_subtype_pairs = task_subtype_loader.distance_from_subtype_with_task_id(task_id)
                    distance_between_subtypes = {}
                    for pair in distance_subtype_pairs:
                        distance_between_subtypes[pair[0]] = pair[1]

                    task_data = selector.taskdata_with_task_id(task_id)
                    try:
                        task = Task(distance_between_subtypes=distance_between_subtypes,
                                    object_term=task_data[0],
                                    cmp=task_data[1],
Example #4
0
# -*- coding: utf-8 -*-
import constants
import pdb
from pickle_file_saver_for_original import PickleFileSaverForOriginal
from page_data_loader import PageDataLoader
from web_page import WebPage

if __name__ == '__main__':
    queries = constants.QUERIES_4
    saver = PickleFileSaverForOriginal()
    with PageDataLoader() as page_loader:
        for query in queries:
            pages = []
            page_ids = page_loader.page_ids_with_query(query)
            for page_id in page_ids:
                pagedata = page_loader.pagedata_with_id(
                    page_id)  # (id, url, snippet, body, rank)
                page = WebPage(id=page_id,
                               url=pagedata[0],
                               query=pagedata[1],
                               snippet=pagedata[2],
                               rank=pagedata[3])
                pages.append(page)
            saver.save_pages_with_query(pages=pages, query=query)
Example #5
0
# -*- coding: utf-8 -*-
from pickle_file_loader_for_original import PickleFileLoaderForOriginal
from pickle_file_saver_for_original import PickleFileSaverForOriginal
from page_data_loader import PageDataLoader
import constants
from sentence import Sentence
import pdb

if __name__ == '__main__':
    queries = constants.QUERIES_4
    for query in queries:
        pfl = PickleFileLoaderForOriginal()
        pages = pfl.load_fetched_pages_with_query(query)
        for i, page in enumerate(pages):
            with PageDataLoader() as page_loader:
                sentences = page_loader.sentences_with_id(page.id)
                page.sentences = []
                for sentence in sentences:
                    page.sentences.append(Sentence(sentence, page.query))
            page.set_tasks_from_sentences()
            print('%s の %i 番目のページにtasksをセットしました!' % (page.query, i))

        pfs = PickleFileSaverForOriginal()
        pfs.save_pages_with_query(pages=pages, query=query)
# -*- coding: utf-8 -*-
import constants
import pdb
from pickle_file_saver_for_original import PickleFileSaverForOriginal
from page_data_loader import PageDataLoader
from web_page import WebPage

if __name__ == '__main__':
    queries = constants.QUERIES_4
    saver = PickleFileSaverForOriginal()
    with PageDataLoader() as page_loader:
        for query in queries:
            pages = []
            page_ids = page_loader.page_ids_with_query(query)
            for page_id in page_ids:
                pagedata = page_loader.pagedata_with_id(page_id)  # (id, url, snippet, body, rank)
                page = WebPage(id=page_id,
                               url=pagedata[0],
                               query=pagedata[1],
                               snippet=pagedata[2],
                               rank=pagedata[3])
                pages.append(page)
            saver.save_pages_with_query(pages=pages, query=query)