Python get_page 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: fetcher

메소드/함수: get_page

hotexamples.com에서의 예제들: 9

Python get_page - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 fetcher.get_page에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: subscriptions.py 프로젝트: Kagami/gate

 def process_last(self, user_jid, our_jid, sub, last):
     if last is not None:
         sub["last"] = last
         yield UserSubscriptions(user_jid).subscribe(sub["url"])
         yield Subscription.create(sub)
         self._xmpp.send_presence(to=user_jid,
                                  from_=sub["jid"],
                                  type_="subscribe")
     else:
         yield wait_for_host(sub["host"], level=2)
         try:
             page = yield get_page(sub["url"])
         except Exception:
             self._xmpp.send_message(
                 to=user_jid,
                 from_=get_full_jid(our_jid),
                 body=u"Url check failed, subscription aborted. "
                 "Seems like not existing url.")
         else:
             parsed = yield self._worker.parse(sub, page)
             if "last" in parsed and parsed["last"] is not None:
                 self.process_last(user_jid, our_jid, sub, parsed["last"])
             else:
                 self._xmpp.send_message(
                     to=user_jid,
                     from_=get_full_jid(our_jid),
                     body=u"Page parsing failed, subscription aborted. "
                     "Seems like not existing url.")

예제 #2

파일 보기

파일: subscriptions.py 프로젝트: Kagami/gate

 def process_last(self, user_jid, our_jid, sub, last):
     if last is not None:
         sub["last"] = last
         yield UserSubscriptions(user_jid).subscribe(sub["url"])
         yield Subscription.create(sub)
         self._xmpp.send_presence(
             to=user_jid, from_=sub["jid"],
             type_="subscribe")
     else:
         yield wait_for_host(sub["host"], level=2)
         try:
             page = yield get_page(sub["url"])
         except Exception:
             self._xmpp.send_message(
                 to=user_jid, from_=get_full_jid(our_jid),
                 body=u"Url check failed, subscription aborted. "
                       "Seems like not existing url.")
         else:
             parsed = yield self._worker.parse(sub, page)
             if "last" in parsed and parsed["last"] is not None:
                 self.process_last(user_jid, our_jid, sub, parsed["last"])
             else:
                 self._xmpp.send_message(
                     to=user_jid, from_=get_full_jid(our_jid),
                     body=u"Page parsing failed, subscription aborted. "
                           "Seems like not existing url.")

예제 #3

파일 보기

파일: views.py 프로젝트: cnssnewscenter/CNSS_mobile_news_center

def get_data(url, handler):
    key = parser.convertUrl(url)
    if not options.DEBUG:
        cached = yield fetcher.get_data(key)
        if cached:
            return cached

    result = yield fetcher.get_page(url)
    ret = yield maybe_future(handler(result))
    ret = json.dumps(ret)
    yield fetcher.write_data(key, ret, options.CACHE_TIME)
    return ret

예제 #4

파일 보기

def get_data(url, handler):
    key = parser.convertUrl(url)
    if not options.DEBUG:
        cached = yield fetcher.get_data(key)
        if cached:
            return cached

    result = yield fetcher.get_page(url)
    ret = yield maybe_future(handler(result))
    ret = json.dumps(ret)
    yield fetcher.write_data(key, ret, options.CACHE_TIME)
    return ret

예제 #5

파일 보기

def get_link_data(valid_link, link_file_prename):
    f = open('%surl_list.html' % link_file_prename, 'wb+')
    for url in valid_link:
        f.write(url + '\n')
    f.close()
    for num in range(0, len(valid_link)):
        url = valid_link[num]
        print url
        html = fetcher.get_page(url)
        f = open('%s%d.html' % (link_file_prename, num + 1), 'wb+')
        f.write(html)
        f.close()
    pass

예제 #6

파일 보기

파일: subscriptions_updater.py 프로젝트: Kagami/gate

 def process_page(self, sub, last_modified):
     yield utils.wait_for_host(sub["host"])
     self.debug("HOST OK: %s (page)" % sub["url"])
     try:
         page = yield get_page(sub["url"])
     except NotFound:
         self.dead_url(sub)
     except Exception:
         err = traceback.format_exc()[:-1]
         self.bad_url(sub, err)
     else:
         parsed = yield self._worker.parse(sub, page)
         self.process_parsed(sub, parsed, last_modified)
     # We've done, decrement connections count
     self._conn_count -= 1

예제 #7

파일 보기

파일: subscriptions_updater.py 프로젝트: Kagami/gate

 def process_page(self, sub, last_modified):
     yield utils.wait_for_host(sub["host"])
     self.debug("HOST OK: %s (page)" % sub["url"])
     try:
         page = yield get_page(sub["url"])
     except NotFound:
         self.dead_url(sub)
     except Exception:
         err = traceback.format_exc()[:-1]
         self.bad_url(sub, err)
     else:
         parsed = yield self._worker.parse(sub, page)
         self.process_parsed(sub, parsed, last_modified)
     # We've done, decrement connections count
     self._conn_count -= 1

예제 #8

파일 보기

파일: crawler.py 프로젝트: ponchodelosrios98/python_search_engine

def crawl_web(environment, index, graph, seed, max_pages, max_depth):
    tocrawl = ['https://wikipedia.org/wiki/' + seed]
    next_depth = []
    crawled = []
    count = 0
    depth = 0
    while tocrawl and count < max_pages and depth <= max_depth:
        count += 1
        page = tocrawl.pop()
        if page not in crawled:
            links, page_content = get_page(environment, page, max_pages)
            add_page_to_index(index, page, page_content)
            graph[page] = links
            mergeLists(next_depth, links)
            crawled.append(page)
        if not tocrawl:
            tocrawl, next_depth = next_depth, []
            depth = depth + 1
    return index, graph

예제 #9

파일 보기

if __name__ == '__main__':
    config = configparser.ConfigParser()
    config.read('crawler.config')

    print("Master started. Initial page {}".format(
        config["SITE"]["initial_page"]))

    # init
    start_time = datetime.datetime.now()

    url_manager = URLManager.URLManager()
    url_manager.insert_url(config["SITE"]["initial_page"], 0, 0, -1)

    fetcher = fetcher.Fetcher(url_manager)

    end_time = datetime.datetime.now()
    delta = end_time - start_time
    print("Init time", delta)

    # start crawling
    while url_manager.has_next_url():  # TODO: change if parallel
        print("queue size", url_manager.get_size())
        next_url = url_manager.get_next_url()

        print("fetching", next_url)
        fetcher.get_page(next_url)
        print("done")

        sys.stdout.flush()
        sys.stderr.flush()