Python Request Examples

Programming Language: Python

Namespace/Package Name: spider

Class/Type: Request

Examples at hotexamples.com: 10

Python Request - 10 examples found. These are the top rated real world Python examples of spider.Request extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Request(10)

Frequently Used Methods

Request (10)

Example #1

Show file

    def start_requests(self):
        start_url = list()

        for i in range(100):
            i = str(i)
            u = self.url + i
            start_url.append(u)

        for url in start_url:
            yield Request(url, callback=self._parse)

Example #2

Show file

def paper_page_parser(search_exp):
    """docstring for paper_page_parser"""
    search_exp = urllib2.quote(search_exp.encode('utf-8'))
    page_content = requests.get(
        PAPER_SEARCH_URL.format(search_exp=search_exp, page=1)).text
    page_count = int(NUM_RE.search(page_content).group(1)) / 20 + 2
    result = []
    for i in xrange(1, page_count):
        result.append(
            Request(arg=PAPER_SEARCH_URL.format(search_exp=search_exp, page=i),
                    parser=paper_parser))
    return result

Example #3

Show file

    def start_requests(self):
        start_url = list()

        for i in range(180, 200):
            i = str(i)
            u = self.url + i
            start_url.append(u)

        self.num = start_url.__len__()

        for url in start_url:
            yield Request(url, self._parse)

Example #4

Show file

File: article_extractor.py Project: king1043/news-spider

    def __init__(self, url, html=None):
        self._html = html
        self._url = url

        self._content_start_pos = ""
        self._content_end_pos = ""
        self._content_center_pos = ""
        self._paragraphs = ""

        if not html:
            resp = Request(url).get_response()
            self._html = resp.text

        self._text = self.__del_html_tag(self._html, save_useful_tag=True)

Example #5

Show file

File: jd_img.py Project: wazedix/spider

 def parser_desc(self, text):
     """add detail page to queue"""
     find_desc_url = DESC_P.findall(text)
     for desc_url in self.store.record_url(find_desc_url):
         static_desc_url = self.get_static_url(desc_url)
         self.spider.queue.put(Request(static_desc_url))

Example #6

Show file

File: jd_img.py Project: wazedix/spider

 def parser_detail(self, text):
     """add detail page to queue"""
     find_item_url = ITEM_P.findall(text)
     for item_url in self.store.record_url(find_item_url):
         static_item_url = self.get_static_url(item_url)
         self.spider.queue.put(Request(static_item_url))

Example #7

Show file

File: jd_img.py Project: wazedix/spider

 def parser_list(self, text):
     """add next list page to queue"""
     find_list_url = LIST_PAGE_P.findall(text)
     for list_url in self.store.record_url(find_list_url):
         static_list_url = self.get_list_url(list_url)
         self.spider.queue.put(Request(static_list_url))

Example #8

Show file

File: shell.py Project: zhuyoucai168/boris-spider

def request(**kwargs):
    kwargs.setdefault("proxies", None)
    response = Request(**kwargs).get_response()
    print(response)

    IPython.embed(header="now you can use response")

Example #9

Show file

File: article_extractor.py Project: king1043/news-spider

        release_time = get_release_time_in_paragraph(self._content_start_pos)
        if not release_time:
            release_time = get_release_time_in_paragraph(
                self._content_center_pos)

        return release_time


if __name__ == "__main__":
    urls = [
        "http://news.cctv.com/2020/06/27/ARTIWaUMWOEtQNxyLiVqrH0Q200627.shtml",
        "http://column.caijing.com.cn/20200724/4684426.shtml",
    ]
    for url in urls:
        resp = Request(url).get_response()
        html = resp.text

        article_extractor = ArticleExtractor(url, html)
        content = article_extractor.get_content()
        title = article_extractor.get_title()
        release_time = article_extractor.get_release_time()
        author = article_extractor.get_author()
        print("---------------------------")
        print(url)
        print("title : ", title)
        print("release_time: ", release_time)
        print("author", author)
        print("content : ", content)
        print("---------------------------")

Example #10

Show file

File: run.py Project: NARESH2017/intelliQ

                           user=config.db_user,
                           passwd=config.db_password,
                           db=config.db_database,
                           charset='utf8')
    cursor = conn.cursor()
    cursor.execute(
        'select configValue from t_spider_config where configKey=%s',
        (arg_config.get(sys.argv[1]), ))
    config_values = [row[0] for row in cursor.fetchall()]
    if sys.argv[1] == 'paper':
        spider_paper = Spider('paper')
        for search_exp in config_values:
            reqs = parser.paper_page_parser(search_exp)[:500]
            for req in reqs:
                spider_paper.add_request(req)
        spider_paper.crawl()

    if sys.argv[1] == 'news':
        spider_news = Spider('news')
        for seed_url in config_values:
            spider_news.add_request(
                Request(arg=seed_url, parser=parser.news_parser))
        spider_news.crawl()

    if sys.argv[1] == 'patent':
        spider_patent = Spider('patent')
        for search_exp in config_values:
            spider_patent.add_request(
                Request(arg=search_exp, parser=parser.patent_parser))
        spider_patent.crawl()