Python download_page Examples

Programming Language: Python

Namespace/Package Name: utils.utils

Method/Function: download_page

Examples at hotexamples.com: 4

Python download_page - 4 examples found. These are the top rated real world Python examples of utils.utils.download_page extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: wangyi.py Project: CN-P5/163spider

    def get_qsyk_and_insert(self, docid):
        cover_img = MySQLdb.escape_string(docid['cover_img'])
        docid = docid['docid']

        if self.db_has_exist(docid):
            return

        url = "http://c.3g.163.com/nc/article/%s/full.html" % str(docid)
        data = utils.download_page(url, True)

        if data:
            data = data[docid]
            if data:
                ptime = data['ptime']
                today = ptime.split(' ')[0]
                imgs = data['img']
                body = data['body'].encode('utf-8')

                title = data['title'].replace(' ', '').replace('（', '-').replace('(', '-').replace(')', '').replace('）', '')

                for img in imgs:
                    body = body.replace(img['ref'], "<img src=\"" + img['src'] + "\"/><hr>")

                body = body.replace('%', '%%')
                body = MySQLdb.escape_string(body)
                sql = "insert into wangyi(item_type, title, url, docid, cover_img, ptime, today, body) values('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" % (self._item_type, title, url, docid, cover_img, ptime, today, body)
                utils.insert_mysql(sql)

Example #2

Show file

File: wangyi.py Project: xiaogang00/web-crawler

    def get_qsyk_and_insert(self, docid):
        cover_img = MySQLdb.escape_string(docid['cover_img'])
        docid = docid['docid']

        if self.db_has_exist(docid):
            return

        url = "http://c.3g.163.com/nc/article/%s/full.html" % str(docid)
        data = utils.download_page(url, True)

        if data:
            data = data[docid]
            if data:
                ptime = data['ptime']
                today = ptime.split(' ')[0]
                imgs = data['img']
                body = data['body'].encode('utf-8')

                title = data['title'].replace(' ', '').replace(
                    '（', '-').replace('(', '-').replace(')',
                                                        '').replace('）', '')

                for img in imgs:
                    body = body.replace(
                        img['ref'], "<img src=\"" + img['src'] + "\"/><hr>")

                body = body.replace('%', '%%')
                body = MySQLdb.escape_string(body)
                sql = "insert into wangyi(item_type, title, url, docid, cover_img, ptime, today, body) values('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" % (
                    self._item_type, title, url, docid, cover_img, ptime,
                    today, body)
                utils.insert_mysql(sql)

Example #3

Show file

File: wangyi.py Project: CN-P5/163spider

 def get_docid_from_json(self):
     """ 根据指定的起始、结束区间，提取这个区间的每日轻松一刻的 url 关键元素 """
     url = self._list_url + str(self._start) + "-" + str(self._end) + ".html"
     self._data = utils.download_page(url)
     if self._data:
         self._data = json.loads(self._data)
         if self._data.has_key(self._list_docid):
             self._data = self._data[self._list_docid]
             self.extract_docid()

Example #4

Show file

File: wangyi.py Project: xiaogang00/web-crawler

 def get_docid_from_json(self):
     """ 根据指定的起始、结束区间，提取这个区间的每日轻松一刻的 url 关键元素 """
     url = self._list_url + str(self._start) + "-" + str(
         self._end) + ".html"
     self._data = utils.download_page(url)
     if self._data:
         self._data = json.loads(self._data)
         if self._data.has_key(self._list_docid):
             self._data = self._data[self._list_docid]
             self.extract_docid()