Beispiel #1
0
    def json_parser_detail(self, response):
        code = response.save['code']
        sql = SQL()
        desc = ""
        for x in response.json['propertyInfoList']:
            desc = desc + x['propertykey'] + ":" + x['propertyValue'] + ";"

        result = {
            "code":response.save['code'],
            "name":response.save['name'],
            "img":response.save['img'],
            "skuAdv":response.save['skuAdv'],
            "sellPrice":response.save['sellPrice'],
            "marketPrice":response.save['marketPrice'],
            "stockNum":response.save['stockNum'],
            "skuPropertyInfo":response.save['skuPropertyInfo'],
            "salesCount":response.save['salesCount'],
            "points":response.save['points'],
            "brandName":response.save['brandName'],
            "sysDateTime":response.save['sysDateTime'],
            "content" : desc
        }
        sql.replace('jyhgoods',**result)
        return response.save['name']
Beispiel #2
0
 def on_result(self, result):
     print result
     if not result or not result['title']:
         return
     sql = SQL()
     sql.replace('jobs', **result)
Beispiel #3
0
 def on_result(self, result):
     if not result:
         return
     sql = SQL()
     if isinstance(result, dict):
         entrytype = result.pop('type')
         if entrytype == 'domain':
             sql.insert(tablename='Domains', **result)
         elif entrytype == 'host':
             sql.insert(tablename='Hosts', **result)
         elif entrytype == 'sample':
             sql.insert(tablename='Samples', **result)
     elif isinstance(result, list):
         for each in result:
             entrytype = each.pop('type')
             if entrytype == 'domain':
                 sql.insert(tablename='Domains', **each)
             elif entrytype == 'host':
                 sql.insert(tablename='Hosts', **each)
             elif entrytype == 'sample':
                 sql.insert(tablename='Samples', **each)
Beispiel #4
0
 def on_result(self,result):
     if not result or not result['original_id']:
         return
     sql = SQL()
     sql.insert('article',**result) 
Beispiel #5
0
 def __init__(self):
         if self.connect:
                 SQL.connect(self)
Beispiel #6
0
 def on_result(self, result):
     if not result:
         return
     db = SQL()
     result['insert_time'] = time.strftime('%Y-%m-%d %H:%M:%S')
     db.insert('reeoo_detail', **result)
Beispiel #7
0
 def on_result(self, result):
     print result
     if not result or not result['spu']:
         return
     sql = SQL()
     sql.replace('wish',**result)
 def on_result(self, result):
     if not result or not result['booktitle']:
         return
     sql = SQL()
     sql.replace('novel', **result)
Beispiel #9
0
 def on_result(self, result):
     print(result)
     if not result or not result['title']:
         return
     sql = SQL()
     sql.replace('info', **result)
Beispiel #10
0
from pyspider.libs.base_handler import *
from pyspider.database.mysql.mysqldb import SQL
import time

baseUrl = "https://www.zhihu.com/"
answers_url_fmt = "https://www.zhihu.com/people/{name}/answers"
followers_url_fmt = "https://www.zhihu.com/api/v4/members/{user_id}/followers?include=data%5B*%5D.answer_count%2Carticles_count%2Cgender%2Cfollower_count%2Cis_followed%2Cis_following%2Cbadge%5B%3F(type%3Dbest_answerer)%5D.topics&offset={offset}&limit=20"
kwargs = {
    'host': 'localhost',
    'user': '******',
    'passwd': '7222992dong',
    'db': 'zhihu',
    'charset': 'utf8'
}
sql = SQL(**kwargs)


class Handler(BaseHandler):

    headers = {
        "Cache-Control": "no-cache",
        "Connection": "keep-alive",
        "Host": "www.zhihu.com",
        "User-Agent":
        "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.89 Safari/537.36",
        "authorization": "oauth c3cef7c66a1843f8b3a9e6a1e3160e20"
    }

    crawl_config = {"headers": headers, "timeout": 5000}