def json_parser_detail(self, response): code = response.save['code'] sql = SQL() desc = "" for x in response.json['propertyInfoList']: desc = desc + x['propertykey'] + ":" + x['propertyValue'] + ";" result = { "code":response.save['code'], "name":response.save['name'], "img":response.save['img'], "skuAdv":response.save['skuAdv'], "sellPrice":response.save['sellPrice'], "marketPrice":response.save['marketPrice'], "stockNum":response.save['stockNum'], "skuPropertyInfo":response.save['skuPropertyInfo'], "salesCount":response.save['salesCount'], "points":response.save['points'], "brandName":response.save['brandName'], "sysDateTime":response.save['sysDateTime'], "content" : desc } sql.replace('jyhgoods',**result) return response.save['name']
def on_result(self, result): print result if not result or not result['title']: return sql = SQL() sql.replace('jobs', **result)
def on_result(self, result): if not result: return sql = SQL() if isinstance(result, dict): entrytype = result.pop('type') if entrytype == 'domain': sql.insert(tablename='Domains', **result) elif entrytype == 'host': sql.insert(tablename='Hosts', **result) elif entrytype == 'sample': sql.insert(tablename='Samples', **result) elif isinstance(result, list): for each in result: entrytype = each.pop('type') if entrytype == 'domain': sql.insert(tablename='Domains', **each) elif entrytype == 'host': sql.insert(tablename='Hosts', **each) elif entrytype == 'sample': sql.insert(tablename='Samples', **each)
def on_result(self,result): if not result or not result['original_id']: return sql = SQL() sql.insert('article',**result)
def __init__(self): if self.connect: SQL.connect(self)
def on_result(self, result): if not result: return db = SQL() result['insert_time'] = time.strftime('%Y-%m-%d %H:%M:%S') db.insert('reeoo_detail', **result)
def on_result(self, result): print result if not result or not result['spu']: return sql = SQL() sql.replace('wish',**result)
def on_result(self, result): if not result or not result['booktitle']: return sql = SQL() sql.replace('novel', **result)
def on_result(self, result): print(result) if not result or not result['title']: return sql = SQL() sql.replace('info', **result)
from pyspider.libs.base_handler import * from pyspider.database.mysql.mysqldb import SQL import time baseUrl = "https://www.zhihu.com/" answers_url_fmt = "https://www.zhihu.com/people/{name}/answers" followers_url_fmt = "https://www.zhihu.com/api/v4/members/{user_id}/followers?include=data%5B*%5D.answer_count%2Carticles_count%2Cgender%2Cfollower_count%2Cis_followed%2Cis_following%2Cbadge%5B%3F(type%3Dbest_answerer)%5D.topics&offset={offset}&limit=20" kwargs = { 'host': 'localhost', 'user': '******', 'passwd': '7222992dong', 'db': 'zhihu', 'charset': 'utf8' } sql = SQL(**kwargs) class Handler(BaseHandler): headers = { "Cache-Control": "no-cache", "Connection": "keep-alive", "Host": "www.zhihu.com", "User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.89 Safari/537.36", "authorization": "oauth c3cef7c66a1843f8b3a9e6a1e3160e20" } crawl_config = {"headers": headers, "timeout": 5000}