def segement_first(self,response): Some_Info = response.meta.get('Some_Info',None) Index_Url = response.meta.get('Index_Url',None) segement = response.meta.get('segement',None) First = response.meta.get('First',None) Second = response.meta.get('Second',None) Third = response.meta.get('Third',None) Final_Xpath = response.meta.get('Final_Xpath',None) detail_url = [] level = 1 is_sege = 1 if First.has_key('json'): res_json = json.loads(response.body_as_unicode()) #递归读取最底层的key对应的value值,我去,想出来了~~[这里是要for一遍最底层的list,所以要读到len-1处,然后在得到detail_url] depth = 0 length = len(segement['index']) while depth < length - 1: res_json = res_json.get(segement['index'][depth]) depth += 1 #print "now the res_json is %s"%res_json for i in res_json: detail_url.append(i.get(segement['index'][length-1])) try: detail_url = R_2_A(Index_Url,detail_url,self.name,level,is_sege) except Exception,e: print Exception,":",e
def segement_second(self,response): #这边就是管你有没有,我都接收,在使用的时候判断,如果不存在,说明要直接到final_parse处 Some_Info = response.meta.get('Some_Info',None) Index_Url = response.meta.get('Index_Url',None) segement = response.meta.get('segement',None) detail_url = [] level = 0 is_sege = 1 if segement.has_key('json'): res_json = json.loads(response.body_as_unicode()) #递归读取最底层的key对应的value值,我去,想出来了~~[这里是要for一遍最底层的list,所以要读到len-1处,然后在得到detail_url] depth = 0 length = len(segement['index']) while depth < length - 1: res_json = res_json.get(segement['index'][depth]) depth += 1 #print "now the res_json is %s"%res_json for i in res_json: detail_url.append(i.get(segement['index'][length-1])) try: detail_url = R_2_A(Index_Url,detail_url,self.name,level,is_sege) except Exception,e: print Exception,":",e
i = T_T_P(i,self.name,level) url = urls.format(page=str(i)) if C_U_V(url): request = Request(url,callback = self.parse_first,dont_filter=True) request.meta['Index_Url'] = url yield request else: continue else: detail_url = [] if not Zero.has_key('json'): for xpath in Zero['xpath']: for url in R_2_A(Index_Url,response.xpath(xpath).extract(),self.name,level,is_sege): detail_url.append(url) else: res_json = json.loads(response.body_as_unicode()) #递归读取最底层的key对应的value值,我去,想出来了~~[这里是要for一遍最底层的list,所以要读到len-1处,然后在得到detail_url] depth = 0 length = len(Zero['index']) while depth < length - 1: res_json = res_json.get(Zero['index'][depth]) depth += 1 #print "now the res_json is %s"%res_json for i in res_json: detail_url.append(i.get(Zero['index'][length-1])) try: detail_url = R_2_A(Index_Url,detail_url,self.name,level,is_sege) except Exception,e: