def parse1(self, response): bs_obj = bs4.BeautifulSoup(response.text, 'html.parser') item = response.meta['item'] try: item['content_detail'],item['monitor_extra'] = spider_func.df_output(bs_obj,self.name,item['parcel_status']) yield item except: log_obj.error(item['monitor_url'], "%s(%s)中无法解析\n%s" % (self.name, response.url, traceback.format_exc())) yield response.meta['item']
def parse2(self, response): bs_obj = bs4.BeautifulSoup(response.text, 'html.parser') item = response.meta['item'] try: item['content_detail'],item['monitor_extra'] = spider_func.df_output(bs_obj,self.name,item['parcel_status']) pd.DataFrame([item['monitor_title'],]).to_csv(r'C:\Users\Administrator\Desktop\data.csv', mode='a', encoding='utf_8_sig') ser = item['content_detail'][1] ser.index = [re.sub(ur':','',s) for s in item['content_detail'][0]] addition = ser[u'建设情况'].replace('\n',u',')
def parse0(self, response): bs_obj = bs4.BeautifulSoup(response.text, 'html.parser') item = response.meta['item'] try: city0 = copy.deepcopy(item['monitor_extra']) item['content_detail'], item[ 'monitor_extra'] = spider_func.df_output( bs_obj, self.name, item['parcel_status']) if isinstance(item['monitor_extra'], pd.core.frame.DataFrame): item['monitor_extra']['city0'] = city0 else: item['monitor_extra'] = pd.DataFrame({"city0": city0}, index=[ 0, ]) yield item except: log_obj.error( item['monitor_url'], "%s(%s)中无法解析\n%s" % (self.name, response.url, traceback.format_exc())) yield response.meta['item']