def parse_detail(self, url, specify=0, **kwargs): """ :param url: :param specify: 1:针对特定指标名进行爬取,0:否 :return: """ from lxml import html item = CommonItem.commonItem() if specify not in (1, 0): raise ValueError("no such specify values! It must be 1 or 0") if specify == 1: self.headers = { 'cookie': giveCookie(method='set', code=self.channelname[1]), 'user-agent': userAgent.user_agent, } response = self.Request(url=url, method='GET', callback=None) tree = html.fromstring(response) time_list = tree.xpath(xpathRules.xtime) time_list = map(EasyMethod.fuckMonthEnd, time_list) objname = tree.xpath(xpathRules.xobj)[1:] objname = map(lambda x: x.xpath("string()"), objname) unit = tree.xpath(xpathRules.xunit) unit = map(lambda x: re.sub(r"[\[\]]", "", x), unit) if specify: obj_num = dict(zip(objname, range(len(objname)))) obj = kwargs['objname'].split(":")[-1] nums = [obj_num[obj.decode("utf8")]] del obj_num else: nums = range(len(objname)) for n in nums: value = tree.xpath(xpathRules.xdata.format(n + 3)) data = EasyMethod.KeepNum(dict(zip(time_list, value))) item.data = data if specify == 1: item.objname = kwargs['objname'] else: item.objname = "中国投资:" + kwargs['channelname'] + ":" + objname[ n] item.unit = unit[n] item.plat = 6 item.freq = 4 item.mode = { "mode": "Z", "url": url, "code": self.channelname[1], "name": self.channelname[0] } yield item()
def Tubes(self, taskinfo): import datetime print taskinfo try: self.plat_id = taskinfo["plat_id"] code = eval(taskinfo["obj_ext"]) mode = code['mode'] dataflow = self.ModeOption(mode=mode, objname=taskinfo['obj_name']) taskinfo['report_time'] = '%s' % \ datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') taskinfo["data"] = EasyMethod.KeepNum(dataflow["data"]) taskinfo['process_code'] = os.getpid() return taskinfo except Exception as e: print(e) self.Logger.error(["TubesError[%d]" % os.getpid(), e])