Beispiel #1
0
    def parse_detail(self, url, specify=0, **kwargs):
        """

        :param url:
        :param specify: 1:针对特定指标名进行爬取,0:否
        :return:
        """
        from lxml import html
        item = CommonItem.commonItem()
        if specify not in (1, 0):
            raise ValueError("no such specify values! It must be 1 or 0")

        if specify == 1:
            self.headers = {
                'cookie': giveCookie(method='set', code=self.channelname[1]),
                'user-agent': userAgent.user_agent,
            }

        response = self.Request(url=url, method='GET', callback=None)

        tree = html.fromstring(response)
        time_list = tree.xpath(xpathRules.xtime)
        time_list = map(EasyMethod.fuckMonthEnd, time_list)

        objname = tree.xpath(xpathRules.xobj)[1:]
        objname = map(lambda x: x.xpath("string()"), objname)

        unit = tree.xpath(xpathRules.xunit)
        unit = map(lambda x: re.sub(r"[\[\]]", "", x), unit)

        if specify:

            obj_num = dict(zip(objname, range(len(objname))))
            obj = kwargs['objname'].split(":")[-1]
            nums = [obj_num[obj.decode("utf8")]]
            del obj_num
        else:
            nums = range(len(objname))

        for n in nums:
            value = tree.xpath(xpathRules.xdata.format(n + 3))
            data = EasyMethod.KeepNum(dict(zip(time_list, value)))
            item.data = data
            if specify == 1:
                item.objname = kwargs['objname']
            else:
                item.objname = "中国投资:" + kwargs['channelname'] + ":" + objname[
                    n]

            item.unit = unit[n]
            item.plat = 6
            item.freq = 4
            item.mode = {
                "mode": "Z",
                "url": url,
                "code": self.channelname[1],
                "name": self.channelname[0]
            }

            yield item()
Beispiel #2
0
    def Tubes(self, taskinfo):
        import datetime
        print taskinfo
        try:
            self.plat_id = taskinfo["plat_id"]
            code = eval(taskinfo["obj_ext"])
            mode = code['mode']
            dataflow = self.ModeOption(mode=mode, objname=taskinfo['obj_name'])
            taskinfo['report_time'] = '%s' % \
                                      datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            taskinfo["data"] = EasyMethod.KeepNum(dataflow["data"])
            taskinfo['process_code'] = os.getpid()
            return taskinfo

        except Exception as e:
            print(e)
            self.Logger.error(["TubesError[%d]" % os.getpid(), e])