Beispiel #1
0
 def retry(self, url, retries, func, code=None):
     responset = self.startRequest(url=url, retries=retries)
     responset = responset.content.decode("utf8", "ignore")
     responset = json.loads(responset)
     webcode = jsonpath.jsonpath(responset, "$..code")[0]
     self.Logger.info([url, "msg %d" % webcode])
     if webcode < 0 and retries < 3:
         if webcode == -403:
             print(webcode, "Need login. Wait 5 sec")
             time.sleep(5)
             Ey.RoboEasyLogin(self.key)
             print("Retry Login...%d " % retries)
             time.sleep(5)
             self.cookie = Ey.getCookie(self.key)
             func(retries=retries + 1, code=code)
         else:
             p = random.randint(5, 15)
             self.Logger.error(
                 ("ErrorCode %d :Sleep %d sec ..." % (webcode, p)))
             time.sleep(p)
             func(retries=retries + 1, code=code)
     if retries == 3 and webcode < 0:
         raise ValueError("HTTPERROR OVER MAX RETRY TIME")
     print("SuccessCode %d " % webcode)
     return responset
Beispiel #2
0
    def parse_detail(self, url, specify=0, **kwargs):
        """

        :param url:
        :param specify: 1:针对特定指标名进行爬取,0:否
        :return:
        """
        from lxml import html
        item = CommonItem.commonItem()
        if specify not in (1, 0):
            raise ValueError("no such specify values! It must be 1 or 0")

        if specify == 1:
            self.headers = {
                'cookie': giveCookie(method='set', code=self.channelname[1]),
                'user-agent': userAgent.user_agent,
            }

        response = self.Request(url=url, method='GET', callback=None)

        tree = html.fromstring(response)
        time_list = tree.xpath(xpathRules.xtime)
        time_list = map(EasyMethod.fuckMonthEnd, time_list)

        objname = tree.xpath(xpathRules.xobj)[1:]
        objname = map(lambda x: x.xpath("string()"), objname)

        unit = tree.xpath(xpathRules.xunit)
        unit = map(lambda x: re.sub(r"[\[\]]", "", x), unit)

        if specify:

            obj_num = dict(zip(objname, range(len(objname))))
            obj = kwargs['objname'].split(":")[-1]
            nums = [obj_num[obj.decode("utf8")]]
            del obj_num
        else:
            nums = range(len(objname))

        for n in nums:
            value = tree.xpath(xpathRules.xdata.format(n + 3))
            data = EasyMethod.KeepNum(dict(zip(time_list, value)))
            item.data = data
            if specify == 1:
                item.objname = kwargs['objname']
            else:
                item.objname = "中国投资:" + kwargs['channelname'] + ":" + objname[
                    n]

            item.unit = unit[n]
            item.plat = 6
            item.freq = 4
            item.mode = {
                "mode": "Z",
                "url": url,
                "code": self.channelname[1],
                "name": self.channelname[0]
            }

            yield item()
Beispiel #3
0
    def parseMarketSeason(self, url_suffix, mod=2, **kwargs):
        if mod < 0 or mod > 4 or not isinstance(mod, int):
            raise ValueError("mod取值只可以是0到4的整数值")
        if mod < 2: step, freq = 3, 5
        elif mod == 2: step, freq = 1, 4
        else: step, freq = 6, 5

        this = self.allow_domains[0] + url_suffix
        objprefix = "易车指数##市场大盘##份额趋势(近%d个月均值)" % step + "##%s"
        suffix = url_suffix.split("/")[-1]
        param = {"timeType": "month"}

        for year in (2018, 2017):
            for month in range(0, 13, step):
                try:
                    tmp = "tmpyiche:" + suffix + ":%s"
                    if month + step > 12: continue
                    param['fromTime'] = "%d-%02d-01" % (year, month)
                    param['toTime'] = EasyMethod.fuckMonthEnd(year=year,
                                                              month=month +
                                                              step)
                    response = Yiche.startRequest(url=this, data=param)
                    # type 表示 类型
                    if kwargs["type"] == 1:
                        objname = jsonpath(response,
                                           "$..series[*].data[*].name")
                        objdata = jsonpath(response,
                                           "$..series[*].data[*].symbolSize")
                        if objdata == False:
                            objdata = jsonpath(response,
                                               "$..series[*].data[*].value")

                    elif kwargs["type"] == 0:
                        objname = jsonpath(response, "$..yAxis[*].data")[0]
                        objdata = jsonpath(response, "$..series[*].data")[0]

                    print(objdata)
                    map(
                        lambda a, b: self._Rconn.hset(tmp % a, param['toTime'],
                                                      b.decode("utf8"))
                        if b else 1, objname, objdata)

                except Exception as e:
                    print(e)

        for k in self._Rconn.keys("tmpyiche:%s*" % suffix):
            data = self._Rconn.hgetall(k)
            objname = objprefix % k.split(":")[-1]
            yield {
                "objname": objname,
                "data": data,
                "unit": "%",
                "freq": freq
            }, {
                "param": param
            }
            print("delete %s" % k)
            self._Rconn.delete(k)
def initAccount():
    from middles.middleAssist import ssdbAssist
    from middles.middleWare import EasyMethod
    t = ssdbAssist.SshSSDB().connect()
    n = eval(t.get("robo:uname"))

    # n = ['zzm','com', 'cwf', 'fwb','llb']
    # t = imredis().connection()
    for i in n:
        EasyMethod.RoboEasyLogin(i, 2)
Beispiel #5
0
    def __init__(self, hkey="Robo"):

        name = "RoboSpider"
        self.start_urls = 'https://gw.datayes.com/rrp_adventure/web/supervisor/macro/level/0'
        self.urlContainer = [
            'https://gw.datayes.com/rrp_adventure/web/supervisor/macro/%s',
            'https://gw.datayes.com/rrp_adventure/web/dataCenter/indic/%s?compare=false'
        ]
        self.key = hkey
        self.cookie = Ey.getCookie(self.key)
        self.Logger = logAsisst.imLog(sys.argv[1])()
Beispiel #6
0
    def Tubes(self, taskinfo):
        import datetime
        print taskinfo
        try:
            self.plat_id = taskinfo["plat_id"]
            code = eval(taskinfo["obj_ext"])
            mode = code['mode']
            dataflow = self.ModeOption(mode=mode, objname=taskinfo['obj_name'])
            taskinfo['report_time'] = '%s' % \
                                      datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            taskinfo["data"] = EasyMethod.KeepNum(dataflow["data"])
            taskinfo['process_code'] = os.getpid()
            return taskinfo

        except Exception as e:
            print(e)
            self.Logger.error(["TubesError[%d]" % os.getpid(), e])
Beispiel #7
0
    def parse(self, code, **kwargs):
        """

        :param code: 节点代码
        :param kwargs: retries = 0  retries must be equal zero
        :return: item object
        """
        response = self.retry(url=self.urlContainer[1] % code,
                              retries=kwargs["retries"],
                              code=code,
                              func=self.parse)
        items = RoboItem()
        try:
            dateValue = jsonpath.jsonpath(response, "$..periodDate")
            #dateValue = map(Ey.fuckAntiNum, dateValue)
            #print(jsonpath.jsonpath(response, "$..dataValue"))
            items.data = dict(
                zip(dateValue, jsonpath.jsonpath(response, "$..dataValue")))
        except Exception as e:
            print(e)
            items.data = {}
        source_msg = jsonpath.jsonpath(response, "$..indic")[0]
        del response
        items.update_time = source_msg["updateTime"]
        items.unit = source_msg["unit"]
        items.is_end = 1 if source_msg["isUpdate"] == False else 0

        items.start_time = source_msg["beginDate"]
        items.end_time = source_msg["periodDate"]
        items.source = source_msg["dataSource"]
        items.frequency = Ey.frequency2id(source_msg["frequency"])
        items.value = source_msg["frequency"] if items.frequency == 100 else ""
        items.ext = {
            "region": source_msg["region"],
            "country": source_msg["country"],
            "name": source_msg["indicName"]
        }
        ## 'pcode' var xiangdangyu  'note' field
        items.pcode = source_msg["statType"]
        #print(items())
        return items()
Beispiel #8
0
    def parseSales(self, code, name, **kwargs):
        if kwargs['pid'] > 6: yield 0
        param9 = {"id": kwargs["pid"], "value": code}

        url = self.allow_domains[0] + self.obj_urls[9]
        response = Yiche.startRequest(url, data=param9)
        objdetail = jsonpath(response, "$..thead[*].name")[0]
        objtime = objdetail[0]
        objname = objdetail[2]
        objtime = EasyMethod.fuckMonthEnd(re.sub("[^0-9]", "", objtime))
        objdata = jsonpath(response, "$..tbody")[0]
        for obj in objdata:
            yield {
                "objname": "%s##%s##%s" % (name, objname, obj['name']),
                "data": {
                    objtime: obj['index']
                },
                "unit": "辆",
                "freq": 4
            }, {
                "param": param9
            }

        self.parseSales(code=code, name=name, pid=kwargs['pid'] + 1)
Beispiel #9
0
Ice.loadSlice("../util/gmqi.ice")
import GMQ
sys.path.append("../")
from util.gmqutil import sendMessagetoQueue
import signal
import json

from gevent.queue import Queue, Empty
import gevent.monkey
gevent.monkey.patch_socket()

from middles.middleAssist import logAsisst
from tubes import RoboTubes
from middles.middleWare import EasyMethod

EasyMethod.RoboEasyLogin("Robo")
QUEUE_IP = '10.0.0.6'
QUEUE_PORT = 22345
READ_QUEUE_NAME = 'iMqDataSnatch_luobo_d'
DATA_COLLECT_QUEUE_NAME = 'iMqIMDataCollect'
IMMQ_PROXY = 'gmqObjectId:tcp -h 10.0.0.6  -p 22345'

LOGGER_NAME = "RoboCrawlGetTask"
lg = logAsisst.imLog(LOGGER_NAME)()


def signal_handler(signal, frame):
    print('You pressed Ctrl+C!')
    ic.destroy()
    sys.exit(0)