Beispiel #1
0
def main(host, user, password):
        host = host
        user = user
        password = password
        email = []
        vip_url = 'http://' + host + '/nitro/v1/config/lbvserver?view=summary'
        stats = session.get(vip_url,  auth=('xx', 'xx'))
        list = json.loads(stats.content)
        vip = jsonpath.jsonpath(list, "$..lbvserver[?(@.curstate=='UP')]")
        down_vip = jsonpath.jsonpath(list, "$..lbvserver[?(@.curstate=='DOWN')]")
        if down_vip:
            for down in down_vip:
                down_vip_name = down['name']
                email.append(down_vip_name + "vip is down.")
        for line in vip:
            vip_name = line['name']
            url = 'http://' + host + '/nitro/v1/config/lbvserver_service_binding/' + vip_name
            binding = session.get(url,  auth=('xxx', 'xxx'))
            servers = json.loads(binding.content)
            for obj in servers["lbvserver_service_binding"]:
                svrhost = obj['ipv46']
                server = commands.getoutput("psql -t --host localhost --port xxx --dbname xx --user www -c \"select xxx from xxx.xx where address = '"+ svrhost +"'\"")
                if obj["curstate"] == "DOWN":
                    email.append(svrhost + " in the " + vip_name + " is down")
                if server == "f":
                    email.append(svrhost + " in the " + vip_name + " vip is set to FALSE in opsdb but is in the LB")
                if server == "":
                    email.append(svrhost + " in the " + vip_name + " vip is NOT in opsdb")
        if email:
            text = "\n".join(email)
            message = 'Subject: %s\n\n%s' % (SUBJECT, text)

            server = smtplib.SMTP('localhost')
            server.sendmail(FROM, TO,  message)
            server.quit()
Beispiel #2
0
    def get_content(self):
        jsonobj = json.loads(self.get_html())
        # 商品名称
        namelist = jsonpath.jsonpath(jsonobj, '$..title')
        # 商品价格
        pricelist = jsonpath.jsonpath(jsonobj, '$..promotionPrice')
        # 商品图片
        imglist = jsonpath.jsonpath(jsonobj, '$..img')
        listdata = zip(imglist,namelist,pricelist)



        for item in listdata:
            # print(item[1])
            try:
                result = self.cursor.execute(
                    "insert into myduodian_aiduodian (image,goodName,price) VALUES (%s,%s,%s)",[item[0],item[1],item[2]])
                self.db.commit()
                print(result)
            except Exception as e:
                self.db.rollback()
                print('失败')

        # 关闭连接,释放资源
        self.db.close()
Beispiel #3
0
def x_jsonpath(selector):
    """select elements with jsonpath in dict stream.
    """

    from jsonpath import jsonpath
    for val in vals_from_json():
        print jsonpath(val, selector)
Beispiel #4
0
 def getresponse(self):
     url = "https://api.myjson.com/bins/1leee"
     response = urlopen(url)
     data = response.read()
     somejson = json.loads(data)
     match = jsonpath.jsonpath(somejson, '$..title')
     print match
def getStat(name):
    global last_update, result, url

    # If time delta is > 20 seconds, then update the JSON results
    now = time.time()
    diff = now - last_update
    if diff > 20:
        print "[elasticsearch] " + str(diff) + " seconds passed - Fetching " + url
        result = json.load(urllib.urlopen(url))
        last_update = now

    JsonPathName = keyToPath[name]
    tmp = jsonpath.jsonpath(result, JsonPathName)

    # Check to make sure we have a valid result
    # JsonPath returns False if no match found
    if not tmp:
        return None

    # Convert List to String
    try:
        val = " ".join(["%s" % el for el in tmp])
    except TypeError:
        val = None
        return val
        pass

    # Check for integer only result
    if val.isdigit():
        val = int(val)
    print "********** " + name + ": " + str(val)
    return val
 def returnpath(self,json,query):
     """ returnpath('topping[*].type')
         Returns data structure from json, else false """
     try:
         return jsonpath.jsonpath(json,query)
     except TypeError:  # Return is path is missing.
         return None
Beispiel #7
0
  def custom_parse(self, data): 
    data = json.loads(data)
    messages = jsonpath(data, self.paths[0])

    if not messages: 
      raise JSONFailedException(self.paths[0])

    return  [Comment('Not Implemented', 'Not Implemented', text) for text in messages]
Beispiel #8
0
    def assertGet(entity, jpath, dctAssert=None):
        err = 0; message = '(%s)' % jpath

        ret = jsonpath.jsonpath(entity, jpath)
        if not ret: err = 1

        if dctAssert:
            dctAssert['errnum'] += err
            dctAssert['asserts'].append('%s:%s' % (message, err==0))

        return ret and ret[0]
Beispiel #9
0
    def parse(self, data):

        qs = self.inf.get('query')
        t = self.inf.get('type', 'object')
        if t=='object':
            lr = '{}'
        else:
            lr = '[]'
        l,r = data.find(lr[0]),data.rfind(lr[-1])
        data = data[l:r+1]
        return jsonpath(json.loads(data), qs)
Beispiel #10
0
    def parse_json_item(self, response, loop, fields):

        meta = response.meta
        enc = getattr(self, 'json_enc', 'utf-8')
        txt = unicode(response.body, encoding=enc, errors='ignore')

        if hasattr(self, 'json_type') and self.json_type=='list':
            l, r = txt.find('['), txt.rfind(']')
        else:
            l, r = txt.find('{'), txt.rfind('}')
        obj = json.loads(txt[l:r+1])
        self.macro.update({'URL':response.url, 'keyword':meta.get('keyword', '')})

        for e in jsonpath.jsonpath(obj, loop or '$[]') or []:

            item = Item()

            for k,v in fields.iteritems():
                if 'value' in v:
                    v_x = self.macro.expand(v.get('value'))
                elif 'jpath' in v:
                    v_x = jsonpath.jsonpath(e, self.macro.expand(v.get('jpath')))
                    v_x = None if v_x==False else v_x
                else:
                    log.msg(u'field [{}] should contains "value" or "jpath"'.format(k), level=log.WARNING)
                    continue

                val = parser.make_parser(v.get('parse', {}))(v_x)

                if not val and 'default' in v:
                    val = self.macro.expand(v.get('default'))

                if not (val or v.get('multi') or v.get('opt')):
                    log.msg(u'field [{}] is empty:\n{}'.format(k, item), level=log.WARNING)
                    break

                item[k] = arg_to_iter(val)

            else:

                yield item
    def create_ckan_data(self):
       ''' Create NRCAN datasets in CKAN format and store in a text file '''
       self.config.read('nrcan.config')
       infile = open('/Users/peder/dev/goc/nrcan.dat', "r")
       outfile = open('/Users/peder/dev/goc/nrcan-ckan.dat', "w")
       outfile = open('log.jl', "w")
       for line in infile:
           en,fr = str(line).strip().split('|')    
           data_en= eval(en)
           package_dict = {'extras': {}, 'resources': [], 'tags': []}

           # start with English Package Fields
           lookups =[]
           for ckan, nrcan in config.items('package'):
               if nrcan=='LOOKUP':
                  lookups.append(ckan)
               elif "$." in nrcan:
                   print jsonpath(data_en, nrcan)
                   if schema_description.dataset_field_by_id[ckan].get('type') == 'keywords':
                       package_dict[ckan] = ','.join(jsonpath(data_en, nrcan))
                   else:
                       (package_dict[ckan],) = jsonpath(data_en, nrcan)
               elif nrcan:
                   package_dict[ckan] = data_en[nrcan]
                   
                   
               # After both languages are done, we can deal with lookups.    
               for ckan in lookups:
                  if ckan == 'catalog_type':
                       package_dict[ckan] = self.get_choice_for_english_value(ckan,'Geo Data')

                  elif ckan =='maintenance_and_update_frequency':
                      package_dict[ckan] = self.get_choice_for_english_value(ckan,'Unknown')
                
                  elif ckan =='maintenance_and_update_frequency':
                      package_dict[ckan] = self.get_choice_for_english_value(ckan,'Unknown')
        
           
           pprint(package_dict)
           sys.exit()
           pass
Beispiel #12
0
 def render(self):
     for logrecord in self.queryset:
         row = [logrecord.app, logrecord.loggerName, logrecord.get_level_display(), logrecord.timestamp,
                logrecord.message, logrecord.fileName, logrecord.lineNumber, logrecord.thread,
                logrecord.exception_message]
         for json_path in CSV_EXPORT_EXTRA_JSON_PATHS:
             jsonpath_result = jsonpath.jsonpath(logrecord.extra, json_path)
             row.append(jsonpath_result[0] if jsonpath_result is not False else u'')
         self.writer.writerow(row)
     csv_data = self.buffer.getvalue()
     self.buffer.close()
     return csv_data
Beispiel #13
0
def main(DCity,ACity,Date,maxprice):
    headers = {
        'Host' : 'flights.ctrip.com',
        'Cache-Control' : 'max-age=0',
        'Upgrade-Insecure-Requests' : '1',
        'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
        'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding' : 'gzip, deflate, sdch',
        'Accept-Language' : 'zh-CN,zh;q=0.8',
        'Cookie' : '_abtest_userid=d8acf40b-bd99-4d4c-a32a-fc629f1c7551; GUID=09031168410855693377; HotelCityID=2split%E4%B8%8A%E6%B5%B7splitShanghaisplit2018-1-26split2018-01-27split0; appFloatCnt=3; StartCity_Pkg=PkgStartCity=32; traceExt=campaign=CHNbaidu81&adid=index; adscityen=Guangzhou; Session=SmartLinkCode=U153507&SmartLinkKeyWord=&SmartLinkQuary=&SmartLinkHost=&SmartLinkLanguage=zh; Union=OUID=title&AllianceID=5376&SID=153507&SourceID=&Expires=1518230709419; manualclose=1; DomesticUserHostCity=CAN|%b9%e3%d6%dd; __zpspc=9.14.1517629443.1517629443.1%233%7Cwww.so.com%7C%7C%7C%7C%23; _jzqco=%7C%7C%7C%7C1517625909536%7C1.1773865467.1516933714186.1517625928012.1517629443498.1517625928012.1517629443498.undefined.0.0.44.44; FD_SearchHistorty={"type":"S","data":"S%24%u5E7F%u5DDE%28CAN%29%24CAN%242018-02-15%24%u4E0A%u6D77%28SHA%29%24SHA"}; _RF1=163.177.136.73; _RSG=lqaqXMEO6B5ogqNCNdMo0A; _RDG=283d86d5305517270a304ebac2d5b88495; _RGUID=52c660e5-a1d2-4d2f-a032-5a75cc5f4ab2; Mkt_UnionRecord=%5B%7B%22aid%22%3A%22761445%22%2C%22timestamp%22%3A1517188667747%7D%2C%7B%22aid%22%3A%224897%22%2C%22timestamp%22%3A1517471270297%7D%2C%7B%22aid%22%3A%225376%22%2C%22timestamp%22%3A1517641193850%7D%5D; _ga=GA1.2.2009874240.1516933714; _gid=GA1.2.318181679.1517625909; MKT_Pagesource=PC; _bfa=1.1516933711238.47ahnf.1.1517625906849.1517641190746.14.81.212093; _bfs=1.2; _bfi=p1%3D101027%26p2%3D101027%26v1%3D81%26v2%3D80'
        }

    #url = 'http://flights.ctrip.com/domesticsearch/search/SearchFirstRouteFlights?DCity1=CAN&ACity1=SHA&SearchType=S&DDate1=2018-02-11'#测试链接
    url = 'http://flights.ctrip.com/domesticsearch/search/SearchFirstRouteFlights?DCity1=' + DCity + "&ACity1=" + ACity + '&SearchType=S&DDate1=' + Date
    response = requests.get(url,headers = headers)
    try:
        response.status_code == 200
        print('链接正常',response.status_code)
    except:
        print('链接有误',response.status_code)
    print('日期:',Date)
    demo = json.loads(response.text)
    #print(response.status_code)
    aim = jsonpath.jsonpath(demo,"$..acn")#目的地
    fn = jsonpath.jsonpath(demo,'$..fn')#航班型号
    times = jsonpath.jsonpath(demo,"$..dt")#起飞时间
    time = []
    for i in times:#筛选times中数据
        if len(i) > 1:
            time.append(i)
    #price = jsonpath.jsonpath(demo,"$..scs[0].p")#确定价格唯一元素
    price = jsonpath.jsonpath(demo,"$..fis.[:100].lp")#价格另一个确定方法
    rata = jsonpath.jsonpath(demo,"$..scs[0]..rate")#折扣
    #rt = jsonpath.jsonpath(demo,"$..scs[0]..rt")#折扣
    #print(len(aim),len(fn),len(time),len(price),len(rata))#列表长度,检测数据是否有误
    code = map(list,zip(aim,fn,time,price,rata))#将列表放入一个列表
    code = filter(lambda x:x[3]<=int(maxprice),code)#筛选价格
    #code = filter(lambda x:x[4]<=0.8,code)#通过折扣筛选列表元素
    for i in code:
        print('航班',i)
def main(query):
    results = []
    json_ = json.load(file(BOOKMARK_PATH))
    uid = uid_generator()
    for entry in jsonpath.jsonpath(json_, '''$..?(@.url and @.type=='url')'''):
        if not query in entry['name'].lower() and not query in entry['url']:
            continue
        results.append(alfred.Item(
            attributes = { 'uid': uid.next(), 'arg': entry['url'] }, 
            title = entry['name'], 
            subtitle = entry['url'], 
            icon = 'icon.png'))
    alfred.write(alfred.xml(results))
Beispiel #15
0
    def list(self, **kwargs):
        _logger.debug(self.RESOURCE)
        _logger.debug(type(self))

        # Pop jsonpath if exists and pass the rest of arguments to API
        # for some API calls home center handles additional parameters

        json_path = kwargs.pop('jsonpath', None)

        # Home center ignores unknown parameters so there is no need to
        # remove them from REST reqest.
        try:
            items = self.http_client.get(self.RESOURCE, params=kwargs).json()
        except exceptions.ConnectionError:
            return

        # if there is no explicit defined json_path paramers
        if json_path is None:
            for value in self.API_PARAMS:
                kwargs.pop(value, None)

            condition_expression = ""
            for k, v in six.iteritems(kwargs):
                if k.startswith('p_'):  # search for properties
                    k = "properties." + k[2:]
                condition_expression += self.JSON_CONDITION_BASE.format(
                    k,
                    quote_if_string(v))
                condition_expression += " and "

            if condition_expression is not "":
                # filter the results with json implicit built from
                # remaining parameters

                json_path = "$[?({})]".format(condition_expression[:-5])
                _logger.debug("Implicit JSON Path: {}".format(json_path))

        if json_path:
            _logger.debug("JSON Path: {}".format(json_path))
            filtered_items = jsonpath.jsonpath(items, json_path)
            if filtered_items:
                items = filtered_items
            else:
                return

        for item in items:
            item_obj = self.model(**item)
            if item_obj:
                yield item_obj
            else:
                continue
Beispiel #16
0
    def _read_json(self, content, config):
        data = json.loads(content)
        responses = []
        if not "extract" in config:
            return data
        extract_config = config["extract"]

        if type(extract_config) is unicode:
            return jsonpath.jsonpath(data, extract_config)
        else:
            tmp_responses = {}
            for key, expression in extract_config.items():
                tmp_responses[key] = jsonpath.jsonpath(data, extract_config[key])

            keys = tmp_responses.keys()

            if len(keys) > 0 and len(tmp_responses[keys[0]]):
                for i in range(0, len(tmp_responses[keys[0]])):
                    response = dict()
                    for key in keys:
                        response[key] = tmp_responses[key][i]
                    responses.append(response)
            return responses
    def parse_item(self, response, loop, fields):
        try:
            txt = utils.to_unicode(response.body)
            if hasattr(self, 'json_type') and self.json_type=='list':
                l, r = txt.find('['), txt.rfind(']')
            else:
                l, r = txt.find('{'), txt.rfind('}')
            obj = json.loads(txt[l:r+1])
            self.macro.update({'URL':response.url})

            for e in jsonpath.jsonpath(obj, loop or '$[]') or []:
                item = Item()

                for k,v in fields.iteritems():
                    if 'value' in v:
                        v_x = v.get('value')
                    elif 'jpath' in v:
                        v_x = jsonpath.jsonpath(e, self.macro.expand(v.get('jpath')))
                    else:
                        log.msg(u'field [{}] should contains "value" or "jpath"'.format(k), level=log.WARNING)
                        continue

                    val = utils.convert_type(v.get('parse', {}))(self.macro.expand(v_x))

                    if not val and 'default' in v:
                        val = self.macro.expand(v.get('default'))

                    qry = v.get('filter', {})
                    if utils.filter_data(qry, val):
                        item[k] = arg_to_iter(val)
                    else:
                        break
                else:
                    yield item

        except Exception as ex:
            log.msg(u'{}\n{}'.format(response.url, traceback.format_exc()))
Beispiel #18
0
        def _convert(data):
            if t not in ['join', 'list'] and isinstance(data, list):
                data = TakeFirst()(data)
                if type(data) in [str, unicode]:
                    data = data.strip()
                elif type(data) in [int, float, datetime]:
                    data = str(data)
                else:
                    return data

            if t=='join':
                sep = inf.get('sep', u' ')
                return Join(sep)(data)
            elif t=='list':
                sep = inf.get('sep', u' ')
                return remove_tags(Join(sep)(data)).strip()
            elif t=='text':
                return remove_tags(data).strip()
            elif t=='clean':
                cleaner = Cleaner(style=True, scripts=True, javascript=True, links=True, meta=True)
                return cleaner.clean_html(data)
            elif t=='unesc':
                return HTMLParser().unescape(data)
            elif t=='base64':
                return base64.decodestring(data)
            elif t=='sub':
                frm = inf.get('from')
                to = inf.get('to')
                return re.sub(frm, to, data)
            elif t=='jpath':
                qs = inf.get('query')
                return jsonpath.jsonpath(json.loads(data), qs)
            elif t=='map':
                m = inf.get('map')
                d = inf.get('default')
                return m.get(data, d)
            elif t=='int':
                return int(float(data))
            elif t=='float':
                return float(data)
            elif t=='date':
                fmt = inf.get('fmt', 'auto')
                tz = inf.get('tz', '+00:00')
                return parse_date(data, fmt, tz)
            elif t=='cst':
                fmt = inf.get('fmt', 'auto')
                return parse_date(data, fmt, '+08:00')
            else:
                return data
Beispiel #19
0
def github(profile, token):
  url = 'https://api.github.com/orgs/%s/repos' % profile
  logger.debug("Requesting '%s' with '%s' token..." % (url, token))
  api_request  = requests.get(url, headers={'Authorization' : 'token %s' % token})
  logger.debug("Loading JSON content...")
  repositories = json.loads(api_request.content)
  if isinstance(repositories, dict) and ('message' in repositories):
    logger.error("Could not retrieve the expected JSON content from GitHub! '%s'" % repositories['message'])
    return [ ]
  logger.debug("Searching for 'clone_url' keys...")
  urls = jsonpath.jsonpath(repositories, '$[*].ssh_url')
  logger.debug("%d URLs found:" % len(urls))
  logger.debug("URLs %s:" % urls)
  map(logger.debug, urls)
  return urls
    def pathexists(self,json,path,value=None):
        """ Checks if JSON path exists with an explicit value.
            Returns true or false.

            If arguement 'value' == None; just returns True if path exists.
            """
        if value == None: # If not searching for value, your verifying truth of a path... so
            try:
                results = jsonpath.jsonpath(json,path)
                if results:
                    return True # Path exists! Results exist.
                else:
                    return False
            except TypeError: # False if the path is nonexistant.
                return False
        else: # You must have a value == so we will see if the value matches a value in the list.
            try:
                results = jsonpath.jsonpath(json,path)
                if value in results: # If value is contained within LIST of results; it exists
                    return True
                else: # It's not in the list, so its 
                    return False
            except TypeError: #  False if the path is nonexistant.
                return False
Beispiel #21
0
 def reserved_price(self, region, ins, arch, reserved):
     region = str(region)
     if region == 'us-east-1':
         region = 'us-east' # naming inconsistency!
     i1, i2 = ins.split('.')
     r1 = TYPES_AZ[i1].replace('OD', 'Res')
     assert arch in ('linux', 'windows')
     if arch == 'linux':
         data = self.heavylinux
     else:
         data = self.heavywindows
     jpath = 'config.regions[?(@.region==%r)].instanceTypes[?(@.type==%r)].sizes[?(@.size==%r)]' % (
         region,
         r1,
         SUBTYPES_AZ[i2],
     )
     jpath1 = jpath+'.valueColumns[?(@.name==%r)].prices.USD' % 'yrTerm%dHourly' % reserved
     jpath2 = jpath+'.valueColumns[?(@.name==%r)].prices.USD' % 'yrTerm%d' % reserved
     hourly = jsonpath(data, jpath1)
     upfront = jsonpath(data, jpath2)
     if not hourly or not upfront:
         logging.error('Price not found for: %r %r %r' % (region, ins, arch))
         return 0.0
     return float(hourly[0]) + float(upfront[0])/YEARHOURS/reserved
Beispiel #22
0
 def ondemand_price(self, region, ins, arch):
     i1, i2 = ins.split('.')
     if arch == 'windows':
         arch = 'mswin'
     jpath = 'config.regions[?(@.region==%r)].instanceTypes[?(@.type==%r)].sizes[?(@.size==%r)].valueColumns[?(@.name==%r)].prices.USD' % (
         REGIONS_AZ[region],
         TYPES_AZ[i1],
         SUBTYPES_AZ[i2],
         arch,
     )
     r = jsonpath(self.ondemand, jpath)
     if not r:
         logging.error('Price not found for: %r %r %r' % (region, ins, arch))
         return 0.0
     return float(r[0])
Beispiel #23
0
    def assertRex(entity, jpath, pat, mode='match', dctAssert=None):
        err = 0; message = 're.%s(%s, %s)' % (mode, pat, jpath); m = None

        ret = jsonpath.jsonpath(entity, jpath)
        if not ret: err = 1

        if not err:
            refunc = eval('re.%s' % mode); text = ret[0]
            m = refunc(pat, text)
            if not m: err = 1

        if dctAssert:
            dctAssert['errnum'] += err
            dctAssert['asserts'].append('%s:%s' % (message, err==0))

        return m
Beispiel #24
0
    def get_json(self):
        headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0)'}
        request = urllib2.Request(self.url,headers=headers)
        response = urllib2.urlopen(request)
        html = response.read()
        jsonobj = json.loads(html)
        # 获取城市名称
        namelist = jsonpath.jsonpath(jsonobj,'$..name')
        for name in namelist:
            print(name)

        # 把列表存储为字符串
        nametext = json.dumps(namelist,ensure_ascii=False)
        with open('name.txt','a') as file:
            file.write(nametext.encode("utf-8"))
            file.close
Beispiel #25
0
 def _complete_href_links(self, parent_collection, current):
     """Resolves self hyperlinks (JSONPath and JSONPointers."""
     if isinstance(current, HyperLink) or \
        (isinstance(current, dict) and "href" in current):
         if isinstance(current["href"], (unicode, str)):
             resource = None
             if current["href"] in self._cache:
                 resource = self._cache[current["href"]]
             elif current["href"].startswith("#"):
                 resource = jsonpointer.resolve_pointer(parent_collection,
                           current["href"][1:])
                 if not resource:
                     resource = "Unresolved"
             elif current["href"].startswith("$"):
                 path = jsonpath(parent_collection,
                     current["href"], result_type="PATH")
                 if path:
                     resource = eval("parent_collection%s" % path[0].lstrip("$"))
                 else:
                     resource = "Unresolved"
             self._cache[current["href"]] = resource
             if resource and resource != "Unresolved":
                 if "selfRef" not in resource:
                     ret = self.set_self_ref(resource)
                     if ret < 0:
                         return ret
                 current["href"] = resource["selfRef"]
         return 0
     elif isinstance(current, list):
         keys = range(len(current))
     elif isinstance(current, dict):
         keys = current.keys()
     else:
        return 0
     
     for key in keys:
         value = current[key]
         if isinstance(value, (NetworkResource, Topology)) and \
             "selfRef" not in value:
             ret = self.set_self_ref(value)
             if ret < 0:
                 return ret
         if isinstance(value, list) or isinstance(value, dict):
             ret = self._complete_href_links(parent_collection, value)
             if ret < 0:
                 return ret
     return 0
Beispiel #26
0
 def _extract_section_data(self, section_source):
     """
     Given a section extract the relevant json for it
     :param section_source: the json source of the elasticsearch section docuement
     :return: the json representation of the section's content
     """
     page_title = section_source['page_title']
     section_title = section_source['section_title']
     page_json = self._page_from_title(page_title)
     try:
         return jsonpath(
             page_json,
             "$..wikitext-json.."
             "[?(@.__type==\"section\" and @.title==\"{}\")]".format(section_title)
         )
     except UnicodeEncodeError:
         return False
Beispiel #27
0
    def get_parks(self):
        for document, dom in self.sections_with_content(self.query):
            least_once = False
            if not dom:
                self.processed(False)
                continue

            content = jsonpath(dom, "$..[?(@.__type==\"list_item\")].content")
            if not content:
                self.processed(False)
                continue

            for name in self._get_park_names(content):
                yield document['page_title'], name
                least_once = True

            self.processed(least_once)
Beispiel #28
0
    def __call__(self, request_context, env):

        global dsl_debug
 
        root = None

        p = request_context
        l = len(self.access_path)

        for i in range(l):
            n = self.access_path[i]
            if n in p:
                p = p[n]
            else:
                p = None
                break

        if self.use_env and self.env_param in env and self.field_name in env[self.env_param]:
            p = env[self.env_param][self.field_name]
        elif self.field_name is not None and p is not None and self.field_name in p:
            p = p[self.field_name]

        root = p

        if self.is_json:
            if dsl_debug:
                print "jsonpath context: ", root
                print "\tpath: ", self.value

            root = jsonpath.jsonpath(root, self.value)

            if dsl_debug:
                print "\tjsonpath result: ", root
                print "\tpath: ", self.value

            if not root:
                root = None
            else:
                root = root[0]

        if dsl_debug:
            print "Call returns: " , root

        return root
Beispiel #29
0
    def __parse_expression(self, expression, container=None):
        """
        Parses the jsonpath expression in {self.output_results} and return its result
        """
        if container is None:
            container = self.output_results

        expression = expression.strip()
        as_list = False

        if expression.endswith("as list"):
            as_list = True
            expression = expression.replace("as list", "").strip()

        try:
            results = jsonpath(container, expression)
        except Exception, e:
            print e
            raise RuntimeError("Error when parsing the expression {}".format(expression))
Beispiel #30
0
    def assertFunc(entity, jpath, func_name, val, dctAssert=None):
        err = 0; message = '(%s %s %s)' % (jpath, func_name, val)

        ret = jsonpath.jsonpath(entity, jpath)
        if not ret: err = 1

        if not err:
            ret = ret[0]
            if isinstance(val, basestring):
                val = val.replace('"', '\"')
                good = eval('ret %s "%s"' % (func_name, val))
            else: good = eval('ret %s %s' % (func_name, val))
            err = not good and 1 or 0

        if dctAssert:
            dctAssert['errnum'] += err
            dctAssert['asserts'].append('%s:%s' % (message, err==0))

        return ret
Beispiel #31
0
    def parse(self, response):

        # 打开数据库连接
        db = pymysql.Connect(host='192.168.1.22',
                             port=7306,
                             user='******',
                             passwd='123456',
                             db='joom',
                             charset='utf8')

        # 使用cursor()方法获取操作游标
        cursor = db.cursor()

        if response.status == 404:
            order_id = str(
                re.compile(r"(?<==)(.+?)\b").search(
                    response.request.url).group(0))

            sql = "INSERT INTO shipping(order_id, tracking_true) \
                               VALUES ('%s', %s)"                                                  % \
                  (order_id, 0)

        else:

            shipping_re = json.loads(response.text)
            # print(shipping_re)
            # if shipping_re['code'] == 1000 :
            #     sql = "INSERT INTO shipping(order_id, tracking_true, delivered, depth, tracking_id, trackingNumber, arrived, passedCustoms) \
            #                    VALUES ('%s',%s,%s,'%s','%s','%s',%s,%s)" % \
            #           (str(re.compile(r"(?<==)(.+?)\b").search(response.request.url).group(0)), True, False, 10, '0', '0', False, '0')
            # else:
            # print(shipping_re)
            # 处理数据
            order_id = str(
                re.compile(r"(?<==)(.+?)\b").search(
                    response.request.url).group(0))
            # 如果有结果返回的类型是list 否则 为false
            tracking_true = 'True' if type(
                jsonpath.jsonpath(
                    shipping_re,
                    '$..checkpoints[0].depth')) == list else 'False'
            # tracking_ture = type(jsonpath.jsonpath(re,'$..checkpoints[0].depth'))
            depth = jsonpath.jsonpath(
                shipping_re, '$..checkpoints[-1:].depth'
            )[0] if tracking_true == 'True' else 'False'
            delivered = shipping_re['data']['delivered']
            tracking_id = str(shipping_re['data']['id'])
            trackingNumber = str(shipping_re['data']['trackingNumber'])
            arrived = shipping_re['data']['arrived']
            passedCustoms = shipping_re['data']['passedCustoms']

            # print(order_id, tracking_true, delivered, depth, tracking_id, trackingNumber, arrived, passedCustoms)

            # SQL 插入语句
            sql = "INSERT INTO shipping(order_id, tracking_true, delivered, depth, tracking_id, trackingNumber, arrived, passedCustoms) \
                   VALUES ('%s',%s,%s,'%s','%s','%s',%s,%s)"                                                             % \
                  (order_id, tracking_true, delivered, depth, tracking_id, trackingNumber, arrived, passedCustoms)

        # print(sql)
        try:
            # 执行sql语句
            cursor.execute(sql)
            # 执行sql语句
            db.commit()
            print(order_id + "订单物流信息已插入")
        except pymysql.Error as e:
            # 发生错误时回滚
            print("错误代码 %d: %s" % (e.args[0], e.args[1]))
            # print('Got error {!r}, errno is {}'.format(e, e.args[0]))
            print(sql)
            db.rollback()
            print(order_id + "出现错误")

        # 关闭数据库连接
        db.close()
listDict = [{"city": "北京"}, {"name": "小明"}]
strlist = json.dumps(listDict, ensure_ascii=False)
print type(strlist)  # <type 'str'>
# 写数据
with open("listDict.json", 'w') as file:
    file.write(strlist)

# 2、第二种存储字典和数组
listStr = [{"city": "北京"}, {"name": "大刘"}]
json.dump(listStr, open("listStr.json", "w"), ensure_ascii=False)

dictStr = {"city": "北京", "name": "大刘"}
json.dump(dictStr, open("dictStr.json", "w"), ensure_ascii=False)
time.sleep(1)

# ------------ 从文件里面取数据 ---------

dictList = json.load(open("listDict.json", 'r'))
# 输出北京
print dictList[0]["city"]
# ------------ 读出字典loads ----------
strDict = '{"city": "北京", "name": "大猫"}'
# <type 'dict'>
print type(json.loads(strDict))

jsonobj = json.loads(strDict)

# 从根节点开始,匹配name节点
citylist = jsonpath.jsonpath(jsonobj, '$..name')

print citylist[0].encode('utf-8')
Beispiel #33
0
import requests
import json
import jsonpath

url = "https://reqres.in/api/users/2"

# Read Json input file
file = open(
    '/Users/ivoitkiv/PycharmProjects/APIAutomation/CreatingNewUser.json', 'r')
json_input = file.read()
request_json = json.loads(json_input)
print(request_json)

#Send Put Request
response = requests.put(url, request_json)
#Response validation
assert response.status_code == 200

#print(response.headers.get('Content-Length'))
json_response = json.loads(response.text)
#id = jsonpath.jsonpath(json_response,'id')
#print(id[0])
updated_li = jsonpath.jsonpath(json_response, 'updatedAt')
print(updated_li[0])
        data = {
            'offset': '{}'.format(num),
            'limit': '20',
            'is_refresh': '1',
            'city_name': '{}'.format(a)
        }
        try:
            resp_json = srequest.post(url=url,
                                      data=data,
                                      headers=headers,
                                      verify=False).json()
        except:
            continue
        # resp_json = srequest.post(url=url, data=data, headers=headers).json()
        time.sleep(1.5)
        series_id_list = jsonpath.jsonpath(resp_json, '$..id')
        for series_id in series_id_list:
            # print(series_id)
            spec_link = 'https://www.dcdapp.com/motor/car_page/m/v1/series_all_json/?series_id={}&city_name={}&show_city_price=1&m_station_dealer_price_v=1'.format(
                series_id, a)

            print('spec_link', spec_link)
            try:
                spec_data = srequest.get(
                    url=spec_link, headers=headers,
                    verify=False).json(strict=False)['data']
                time.sleep(1.5)

                # 车系名称
                series_name = jsonpath.jsonpath(spec_data,
                                                '$[online]..series_name')
Beispiel #35
0
from pyecharts.charts import Map  #地图matplotlib   :静态图
from pyecharts import options as opts  #配置项
from demo1 import nameMap    #自己写的模块  引入


#爬取全球疫情数据
#数据源
url='https://api.inews.qq.com/newsqa/v1/automation/foreign/country/ranklist'
resp = requests.post(url).text  #get post  对网址post请求
# print(resp)     #字符串

data = json.loads(resp)   #string——dict  方便后期提取内容
# print(type(data))

#1提取国家名字  病死率数量
name=jsonpath.jsonpath(data,"$..name")   #从网页源代码提取名字
print(name)


#  #病死率数量
confirm=jsonpath.jsonpath(data,"$..confirm")  #提取数据
print(confirm)


# 整理数据 zip
a = list(zip(name,confirm))
print(a)

#  可视化地图分析
map_ = Map(opts.InitOpts(width='1200px',height='600px')).add(series_name="世界各国的病死率",
                                                          data_pair = a,#输入数据
Beispiel #36
0
    def parse(self, response):

        data_dict = json.loads(response.body)
        try:
            flight_list = jsonpath(data_dict, '$..departures')[0]
        except:
            # self.token_flag = True
            logging.info('# invalid airport')
            print '# flight_list ', response.text
            return

        self.proxy_flag = False
        datas = response.meta.get('invalid')
        if len(flight_list) == 0:
            datas = response.meta.get('invalid')
            logging.info('# no flight: ' + datas.get('depAirport') +
                         datas.get('arrAirport') + datas.get('date'))
            self.task.append(response.meta.get('invalid'))

        for data in flight_list:

            # 中转
            if len(data.get('legs')) > 1:
                logging.info('is change')
                continue
            flight_info = data.get('legs')[0]
            flight_number = str(flight_info.get('flightNumber')).replace(
                ' ', '')
            carrier = flight_number[0:2]

            if carrier != 'TR':
                print '# other airline'
                continue

            # 2018-12-31T00: 55: 00
            dep_time = time.mktime(
                time.strptime(flight_info.get('departureDateTime'),
                              '%Y-%m-%dT%H:%M:%S'))
            arr_time = time.mktime(
                time.strptime(flight_info.get('arrivalDateTime'),
                              '%Y-%m-%dT%H:%M:%S'))
            dep_airport = flight_info.get('departure')
            arr_airport = flight_info.get('arrival')

            price_info_list = data.get('fareClasses')
            price_info = price_info_list[0]

            try:
                adult_price = float(price_info.get('price').get('amount'))
            except:
                print '### price error: ' + datas.get(
                    'depAirport') + datas.get('arrAirport') + datas.get('date')
                continue

            currency = price_info.get('price').get('currency')
            net_fare = adult_price
            cabin = price_info.get('productCode')
            max_seats = int(data.get('journeyInfo').get('seatLeft'))
            adult_tax = 0
            is_change = 1

            segments_data = ''
            for i in price_info_list:
                if i.get('name') == 'FlyBag':
                    segments_data = i
                    break
            segments = []
            try:
                if segments_data != '':
                    segments.append(
                        [segments_data.get('price').get('amount'), max_seats])
                else:
                    segments = [[0, 0]]
            except:
                print '### segments price error: ' + datas.get(
                    'depAirport') + datas.get('arrAirport') + datas.get('date')
                continue

            item = SpidersHynItem()
            item.update(
                dict(f=flight_number,
                     d=dep_time,
                     a=arr_time,
                     fc=self.port_city.get(dep_airport, dep_airport),
                     tc=self.port_city.get(arr_airport, arr_airport),
                     c=currency,
                     ap=adult_price,
                     at=adult_tax,
                     n=net_fare,
                     m=max_seats,
                     cb=cabin,
                     cr=carrier,
                     i=is_change,
                     s=json.dumps(segments),
                     g=time.time(),
                     da=dep_airport,
                     aa=arr_airport))

            yield item
Beispiel #37
0
    def treating_data(self, is_token, file_var, file_path, parameters,
                      dependent, data, save_response_dict):
        #使用哪个header
        if is_token == '':
            header = self.no_token_header
        else:
            header = self.token_header

            #if file_var != '' and file_path != '':     #调上传文件接口用
            #    self.token_header['Content-Type'] = "multipart/form-data"
            #else:
            #    del self.token_header['Content-Type']
        logger.info(f'处理依赖前data的数据:{data}')
        #处理依赖数据data
        if dependent != '':
            if dependent.find('={') != -1:
                dependent_key = dependent.split('=')[0]
                dependent_value = dependent.split('=')[1]
                #dependent_data = {dependent_key:save_response_dict.read_depend_data(dependent_value)}
                dependent_data = json.loads(dependent_value)
            else:
                dependent_data = save_response_dict.read_depend_data(dependent)
            logger.debug(f'依赖数据解析获取的字典{dependent_data}')
            if parameters != '' and data != '':
                data = json.loads(data)
                exists_key = False
                parameters_list = parameters.split('/')
                for dk, dv in dependent_data.items():
                    for pl in parameters_list:
                        if pl == dk:
                            if isinstance(dv, int):
                                dv = str(dv)
                            parameters = parameters.replace(pl, dv)
                logger.info(f'parameters有数据,依赖有数据时{parameters}')
                #处理data与依赖中有相同key的问题,目前支持列表,字典,本地列表形式调试通过,需要在定义时,data中该key定义成列表
                #实例{"id":[1],"user":{"username":"******"}}
                for k, v in data.items():
                    for dk, dv in dependent_data.items():
                        if k == dk:
                            if isinstance(data[k], list):
                                data[k].append(dv)
                            if isinstance(data[k], dict):
                                data[k].update(dv)
                            if isinstance(data[k], int):
                                data[k] = dv
                            if isinstance(data[k], str):
                                data[k] = dv
                            exists_key = True
                    if exists_key is False:
                        #合并组成一个新的data
                        dependent_data.update(data)
                        data = dependent_data
                        logger.info(f'data有数据,依赖有数据时{data}')
            elif parameters != '' and data == '':
                #实例/id/name/num
                parameters_list = parameters.split('/')
                for dk, dv in dependent_data.items():
                    for pl in parameters_list:
                        if pl == dk:
                            if isinstance(dv, int):
                                dv = str(dv)
                            parameters = parameters.replace(pl, dv)
                logger.info(f'parameters有数据,依赖有数据时{parameters}')
            elif data != '' and parameters == '':
                data = json.loads(data)
                exists_key = False
                #处理data与依赖中有相同key的问题,目前支持列表,字典,本地列表形式调试通过,需要在定义时,data中该key定义成列表
                #实例{"id":[1],"user":{"username":"******"}}
                for k, v in data.items():
                    for dk, dv in dependent_data.items():
                        if k == dk:
                            print(type(data[k]))
                            if isinstance(data[k], list):
                                data[k].append(dv)
                            if isinstance(data[k], dict):
                                data[k].update(dv)
                            if isinstance(data[k], int):  #自加1
                                data[k] = dv + 1
                            if isinstance(data[k], str):  #为用例而加,双主键
                                data[k] = dv + str(random.randint(0, 10000))
                            exists_key = True
                    if exists_key is False:
                        #合并组成一个新的data
                        dependent_data.update(data)
                        data = dependent_data
                        logger.info(f'data有数据,依赖有数据时{data}')
            else:
                #赋值给data
                data = dependent_data
                logger.info(f'data无数据,依赖有数据时{data}')
        else:
            if data == '':
                data = None
                logger.info(f'data无数据,依赖无数据{data}')
            else:
                try:
                    data = json.loads(data)
                    logger.info(f'data有数据,依赖无数据{data}')
                except JSONDecodeError as e:
                    logger.error(f'data格式有误,请检查数据格式{e}')
        #处理路径参数Path的依赖
        #传进来的参数类似{"case_002“:”$.data.id"}/item/{"case_002":"$.meta.status"},进行列表拆分

        path_list = parameters.split('/')
        #获取列表长度迭代
        for i in range(len(path_list)):
            try:
                #尝试序列化dict: json.loads('2') 可以转换成2
                path_dict = json.loads(path_list[i])
            except JSONDecodeError as e:
                #序列化失败此path_list[i]的值不变化
                logger.error(f'无法转换字典,进入下一个检查,本轮值不发生变化:{path_list},{e}')
                #跳过进入下次循环
                continue
            else:
                #解析该字典,获取用例编号,表达式
                logger.info(f'获得字典信息:{path_dict}')
                #处理json.loads('数字')正常序列化导致的AttributeError
                try:
                    for k, v in path_dict.items():
                        try:
                            #尝试从对应的case实际响应提取某个字段内容
                            #path_list[i] = jsonpath.jsonpath(json.dumps(save_response_dict.actual_response[k]),v)[0]
                            path_list[i] = jsonpath.jsonpath(
                                save_response_dict.actual_response[k], v)
                            if isinstance(path_list[i], list):
                                path_list[i] = path_list[i][0]
                        except TypeError as e:
                            logger.error(f'无法提取,请检查响应字典中是否支持该表达式.{e}')
                except AttributeError as e:
                    logger.error(
                        f'类型错误:{type(path_list[i])},本次将不转换值{path_list[i]},{e}')

        #字典中存在有不是str的元素:使用map转换在全字符串的列表
        path_list = list(map(str, path_list))
        #将字符串列表转换成字符:500/item/200
        parameters_path_url = "/".join(path_list)
        logger.info(f'path路径参数解析依赖后的路径为{parameters_path_url}')
        return data, header, parameters_path_url
def existing_bookings():
    url = 'https://restful-booker.herokuapp.com/booking'
    response = (requests.get(url)).json()
    for booking in response:
        bookingids = jsonpath.jsonpath(response, '$.[bookingid]')
    return list(bookingids)
Beispiel #39
0
def extract_json_field(resp, json_field):
    value = jsonpath.jsonpath(resp.json(), json_field)
    return value[0]
# Provides the list of currently existing bookings
def existing_bookings():
    url = 'https://restful-booker.herokuapp.com/booking'
    response = (requests.get(url)).json()
    for booking in response:
        bookingids = jsonpath.jsonpath(response, '$.[bookingid]')
    return list(bookingids)


BOOKING = random.choice(existing_bookings())
UPDATE = 'Edited'

# GET Pre-request: Takes booking and its firstname
get_response = requests.get(URL.format(BOOKING))
get_firstname = jsonpath.jsonpath(get_response.json(), '$.firstname')[0]


def test_update_booking(token):
    # PUT: Updates booking
    headers = {
        'Content-Type': 'application/json',
        'Cookie': 'token=' + str(token)
    }
    put_data = json.dumps({
        "firstname": "{}".format(UPDATE),
        "lastname": "Brown",
        "totalprice": 111,
        "depositpaid": True,
        "bookingdates": {
            "checkin": "2018-01-01",
Beispiel #41
0
def covid_19_163(indicator: str = "实时") -> pd.DataFrame:
    """
    网易-新冠状病毒
    https://news.163.com/special/epidemic/?spssid=93326430940df93a37229666dfbc4b96&spsw=4&spss=other&#map_block
    https://news.163.com/special/epidemic/?spssid=93326430940df93a37229666dfbc4b96&spsw=4&spss=other&
    :return: 返回指定 indicator 的数据
    :rtype: pandas.DataFrame
    """
    url = "https://c.m.163.com/ug/api/wuhan/app/data/list-total"
    headers = {
        "user-agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36",
    }
    payload = {
        "t": int(time.time() * 1000),
    }
    r = requests.get(url, params=payload, headers=headers)
    data_json = r.json()
    # data info
    url = "https://news.163.com/special/epidemic/"
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, "lxml")
    data_info_df = pd.DataFrame([
        item.text.strip().split(".")[1]
        for item in soup.find("div", attrs={
            "class": "data_tip_pop_text"
        }).find_all("p")
    ])
    data_info_df.columns = ["info"]

    # 中国历史时点数据
    hist_today_df = pd.DataFrame(
        [item["today"] for item in data_json["data"]["chinaDayList"]],
        index=[item["date"] for item in data_json["data"]["chinaDayList"]])

    # 中国历史累计数据
    hist_total_df = pd.DataFrame(
        [item["total"] for item in data_json["data"]["chinaDayList"]],
        index=[item["date"] for item in data_json["data"]["chinaDayList"]])

    # 中国实时数据
    current_df = pd.DataFrame.from_dict(data_json["data"]["chinaTotal"])

    # 世界历史时点数据
    outside_today_df = pd.DataFrame(
        [item["today"] for item in data_json["data"]["areaTree"]],
        index=[item["name"] for item in data_json["data"]["areaTree"]])

    # 世界历史累计数据
    outside_total_df = pd.DataFrame(
        [item["total"] for item in data_json["data"]["areaTree"]],
        index=[item["name"] for item in data_json["data"]["areaTree"]])

    # 全球所有国家及地区时点数据
    all_world_today_df = pd.DataFrame(
        jsonpath.jsonpath(data_json["data"]["areaTree"], '$..today'),
        index=jsonpath.jsonpath(data_json["data"]["areaTree"], '$..name'))

    # 全球所有国家及地区累计数据
    all_world_total_df = pd.DataFrame(
        jsonpath.jsonpath(data_json["data"]["areaTree"], '$..total'),
        index=jsonpath.jsonpath(data_json["data"]["areaTree"], '$..name'))

    # 中国各地区时点数据
    area_total_df = pd.DataFrame(
        [
            item["total"]
            for item in data_json["data"]["areaTree"][0]["children"]
        ],
        index=[
            item["name"]
            for item in data_json["data"]["areaTree"][0]["children"]
        ])

    # 中国各地区累计数据
    area_today_df = pd.DataFrame(
        [
            item["today"]
            for item in data_json["data"]["areaTree"][0]["children"]
        ],
        index=[
            item["name"]
            for item in data_json["data"]["areaTree"][0]["children"]
        ])

    # 疫情学术进展
    url_article = "https://vip.open.163.com/api/cms/topic/list"
    payload_article = {
        "topicid": "00019NGQ",
        "listnum": "1000",
        "liststart": "0",
        "pointstart": "0",
        "pointend": "255",
        "useproperty": "true"
    }
    r_article = requests.get(url_article, params=payload_article)
    article_df = pd.DataFrame(r_article.json()["data"]).iloc[:, 1:]

    # 咨询
    url_info = "https://ent.163.com/special/00035080/virus_report_data.js"
    payload_info = {
        "_": int(time.time() * 1000),
        "callback": "callback",
    }
    r_info = requests.get(url_info, params=payload_info, headers=headers)
    data_info_text = r_info.text
    data_info_json = demjson.decode(data_info_text.strip(" callback(")[:-1])

    if indicator == "数据说明":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return data_info_df

    if indicator == "中国实时数据":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return current_df

    if indicator == "中国历史时点数据":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return hist_today_df

    if indicator == "中国历史累计数据":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return hist_total_df

    if indicator == "世界历史时点数据":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return outside_today_df

    if indicator == "世界历史累计数据":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return outside_total_df

    if indicator == "全球所有国家及地区时点数据":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return all_world_today_df

    elif indicator == "全球所有国家及地区累计数据":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return all_world_total_df

    elif indicator == "中国各地区时点数据":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return area_today_df

    elif indicator == "中国各地区累计数据":
        print(f"数据更新时间: {data_json['data']['lastUpdateTime']}")
        return area_total_df

    elif indicator == "疫情学术进展":
        return article_df

    elif indicator == "实时资讯新闻播报":
        return pd.DataFrame(data_info_json["list"])

    elif indicator == "实时医院新闻播报":
        return pd.DataFrame(data_info_json["hospital"])

    elif indicator == "前沿知识":
        return pd.DataFrame(data_info_json["papers"])

    elif indicator == "权威发布":
        return pd.DataFrame(data_info_json["power"])

    elif indicator == "滚动新闻":
        return pd.DataFrame(data_info_json["scrollNews"])
Beispiel #42
0
# Reading data from the file
file = open('E:\\API_automation\\GET_Request\\CreateUser.json', 'r')
json_input = file.read()

#Converting into Json format
requests_json = json.loads(json_input)

#For POST request we need to pass JSON as input
#print(requests_json)

#Make POST request with Json Input body
response = requests.post(url, requests_json)
print(response.content)

#Validating response code.
assert response.status_code == 201, "Actual status code is different"

# Fetch header from response
#print(response.headers) # This prints all the header details present.

print(
    response.headers.get("Content-Length")
)  # This gives information about the particular header, here content-length

# Parse response to Json Format
response_json = json.loads(response.text)

#Pick Id using Json Path
id = jsonpath.jsonpath(response_json, 'id')  # This will return the list.
print(id[0])  # Print(id)
Beispiel #43
0
    def parse_page(self, response):
        url = response.url
        keywords = response.meta['keywords']
        r = requests.get(url, headers=DEFAULT_REQUEST_HEADERS)
        sel = etree.HTML(r.text)
        content = sel.xpath('/html/body/script[1]/text()')
        content = re.sub('window._sharedData = ', '', content[0]).rstrip(';')
        data = json.loads(content)
        end_cursor = jsonpath(
            data,
            '$.entry_data.TagPage[0].graphql.hashtag.edge_hashtag_to_media.page_info.end_cursor'
        )[0]
        has_next_page = jsonpath(
            data,
            '$.entry_data.TagPage[0].graphql.hashtag.edge_hashtag_to_media.page_info.has_next_page'
        )[0]
        print(has_next_page)
        posts = jsonpath(
            data,
            '$.entry_data.TagPage[0].graphql.hashtag.[edge_hashtag_to_media,edge_hashtag_to_top_posts].edges[*]'
        )
        for post in posts:
            self.item['keywords'] = keywords
            self.item['type'] = jsonpath(post, '$.node.__typename')[0]
            try:
                self.item['img_description'] = ''.join(
                    jsonpath(post, '$.node.accessibility_caption'))
            except TypeError as e:
                self.item['img_description'] = ''
            self.item['cover_height'] = jsonpath(post,
                                                 '$.node.dimensions.height')[0]
            self.item['cover_width'] = jsonpath(post,
                                                '$.node.dimensions.width')[0]
            self.item['cover_link'] = jsonpath(post, '$.node.display_url')[0]
            self.item['liked_count'] = jsonpath(
                post, '$.node.edge_liked_by.count')[0]
            try:
                self.item['content'] = ''.join(
                    jsonpath(
                        post,
                        '$.node.edge_media_to_caption.edges[*].node.text'))
            except Exception as e:
                pass
            self.item['comment_count'] = jsonpath(
                post, '$.node.edge_media_to_comment.count')[0]
            self.item['post_id'] = jsonpath(post, '$.node.id')[0]
            self.item['post_url'] = jsonpath(post, '$.node.shortcode')[0]
            self.item['post_link'] = self.start_urls[0] + 'p/' + self.item[
                'post_url'] + '/'
            self.item['user_id'] = jsonpath(post, '$.node.owner.id')[0]
            self.item['is_video'] = jsonpath(post, '$.node.is_video')[0]
            timestamp = jsonpath(post, '$.node.taken_at_timestamp')[0]
            self.item['pub_time'] = time.strftime("%Y-%m-%d",
                                                  time.localtime(timestamp))
            yield self.item

        if has_next_page:
            variable_dict = {
                "tag_name": keywords,
                "first": 12,
                "after": end_cursor
            }
            variable_json = json.dumps(variable_dict)
            print(variable_dict)
            params = {
                'query_hash': '174a5243287c5f3a7de741089750ab3b',
                'variables': variable_json
            }
            url = 'https://www.instagram.com/graphql/query/?{}'.format(
                urlencode(params))
            yield Request(url=url,
                          meta={
                              'keywords': keywords,
                              'break_count': 1
                          },
                          callback=self.parse_nextpage)
Beispiel #44
0
def check(Case, Response):
    '''
    检查返回结果
    :param Case:
    :param Response:
    :return:
    '''
    if Case['Out']['type'] == 'key':
        '''
        校验返回的指定key的value值
        '''
        isPass = True
        for keyOut in Case['Out']:
            if keyOut == 'type':
                pass
            else:
                if '$' in keyOut:
                    Log.print_info(2, "use Jsonpath")
                    res = jsonpath(Response, keyOut)
                    if res:
                        Error = numpy.where(
                            Case['Out'][keyOut] in res, 'Pass',
                            'expect:{0},Actual:{1},Res:{2}'.format(
                                Case['Out'][keyOut], res, Response))
                    else:
                        Error = 'Failed,{0} 查找不到匹配 key'.format(keyOut)
                else:
                    Error = numpy.where(
                        Case['Out'][keyOut] == Response[keyOut], 'Pass',
                        'expect:{0},Actual:{1},Res:{2}'.format(
                            Case['Out'][keyOut], Response[keyOut], Response))
                if 'Pass' != Error:
                    isPass = False
                    Log.print_info(1, 'INSTRUMENTATION_STATUS: result=Failed')
                    Log.print_info(
                        1, 'INSTRUMENTATION_STATUS: log={0}'.format(Error))
                    break

        if isPass:
            Log.print_info(1, 'INSTRUMENTATION_STATUS: result=Pass')
            Log.print_info(1, 'INSTRUMENTATION_STATUS: log={0}'.format('null'))
    elif Case['Out']['type'] == 'type':
        '''
        校验返回结果指定key的type数据类型
        '''
        isPass = True
        for keyOut in Case['Out']:
            if keyOut == 'type':
                pass
            else:
                if Case['Out'][keyOut] == 'int':
                    expect_type = int
                elif Case['Out'][keyOut] == 'str':
                    expect_type = str
                elif Case['Out'][keyOut] == 'dict':
                    expect_type = dict
                elif Case['Out'][keyOut] == 'list':
                    expect_type = list
                elif Case['Out'][keyOut] == 'bool':
                    expect_type = bool
                elif Case['Out'][keyOut] == 'none':
                    expect_type = type(None)

                Error = numpy.where(
                    expect_type == type(Response[keyOut]), 'Pass',
                    'expect:{0},Actual:{1},Res:{2}'.format(
                        Case['Out'][keyOut], type(Response[keyOut]), Response))
                if 'Pass' != Error:
                    isPass = False
                    Log.print_info(1, 'INSTRUMENTATION_STATUS: result=Failed')
                    Log.print_info(
                        1, 'INSTRUMENTATION_STATUS: log={0}'.format(Error))
                    break

        if isPass:
            Log.print_info(1, 'INSTRUMENTATION_STATUS: result=Pass')
    elif Case['Out']['type'] == 'file':
        '''
        校验整体返回
        '''
        Local = os.getcwd()
        if Local.endswith('API_test_Pro'):
            pass
        else:
            Local = Local.split('API_test_Pro')[0] + 'API_test_Pro'
        file = open('{1}/Entry/Result/{0}'.format(Case['CaseName'], Local),
                    'w')
        file.write(str(Response))
        file.close()
        Actual = getMD5('{1}/Entry/Result/{0}'.format(Case['CaseName'], Local))
        expect = getMD5('{1}/Utils/Result/{0}'.format(Case['Out']['file'],
                                                      Local))
        Error = numpy.where(Actual == expect, 'Pass',
                            '数据不匹配,检查返回:{0}'.format(Response))
        if 'Pass' != Error:
            Log.print_info(1, 'INSTRUMENTATION_STATUS: result=Failed')
            Log.print_info(1, 'INSTRUMENTATION_STATUS: log={0}'.format(Error))
        else:
            Log.print_info(1, 'INSTRUMENTATION_STATUS: result=Pass')
            os.remove('{1}/Entry/Result/{0}'.format(Case['CaseName'], Local))
Beispiel #45
0
    def parse_nextpage(self, response):
        try:
            keywords = response.meta['keywords']
            break_count = int(response.meta['break_count'])
            r = requests.get(response.url)
            time.sleep(1)
            data = r.json()
            end_cursor = jsonpath(
                data,
                '$.data.hashtag.edge_hashtag_to_media.page_info.end_cursor')[0]
            has_next_page = jsonpath(
                data,
                '$.data.hashtag.edge_hashtag_to_media.page_info.has_next_page'
            )[0]
            posts = jsonpath(data,
                             '$.data.hashtag.edge_hashtag_to_media.edges[*]')
            for post in posts:
                self.item['keywords'] = keywords
                self.item['type'] = jsonpath(post, '$.node.__typename')[0]
                try:
                    self.item['img_description'] = ''.join(
                        jsonpath(post, '$.node.accessibility_caption'))
                except TypeError as e:
                    self.item['img_description'] = ''

                try:
                    self.item['content'] = ''.join(
                        jsonpath(
                            post,
                            '$.node.edge_media_to_caption.edges[*].node.text'))
                except TypeError as e:
                    self.item['content'] = ''

                self.item['cover_height'] = jsonpath(
                    post, '$.node.dimensions.height')[0]
                self.item['cover_width'] = jsonpath(
                    post, '$.node.dimensions.width')[0]
                self.item['cover_link'] = jsonpath(post,
                                                   '$.node.display_url')[0]
                self.item['liked_count'] = jsonpath(
                    post, '$.node.edge_liked_by.count')[0]
                self.item['comment_count'] = jsonpath(
                    post, '$.node.edge_media_to_comment.count')[0]
                self.item['post_id'] = jsonpath(post, '$.node.id')[0]
                self.item['post_url'] = jsonpath(post, '$.node.shortcode')[0]
                self.item['post_link'] = self.start_urls[0] + 'p/' + self.item[
                    'post_url'] + '/'
                self.item['user_id'] = jsonpath(post, '$.node.owner.id')[0]
                self.item['is_video'] = jsonpath(post, '$.node.is_video')[0]
                timestamp = jsonpath(post, '$.node.taken_at_timestamp')[0]
                self.item['pub_time'] = time.strftime(
                    "%Y-%m-%d", time.localtime(timestamp))
                yield self.item

            logging.error('{},{}.'.format(has_next_page, end_cursor))

            if has_next_page and break_count < 200:
                break_count += 1
                print(
                    '********************************************************************************************************************************'
                )
                print(break_count)
                logging.error('break_count:{}.'.format(break_count))
                variable_dict = {
                    "tag_name": keywords,
                    "first": 12,
                    "after": end_cursor
                }
                variable_json = json.dumps(variable_dict)
                params = {
                    'query_hash': '174a5243287c5f3a7de741089750ab3b',
                    'variables': variable_json
                }
                url = 'https://www.instagram.com/graphql/query/?{}'.format(
                    urlencode(params))
                yield Request(url=url,
                              meta={
                                  'keywords': keywords,
                                  'break_count': break_count
                              },
                              callback=self.parse_nextpage)
        except Exception as e:
            logging.error('{},{}.'.format(e, response.url))
Beispiel #46
0
            "isbn": "0-395-19395-8",
            "price": 22.99
        }],
        "bicycle": {
            "color": "red",
            "price": 19.95
        }
    }
}

from jsonpath import jsonpath
"""jsonpath:用来解析多层嵌套的json数据"""

# 1. 安装:pip install jsonpath
# 2. 用法:jsonpath(要被提取的python数据类型, '提取的规则')

print(jsonpath(book_dict, '$..author'))  # 返回值为列表,如果取不到,返回False
print(jsonpath(book_dict, '$.store.bicycle.price'))
"""
$.store.book[*].author	store中的所有的book的作者
$..author	            所有的作者
$.store.*	            store下的所有的元素
$.store..price	        store中的所有的内容的价格
$..book[2]	            第三本书
$..book[(@.length-1)] | $..book[-1:]	最后一本书
$..book[0,1] | $..book[:2]	            前两本书
$..book[?(@.isbn)]	                    获取有isbn的所有数
$..book[?(@.price<10)]	                获取价格大于10的所有的书
$..*	                                获取所有的数据
"""
Beispiel #47
0

def generated_datas(data, sent_data=None):
    """
    存在多个依赖数据时,循环合成新的数据
    """
    if sent_data == None:
        temp_data = {}
        for i in data.split(","):
            for key, value in generated_data(i).items():
                temp_data[key] = value
    else:
        temp_data = sent_data
        for i in data.split(","):
            for key, value in generated_data(i).items():
                temp_data[key] = value
    return temp_data


if __name__ == '__main__':
    data = "case_001>data.token"
    data1 = "case_001>data.token,case_001>data"
    data2 = "case_001>data"
    #print(split_data(data))
    #print(split_key(data2))
    #data3={"111":111}
    #print(generated_datas(data1,data3))
    print(depend_data(data))
    print(type(json.loads(depend_data(data))))
    print(jsonpath.jsonpath(json.loads(depend_data(data)), "$.data.token"))
Beispiel #48
0
}

max_id = ""
while True:
    if max_id == "":
        url = "https://m.weibo.cn/comments/hotflow?id=4485613145089303&mid=4485613145089303&max_id_type=0"
    else:
        url = "https://m.weibo.cn/comments/hotflow?id=4485613145089303&mid=4485613145089303&max_id={}&max_id_type=0".format(
            max_id)

    response1 = requests.get(url=url, headers=headers)
    if response1.status_code != 200:
        break
    print("请求成功:{}".format(response1.status_code))
    response = response1.json()
    names = jsonpath.jsonpath(response, "$..screen_name")
    ids = jsonpath.jsonpath(response, "$..user.id")
    max_id = jsonpath.jsonpath(response, "$..max_id")[0]
    print("请求的是:{}".format(max_id))
    texts = jsonpath.jsonpath(response, "$..text")
    textss = []
    for i in texts:
        a = i.split("<", 1)[0]
        textss.append(a)
    for id, name, text in zip(ids, names, textss):
        item1 = {"id": id, "name": name, "text": text}
        client.weibo.pinglun.insert(item1)
    print("写入成功")
    client.close()
    time.sleep(3)
Beispiel #49
0
month = int(input('输入月份:'))
date1 = int(input('输入起始日期:'))
date2 = int(input('输入结束日期:'))

if month < 10:
    month = '0' + str(month)
for date in range(date1, date2 + 1):
    if date < 10:
        date = '0' + str(date)

    url = 'https://api.sharkshopping.com/ec/api?method=tv.program.data&appid=webapp&token=&version=4.4.1&source=wap&city_num=310100&brand_id=&date=' + str(
        year) + '-' + str(month) + '-' + str(date) + '&cat_id='
    print(url)
    res = requests.get(url)
    json_data = demjson.decode(res.text)
    start_time_list = jsonpath(json_data, '$..start_time')
    end_time_list = jsonpath(json_data, '$..end_time')
    sku_list = jsonpath(json_data, '$..sku')
    name_list = jsonpath(json_data, '$..name')
    price_list = jsonpath(json_data, '$..price')
    product_brand_list = jsonpath(json_data, '$..product_brand')

    start_time_list = [i for i in start_time_list if i != 0]
    end_time_list = [i for i in end_time_list if i != 0]

    list_content = [
        start_time_list, end_time_list, sku_list, name_list, price_list,
        product_brand_list
    ]
    df = pd.DataFrame(list_content)
    df = df.T
import sys
sys.path.append('G:\\framework_practicsse\\apiautomation')
print(sys.path)
from helpers import crudAPI
from utility import Config
import json
import jsonpath

gurl = Config.readConfigData("APIDetails", "get_url")

a = crudAPI.hitgetApi("get", gurl)
#print(a)

json_response = json.loads(a)
print(json_response)
x = jsonpath.jsonpath(json_response, 'total')
print(x)
assert x[0] == 12
Beispiel #51
0
import requests
import json
import jsonpath

url = 'https://reqres.in/api/users/2'
data_dict = {"name": "Sanjay Singh Panwar", "job": "leader"}
response = requests.put(url, data_dict)
print(response)

assert response.status_code == 200

json_response = json.loads(response.text)
print json_response

updated_time = jsonpath.jsonpath(json_response, 'updatedAt')
print updated_time[0]
Beispiel #52
0
# village = "zhjy"
# village_name = "中虹家园"
# location = '&location=121.672462,31.28271'  # 中虹家园

village = "dxy"
village_name = "丁香园"
location = '&location=121.415404,31.14585'  # 丁香园

keywords = '&keywords=亲子'
type = '&types='
other = '&radius=3000&offset=20&page=1&extensions=base'

amap_url = 'http://restapi.amap.com/v3/place/around?key=eb38430327c843a503698c6eb015ec48' + location + keywords + type + other
page = urllib2.urlopen(amap_url)
data = json.load(page)
citylist = jsonpath.jsonpath(data, '$..pois')
for city in citylist:
    i = 0
    while i < len(city):
        distance = jsonpath.jsonpath(city[i], '$..distance')[0]
        name = jsonpath.jsonpath(city[i], '$..name')[0]
        type = jsonpath.jsonpath(city[i], '$..type')[0]
        typecode = jsonpath.jsonpath(city[i], '$..typecode')[0]
        address = jsonpath.jsonpath(city[i], '$..address')[0]
        location = jsonpath.jsonpath(city[i], '$..location')[0]
        tel = jsonpath.jsonpath(city[i], '$..tel')[0]
        if tel==[]:
            tel=''
        print(tel)
        i += 1
        # rank_date=time.strptime(str(rank_date),"%Y-%m-%d")
def getcms(keyword):
    # , language, resPeople
    result = keyWordsCollection.find_one({"originKey": keyword})
    language = result["language"]
    resPeople = result["resPeople"]
    part = result["part"]
    station = result["station"]
    # 改变关键词获取状态
    # if " " in keyword:
    #     updateStatusKeyWord(keyword, part)
    #     return

    word = words.get(language)
    if not word:
        updateStatusKeyWord(keyword, part)
        logging.error("没有匹配的语言:{}".format(language))
        return

    keywordnew = "inurl:telegram.me " + keyword
    keywordnew = keywordnew.replace(" ", "%20")
    url = "http://api.serpprovider.com/5bfdf4cd7d33d1d77b9875d1/google/en-us/{}/{}".format(
        word, keywordnew)
    logging.info("请求数据,关键字:{},url:{}".format(keywordnew, url))
    html = sendRequest(url)  # 请求
    try:
        datas = json.loads(html)
    except Exception as e:
        return
    reslist = jsonpath.jsonpath(datas, "$..res")
    if reslist:
        reslist = reslist[0]
    else:
        logging.error("google搜索后没有数据:{}".format(url))
        updateStatusKeyWord(keyword, part)
        return
    if not reslist:
        updateStatusKeyWord(keyword, part)
        logging.error("google搜索后没有数据:{}".format(url))
        return
    for data in reslist:
        url = data["url"]
        # 协议
        scheme = urlparse(data['url']).scheme
        # 域名
        domain = urlparse(data['url']).netloc
        if not scheme or not domain:
            continue
        link = scheme + '://' + domain  # 拼接链接

        if domain != "telegram.me":
            logging.error("域名不为telegram.me     :{}".format(domain))
            continue

        # 此时域名已经是telegram.me
        if url.split("telegram.me")[-1] == "/":
            logging.error("url为{}".format(url))
            continue

        if url.endswith("telegram.me"):
            logging.error("url为{}".format(url))
            continue
        url = link + url.split("telegram.me")[-1]
        if url.endswith("/"):
            url = url[:-1]
        if url in urlList:
            logging.warn("该地址已经获取,存在缓存中,url:{}".format(url))
            continue

        # 判断是否在数据库中
        result = googleUrlCollection.find_one({"url": url, "part": part})
        if result:
            logging.warn("该url已经获取,存在数据库中中,url:{}".format(url))
            continue

        title = data['title']  # 获取标题
        describition = data['desc']  # 获取描述
        urlList.append(url)
        sourceUrl = data["url"]
        insertItem(domain, url, sourceUrl, scheme, keyword, language,
                   resPeople, title, describition, word, part, station)
    updateStatusKeyWord(keyword, part)
Beispiel #54
0
import json
import requests
import jsonpath

load = {
    'id': 4296,
    'language': ['python', 'selenium'],
    'yearsexp': '2',
    'lastused': '2019',
    'st_id': '4296'
}
result = requests.get('http://thetestingworldapi.com/api/technicalskills/4182')
print(result.status_code)
rece = result.content
jse = json.loads(rece)
print(jse)
print(jsonpath.jsonpath(jse, 'id'))
Beispiel #55
0
"""

import requests
import jsonpath

headers = {
    "X-Lemonban-Media-Type": "lemonban.v2"
}
# 登录的请求
url = "http://api.lemonban.com/futureloan/member/login"
data = {
    "mobile_phone": "13367899876",
    "pwd": "lemonban"
}
res = requests.post(url=url, json=data, headers=headers)

data = res.json()
print(data)
# 提取token
token = jsonpath.jsonpath(data, "$..token")
# [0]
print(token)
# 提取token值
token_type = jsonpath.jsonpath(data, "$..token_type")[0]
print(token_type)
# 提取reg_name
reg_name = jsonpath.jsonpath(data, "$..reg_name")[0]
print(reg_name)


Beispiel #56
0
from jsonpath import jsonpath
import requests
from fake_useragent import UserAgent
import json

url = "https://www.lagou.com/lbs/getAllCitySearchLabels.json"
headers = {
    "User-Agent": UserAgent().random
}
response = requests.get(url, headers=headers)
names = jsonpath(json.loads(response.text), '$..name')
codes = jsonpath(response.json(), '$..code')
print(names)
print(codes)
Beispiel #57
0
        "SecurityGroup": [
            {
                "CreationTime": "2016-10-27T01:49:17Z",
                "Tags": {
                    "Tag": []
                },
                "SecurityGroupId": "sg-SecurityGroupId",
                "SecurityGroupName": "SecurityGroupName",
                "Description": "Description",
                "ResourceGroupId": "",
                "SecurityGroupType": "normal",
                "VpcId": ""
            }
        ]
    }
}
"""

wanted_res_1 = jsonpath.jsonpath(
    json.loads(test_json_str),
    "$.SecurityGroups.SecurityGroup[0].SecurityGroupId")

wanted_res_2 = jsonpath.jsonpath(
    json.loads(test_json_str),
    '$.SecurityGroups.SecurityGroup[?(@.Description=="Description")].SecurityGroupId'
)

wanted_res_3 = jsonpath.jsonpath(
    json.loads(test_json_str),
    "$.SecurityGroups.SecurityGroup[*].SecurityGroupId")
Beispiel #58
0
    def parse_data(self, spec_datas):
        # print(spec_datas)
        for spec_data in spec_datas:
            spec_id = jsonpath.jsonpath(spec_data, '$[specList]..specId')
            # print('车型id',spec_id)
            if spec_id == False:
                continue

            # 厂商id
            brand_id = jsonpath.jsonpath(spec_data, '$[specinfo]..fctid')[0]
            # print('品牌id',brand_id)

            # 厂商名称
            brand_name = jsonpath.jsonpath(spec_data,
                                           '$[specinfo]..fctname')[0]
            # print('品牌名称',brand_name)

            # 品牌id
            contain_brand_id = jsonpath.jsonpath(spec_data,
                                                 '$[specinfo]..brandid')[0]
            # print('品牌id',brand_id)

            # 品牌名称
            contain_brand_name = jsonpath.jsonpath(spec_data,
                                                   '$[specinfo]..brandname')[0]
            # print('品牌名称',brand_name)

            # 车系id
            series_id = jsonpath.jsonpath(spec_data,
                                          '$[specinfo]..seriesid')[0]
            # print('车系id',series_id)

            # 车系名称
            category_fullname = jsonpath.jsonpath(spec_data,
                                                  '$[specinfo]..seriesname')[0]
            # print('车系名称',category_fullname)
            """将车系名称与车型名称拼接"""
            # 车系名称
            series_names = jsonpath.jsonpath(spec_data,
                                             '$[specList]..SeriesName')
            # 车型名称
            spec_names = jsonpath.jsonpath(spec_data, '$[specList]..specName')
            # 将车系名称拼接到车型名称之前
            spec_name_list = []
            for i in range(0, len(series_names)):
                spec_name_list.append(series_names[i] + ' ' + spec_names[i])
            # print('名称',spec_name_list)

            # 提取年款 拼接到名称最后
            spec_name_list2 = []
            for name in spec_name_list:
                s_name = "".join(re.findall(r'[0-9]{4}款*', name))
                sp_name = name.replace(s_name + " ", '')
                spec_names = sp_name + s_name
                spec_name_list2.append(spec_names)
            # print(spec_name_list2)

            # 手动
            spec_name_shou = []
            for name in spec_name_list2:
                name1 = name.replace('手动', '手动 ')
                spec_name_shou.append(name1)
            # print(spec_name_shou)
            # 更改后
            spec_name = []
            for name in spec_name_shou:
                name1 = name.replace('自动', '自动 ')
                spec_name.append(name1)
            # print(spec_name)
            gc.collect()

            # 城市id
            city_id = jsonpath.jsonpath(spec_data, '$..cid')[0]
            # print('城市id', city_id)

            # 城市名称
            city_name = jsonpath.jsonpath(spec_data, '$..cityName')[0]
            # print('城市名称',city_name)

            # 指导价
            guidance_price = jsonpath.jsonpath(
                spec_data, '$[specList]..MinOriginalPrice')
            # print('指导价', guidance_price)

            # 现价
            price = jsonpath.jsonpath(spec_data, '$[specList]..Price')

            # 降价
            cut_price = (list(map(lambda x, y: x - y, guidance_price, price)))

            # 最后一次更新时间
            last_sync_time = datetime.now().strftime("%Y-%m")

            a_last_sync_time = datetime.now().strftime("%Y-%m-%d")
            yield brand_id, brand_name, series_id, category_fullname, spec_id, spec_name, city_id, city_name, guidance_price, cut_price, price, contain_brand_id, contain_brand_name, last_sync_time, a_last_sync_time
Beispiel #59
0
 def test_order_check_add(self):
     res = requests.get(url, params=None, headers=heads)
     msg = jsonpath.jsonpath(res.json(), '$.msg')[0]
     self.assertEqual(msg, '成功')
Beispiel #60
0
    def get_all_data(self, url):
        html = self.get_html(url, 'get_all_data')
        if html is not None:
            html = json.loads(html)
            titles = jsonpath.jsonpath(html, "$..title")
            print 'titles      ', titles
            url = jsonpath.jsonpath(html, "$..mediaUrl")
            descs = jsonpath.jsonpath(html, "$..description")
            if url:
                print url
                i = 0
                for i in range(0, len(url)):
                    download_url = 'https://static.wixstatic.com/media/' + url[
                        i]
                    print download_url
                    if titles:
                        title = titles[0]
                        subcat = re.sub(r'-png-\d+', '', title)
                        if 'pngs' in subcat:
                            subcat = subcat.replace('pngs', '')
                        if 'icon' in subcat:
                            subcat = subcat.replace('icon', '')
                        if 'PNGs' in subcat:
                            subcat = subcat.replace('PNGs', '')
                        if 'PNG' in subcat:
                            subcat = subcat.replace('PNG', '')
                        if 'images' in subcat:
                            subcat = subcat.replace('images', '')
                        if 'image' in subcat:
                            subcat = subcat.replace('image', '')
                        if 'free' in subcat:
                            subcat = subcat.replace('free', '')
                        if 'Free' in subcat:
                            subcat = subcat.replace('Free', '')
                        if 'cutouts' in subcat:
                            subcat = subcat.replace('cutouts', '')
                        if 'cutout' in subcat:
                            subcat = subcat.replace('cutout', '')
                        if 'holiday' in subcat:
                            subcat = subcat.replace('holiday', '')
                        if 'collection' in subcat:
                            subcat = subcat.replace('collection', '')
                        if 'Letter' in subcat:
                            subcat = subcat.replace('Letter', '')
                        if 'Number' in subcat:
                            subcat = subcat.replace('Number', '')
                        if ',' in subcat:
                            subcat = subcat.replace(',', ' ')
                        if ':' in subcat:
                            subcat = subcat.replace(':', ' ')
                        if 'transparent' in subcat:
                            subcat = subcat.replace('transparent', ' ')
                        if 'ex' in subcat:
                            subcat = subcat.replace('ex', ' ')
                        subcat = subcat.strip()
                        cat = self.get_cat_from_mysql(subcat)
                        if descs:
                            desc = descs[0].strip('- ')
                            if '\\' in desc:
                                desc = desc.replace('\\', '`')

                            if cat:
                                sql = 'insert into freepngs_pdts (cat,subcat,title,download_url,description) VALUES ("%s","%s","%s","%s","%s")' % (
                                    cat, subcat, title, download_url, desc)
                            else:
                                sql = 'insert into freepngs_pdts (subcat,title,download_url,description) VALUES ("%s","%s","%s","%s")' % (
                                    subcat, title, download_url, desc)
                        else:
                            if cat:
                                sql = 'insert into freepngs_pdts (cat,subcat,title,download_url) VALUES ("%s","%s","%s","%s")' % (
                                    cat, subcat, title, download_url)
                            else:
                                sql = 'insert into freepngs_pdts (subcat,title,download_url) VALUES ("%s","%s","%s")' % (
                                    subcat, title, download_url)
                    else:
                        sql = 'insert into freepngs_pdts (download_url) VALUES ("%s")' % download_url
                    i += 1
                    try:
                        self.cursor.execute(sql)
                        self.db.commit()
                        print '插入成功'
                    except Exception, e:
                        self.db.rollback()
                        print sql
                        print str(e)