def getAllPullRequestsForRepo(userAndProject): pullRequestRecords = [] conn = httplib.HTTPSConnection('api.github.com') conn.request("GET", '/repos/' + userAndProject + '/pulls?state=open&per_page=100') pullRequests = demjson.decode(conn.getresponse().read()) conn.request("GET", '/repos/' + userAndProject + '/pulls?state=closed&per_page=100') pullRequests.extend(demjson.decode(conn.getresponse().read())) # print '<div>%s has %d pull requests</div>' % (userAndProject, len(pullRequests)) # html url # https://github.com/mozila/pdf.js/pull/643 for pullRequest in pullRequests: pullRecord = { 'ts': private_strptime(pullRequest["created_at"][0:19]), 'kind': 'pull created', 'category': canonicalizeCategory(userAndProject), 'user': canonicalizeUsername(pullRequest["user"]["login"]), 'url': pullRequest["html_url"], 'title': pullRequest["title"][0:80].replace('\n', ' ') } pullRequestRecords.append(pullRecord) return pullRequestRecords
def _get_company_as_string (self): comp1 = esc_str(self.get_company()) if not comp1: return 'nil' comp = copy.deepcopy(self.get_custom('company')) ver = self.get_store().get_file_format() ## FIXME: This is an egregious design violation, as noted earlier. We ## should move all such version specific conversions to pimdb_bb.el if ver == '6': if comp and len(comp) > 0: comp = demjson.decode(comp) comp = [chompq(x) for x in comp] else: comp = [] comp.insert(0, comp1) return unchompq('; '.join(comp)) elif ver == '7': if comp and len(comp) > 0: comp = demjson.decode(comp) comp.insert(0, unchompq(comp1)) else: comp = [unchompq(comp1)] return ('(' + ' '.join(comp) + ')')
def get_os_id(params): """ Function to get Open States ID. Please do not abuse API key. """ apikey = '49c5c72c157d4b37892ddb52c63d06be' params['apikey'] = apikey os_url = create_os_url(params) raw = scraperwiki.scrape(os_url) os_data = demjson.decode(raw) os_found = len(os_data) os_id = '' # Use first if any found, if not remove last name if os_found > 0: os_id = os_data[0]['id'] else: del params['first_name'] os_url = create_os_url(params) raw = scraperwiki.scrape(os_url) os_data = demjson.decode(raw) os_found = str(len(os_data)) + '-removed-first' if len(os_data) > 0: os_id = os_data[0]['id'] return { 'found': os_found, 'id': os_id }
def __getDataDemoFromString__(self,jsonStr): self.crawlerTime = datetime.datetime.now() dataDemo = jsonDataDemo() items =[] variables =[] text = demjson.decode(jsonStr) dataDemo.__setTableName__('tableName',text.get('tableName')) dataDemo.__setPrimaryKey__('primaryKey',text.get('primaryKey')) # replace DataTime to Now startUrl = text.get('URL') startUrl = startUrl.replace("##year", str(self.crawlerTime.year)) startUrl = startUrl.replace("##month", str(self.crawlerTime.month)) startUrl = startUrl.replace("##day", str(self.crawlerTime.day)) startUrl = startUrl.replace("##hour", str(self.crawlerTime.hour)) startUrl = startUrl.replace("##minute", str(self.crawlerTime.minute)) startUrl = startUrl.replace("##second", str(self.crawlerTime.second)) dataDemo.__setURL__('URL',startUrl) dataDemo.__setTask__('task',text.get('task')) for item in text.get('items'): items.append(demjson.decode(demjson.encode(item))) dataDemo.__setItems__('items',items) for variable in text.get('variables'): variables.append(demjson.decode(demjson.encode(variable))) dataDemo.__setVariables__('variables',variables) chirdrenObjs = [] for chirdren in text.get('chirdren'): chirdrenJson = json.dumps(chirdren) chirdrenObj = self.__getDataDemoFromString__(chirdrenJson) chirdrenObjs.append(chirdrenObj) dataDemo.__setChirdren__(chirdrenObjs) return dataDemo
def get_description(artist_name): """Grabs a bunch of info about the band from Seevl (or last.fm for description if there isn't one on seevl). Returns a triple: ("Description Text", "Genre", [("link_type", "link_url]),..,("link_type", "link_url)] )""" #Setup the variables incase everything fails artist_description = "We don't have a description or bio for this band, sorry :(" genre = "Unknown" url_list = [] try: #Set up the headers etc for Seevl.net API and request artist infos url = 'http://data.seevl.net/entity/?prefLabel={name}'.format(name=urllib.quote(artist_name)) headers = { 'Accept' : 'application/json', 'X_APP_ID' : SV_ID, 'X_APP_KEY' : SV_KEY } req = urllib2.Request(url, None, headers) response = urllib2.urlopen(req) artist_page = response.read() artist_info = json.decode(artist_page) #This is a dict with a load of seevl info about artist #If seevl doesen't have a description then look for it on last.fm to see if they have one: if len(artist_info['results']) == 0: try: lfm_url = "http://ws.audioscrobbler.com/2.0/?method=artist.getinfo&artist={artist}&api_key={key}&format=json".format(artist=urllib.quote(artist_name), key=LF_KEY) lfm_info = json.decode(urllib2.urlopen(lfm_url).read()) artist_description = "{text} {attrib}".format(text=lfm_info['artist']['bio']['summary'].encode('utf-8'), attrib="Description from last.fm") lfm_description = True #Grab the genre off last.fm too if Seevl doesen't have it tag_url = "http://ws.audioscrobbler.com/2.0/?method=artist.gettoptags&artist={artist}&api_key={key}&format=json".format(artist=urllib.quote(artist_name).encode('utf-8'), key=LF_KEY) tag_data = json.decode(urllib2.urlopen(tag_url).read()) genre = tag_data['toptags']['tag'][0]['name'].title() except Exception, e: #Fun error handling print "Error", e else:
def get_collaborators(artistName): collaborators = set () #artistNameQuoted = artistName.replace(" ","+") #artistNameQuoted = urllib.quote_plus(artistName) # Search in the artist field #scrape_url = "http://itunes.apple.com/search?term=%s&limit=5000&entity=song&attribute=artistTerm" % artistNameQuoted params = {'term' : artistName, 'limit' : 5000, 'entity' : 'song', 'attribute' : 'artistTerm' } search_json = demjson.decode(get_json(params)) collaborators.update(get_collaborators_from_json(search_json, artistName)) # Search in the title field (will fail for artists whose names also appear as unrelated song titles...) #scrape_url = "http://itunes.apple.com/search?term=%s&limit=5000&entity=song&attribute=songTerm" % artistNameQuoted params = {'term' : artistName, 'limit' : 5000, 'entity' : 'song', 'attribute' : 'songTerm' } search_json = demjson.decode(get_json(params)) #print search_json collaborators.update(get_collaborators_from_json(search_json, artistName)) for collaborator in collaborators: data = {'artist' : artistName, 'collaborator' : collaborator } scraperwiki.sqlite.save(unique_keys=['artist', 'collaborator'], data=data) return collaborators
def DELETE(self): input_data = web.data() data = urlparse.parse_qs(input_data) v_ct_fids = db.query("select distinct t.source_fid,t.target_fid,t.type_fid,t.owner,t.family_id from t_ci_relation t where t.family_id=$fid and t.endtime=$endtime",vars={'endtime':ENDTIME,'fid':data['fid'][0]}) json_en = demjson.encode(v_ct_fids) json_de = demjson.decode(json_en) v_ct_fid_num = len(json_de) if v_ct_fid_num == 0: return 2 #there is no records to delete in table T_CI_RELATION elif v_ct_fid_num > 1: return 3 #there are more than one records to delete in table T_CI_RELATION v_curtime = time.strftime("%Y%m%d%H%M%S", time.localtime()) #Notice;if the relation is composition and the target ci exists, we should delete the relative ci v_target_fids = db.query("select t.family_id, crt.relation from t_ci t, t_ci_relation_type crt where t.family_id=$target_fid and t.endtime=$endtime and crt.family_id=$type_fid and crt.endtime=$endtime and crt.relation='COMPOSITION'",vars={'endtime':ENDTIME,'target_fid':json_de[0]['TARGET_FID'],'type_fid':json_de[0]['TYPE_FID']}) target_json_en = demjson.encode(v_target_fids) target_json_de = demjson.decode(target_json_en) v_target_num = len(target_json_de) if v_target_num <> 0: #delete the existed ci. It will also delete the relative ci_attribute and ci_relation. n = webci.fn_delete_ci(json_de[0]['TARGET_FID'], v_curtime, data['change_log'][0]) else: #delete t_ci_relation n = fn_delete_cirela(data['fid'][0],v_curtime,data['change_log'][0]) return n
def return_check_requests(cls, account='TIANJINOPERATION', operate_type='1', order_ids=[]): """ 退货审核 :param account: :param operate_type: 1:批准,2:二次配送 :param ids: """ print u'*' * 20 + u'退货审核' obj = obj = demjson.decode(RainbowUtil.rainbow_get_return_request(order_nos=order_ids)) n = 0 while int(obj['total']) != len(order_ids) and n < tmsBase.retry_times: print 'expect %s, actual %s' % (len(order_ids), obj['total']) sleep(1) obj = demjson.decode(RainbowUtil.rainbow_get_return_request(order_nos=order_ids)) n += 1 print 'expect %s, actual %s' % (len(order_ids), obj['total']) if int(obj['total']) != len(order_ids): print 'expect %s, actual %s' % (len(order_ids), obj['total']) raise StandardError(u'>>>>>>>>>>期望待退货审核运单,与实际可退货运单不一致') ids = list() for item in obj['rows']: ids.append(item['id']) url = tmsBase.base_url + '/tms/sort/refundOrderCheckController/updateCheckResult.do?operation=' + operate_type resp = HttpRequest.post_request(TmsLogin.get_session(account), url, data={'ids[]': ids}) check_operation_result(resp) return resp
def refund_apply(cls, account='ADMIN', order_nos=[]): """ 退款申请 :param account: :param ids: """ print u'*' * 20 + u'退款申请' # 获取运单id order_ids = [] obj = demjson.decode(RainbowUtil.rainbow_get_refund_apply_info(account=account, order_ids=order_nos)) for item in obj['rows']: order_ids.append(item['id']) # 申请退款 url = tmsBase.base_url + '/tms/sort/refundOrderController/refundApply.do' resp = HttpRequest.post_request(TmsLogin.get_session(account), url, data={'ids[]': order_ids}) # check_operation_result(resp) print resp # 获取退款申请号 obj = demjson.decode(RainbowUtil.rainbow_get_refund_apply_info(account=account, order_ids=order_nos)) refund_apply_no = obj['rows'][0]['refundApplyNo'] print u'----------退款申请号:%s' % refund_apply_no return refund_apply_no
def main(infiles=None, locfile=None, **kwargs): locations = {} metadata_file = locfile.read() match = PATTERN2.finditer(metadata_file) for entry in match: locations[entry.group(1)] = demjson.decode(entry.group(2)) tracks = {} match = PATTERN3.finditer(metadata_file) for entry in match: tracks[entry.group(1)] = demjson.decode(entry.group(2)).get('name') events = [] for infile in infiles: data = json.load(infile) if data is None: continue events.extend(data['events']) for track_id, track_name in tracks.items(): cal = Calendar() cal['dtstart'] = '20180519T080000' cal['summary'] = 'OpenStack Summit Vancouver 2018: ' + track_name tz = Timezone(TZID='America/Vancouver') tz.add_component(TimezoneStandard(DTSTART="20171105T020000", TZOFFSETFROM="-0700", TZOFFSETTO="-0800", RDATE="20181104T020000", TZNAME="PST")) tz.add_component(TimezoneDaylight(DTSTART="20180311T020000", TZOFFSETFROM="-0800", TZOFFSETTO="-0700", TZNAME="PDT")) cal.add_component(tz) for session in events: if track_id != str(session.get('track_id')): continue timezone_str = session.get('time_zone_id') tzinfos = {"UN": gettz(timezone_str)} start_datetime_str = session.get('start_datetime') start_datetime = parse(start_datetime_str + " UN", tzinfos=tzinfos) start_datetime_utc = start_datetime.astimezone(utc) end_datetime_str = session.get('end_datetime') end_datetime = parse(end_datetime_str + " UN", tzinfos=tzinfos) end_datetime_utc = end_datetime.astimezone(utc) desc = PATTERN.sub('', session.get('abstract')) for pre, post in REPLACE_MAP.items(): desc = desc.replace(pre, post) event = Event() event.add('dtstart', start_datetime_utc) event.add('dtend', end_datetime_utc) event.add('summary', session.get('title')) event.add('location', locations.get(str(session.get('location_id')), {}).get('name_nice', "")) event.add('description', desc) event.add('uid', "%s@openstacksummitboston2017" % session.get('id')) cal.add_component(event) with open("%s.ics" % PATTERN4.sub("-", track_name), "w") as f: f.write(cal.to_ical())
def testObjectNonstringKeys(self): self.assertEqual(demjson.decode('{55:55}',strict=False), {55:55}) self.assertEqual(demjson.decode('{fiftyfive:55}',strict=False), {'fiftyfive':55}) self.assertRaises(demjson.JSONDecodeError, demjson.decode, '{fiftyfive:55}', strict=True) self.assertRaises(demjson.JSONEncodeError, demjson.encode, {55:'fiftyfive'}, strict=True) self.assertEqual(demjson.encode({55:55}, strict=False), '{55:55}')
def _get_demjson_diagnostics(raw): """Get diagnostics string for invalid JSON files from demjson.""" errstr = None try: demjson.decode(raw, strict=True) except demjson.JSONError as err: errstr = err.pretty_description() return errstr
def validate(content, source_id="<source>"): """Return whether the content is valid JSON.""" try: decode(content, strict=True) except JSONDecodeError, error: print "\nInvalid JSON source: %s" % source_id print "\n\t%s\n" % error.pretty_description() return False
def choice_budget(): global chosen, validCities, costDict travelcosts = dict() lattitude = (str)(loc[0]) longitude = (str)(loc[1]) re = requests.get('https://api.sandbox.amadeus.com/v1.2/airports/nearest-relevant?apikey=' + apikey + '&latitude=' + (lattitude) + '&longitude=' + (longitude)) page = re.text page = demjson.decode(page) d_code = (page[0])["airport"] x = len(countries) if international: x = 15 for i in range(x): if (1==validCities[i]): city = cities[i].replace (" ", "%20") print city re = requests.get("https://api.sandbox.amadeus.com/v1.2/airports/autocomplete?apikey=" + apikey + "&term={0}".format(city)) page = re.text page = demjson.decode(page) if page == []: validCities[i] = 0 else: a_code = page[0]["value"] re = requests.get("https://api.sandbox.amadeus.com/v1.2/flights/low-fare-search?apikey=" + apikey + "&origin="+d_code+"&destination="+a_code+"&departure_date="+str(departDate)+"&return_date="+str(arriveDate)) page = re.text page = demjson.decode(page) if ("status" in page): validCities[i] = 0 else: global travelcosts, costDict results = page["results"] price = results[0]["fare"]["total_price"] airfare = (float)(price) re = requests.get("https://api.sandbox.amadeus.com/v1.2/hotels/search-airport?apikey=" + apikey + "&location="+a_code+"&check_in="+str(departDate)+"&check_out="+str(arriveDate)) page = re.text page = demjson.decode(page) results = page["results"] if results == []: validCities[i] = 0 else: price = results[0]["total_price"]["amount"] stayfare = (float)(price) costDict[cities[i]] = [airfare,stayfare] total_cost = airfare+stayfare travelcosts[total_cost]= cities[i] costs = travelcosts.keys() costs.sort() costs = budget_helper(costs, budget) for i in range(4): if i>=len(travelcosts): chosen[0]=0 else: chosen[travelcosts[costs[i]]] = costs[i] print travelcosts print costDict
def testDecodeWhitespace(self): self.assertEqual(demjson.decode(' []'), []) self.assertEqual(demjson.decode('[] '), []) self.assertEqual(demjson.decode(' [ ] '), []) self.assertEqual(demjson.decode('\n[]\n'), []) self.assertEqual(demjson.decode('\t\r \n[\n\t]\n'), []) # Form-feed is not a valid JSON whitespace char self.assertRaises(demjson.JSONDecodeError, demjson.decode, '\x0c[]', strict=True) # No-break-space is not a valid JSON whitespace char self.assertRaises(demjson.JSONDecodeError, demjson.decode, u'\u00a0[]', strict=True)
def testDecodeComments(self): self.assertEqual(demjson.decode('//hi\n42', allow_comments=True), 42) self.assertEqual(demjson.decode('/*hi*/42', allow_comments=True), 42) self.assertEqual(demjson.decode('/*hi//x\n*/42', allow_comments=True), 42) self.assertEqual(demjson.decode('"a/*xx*/z"', allow_comments=True), 'a/*xx*/z') self.assertRaises(demjson.JSONDecodeError, demjson.decode, \ '4/*aa*/2', allow_comments=True) self.assertRaises(demjson.JSONDecodeError, demjson.decode, \ '//hi/*x\n*/42', allow_comments=True) self.assertRaises(demjson.JSONDecodeError, demjson.decode, \ '/*hi/*x*/42', allow_comments=True)
def testDecodeStringRawUnicode(self): self.assertEqual(demjson.decode('"\xc3\xa0"', encoding='utf-8'), u'\u00e0') self.assertEqual(demjson.decode('"\x00\x00\x00\xe0\x00\x00\x00"\x00\x00\x00', encoding='ucs4le'), u'\u00e0') self.assertEqual(demjson.decode('\x00\x00\x00"\x00\x00\x00\xe0\x00\x00\x00"', encoding='ucs4be'), u'\u00e0') self.assertEqual(demjson.decode('\x00\x00\x00"\x00\x00\x00\xe0\x00\x00\x00"', encoding='utf-32be'), u'\u00e0') self.assertEqual(demjson.decode('\x00\x00\xfe\xff\x00\x00\x00"\x00\x00\x00\xe0\x00\x00\x00"', encoding='ucs4'), u'\u00e0')
def fetch_trades (self, url, apikey, body): ret = {} req = urllib2.Request(url, body, {'Content-Type': 'application/json'}) resp = urllib2.urlopen(req) content = resp.read() c = demjson.decode(content) if 'error' in c: raise CSError(c[u'error']) return demjson.decode(content)
def get_preview(artist): """Grabs the preview clip URL from 7Digital for the top song of the artist""" try: top_song_url = "http://ws.audioscrobbler.com/2.0/?method=artist.gettoptracks&artist={artist}&api_key={key}&format=json".format(artist=urllib.quote(artist), key=LF_KEY) song_json = json.decode(urllib2.urlopen(top_song_url).read()) toptrack = song_json['toptracks']['track'][0]['name'] en_url = "http://developer.echonest.com/api/v4/song/search?api_key=N6E4NIOVYMTHNDM8J&format=json&results=1&artist={artist}&title={track}&bucket=id:7digital&bucket=audio_summary&bucket=tracks".format(artist=urllib.quote(artist), track=urllib.quote(toptrack)) en_json = json.decode(urllib2.urlopen(en_url).read()) return en_json['response']['songs'][0]['tracks'][0]['preview_url'] except Exception, e: print "Error", e return None
def testDecodeSupplementalUnicode(self): import sys if sys.maxunicode > 65535: self.assertEqual(demjson.decode(r'"\udbc8\udf45"'), u'\U00102345') self.assertEqual(demjson.decode(r'"\ud800\udc00"'), u'\U00010000') self.assertEqual(demjson.decode(r'"\udbff\udfff"'), u'\U0010ffff') for bad_case in [r'"\ud801"', r'"\udc02"', r'"\ud801\udbff"', r'"\ud801\ue000"', r'"\ud801\u2345"']: try: self.assertRaises(demjson.JSONDecodeError, demjson.decode(bad_case)) except demjson.JSONDecodeError: pass
def country_from_city(city):#Ran only once to get co-ordinates of each city using POI city = city.replace (" ", "%20") r = requests.get('https://api.sandbox.amadeus.com/v1.2/points-of-interest/yapq-search-text?apikey=' + apikey + '&city_name={0}'.format(city)) the_page = r.text the_page = demjson.decode(the_page) while ("status" in the_page): r = requests.get('https://api.sandbox.amadeus.com/v1.2/points-of-interest/yapq-search-text?apikey=' + apikey + '&city_name={0}'.format(city)) the_page = r.text the_page = demjson.decode(the_page) l1 = the_page["points_of_interest"] i = l1[0] location = [(i["location"]).values()[0],(i["location"]).values()[2]] return (getplace(location[0],location[1]))[1]
def transform_records(self, uuids_and_insts=None): """ Transforms a set of ISO19139 records into GeoBlacklight JSON. Uses iso2geoBL.xsl to perform the transformation. """ inst = self.inst for r in self.records: if not inst and not uuids_and_insts: inst = self.get_inst_for_record(r) elif uuids_and_insts: inst = uuids_and_insts[r] rec = self.records[r].xml rec = rec.replace("\n", "") root = etree.fromstring(rec) record_etree = etree.ElementTree(root) result = self.transform(record_etree,institution=self.institutions[inst]) # if self.collection: # result = self.transform( # record_etree, # institution=self.institutions[inst], # collection=self.collection # ) # else: # result = self.transform( # record_etree, # institution=self.institutions[inst] # ) result_u = unicode(result) # A dirty hack to avoid XSLT quagmire WRT skipping non-HTTPS links :{} result_u = result_u.replace(",}","}").replace("{,", "{") try: result_json = demjson.decode(result_u) if self.md_link: refs = demjson.decode(result_json["dct_references_s"]) refs["http://www.isotc211.org/schemas/2005/gmd/"] = self.OPENGEOMETADATA_URL.format( repo=self.opengeometadata_map[inst], uuid_path=self.get_uuid_path(r)) result_json["dct_references_s"] = demjson.encode(refs) result_dict = OrderedDict({r: result_json}) log.debug(result_dict) self.record_dicts.update(result_dict) except demjson.JSONDecodeError as e: log.error("ERROR: {e}".format(e=e)) log.error(result_u)
def loads(self, data): """ Wrapper for json library. Load json string as a python dict :param str data: json string to load into dict :returns: Converted dict :rtype: dict """ try: return json.decode(data.decode("utf-8")) except: return json.decode(data)
def load_registry(self, fqfn): """load registry json file into self.registry. Does no validation other than requiring the file to be valid json. :param fqfn - str """ if not isfile(fqfn): self._abort("Invalid registry file: %s" % fqfn) with open(fqfn) as infile: json_str = infile.read() try: self.registry, reg_errors, reg_stats = demjson.decode(json_str, return_errors=True) except demjson.JSONDecodeError as e: self.logger.critical("registry json load error: %s", e) for err in reg_errors: self.logger.critical(err) self._abort("Invalid registry file - could not load/decode") else: if reg_errors: self.logger.critical("registry json load error") for err in reg_errors: self.logger.critical(err) self._abort("Invalid registry file - json errors discovered during load")
def __init__(self, level, type_, name, pos, **args): super(Entity, self).__init__() self.level = None # Set by level.add_entity level.add_entity(self) self.type = type_ self.name = name self.pos = pos self.removed = False defaults = dictkeys_to_ascii( demjson.decode(pyglet.resource.file("entity/%s.json" % (type_)).read() ) ) for key, value in args.items(): defaults[key].update(value) print defaults self.behaviorName = defaults['behavior']['name'] if "behavior" in defaults else "none" self.physicsName = defaults['physics']['name'] if "physics" in defaults else "static" self.width = defaults['width'] self.height = defaults['height'] self.vel = euclid.Vector2(0., 0.) self.max_vel = euclid.Vector2(7, 25) self._boundingbox = BoundingBox(euclid.Vector2(-self.width/2, -self.height/2), euclid.Vector2(self.width/2, self.height/2)) self.view_direction = 1 self._state = [] if self.level.game.delegate: self.level.game.delegate.initEntity(self, **defaults) self.level.game.delegate.entityCreated(self)
def render_POST(self, request, **kwargs): """ :param request: body should contain JSON Required keys in JSON posted: :spider_name: string name of spider to be scheduled. :request: json object request to be scheduled with spider. Note: request must contain url for spider. It may contain kwargs to scrapy request. """ request_body = request.content.getvalue() try: request_data = demjson.decode(request_body) except ValueError as e: message = "Invalid JSON in POST body. {}" message.format(e.pretty_description()) raise Error('400', message=message) log.msg("{}".format(request_data)) spider_data = self.get_required_argument(request_data, "request") error_msg = "Missing required key 'url' in 'request' object" self.get_required_argument(spider_data, "url", error_msg=error_msg) return self.prepare_crawl(request_data, spider_data, **kwargs)
def _snarf_names_from_parse_res (self, pr): n = pr['firstname'] if n and n != 'nil': self.set_firstname(unesc_str(chompq(n))) n = pr['lastname'] if n and n != 'nil': self.set_lastname(unesc_str(chompq(n))) try: affix = pr['affix'] if affix and affix != 'nil': str_re = self.get_store().get_str_re() affix = re.findall(str_re, affix) self.set_suffix(unesc_str(chompq(affix[0]))) if len(affix) > 1: aff = demjson.encode([unesc_str(chompq(x)) for x in affix[1:]]) ## FIXME: Do we need to escape the quotes in json encoding ## as in the except clause? self.add_custom('affix', aff) except KeyError, e: ## FIXME: There should be a better way to handle the format ## differences.... for now we'll put up with the hacks affix = self.get_custom('affix') if affix: affix = demjson.decode(affix) if len(affix) > 0: self.set_suffix(affix[0]) affix = affix[1:] if len(affix) > 0: aff = demjson.encode(affix) self.add_custom('affix', aff)
def get_link(url , cat): try: url='https://en-ae.wadi.com/api/sawa/v1/u' + url #print(url) response = requests.get(url) data = demjson.decode(response.content) item_count=int(data['totalCount']) page_count=math.ceil(item_count/30) page_count=int(page_count) print ("Item count : ") print(item_count) print("Page Count : ") print(page_count) count=1 while (page_count>=0): try: get_scrape(url + '&page=' + str(count),cat,str(count)) count=count+1 page_count=page_count-1 except Exception as e: print(str(e)) return except Exception as e: print(str(e)) return return
def validate_file(self, filename): if not isfile(filename): print('validate_file - b') return ValueError, 'Invalid file: %s' % filename try: with open(filename) as infile: json_str = infile.read() try: self.registry, reg_errors, reg_stats = demjson.decode(json_str, return_errors=True) except demjson.JSONDecodeError as e: self.logger.critical("registry json validation error: %s", e) for err in reg_errors: self.logger.critical(err) self._abort("Invalid registry file - could not decode") else: if reg_errors: self.logger.critical("registry json validation error") for err in reg_errors: self.logger.critical(err) self._abort("Invalid registry file - json errors discovered") except IOError: self._abort("Invalid registry file - could not open") try: self.validate() except: self.logger.critical("registry file validation failed") raise
def sp_episodes(): import demjson xbmcplugin.addSortMethod(pluginhandle, xbmcplugin.SORT_METHOD_EPISODE) url = 'http://www.southparkstudios.com/feeds/full-episode/carousel/'+common.args.url+'/dc400305-d548-4c30-8f05-0f27dc7e0d5c' json = common.getURL(url) episodes = demjson.decode(json)['season']['episode'] for episode in episodes: title = episode['title'] description = episode['description'].encode('ascii', 'ignore') thumbnail = episode['thumbnail'].replace('width=55','') episodeid = episode['id'] senumber = episode['episodenumber'] date = episode['airdate'].replace('.','-') seasonnumber = senumber[:-2] episodenumber = senumber[len(seasonnumber):] try: season = int(seasonnumber) episode = int(episodenumber) except: season = 0 episode = 0 u = sys.argv[0] u += '?url="'+urllib.quote_plus(episodeid)+'"' u += '&mode="comedy"' u += '&sitemode="sp_play"' infoLabels={ "Title": title, "Season":season, "Episode":episode, "premiered":date, "Plot":description, "TVShowTitle":"South Park" } common.addVideo(u,title,thumbnail,infoLabels=infoLabels) common.setView('episodes')
ling_con = MysqlLing() ling_request = LingRequest() author_list = ling_con.search( "select * from toutiao_author where media_id=0") if len(author_list) >= 1: for author in author_list: user_id = author['author_id'] respond = ling_request.request(base_url.format(user_id)) s = reg.search(respond.content) s1 = reg1.search(respond.content) if s and s1: content, number = re.subn("\r", "", str(reg2.search(s.group()).group())) content, number = re.subn("\n", "", content) user1 = demjson.decode(content) content, number = re.subn("\r", "", str(reg2.search(s1.group()).group())) content, number = re.subn("\n", "", content) user2 = demjson.decode(content) user = dict(user1.items() + user2.items()) if user['avatarUrl'].find('//') is 0: user['avatarUrl'] = "http:" + user['avatarUrl'] else: pass update(user) else: # time.sleep(5) continue # time.sleep(0.5)
def parse_info(self, response): origin_shop = response.meta['origin_shop'] # 判断是否弹出验证码 new_url = response.url if new_url.startswith('https://verify.meituan.com/'): # 表示需要重试url print('有验证码, 重试') url = origin_shop.get('url') print('出现验证码重试的url:{}'.format(url)) request = scrapy.Request(url, callback=self.parse_info, dont_filter=True) request.meta['origin_shop'] = origin_shop yield request else: # 使用selenium解析经纬度信息 url = origin_shop.get('url') while True: options = webdriver.ChromeOptions() options.add_argument('--headless') chrome = webdriver.Chrome(chrome_options=options) chrome.get(url) e = etree.HTML(chrome.page_source) try: img_src = e.xpath('//div[@id="map"]/img/@src')[0] lat_lng_str = img_src.split('|')[1] lat_lng_list = lat_lng_str.split(',') lat = lat_lng_list[0] lng = lat_lng_list[1] chrome.quit() except: lat = '' lng = '' chrome.quit() if lat != '' and lng != '': break page_source = etree.HTML(response.text) # 解析出原始信息 shop_info_xpath = '//script[10]' try: shop_info_tag = page_source.xpath(shop_info_xpath)[0] except: # 有的页面是另一种 shop_info_tag = page_source.xpath( '//*[@id="top"]/script[1]')[0] try: shop_info_dict = demjson.decode( shop_info_tag.xpath('./text()')[0].split('shop_config=') [1]) # 解析商家的id item = ShopItem() # 加入url作为去重的标准 item['sort'] = 'shop' # 控制数据的版本 item['version'] = '0' item['url'] = origin_shop.get('url') item['full_name'] = shop_info_dict.get('fullName') item['city_en_name'] = shop_info_dict.get('cityEnName') item['address'] = shop_info_dict.get('address') item['city_id'] = shop_info_dict.get('cityId') # item['shop_lat'] = shop_info_dict.get('shopGlat') # item['shop_lng'] = shop_info_dict.get('shopGlng') item['shop_lat'] = lat item['shop_lng'] = lng item['city_lat'] = shop_info_dict.get('cityGlat') item['city_lng'] = shop_info_dict.get('cityGlng') item['power'] = shop_info_dict.get('power') item['shop_power'] = shop_info_dict.get('shopPower') item['shop_type'] = shop_info_dict.get('shopType') item['shop_group_id'] = shop_info_dict.get('shopGroupId') item['main_region_id'] = shop_info_dict.get('mainRegionId') item['main_category_name'] = shop_info_dict.get( 'mainCategoryName') item['main_category_id'] = shop_info_dict.get('mainCategoryId') # food item['category_url_name'] = shop_info_dict.get( 'categoryURLName') # 比如 美食 item['category_name'] = shop_info_dict.get('categoryName') # 有一个textCssVersion, 应该是会定期更新文字库 # 支持自动更新字库 text_css_version = shop_info_dict.get('textCssVersion') # 加载一下字库看行不行 text_css_info = load_text_css(text_css_version) if text_css_info is None: print('网站的字符集有变更, 需要重新解析css') # 抽取css的url css_xpath = '//link[contains(@rel,"stylesheet") and contains(@href, "svgtextcss")]/@href' css_url = 'http:' + page_source.xpath(css_xpath)[0] get_css_text_info(css_url, text_css_version) # 解析svg字体 vote_xpath = '//*[@id="reviewCount"]' item['vote_total'] = parse_text_svg(vote_xpath, page_source, text_css_version) # 如果店铺已关闭, 则营业时间和电话都没有了 shop_closed_xpath = '//p[@class="shop-closed"]' shop_closed_tag = page_source.xpath(shop_closed_xpath) if shop_closed_tag != []: # 店铺已关闭 item['is_open'] = False else: item['is_open'] = True phone_xpath = '//*[@id="basic-info"]/p' item['phone'] = parse_text_svg(phone_xpath, page_source, text_css_version) # 开放时间 bh_xpath = '//*[@id="basic-info"]/div[4]/p[1]/span[2]' item['business_hours'] = parse_text_svg( bh_xpath, page_source, text_css_version) # 人均 avg_xpath = '//*[@id="avgPriceTitle"]' item['avg_price'] = parse_text_svg(avg_xpath, page_source, text_css_version) # 评分 taste_xpath = '//*[@id="comment_score"]/span[1]' item['taste_score'] = parse_text_svg(taste_xpath, page_source, text_css_version) service_xpath = '//*[@id="comment_score"]/span[2]' item['service_score'] = parse_text_svg(service_xpath, page_source, text_css_version) env_xpath = '//*[@id="comment_score"]/span[3]' item['env_score'] = parse_text_svg(env_xpath, page_source, text_css_version) # print(item) yield item except Exception as e: # print(item) print(traceback.format_exc(), e) print('静态信息解析错误, 查看原因.')
# print "pass--------" # print "current letter:" , letter # list = ['php', 'Python', 'c++'] # print list # print list[2] # tup = ('english', 'chinese', 'math') # print tup # print tup[1:2] # import demjson data = {} dic = {'aaa': '11', 'bb': 222} print dic for k, v in dic.items(): data[k] = v print k, v print data json = demjson.encode(data) print 'json:', json dict_2 = demjson.decode(json) print dict_2 print dict(zip(dict_2.values(), dict_2.keys()))
def getjsontime(data): data=json.loads(data) data=demjson.decode(data['Datapoints']) return data[len(data)-1]['timestamp']
def stock_report_fund_hold(symbol: str = "基金持仓", date: str = "20201231") -> pd.DataFrame: """ 东方财富网-数据中心-主力数据-基金持仓 http://data.eastmoney.com/zlsj/2020-12-31-1-2.html :param symbol: choice of {"基金持仓", "QFII持仓", "社保持仓", "券商持仓", "保险持仓", "信托持仓"} :type symbol: str :param date: 财报发布日期, xxxx-03-31, xxxx-06-30, xxxx-09-30, xxxx-12-31 :type date: str :return: 基金持仓数据 :rtype: pandas.DataFrame """ symbol_map = { "基金持仓": "1", "QFII持仓": "2", "社保持仓": "3", "券商持仓": "4", "保险持仓": "5", "信托持仓": "6", } date = "-".join([date[:4], date[4:6], date[6:]]) url = "http://data.eastmoney.com/dataapi/zlsj/list" params = { "tkn": "eastmoney", "ReportDate": date, "code": "", "type": symbol_map[symbol], "zjc": "0", "sortField": "Count", "sortDirec": "1", "pageNum": "1", "pageSize": "50000", "cfg": "jjsjtj", "p": "1", "pageNo": "1", } r = requests.get(url, params=params) data_text = r.text data_json = demjson.decode(data_text[data_text.find("{"):]) temp_df = pd.DataFrame(data_json["data"]) temp_df.reset_index(inplace=True) temp_df["index"] = list(range(1, len(temp_df) + 1)) if temp_df.empty: return None temp_df.columns = [ "序号", "股票代码", "股票简称", "_", "_", "_", "持有基金家数", "持股变化", "持股总数", "持股市值", "持股变动比例", "_", "持股变动数值", "_", ] temp_df = temp_df[[ "序号", "股票代码", "股票简称", "持有基金家数", "持股总数", "持股市值", "持股变化", "持股变动数值", "持股变动比例", ]] return temp_df
""" import demjson import tuyapower # Terminal Color Formatting bold = "\033[0m\033[97m\033[1m" subbold = "\033[0m\033[32m" normal = "\033[97m\033[0m" dim = "\033[0m\033[97m\033[2m" alert = "\033[0m\033[91m\033[1m" alertdim = "\033[0m\033[91m\033[2m" # Load Device Keys from Tuya JSON file print("Loading Tuya Keys...") f = open('devices.json', "r") data = demjson.decode(f.read()) f.close() print(" %s%s device keys loaded%s" % (dim, len(data), normal)) print() print("Scanning network for Tuya devices...") devices = tuyapower.deviceScan(False, 20) print(" %s%s devices found%s" % (dim, len(devices), normal)) print() def getIP(d, gwid): for ip in d: if (gwid == d[ip]['gwId']): return (ip, d[ip]['version']) return (0, 0)
def covid_19_163(indicator: str = "实时") -> pd.DataFrame: """ 网易-新冠状病毒 https://news.163.com/special/epidemic/?spssid=93326430940df93a37229666dfbc4b96&spsw=4&spss=other&#map_block https://news.163.com/special/epidemic/?spssid=93326430940df93a37229666dfbc4b96&spsw=4&spss=other& :return: 返回指定 indicator 的数据 :rtype: pandas.DataFrame """ url = "https://c.m.163.com/ug/api/wuhan/app/data/list-total" headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36", } payload = { "t": int(time.time() * 1000), } r = requests.get(url, params=payload, headers=headers) data_json = r.json() # data info url = "https://news.163.com/special/epidemic/" r = requests.get(url, headers=headers) soup = BeautifulSoup(r.text, "lxml") data_info_df = pd.DataFrame( [ item.text.strip().split(".")[1] for item in soup.find("div", attrs={"class": "data_tip_pop_text"}).find_all( "p" ) ] ) data_info_df.columns = ["info"] # 中国历史时点数据 hist_today_df = pd.DataFrame( [item["today"] for item in data_json["data"]["chinaDayList"]], index=[item["date"] for item in data_json["data"]["chinaDayList"]], ) # 中国历史累计数据 hist_total_df = pd.DataFrame( [item["total"] for item in data_json["data"]["chinaDayList"]], index=[item["date"] for item in data_json["data"]["chinaDayList"]], ) # 中国实时数据 current_df = pd.DataFrame.from_dict(data_json["data"]["chinaTotal"]) # 世界历史时点数据 outside_today_df = pd.DataFrame( [item["today"] for item in data_json["data"]["areaTree"]], index=[item["name"] for item in data_json["data"]["areaTree"]], ) # 世界历史累计数据 outside_total_df = pd.DataFrame( [item["total"] for item in data_json["data"]["areaTree"]], index=[item["name"] for item in data_json["data"]["areaTree"]], ) # 全球所有国家及地区时点数据 all_world_today_df = pd.DataFrame( jsonpath.jsonpath(data_json["data"]["areaTree"], "$..today"), index=jsonpath.jsonpath(data_json["data"]["areaTree"], "$..name"), ) # 全球所有国家及地区累计数据 all_world_total_df = pd.DataFrame( jsonpath.jsonpath(data_json["data"]["areaTree"], "$..total"), index=jsonpath.jsonpath(data_json["data"]["areaTree"], "$..name"), ) # 中国各地区累计数据 area_total_df = pd.DataFrame( [item["total"] for item in data_json["data"]["areaTree"][2]["children"]], index=[item["name"] for item in data_json["data"]["areaTree"][2]["children"]], ) # 中国各地区时点数据 area_today_df = pd.DataFrame( [item["today"] for item in data_json["data"]["areaTree"][2]["children"]], index=[item["name"] for item in data_json["data"]["areaTree"][2]["children"]], ) # 疫情学术进展 url_article = "https://vip.open.163.com/api/cms/topic/list" payload_article = { "topicid": "00019NGQ", "listnum": "1000", "liststart": "0", "pointstart": "0", "pointend": "255", "useproperty": "true", } r_article = requests.get(url_article, params=payload_article) article_df = pd.DataFrame(r_article.json()["data"]).iloc[:, 1:] # 资讯 url_info = "https://ent.163.com/special/00035080/virus_report_data.js" payload_info = { "_": int(time.time() * 1000), "callback": "callback", } r_info = requests.get(url_info, params=payload_info, headers=headers) data_info_text = r_info.text data_info_json = demjson.decode(data_info_text.strip(" callback(")[:-1]) if indicator == "数据说明": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return data_info_df if indicator == "中国实时数据": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return current_df if indicator == "中国历史时点数据": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return hist_today_df if indicator == "中国历史累计数据": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return hist_total_df if indicator == "世界历史时点数据": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return outside_today_df if indicator == "世界历史累计数据": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return outside_total_df if indicator == "全球所有国家及地区时点数据": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return all_world_today_df elif indicator == "全球所有国家及地区累计数据": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return all_world_total_df elif indicator == "中国各地区时点数据": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return area_today_df elif indicator == "中国各地区累计数据": print(f"数据更新时间: {data_json['data']['lastUpdateTime']}") return area_total_df elif indicator == "疫情学术进展": return article_df elif indicator == "实时资讯新闻播报": return pd.DataFrame(data_info_json["list"]) elif indicator == "实时医院新闻播报": return pd.DataFrame(data_info_json["hospital"]) elif indicator == "前沿知识": return pd.DataFrame(data_info_json["papers"]) elif indicator == "权威发布": return pd.DataFrame(data_info_json["power"]) elif indicator == "境外输入疫情趋势": url = "https://c.m.163.com/ug/api/wuhan/app/data/list-by-area-code" params = { "areaCode": "66", "t": round(int(time.time() * 1000)) } r = requests.get(url, params=params, headers=headers) data_json = r.json() temp_df = pd.DataFrame(data_json["data"]["list"]) today_list = [item.get("input", 0) for item in temp_df["today"]] total_list = [item.get("input", 0) for item in temp_df["total"]] result_df = pd.DataFrame([today_list, total_list]).T result_df.columns = ["境外输入新增确诊", "境外输入累计确诊"] result_df.index = pd.to_datetime(temp_df.date) return result_df elif indicator == "境外输入确诊病例来源": url = "https://c.m.163.com/ug/api/wuhan/app/index/input-data-list" params = { "t": round(int(time.time() * 1000)) } r = requests.get(url, params=params, headers=headers) data_json = r.json() temp_df = pd.DataFrame(data_json["data"]["list"]) del temp_df["page"] return temp_df
with open('../lib/data.js', 'r') as datajs: """ Abre o arquivo atual de dados e extrai o json de acronimos """ line_string = ''.join(datajs.readlines()) data_start = line_string.index('{') data_end = line_string.index('};') + 1 before = line_string[:data_start] after = line_string[data_end:] data_string = line_string[data_start:data_end].split('\n') formatted_data = remove_indentation(data_string) data = demjson.decode(remove_indentation(data_string)) with open('../lib/data.js', 'w') as newdatajs: """ Exporta o arquivo de dados de modo que as siglas estejam ordenadas """ ordered_data = json.dumps(data, sort_keys=True, indent=4, ensure_ascii=False).encode('utf8') newdatajs.write(before) newdatajs.write(ordered_data) newdatajs.write(after)
bing_wallpaper_url = 'https://cn.bing.com/HPImageArchive.aspx?format=js&idx=0&n=6&pid=hp&uhd=1&uhdwidth=2880&uhdheight=1620' headers = { "Connection": "keep-alive", "Pragma": "no-cache", "Cache-Control": "no-cache", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.68 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh-Hans;q=0.9,zh;q=0.8,und;q=0.7", } manifest_json = demjson.decode(manifest) photo_json = demjson.decode(photo) bing_wallpaper_json = demjson.decode(bing_wallpaper) manifest_json['version'] = version try: rs = requests.session() res = rs.get(bing_wallpaper_url, headers=headers) res.encoding = 'utf-8' resjson = demjson.decode(res.text) i = 1 for image in resjson['images']: image_url = "https://cn.bing.com" + image['url'] try: img_name = 'background-' + str(i) + '.jpg' img_r = requests.get(image_url)
def stock_zh_a_daily( symbol: str = "sh601939", start_date: str = "19900101", end_date: str = "22001220", adjust: str = "", ) -> pd.DataFrame: """ 新浪财经-A股-个股的历史行情数据, 大量抓取容易封 IP https://finance.sina.com.cn/realstock/company/sh689009/nc.shtml :param start_date: 20201103; 开始日期 :type start_date: str :param end_date: 20201103; 结束日期 :type end_date: str :param symbol: sh600000 :type symbol: str :param adjust: 默认为空: 返回不复权的数据; qfq: 返回前复权后的数据; hfq: 返回后复权后的数据; hfq-factor: 返回后复权因子; hfq-factor: 返回前复权因子 :type adjust: str :return: specific data :rtype: pandas.DataFrame """ def _fq_factor(method): if method == "hfq": res = requests.get(zh_sina_a_stock_hfq_url.format(symbol)) hfq_factor_df = pd.DataFrame( eval(res.text.split("=")[1].split("\n")[0])["data"]) if hfq_factor_df.shape[0] == 0: raise ValueError("sina hfq factor not available") hfq_factor_df.columns = ["date", "hfq_factor"] hfq_factor_df.index = pd.to_datetime(hfq_factor_df.date) del hfq_factor_df["date"] return hfq_factor_df else: res = requests.get(zh_sina_a_stock_qfq_url.format(symbol)) qfq_factor_df = pd.DataFrame( eval(res.text.split("=")[1].split("\n")[0])["data"]) if qfq_factor_df.shape[0] == 0: raise ValueError("sina hfq factor not available") qfq_factor_df.columns = ["date", "qfq_factor"] qfq_factor_df.index = pd.to_datetime(qfq_factor_df.date) del qfq_factor_df["date"] return qfq_factor_df if adjust in ("hfq-factor", "qfq-factor"): return _fq_factor(adjust.split("-")[0]) res = requests.get(zh_sina_a_stock_hist_url.format(symbol)) js_code = py_mini_racer.MiniRacer() js_code.eval(hk_js_decode) dict_list = js_code.call("d", res.text.split("=")[1].split(";")[0].replace( '"', "")) # 执行js解密代码 data_df = pd.DataFrame(dict_list) data_df.index = pd.to_datetime(data_df["date"]) del data_df["date"] data_df = data_df.astype("float") r = requests.get(zh_sina_a_stock_amount_url.format(symbol, symbol)) amount_data_json = demjson.decode( r.text[r.text.find("["):r.text.rfind("]") + 1]) amount_data_df = pd.DataFrame(amount_data_json) amount_data_df.index = pd.to_datetime(amount_data_df.date) del amount_data_df["date"] temp_df = pd.merge(data_df, amount_data_df, left_index=True, right_index=True, how="outer") temp_df.fillna(method="ffill", inplace=True) temp_df = temp_df.astype(float) temp_df["amount"] = temp_df["amount"] * 10000 temp_df["turnover"] = temp_df["volume"] / temp_df["amount"] temp_df.columns = [ "open", "high", "low", "close", "volume", "outstanding_share", "turnover", ] if adjust == "": temp_df = temp_df[start_date:end_date] temp_df["open"] = round(temp_df["open"], 2) temp_df["high"] = round(temp_df["high"], 2) temp_df["low"] = round(temp_df["low"], 2) temp_df["close"] = round(temp_df["close"], 2) temp_df.dropna(inplace=True) return temp_df if adjust == "hfq": res = requests.get(zh_sina_a_stock_hfq_url.format(symbol)) hfq_factor_df = pd.DataFrame( eval(res.text.split("=")[1].split("\n")[0])["data"]) hfq_factor_df.columns = ["date", "hfq_factor"] hfq_factor_df.index = pd.to_datetime(hfq_factor_df.date) del hfq_factor_df["date"] temp_df = pd.merge(temp_df, hfq_factor_df, left_index=True, right_index=True, how="outer") temp_df.fillna(method="ffill", inplace=True) temp_df = temp_df.astype(float) temp_df["open"] = temp_df["open"] * temp_df["hfq_factor"] temp_df["high"] = temp_df["high"] * temp_df["hfq_factor"] temp_df["close"] = temp_df["close"] * temp_df["hfq_factor"] temp_df["low"] = temp_df["low"] * temp_df["hfq_factor"] temp_df.dropna(how="any", inplace=True) temp_df = temp_df.iloc[:, :-1] temp_df = temp_df[start_date:end_date] temp_df["open"] = round(temp_df["open"], 2) temp_df["high"] = round(temp_df["high"], 2) temp_df["low"] = round(temp_df["low"], 2) temp_df["close"] = round(temp_df["close"], 2) temp_df.dropna(inplace=True) return temp_df if adjust == "qfq": res = requests.get(zh_sina_a_stock_qfq_url.format(symbol)) qfq_factor_df = pd.DataFrame( eval(res.text.split("=")[1].split("\n")[0])["data"]) qfq_factor_df.columns = ["date", "qfq_factor"] qfq_factor_df.index = pd.to_datetime(qfq_factor_df.date) del qfq_factor_df["date"] temp_df = pd.merge(temp_df, qfq_factor_df, left_index=True, right_index=True, how="outer") temp_df.fillna(method="ffill", inplace=True) temp_df = temp_df.astype(float) temp_df["open"] = temp_df["open"] / temp_df["qfq_factor"] temp_df["high"] = temp_df["high"] / temp_df["qfq_factor"] temp_df["close"] = temp_df["close"] / temp_df["qfq_factor"] temp_df["low"] = temp_df["low"] / temp_df["qfq_factor"] temp_df.dropna(how="any", inplace=True) temp_df = temp_df.iloc[:, :-1] temp_df = temp_df[start_date:end_date] temp_df["open"] = round(temp_df["open"], 2) temp_df["high"] = round(temp_df["high"], 2) temp_df["low"] = round(temp_df["low"], 2) temp_df["close"] = round(temp_df["close"], 2) temp_df.dropna(inplace=True) return temp_df
data1 = { "VisitId": "927f2bb2-5b3d-e811-8d9e-000c2918a2b6", "ClinicalDiagnosis": "33333" } data = { "VisitId": "927f2bb2-5b3d-e811-8d9e-000c2918a2b6", "ClinicalDiagnosis": "33333", "InspectionFromItems": [{ "InspectionGroupId": "c504b05c-902e-e811-8d9e-000c2918a2b6", "ItemName": "骨科组套1", "GroupName": "骨科组套1", "Quantity": 1 }, { "InspectionGroupId": "039c86f6-f332-e811-8d9e-000c2918a2b6", "ItemName": "检验租套测试test1", "GroupName": "检验租套测试test1", "Quantity": 1 }] } print "bianma:%s" % sys.getdefaultencoding() print "type:%s" % type(data) print data print "******************************111111" print data1 print demjson.decode(data) print demjson.decode(data1)
def covid_19_baidu(indicator: str = "浙江") -> pd.DataFrame: """ 百度-新型冠状病毒肺炎-疫情实时大数据报告 https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_pc_1 :param indicator: 看说明文档 :type indicator: str :return: 指定 indicator 的数据 :rtype: pandas.DataFrame """ url = "https://huiyan.baidu.com/openapi/v1/migration/rank" payload = { "type": "move", "ak": "kgD2HiDnLdUhwzd3CLuG5AWNfX3fhLYe", "adminType": "country", "name": "全国", } r = requests.get(url, params=payload) move_in_df = pd.DataFrame(r.json()["result"]["moveInList"]) move_out_df = pd.DataFrame(r.json()["result"]["moveOutList"]) url = "https://opendata.baidu.com/api.php" payload = { "query": "全国", "resource_id": "39258", "tn": "wisetpl", "format": "json", "cb": "jsonp_1580470773343_11183", } r = requests.get(url, params=payload) text_data = r.text json_data_news = json.loads( text_data.strip("/**/jsonp_1580470773343_11183(").rstrip(");") ) url = "https://opendata.baidu.com/data/inner" payload = { "tn": "reserved_all_res_tn", "dspName": "iphone", "from_sf": "1", "dsp": "iphone", "resource_id": "28565", "alr": "1", "query": "肺炎", "cb": "jsonp_1580470773344_83572", } r = requests.get(url, params=payload) json_data = json.loads(r.text[r.text.find("({") + 1 : r.text.rfind(");")]) spot_report = pd.DataFrame(json_data["Result"][0]["DisplayData"]["result"]["items"]) # domestic-city url = "https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_pc_1" r = requests.get(url) soup = BeautifulSoup(r.text, "lxml") temp_soup = str(soup.find(attrs={"id": "captain-config"})) data_json = demjson.decode(temp_soup[temp_soup.find("{"): temp_soup.rfind("}")+1]) big_df = pd.DataFrame() for i, p in enumerate( jsonpath.jsonpath(data_json["component"][0]["caseList"], "$..area") ): temp_df = pd.DataFrame( jsonpath.jsonpath(data_json["component"][0]["caseList"], "$..subList")[i] ) temp_df["province"] = p big_df = big_df.append(temp_df, ignore_index=True) domestic_city_df = big_df domestic_province_df = pd.DataFrame(data_json["component"][0]["caseList"]).iloc[ :, :-2 ] big_df = pd.DataFrame() for i, p in enumerate( jsonpath.jsonpath(data_json["component"][0]["caseOutsideList"], "$..area") ): temp_df = pd.DataFrame( jsonpath.jsonpath( data_json["component"][0]["caseOutsideList"], "$..subList" )[i] ) temp_df["province"] = p big_df = big_df.append(temp_df, ignore_index=True) outside_city_df = big_df outside_country_df = pd.DataFrame( data_json["component"][0]["caseOutsideList"] ).iloc[:, :-1] big_df = pd.DataFrame() for i, p in enumerate( jsonpath.jsonpath(data_json["component"][0]["globalList"], "$..area") ): temp_df = pd.DataFrame( jsonpath.jsonpath(data_json["component"][0]["globalList"], "$..subList")[i] ) temp_df["province"] = p big_df = big_df.append(temp_df, ignore_index=True) global_country_df = big_df global_continent_df = pd.DataFrame(data_json["component"][0]["globalList"])[ ["area", "died", "crued", "confirmed", "confirmedRelative"] ] if indicator == "热门迁入地": return move_in_df elif indicator == "热门迁出地": return move_out_df elif indicator == "今日疫情热搜": return pd.DataFrame(json_data_news["data"][0]["list"][0]["item"]) elif indicator == "防疫知识热搜": return pd.DataFrame(json_data_news["data"][0]["list"][1]["item"]) elif indicator == "热搜谣言粉碎": return pd.DataFrame(json_data_news["data"][0]["list"][2]["item"]) elif indicator == "复工复课热搜": return pd.DataFrame(json_data_news["data"][0]["list"][3]["item"]) elif indicator == "热门人物榜": return pd.DataFrame(json_data_news["data"][0]["list"][4]["item"]) elif indicator == "历史疫情热搜": return pd.DataFrame(json_data_news["data"][0]["list"][5]["item"]) elif indicator == "搜索正能量榜": return pd.DataFrame(json_data_news["data"][0]["list"][6]["item"]) elif indicator == "游戏榜": return pd.DataFrame(json_data_news["data"][0]["list"][7]["item"]) elif indicator == "影视榜": return pd.DataFrame(json_data_news["data"][0]["list"][8]["item"]) elif indicator == "小说榜": return pd.DataFrame(json_data_news["data"][0]["list"][9]["item"]) elif indicator == "疫期飙升榜": return pd.DataFrame(json_data_news["data"][0]["list"][10]["item"]) elif indicator == "实时播报": return spot_report elif indicator == "中国分省份详情": return domestic_province_df elif indicator == "中国分城市详情": return domestic_city_df elif indicator == "国外分国详情": return outside_country_df elif indicator == "国外分城市详情": return outside_city_df elif indicator == "全球分洲详情": return global_continent_df elif indicator == "全球分洲国家详情": return global_country_df
def choice_budget(): global chosen, validCities, costDict travelcosts = dict() lattitude = (str)(loc[0]) longitude = (str)(loc[1]) re = requests.get( 'https://api.sandbox.amadeus.com/v1.2/airports/nearest-relevant?apikey=WCC0Tn8fJ5hScMw7NTDDAAkjydFLOYTf&latitude=' + (lattitude) + '&longitude=' + (longitude)) page = re.text page = demjson.decode(page) d_code = (page[0])["airport"] x = len(countries) if international: x = 15 for i in range(x): if (1 == validCities[i]): city = cities[i].replace(" ", "%20") print city re = requests.get( "https://api.sandbox.amadeus.com/v1.2/airports/autocomplete?apikey=WCC0Tn8fJ5hScMw7NTDDAAkjydFLOYTf&term={0}" .format(city)) page = re.text page = demjson.decode(page) if page == []: validCities[i] = 0 else: a_code = page[0]["value"] re = requests.get( "https://api.sandbox.amadeus.com/v1.2/flights/low-fare-search?apikey=WCC0Tn8fJ5hScMw7NTDDAAkjydFLOYTf&origin=" + d_code + "&destination=" + a_code + "&departure_date=" + str(departDate) + "&return_date=" + str(arriveDate)) page = re.text page = demjson.decode(page) if ("status" in page): validCities[i] = 0 else: global travelcosts, costDict results = page["results"] price = results[0]["fare"]["total_price"] airfare = (float)(price) re = requests.get( "https://api.sandbox.amadeus.com/v1.2/hotels/search-airport?apikey=WCC0Tn8fJ5hScMw7NTDDAAkjydFLOYTf&location=" + a_code + "&check_in=" + str(departDate) + "&check_out=" + str(arriveDate)) page = re.text page = demjson.decode(page) results = page["results"] if results == []: validCities[i] = 0 else: price = results[0]["total_price"]["amount"] stayfare = (float)(price) costDict[cities[i]] = [airfare, stayfare] total_cost = airfare + stayfare travelcosts[total_cost] = cities[i] costs = travelcosts.keys() costs.sort() costs = budget_helper(costs, budget) for i in range(4): if i >= len(travelcosts): chosen[0] = 0 else: chosen[travelcosts[costs[i]]] = costs[i] print travelcosts print costDict
import demjson #连接数据库 client = pymongo.MongoClient( "mongodb://*****:*****@cluster0-shard-00-00-gmjko.mongodb.net:27017/admin?ssl=true&replicaSet=cluster0-shard-00-00-gmjko&authSource=admin" ) db = client.geokg col = db.region data = xlrd.open_workbook("E:/资料/大三下/实训/广府建筑表格/region.xls") table = data.sheets()[0] #读取excel第一行数据作为存入mongodb的字段名 rowstag = table.row_values(0) nrows = table.nrows ncols = table.ncols returnData = {} for i in range(1, nrows): #将字段名和excel数据存储为字典形式,并转换为json格式 #returnData[i]=json.dumps(dict(zip(rowstag,table.row_values(i)))) returnData[i] = json.dumps( dict( zip(rowstag, [ table.row_values(i)[0], demjson.decode(table.row_values(i)[1]) ]))) #通过编解码还原数据 returnData[i] = json.loads(returnData[i]) # print() print(returnData[i]) # col.insert(returnData[i])
def fetch_album(album_url, dlPath): try: resp = opener.open(album_url) except: print 'Problem while fetching ' + album_url return 0 content = resp.read() regex = r'var EmbedData = (\{(.*?)\});' jsValues = re.search(regex, content, flags=re.M | re.S) jsString = jsValues.group(1).replace('\\r', ' ').replace( '\\n', ' ').decode('utf8').encode('ascii', errors='ignore') jsString = jsString.replace("\\\"", "'") jsString = re.sub(r'//.[^,]*$', '', jsString, 0, flags=re.M) jsString = jsString.replace('\n\n', '').replace('\n', ' ').replace('" + "', '') try: albumNfo = demjson.decode(jsString) except: print("F**k 124 !!\n\n") return 0 regex = r'var TralbumData = (\{(.*?)\});' jsValues = re.search(regex, content, flags=re.M | re.S) jsString = jsValues.group(1).replace('\\r', ' ').replace( '\\n', ' ').decode('utf8').encode('ascii', errors='ignore') jsString = jsString.replace("\\\"", "'") jsString = re.sub(r'//.[^,]*$', '', jsString, 0, flags=re.M) jsString = jsString.replace('\n\n', '').replace('\n', ' ').replace('" + "', '') try: albumDatas = demjson.decode(jsString) except: print("F**k 146 !!\n\n") pprint(jsString) return 0 albumNfo.update(albumDatas) albumNfo['album_art_id'] = albumNfo['art_id'] if 'album_title' in albumNfo: albumTitle = albumNfo['album_title'] else: albumTitle = '_alone_track' try: dname = os.path.dirname(dlPath + sanitizeFname(albumNfo['artist']) + '/' + sanitizeFname(albumTitle) + '/') except: print('F**k 163 !!!\n\n') pprint(albumNfo) return 0 if not os.path.exists(dname): try: os.makedirs(dname) except OSError: pass if 'trackinfo' in albumDatas: download_album_cover(albumNfo, dname) for song in albumDatas['trackinfo']: download_song(song, albumNfo, dname) else: return 0
def get_weather_json(self, url): self.log(url) weather = requests.get(url).text.split('=')[1][:-1] self.log(weather) weather = demjson.decode(weather)['tqInfo'] return weather
def get_config(cls): file = open(join(cls.WD, "config.json"), "r") cls.config = demjson.decode(file.read()) file.close() return cls.config
- what is the hierarchy that is returned?? (are the leaf nodes what is really important?) - are the roles of the input params? in particular, what role do the lexicon values play? """ import sys, os import demjson from urllib import urlopen, urlencode from std import StdNode baseurl = 'http://standards.teachersdomain.org/td_standards_json/get_standards_hierarchical/' params = { 'jurisdictions': 'NY', 'lexicon_terms': '121,122', 'grade_range': 'k-12' } data = urlopen(baseurl, urlencode(params)) """ Returns a JSON formatted ordered 'node set' for a given Jurisdiction, list of Lexicon Term IDs and grade range use example: get_standards_hierarchical_json('NY','121,122','k-12') """ print "foo" json = demjson.decode(data.read()) # print json for tree in json: jurisdiction = tree[0] node = StdNode(tree[1]) node.report()
def parseData(self, response): pass self.logger.info(response.url) data = response.body.decode('gb2312'); data = demjson.decode(data); self.logger.info(len(data)); # {symbol:"sz300711",code:"300711",name:"广哈通信", # trade:"19.400",pricechange:"0.210",changepercent:"1.094", # buy:"19.390",sell:"19.400",settlement:"19.190",open:"19.190", # high:"19.520",low:"18.740",volume:2857915,amount:54821946, #ticktime:"15:00:03", #per:40.417, #pb:4.974,mktcap:279740.15076,nmc:88562.94,turnoverratio:6.26036} #symbol:代码 #code:编号 #name:简称 #trade:最新价 #pricechange:涨跌额 #changepercent:涨跌幅 #buy:买入 #sell:卖出 #settlement:昨收 #open:开盘 #high:最高 #low:最低 #volume:成交量 #amount:成交额 #mktcap:总市值 #nmc:流通市值 #ticktime:时间 #pb:市净率 #turnoverratio:换手率 for each in data: item = {}; item = each; item['date'] = getLastClosingDate(); item['type'] = 'DayClosingData' # yield item; #实时数据 # yield scrapy.Request('http://hq.sinajs.cn/list='+ item['symbol'],meta=item,callback= self.parseNewData) code = item['code'] symbol = item['symbol'] #5分钟数据 meta = { 'symbol':symbol, #代码 'scale':'5', #分钟间隔 5,15,30,60 'ma':'5', #均值(5、10、15、20、25) 'count':'1023' #数量 } url = Template('http://money.finance.sina.com.cn/quotes_service/api/json_v2.php/CN_MarketData.getKLineData?symbol=${symbol}&scale=${scale}&ma=${ma}&datalen=${count}') # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseMinuteData) #历史交易 meta = { 'symbol':symbol, #代码 'code':code, #代码 'year':'2018', 'quarter':'1', #季度 1 2 3 4 } url = Template('http://money.finance.sina.com.cn/corp/go.php/vMS_MarketHistory/stockid/${code}.phtml?year=${year}&jidu=${quarter}') # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseQuarterData) #历史交易明细数据接口,需要获取开盘日 meta = { 'symbol':symbol, #代码 'date':time.strftime("%Y-%m-%d",time.localtime(time.time())), #代码 'page':0, } url = Template('http://market.finance.sina.com.cn/transHis.php?symbol=${symbol}&date=${date}&page=${page}') yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseDetailsData) #资金流 url = "http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/MoneyFlow.ssi_ssfx_flzjtj?format=text&daima=" + symbol # yield scrapy.Request(url,meta=meta,callback= self.parseCapitalFlow) #https://blog.csdn.net/woloqun/article/details/80734088 #财报数据 url = Template("http://vip.stock.finance.sina.com.cn/corp/go.php/vFD_FinanceSummary/stockid/${code}.phtml?qq-pf-to=pcqq.c2c") meta = { 'code':code, 'symbol':symbol } # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseFinanceSummaryData) #https://blog.csdn.net/luanpeng825485697/article/details/78442062?locationNum=5&fps=1 #腾讯股票数据 #分时图 url = Template('http://data.gtimg.cn/flashdata/hushen/minute/${symbol}.js?maxage=${maxage}&${random}') meta = { 'symbol' :symbol, 'maxage':'110', 'random':random.random() } # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseTencentMinuteData) #5天分时图 url = Template('http://data.gtimg.cn/flashdata/hushen/4day/${tag}/${symbol}.js?maxage=${maxage}&visitDstTime=${visitDstTime}') meta = { 'symbol' :symbol, 'tag':symbol[0:2], 'maxage':'110', 'visitDstTime':1 } # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseTencentDayData) #日k url = Template('http://data.gtimg.cn/flashdata/hushen/latest/daily/${symbol}.js?maxage=${maxage}&visitDstTime=${visitDstTime}') meta = { 'symbol' :symbol, 'maxage':'43201', 'visitDstTime':1 } # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseTencentDayKData) #年日K url = Template('http://data.gtimg.cn/flashdata/hushen/daily/${year}/${symbol}.js?visitDstTime=${visitDstTime}') meta = { 'symbol' :symbol, 'year':'2017'[-2:], 'visitDstTime':1 } # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseTencentYearDayKData) #周K url = Template('http://data.gtimg.cn/flashdata/hushen/latest/weekly/${symbol}.js?maxage=${maxage}&visitDstTime=${visitDstTime}') meta = { 'symbol' :symbol, 'maxage':'43201', 'visitDstTime':1 } # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseTencentWeekKData) #月K url = Template('http://data.gtimg.cn/flashdata/hushen/monthly/${symbol}.js?maxage=${maxage}') meta = { 'symbol' :symbol, 'maxage':'43201', 'visitDstTime':1 } # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseTencentMonthKData) #成交明细 列项 url = Template('http://stock.gtimg.cn/data/index.php?appn=detail&action=timeline&c=${symbol}') meta = { 'symbol' :symbol } # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseTencentClosingDetailsListData) #成交明细 url = Template('http://stock.gtimg.cn/data/index.php?appn=detail&action=data&c=${symbol}&p=${page}') meta = { 'symbol' :symbol, 'page':0, 'date':'20180413' } # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseTencentClosingDetailsData) #大单数据 #opt=10 11 12 13 分别对应成交额大于等于(100万 200万 500万 1000万) #opt=1,2,3,4,5,6,7,8 分别对应成交量大于等于(100手 200手 300手 400手 500手 800手 1000手 1500手 2000手) url = Template('http://stock.finance.qq.com/sstock/list/view/dadan.php?t=js&c=${symbol}&max=${max}&p=${page}&opt=${opt}&o=${o}') meta = { 'symbol' :symbol, 'max':80, 'page':0, 'opt':10, 'o':0, } # yield scrapy.Request(url.substitute(meta),meta=meta,callback= self.parseTencentLargeSingleData) break; #data[0] #https://hq.sinajs.cn/?_=1554047924366&list=ml_sh600100 #公告 #https://vip.stock.finance.sina.com.cn/api/jsonp.php/var%20noticeData=/CB_AllService.getMemordlistbysymbol?num=8&PaperCode=600100 #ttps://news.sinajs.cn/rn=1554047925361&maxcnt=20&scnt=20&list=sh600100,gg_sh600100,ntc_sh600100,blog_sh600100,tg_sh600100,lcs_sh600100 #ttps://vip.stock.finance.sina.com.cn/quotes_service/api/jsonp.php/var%20moneyFlowData=/MoneyFlow.ssi_ssfx_flzjtj?daima=sh600100&gettime=1 #https://finance.sina.com.cn/realstock/company/sh600100/hisdata/klc_kl.js?d=2019_4_1 return; node = response.meta['node'] tag = response.meta['tag'] count = int(response.meta['count']) page = int(response.meta['page']) if page * 80 < count: param = self.getData_request(node,tag,page + 1,count) yield scrapy.Request(param['url'],meta=param['meta'],callback= self.parseData)
def _build_demjson_config(self, demjson_string): if demjson_string is None: return OmegaConf.create() demjson_dict = demjson.decode(demjson_string) return OmegaConf.create(demjson_dict)
def getjsondelay(data): data=json.loads(data) data=demjson.decode(data['Datapoints']) value=float(data[len(data)-1]['avg_rtt']) return round(value,2)
#140 tex #141 tor #142 min #143 phi #144 atl #145 cws #146 mia #147 nyy #158 mil for team in range(108, 160): page_url = Template( "http://mlb.mlb.com/lookup/json/named.roster_40.bam?team_id=$team" ).substitute(team=team) json1 = scraperwiki.scrape(page_url) json_decode1 = demjson.decode(json1) roster_len = int(json_decode1['roster_40']['queryResults']['totalSize']) if roster_len > 0: players = json_decode1['roster_40']['queryResults']['row'] for i in range(0, roster_len): record = {} record['key'] = players[i]['player_id'] record['player_id'] = players[i]['player_id'] record['status_code'] = players[i]['status_code'] record['primary_position'] = players[i]['primary_position'] record['position_txt'] = players[i]['position_txt'] record['name_display_first_last'] = players[i][ 'name_display_first_last'] record['name_display_last_first'] = players[i][ 'name_display_last_first']
def _get_rank_fund_info(self): ''' 得到天天基金全部基金的rank_fund :return: a list ''' rank_fund_list = [] for page_num in range(self.page_num_start, self.page_num_end): print('正在抓取第{0}页的基金信息...'.format(page_num)) cookies = { 'st_pvi': '11586003301354', 'EMFUND1': 'null', 'EMFUND0': 'null', 'EMFUND2': '07-10%2018%3A01%3A38@%23%24%u534E%u6DA6%u5143%u5927%u73B0%u91D1%u901A%u8D27%u5E01B@%23%24002884', 'EMFUND3': '07-10%2018%3A01%3A48@%23%24%u5929%u5F18%u73B0%u91D1%u7BA1%u5BB6%u8D27%u5E01B@%23%24420106', 'EMFUND4': '07-10%2018%3A11%3A53@%23%24%u65B9%u6B63%u5BCC%u90A6%u4FDD%u9669%u4E3B%u9898%u6307%u6570%u5206%u7EA7@%23%24167301', 'EMFUND5': '07-10%2018%3A04%3A32@%23%24%u62DB%u5546%u4E2D%u8BC1%u94F6%u884C%u6307%u6570%u5206%u7EA7@%23%24161723', 'EMFUND6': '07-10%2018%3A05%3A13@%23%24%u5929%u5F18%u4E2D%u8BC1%u94F6%u884C%u6307%u6570C@%23%24001595', 'EMFUND7': '07-10%2018%3A06%3A13@%23%24%u5929%u5F18%u4E2D%u8BC1%u94F6%u884C%u6307%u6570A@%23%24001594', 'st_si': '38764934559714', 'ASP.NET_SessionId': 'hqeo1xk5oqgwb0cqzxicytda', 'EMFUND8': '07-11 11:28:55@#$%u7533%u4E07%u83F1%u4FE1%u591A%u7B56%u7565%u7075%u6D3B%u914D%u7F6E%u6DF7%u5408A@%23%24001148', 'EMFUND9': '07-11 11:28:55@#$%u5E7F%u53D1%u751F%u7269%u79D1%u6280%u6307%u6570%28QDII%29@%23%24001092', } headers = { 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', # 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': '*/*', # 'Referer': 'http://fund.eastmoney.com/data/fundranking.html', 'Proxy-Connection': 'keep-alive', } end_date = str(get_shanghai_time())[:10] start_date = str( datetime.datetime(year=get_shanghai_time().year - 1, month=get_shanghai_time().month, day=get_shanghai_time().day))[:10] print('开始时间: {0}, 结束时间: {1}'.format(start_date, end_date)) params = ( ('op', 'ph'), ('dt', 'kf'), ('ft', 'all'), ('rs', ''), ('gs', '0'), ('sc', 'zzf'), ('st', 'desc'), ('sd', start_date), # '2017-07-10' ('ed', end_date), # '2018-07-10' ('qdii', ''), ('tabSubtype', ',,,,,'), ('pi', str(page_num)), # rank_data的页码 ('pn', '50'), ('dx', '1'), # ('v', '0.5290053467389759'), ) url = 'http://fund.eastmoney.com/data/rankhandler.aspx' # TODO 常规requests被502 # body = MyRequests.get_url_body(url=url, headers=headers, params=params, cookies=None) # print(body) # 用phantomjs body = self.my_phantomjs.use_phantomjs_to_get_url_body( url=_get_url_contain_params(url, params)) try: body = re.compile('<body>(.*)</body>').findall(body)[0] this_page_rank_data = re.compile(r'rankData = (.*);').findall( body)[0] # print(this_page_rank_data) except IndexError: print('在获取this_page_rank_data时索引异常!请检查!') continue # 报错: Expecting property name enclosed in double quotes: line 1 column 2 (char 1) # 解决方案: 用demjson处理下 this_page_rank_data = demjson.decode(this_page_rank_data).get( 'datas', {}) # pprint(this_page_rank_data) if this_page_rank_data == {}: return [] for item in this_page_rank_data: _i = item.split(',') rank_fund_list.append({ '基金代码': _i[0], '基金简称': _i[1], '当天日期': _i[3], '单位净值': _i[4], '累计净值': _i[5], '日增长率': _i[6], '近1周': _i[7], '近1月': _i[8], '近3月': _i[9], '近6月': _i[10], '近1年': _i[11], '近2年': _i[12], '近3年': _i[13], '今年来': _i[14], '成立来': _i[15], '手续费': _i[20], }) sleep(2.5) print('\n抓取完毕!\n') # pprint(rank_fund_list) return rank_fund_list
# Name: navitia2OSM.py # # Author: @nlehuby - noemie.lehuby(at)gmail.com # # Created: 04/06/2014 # Licence: WTFPL #------------------------------------------------------------------------------- import requests import demjson import smtplib url = "http://taginfo.openstreetmap.org/api/4/key/values?key=brewery:note" appel_taginfo = requests.get(url) data_tag = demjson.decode(appel_taginfo.content) if data_tag['total'] == 0: print "Pas de résultats, rien de neuf ..." else: print "Il y a des résultats !" FROM = '*****@*****.**' TO = ['*****@*****.**'] SUBJECT = "Il y a du nouveau sur taginfo !" TEXT = "Il y a " + str(data_tag['total']) + " nouveaux résultats : " + url message = """\From: %s\nTo: %s\nSubject: %s\n\n%s""" % ( FROM, ", ".join(TO), SUBJECT, TEXT) smtp = smtplib.SMTP() smtp.connect('serveur', 587) smtp.starttls() smtp.login('login', 'password')
def getjsonvalue(data): data=json.loads(data) data=demjson.decode(data['Datapoints']) value=float(data[len(data)-1]['Value']) return round(value,2)
def air_quality_hist(city: str = "杭州", period: str = "day", start_date: str = "2019-03-27", end_date: str = "2020-04-27") -> pd.DataFrame: """ 真气网-空气历史数据 https://www.zq12369.com/ :param city: 调用 air_city_list 接口获取所有城市列表 :type city: str :param period: "hour": 每小时一个数据, 由于数据量比较大, 下载较慢; "day": 每天一个数据; "month": 每个月一个数据 :type period: str :param start_date: e.g., "2019-03-27" :type start_date: str :param end_date: e.g., ""2020-03-27"" :type end_date: str :return: 指定城市和数据频率下在指定时间段内的空气质量数据 :rtype: pandas.DataFrame """ url = "https://www.zq12369.com/api/newzhenqiapi.php" file_data = _get_file_content(file_name="outcrypto.js") out = execjs.compile(file_data) appId = "4f0e3a273d547ce6b7147bfa7ceb4b6e" method = "CETCITYPERIOD" timestamp = execjs.eval("timestamp = new Date().getTime()") p_text = json.dumps( { "city": city, "endTime": f"{end_date} 23:45:39", "startTime": f"{start_date} 00:00:00", "type": period.upper(), }, ensure_ascii=False, indent=None, ).replace(' "', '"') secret = out.call("hex_md5", appId + method + str(timestamp) + "WEB" + p_text) payload = { "appId": "4f0e3a273d547ce6b7147bfa7ceb4b6e", "method": "CETCITYPERIOD", "timestamp": int(timestamp), "clienttype": "WEB", "object": { "city": city, "type": period.upper(), "startTime": f"{start_date} 00:00:00", "endTime": f"{end_date} 23:45:39", }, "secret": secret, } need = (json.dumps(payload, ensure_ascii=False, indent=None, sort_keys=False).replace(' "', '"').replace( "\\", "").replace('p": ', 'p":').replace('t": ', 't":')) headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36" } params = {"param": out.call("AES.encrypt", need)} r = requests.post(url, data=params, headers=headers) temp_text = out.call("decryptData", r.text) data_json = demjson.decode(out.call("b.decode", temp_text)) temp_df = pd.DataFrame(data_json["result"]["data"]["rows"]) temp_df.index = temp_df["time"] del temp_df["time"] temp_df = temp_df.astype(float, errors="ignore") return temp_df
def get_fbpage_informaton(page_source): '''獲取粉絲專頁內的詳細資訊 Args: user_fbid: 粉絲專頁唯一的fbid page_source:頁面 Returns: ''' lat = '' lng = '' fbid = '' type = '' offical_website = '' name = '' blue_verification_badge = False likes = 0 soup = bs(page_source, 'html.parser') type_elm = soup.select_one( 'img[src="https://static.xx.fbcdn.net/rsrc.php/v3/y7/r/3OfQvJdYD_W.png"]' ).parent if type_elm: type = type_elm.text.split(' · ') offical_website_elm = soup.select_one( 'img[src="https://static.xx.fbcdn.net/rsrc.php/v3/yN/r/aE7VLFYMYdl.png"]' ).parent if type_elm: offical_website = offical_website_elm.text blue_verification_badge_elm = soup.select_one( 'img[src="https://static.xx.fbcdn.net/rsrc.php/v3/yN/r/ZRwcHdL-Tur.png"]' ) if blue_verification_badge_elm: blue_verification_badge = True likes_elm = soup.select_one( '[style="font-size: 14px;font-weight: 400;line-height: 16px;color: #606770"]' ) if likes_elm: likes = re.search(re.compile(r"(\d+)"), likes_elm.text.replace(',', '')).groups()[0] name_elm = soup.select_one('[data-sigil="MBackNavBarClick"]') if name_elm: name = name_elm.text.replace(' - 首頁', '') fbid_elm = soup.select_one('a[rel = "async"]') if fbid_elm: fbid = fatch_fbid(fbid_elm['href']) geog_elm = soup.select_one('.profileMapTile') if geog_elm: landscape_url = demjson.decode(geog_elm['data-store'])['landscapeURL'] lat = re.findall("\d+\.\d+", landscape_url)[0] lng = re.findall("\d+\.\d+", landscape_url)[1] data = { 'fbid': fbid, 'offical_website': offical_website, 'name': name, 'type': type, 'lat': lat, 'lng': lng, 'blue_verification_badge': blue_verification_badge, 'likes': likes, } return data
def json(self,api,*vars): url=api % vars; return demjson.decode(self.downloader(url))
def stock_report_fund_hold_detail(symbol: str = "005827", date: str = "20201231") -> pd.DataFrame: """ 东方财富网-数据中心-主力数据-基金持仓-明细 http://data.eastmoney.com/zlsj/ccjj/2020-12-31-008286.html :param symbol: 基金代码 :type symbol: str :param date: 财报发布日期, xxxx-03-31, xxxx-06-30, xxxx-09-30, xxxx-12-31 :type date: str :return: 基金持仓-明细数据 :rtype: pandas.DataFrame """ date = "-".join([date[:4], date[4:6], date[6:]]) url = "http://datainterface3.eastmoney.com/EM_DataCenter_V3/api/ZLCCMX/GetZLCCMX" params = { "js": "datatable8848106", "tkn": "eastmoney", "SHType": "1", "SHCode": symbol, "SCode": "", "ReportDate": date, "sortField": "SCode", "sortDirec": "1", "pageNum": "1", "pageSize": "500", "cfg": "ZLCCMX", "_": "1611579153269", } r = requests.get(url, params=params) data_text = r.text data_json = demjson.decode(data_text[data_text.find("{"):-1]) temp_df = pd.DataFrame(data_json["Data"][0]) temp_df = temp_df["Data"].str.split("|", expand=True) temp_df.reset_index(inplace=True) temp_df["index"] = range(1, len(temp_df) + 1) temp_df.columns = [ "序号", "股票代码", "股票简称", "_", "_", "_", "_", "_", "_", "_", "持股数", "持股市值", "占总股本比例", "占流通股本比例", ] temp_df = temp_df[[ "序号", "股票代码", "股票简称", "持股数", "持股市值", "占总股本比例", "占流通股本比例", ]] return temp_df
def json_decode(str): return demjson.decode(str)