def test_process_response(self, mock_json, mock_get): mock_get.return_value = Response() mock_get.return_value.status_code = 200 mock_json.return_value = { 'current_observation': { 'stuff': 'things' } } e = Event() blk = WUnderTest(e) city = 'San Francisco' state = 'California' self.configure_block(blk, { 'polling_interval': { 'seconds': 1 }, 'retry_interval': { 'seconds': 1 }, 'queries': [{ 'state': state, 'city': city }], }) blk.start() e.wait(2) self.assert_num_signals_notified(1) self.assertEqual(mock_get.call_args[0][0], blk.URL_FORMAT.format( blk.api_key(), 'conditions', quote(state), quote(city)) ) blk.stop()
def get_lyrics(artist, title, recurse_count=0): addr = BASE_URL + 'title=%s:%s&action=edit' % (quote(artist), quote(title)) logger.info("Downloading lyrics from %r", addr) content = urlopen(addr).read().decode('utf-8') if RE_REDIRECT.search(content): if recurse_count >= 10: # OK, we looped a bit too much, just suppose we couldn't find any # lyrics logger.info("Too many redirects to find lyrics for %r: %r", artist, title) return None new_artist, new_title = RE_REDIRECT.search(content).groups() logger.debug("Lyrics for '%s: %s' redirects to '%s: %s'", artist, title, new_artist, new_title) return get_lyrics(new_artist, new_title, recurse_count + 1) lyrics = content.split("<lyrics>")[1].split("</lyrics>")[0].strip() if lyrics != "" and lyrics != EMPTY_LYRICS: return lyrics else: return None
def quote(stuff_to_quote): if PY3: from urllib.request import quote return quote(stuff_to_quote) else: from urllib2 import quote return quote(stuff_to_quote)
def buildRequest(self, strVar, query, isCmd, isHeader, header=None): if "[random]" in strVar: strVar = strVar.replace("[random]", core.txtproc.rndString(16)) if isHeader: if (header == "cookie"): query = request.quote(query) strVar = strVar.replace("%3b", "[semicolon]") strVar = request.unquote(strVar) strVar = strVar.replace("; ", "COOKIESEPARATOR").replace("=", "COOKIEEQUAL").replace(";", "COOKIESEPARATOR") strVar = strVar.replace("[semicolon]", ";") strVar = strVar.replace("[eq]", "=") strVar = strVar.replace("[", "LEFTSQBRK").replace("]", "RIGHTSQBRK") strVar = request.quote(strVar) strVar = strVar.replace("COOKIESEPARATOR", "; ").replace("COOKIEEQUAL", "=")\ .replace("LEFTSQBRK", "[").replace("RIGHTSQBRK", "]") else: strVar = strVar.replace("[eq]", "=") if isCmd: if "[cmd]" in strVar: strVar = strVar.replace("[cmd]", query) if "[sub]" in strVar: strVar = strVar.replace("[sub]", "null") else: if "[cmd]" in strVar: strVar = strVar.replace(";[cmd]", "").replace("%3B[cmd]", "") strVar = strVar.replace("[sub]", query) if "[blind]" in strVar: strVar = strVar.replace("[blind]", query) return strVar
def get(self,method,args=None): """ GET to DeepDetect server """ u = self.__ddurl u += method headers = {} if args is not None: sep = "?" for arg,argv in args.iteritems(): u += sep sep = "&" u += urllib2.quote(arg) u += '=' if argv is not None: u += urllib2.quote(argv) LOG("GET %s"%u) response = None try: req = urllib2.Request(u) response = urllib2.urlopen(req, timeout=DD_TIMEOUT) jsonresponse=response.read() except: raise DDCommunicationError(u,"GET",headers,None,response) LOG(jsonresponse) try: return self.__return_format(jsonresponse) except: raise DDDataError(u,"GET",headers,None,jsonresponse)
def data_detail(request, project, name): url = "http://localhost:8080/dmcapi-rest/" + "user/" + quote(request.session['user'], '') + "/project/" + quote( project, '') + "/dataType/metaData/" + quote(name, '') h = httplib2.Http() h.add_credentials(request.session['user'], request.session['password']) resp, content = h.request(url, method='GET', headers={'accept': 'application/json'}) string = content.decode('utf-8') json_data = json.loads(string) data_name = json_data['dataName'] data_path = json_data['dataPath'] data_type = json_data['dataType'] attribute = json_data['attribute'] attributeContaners = attribute['attributeContaners'] data = {} for attr in attributeContaners: for k, v in attr.items(): if k == 'physicalDTO': name = v['name'] val = v['dataType'] data[name] = val user_name = request.session['user'] project_name = project return render(request, 'data/detail.html', {'name': data_name, 'path': data_path, 'type': data_type, 'data': data, 'user': user_name, 'project_name': project_name})
def tasks(request, project): user = request.session['user'] password = request.session['password'] # url for get data sets url_data = MAIN_URL + "user/" + quote(user, '') + "/project/" + quote(project, '') + "/dataType/metaData" # url for get saved tasks url_task = MAIN_URL + "user/" + quote(user, '') + "/project/" + quote(project, '') + "/miningTask" # get data sets resp_data, content_data = get_message(user, password, url_data, "GET", {'accept': 'application/json'}) json_data = decode_json(content_data) list_physical_data_set = json_data # get saved tasks resp_task, content_task = get_message(user, password, url_task, "GET", {'accept': 'application/json'}) saved_data = decode_json(content_task) # get environment list # environment_list = []#["environment 1", "environment 2", "environment 3"] # get mining function list mining_function_list = [] # ["mining function 1", "mining function 2", "mining function 3"] # result inf result_data = {'listPhysicalDataSet': list_physical_data_set, 'saved_data': saved_data, 'user': user, 'project_name': project, 'environment_list': [], 'mining_function_list': mining_function_list} return render(request, 'tasks/tasks.html', result_data)
def download(file_type): '''下载接口''' if file_type not in ['form','scheme']:abort(404) id = request.form.get('id') type = request.form.get('type') data = query_data(type,id) #下载策划 if file_type == 'scheme': if data.filename == 'Nothing':abort(404) content = send_file(path.join(Upload_path,data.rand_filename)) filename = quote(data.filename) #if data.applicant!=current_user.name :abort(404) else : #生成context并进行渲染 context=make_context(data,type) for key,value in context.items() : context[key] = RichText(value) doc = DocxTemplate(path.join(Docx_path,type+'.docx')) doc.render(context) temp_file = path.join(Upload_path,str(current_user.id) +'result.docx') doc.save(temp_file) #读取渲染后的文件并将之删除 with open(temp_file,'rb') as f: content = f.read() if path.exists(temp_file): remove(temp_file) filename = quote(data.association+'-'+types[type][1]+'.docx') response = make_response(content) response.headers['Content-Disposition'] = \ "attachment;filename*=UTF-8''" + filename response.headers['Content-Type'] = 'application/octet-stream' return response
def find(unsortedMoviesFolder, sortedMoviesFolder): for movie in os.listdir(unsortedMoviesFolder): year = None movieName = movie for y in range(1500, 2100): if (str(y) in movie): year = str(y) movie = movie.replace(str(y), " ") for z in values: if (str(z) in movie): movie = movie.replace(str(z), " ") if (" " in movie): movie = movie.replace(" ", " ") if (year == None): url = 'http://www.omdbapi.com/?t=' + quote(str(movie)) else: url = 'http://www.omdbapi.com/?t=' + quote(str(movie)) + '&y=' + year response = urlopen(url).read() response = response.decode('utf-8') jsonvalues = json.loads(response) if jsonvalues["Response"] == "True": imdbrating = jsonvalues['imdbRating'] destinationDirLocation = sortedMoviesFolder + '\\' + imdbrating + '_' + movieName srcFileLocation = unsortedMoviesFolder + '\\' + movieName if not os.path.exists(destinationDirLocation): os.makedirs(destinationDirLocation) shutil.move(srcFileLocation, destinationDirLocation)
def login_redirect(): args = { 'client_id':app.config['GOOGLE_CLIENT_ID'], 'redirect_url':quote(app.config['GOOGLE_CALLBACK_URL']), 'scope':quote('profile email') # we really only care if the user exists and has accepted our app } redirect_url = 'https://accounts.google.com/o/oauth2/auth?scope={scope}&redirect_uri={redirect_url}&client_id={client_id}&response_type=code'.format(**args) return redirect(redirect_url)
def getweatherinfo(self, latitude, longitude): url = "http://api.openweathermap.org/data/2.5/forecast/daily?lat=%s&lon=%s&units=metric&cnt=2" % ( quote(str(latitude)), quote(str(longitude)) ) data = urlopen(url).read().decode('utf-8') dataopenweathermap = json.loads(data) return dataopenweathermap['list']
def parsWikiEn(wordObj): if "noParsed" in wordObj: return if "noEnglish" in wordObj: return for typ in typs: if typ in wordObj: return html = "" text = wordObj["text"] url = "/wiki/" + quote(text) urls = [url] urls.append("/wiki/" + quote(text.upper())) urls.append("/wiki/" + quote(text.lower())) urls.append("/wiki/" + quote(text[0].upper() + text[1:].lower())) urls.append("/w/index.php?search=" + quote(text)) for url in urls: try: fullurl = "http://en.wiktionary.org" + url response = urlopen(fullurl) html = response.read() break except HTTPError as e: if str(e) != "HTTP Error 404: Not Found": logging.exception(e) except Exception as e: print(url, e) logging.exception(e) if html == "": wordObj["noParsed"] = 1 logging.warning(' no downloaded "' + text + '" by urls: ' + str(urls)) return start = html.find(b' id="English"') end = html.find(b"<hr />", start) if start == -1: wordObj["noEnglish"] = 1 else: html = html[start:end] countTypes = 0 nounPart = getTypPart(html, "Noun") if nounPart: parseNounPart(nounPart, wordObj) countTypes += 1 verbPart = getTypPart(html, "Verb") if verbPart: parseVerbPart(verbPart, wordObj) countTypes += 1 adjectivePart = getTypPart(html, "Adjective") if adjectivePart: parseAdjectivePart(adjectivePart, wordObj) countTypes += 1 adverbPart = getTypPart(html, "Adverb") if adverbPart: parseAdverbPart(adverbPart, wordObj) countTypes += 1 wordObj["countTypes"] = countTypes
def get_param_value(param,value): if isinstance(value, dict): if('contains' in value): return param +'=.*?'+ re.escape(urllib2.quote(value['contains'])).replace('\%','%')+'.*?' elif('equalto' in value): return param +'='+ re.escape(urllib2.quote(value['equalto'])).replace('\%','%') elif('matches' in value): return param +'='+ value['matches'].replace(' ','%20') else: return param +'='+ value.replace(' ','%20')
def search(keyword,filename,isFist): #需要查询的url url = "http://www.lagou.com/jobs/positionAjax.json?" #添加关键词查询 logger.info("添加城市查询条件,关键字为:"+ keyword) url = url + "&kd="+request.quote(keyword) #第一次查询,获取总记录数 url = url + "&first=true&pn=1" logger.info("第一次查询的URL为:"+url) #查询到的总条数 totalCount = 0 #记录总页数 totalPage = 0 with request.urlopen(url) as resultF: if resultF.status == 200 and resultF.reason == 'OK': #获得所返回的记录 data = resultF.read().decode('utf-8') #将data进行JSON格式化 dataJson = json.loads(data) #获得总记录数 totalCount = int(dataJson['content']['totalCount']) #获得总页数 totalPage = int(dataJson['content']['totalPageCount']) logger.info("查询出的总记录数为:"+ str(totalCount)) logger.info("查询出的总页数为:"+ str(totalPage)) logger.info("查询出的每页总数为:"+ str(dataJson['content']['pageSize'])) else: logger.error("第一次打开url出错,没有正常连接,返回代码为:"+str(resultF.status)) #结束程序 return #查询的当前日期 dateStr = datetime.now().strftime('%Y%m%d') #根据总页数进行循环读取 for i in range(totalPage): #转码操作 if i == 0 : url = "http://www.lagou.com/jobs/positionAjax.json?" + "&kd="+request.quote(keyword)+"&first=true&pn=1" else: url = "http://www.lagou.com/jobs/positionAjax.json?" + "&kd="+request.quote(keyword)+"&first=false&pn="+str((i+1)) logger.info("查询的URL为:"+url) #模拟谷歌浏览器发送请求 req = request.Request(url) req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.132 Safari/537.36") with request.urlopen(req) as result: if resultF.status == 200 and resultF.reason == 'OK': #获得所返回的记录 data = str(result.read().decode('utf-8')) #增加日期区分,以及记录总条数,ex 20151210_981;内容 #将data进行JSON格式化 dataJson = json.loads(data, encoding='utf-8') saveDataForHbase(dataJson['content']['result'], filename, isFist) #访问每条链接,休眠1秒,防止被反爬虫 time.sleep(1)
def stream_id(self): if self.stream_type() == FeedlyStreamType.FEED: return quote('feed/{}'.format(self.stream_name()), safe='') if self.stream_type() == FeedlyStreamType.TAG: return quote('user/{}/tag/{}'.format(self.user_id(), self.stream_name()), safe='') else: # FeedlyStreamType.CATEGORY return quote('user/{}/category/{}'.format(self.user_id(), self.stream_name()), safe='')
def data_delete(request, project, name): user = request.session['user'] password = request.session['password'] url = "http://localhost:8080/dmcapi-rest/" + "user/" + quote(user, '') + "/project/" + quote( project, '') + "/dataType/metaData/" + quote(name, '') resp, content = get_message(user, password, url, "DELETE", {'accept': 'application/json'}) if resp.status == 200: return JsonResponse({"result": "success"}) else: return JsonResponse({"result": "error", "error": resp.status})
def delete_task(request, project, name): user = request.session['user'] password = request.session['password'] url = MAIN_URL + "user/" + quote(user, '') + "/project/" + quote(project, '') + "/miningTask/" + quote(name, '') resp, content = get_message(user, password, url, "DELETE", {'accept': 'application/json'}) if resp.status == 200: result = {'result': 'success', 'error': resp.status} else: result = {'result': 'error', 'error': resp.status} return JsonResponse(result)
def env_delete(request, project): result = {'result': 'error'} user = request.session['user'] password = request.session['password'] id = request.POST['id'] url = MAIN_URL + "user/%s/project/%s/env/%s" % (quote(user, ''), quote(project, ''), quote(id, '')) resp, content = make_request(url, 'DELETE', request) if resp.status == 200: result = {'result': 'success'} return JsonResponse(result)
def edict(bot, trigger): if not trigger.group(2): return bot.say("Please enter a word.") i = trigger.group(2) try: i.encode('ascii') print(i) x = urlopen("http://nihongo.monash.edu/cgi-bin/wwwjdic?1ZDJ{0}".format(i)) c = x.read() bs = BeautifulSoup(c) #print(bs) if bs.pre: res = bs.pre.contents[0].splitlines()[1] #print(res) else: res = "No matches found." bot.say(res) except: print(i) x = urlopen("http://nihongo.monash.edu/cgi-bin/wwwjdic?1ZIK{0}".format(quote(i))) c = x.read() bs = BeautifulSoup(c) if bs.li: res = bs.li.contents[0] else: res = "No matches found." bot.say(res)
def raw_request(self, **kwargs): url = MASTER_URL + '?' for key, val in kwargs.items(): url += '{key}={val}&'.format(key=key, val=quote(str(val))) def urlopen(): out = request.urlopen('{url}SID={SID}'.format(url=url,SID=self.SID), cookies={'EJSID': self.EJSID}) print(out.url) input() out = out.read() out = out.decode('utf-8') return out out = urlopen() fails_titles = ( ('<title>Invalid session</title>', 'Invalid session'), ('<title>Login page</title>', 'Login page'), ('<title>Permission denied</title>', 'Permission denied'), ('<title>Invalid parameter</title>', 'Invalid parameter'), ) for title, reason in fails_titles: if title in out: self.update_sid() out = urlopen() break for title, reason in fails_titles: if title in out: raise EjudgeException(reason) return out
def send_sms(self, obj_mobile, sms_content): try: send_url = "http://%s:%s%s?srcmobile=%s&password=%s&objmobile=%s&smsid=%s%s&smstext=%s" % \ (self.sms_host, self.sms_port, self.sms_send_uri, self.sms_user, self.sms_passwd, obj_mobile, obj_mobile, int(time.time()*1000), request.quote(sms_content)) response = request.urlopen(send_url, timeout=10) result = response.read() err_code = int(result) if not response is None: response.close() if err_code == 0: self._write_log("send sms to <%s> --> %s" % (obj_mobile, sms_content)) return err_code else: self._write_log("send sms fail, error code: %s" % err_code) return err_code except error.HTTPError as e: self._write_log("request send sms url fail, HTTP status code: %s" % e.code) #raise error.HTTPError except error.URLError as e: self._write_log("request send sms url fail, Reason: %s" % e.reason) #raise error.URLError except: self._write_log("runtime error: send_sms") raise
def url_setup(url): scheme, netloc, path, qs, anchor = urlsplit(url) domain = '.'.join(netloc.split('.')[-2:]) path = quote(path, '/%') # remove everything after path url = urlunsplit((scheme, netloc, path, '', '')) return url, domain
def UrlCheck(url): url = quote(url) finalurl = 'http://www.checkdomain.com/cgi-bin/checkdomain.pl?domain=%s' % (url) # user agent stuff req = Request(finalurl) req.add_header('User-Agent:', random.choice(headers_useragents)) req.add_header('Referer:', random.choice(headers_referers)) req.add_header('Keep-Alive:', str(random.choice(range(1, 6)))) req.add_header('Connection:', 'close') try: parse = urlopen(req) html = parse.read() html = str(html) except URLError as e: raise(e) try: if 'has already been registered' in html: return False elif 'still available' in html: return True except Exception as e: raise(e)
def issues_search_text(request): text = request.GET.get('search') if text is None or len(text) < 1: return HttpResponse('{ "msg" : "Search term must be at least 1 character long" }', 400) url = 'http://dev.hel.fi/openahjo/v1/issue/search/?text=%s&format=json&order_by=-latest_decision_date%s'\ % (quote(text), get_paging_info(request)) return JsonResponse(get_url_as_json(url))
def youtube_search(query, limit): ret_url_list = list() for tries in range(1, 10): try: response = tools.retrieve_web_page('https://www.youtube.com/results?search_query=' + quote(query.encode('utf-8')), 'youtube search result') except KeyboardInterrupt: raise except: e = sys.exc_info()[0] if tries > 3: print('Failed to download google search result. Reason: ' + str(e)) raise print('Failed to download google search result, retrying. Reason: ' + str(e)) sleep(1) else: if response: soup = BeautifulSoup(response, "html.parser") for item in soup.findAll(attrs={'class': 'yt-uix-tile-link'}): url = 'https://www.youtube.com' + item['href'] ret_url_list.append(url.split('&')[0]) break return ret_url_list[:limit]
def search(self, query, page=1, limit=10): offset = (page - 1) * limit url = self.url_search % (self.lang, urllib.quote(query), offset, limit) content = self.__fetch(url).read() parsed = json.loads(content) search = parsed['query']['search'] results = [] if search: for article in search: title = article['title'].strip() snippet = article['snippet'] snippet = re.sub(r'(?m)<.*?>', '', snippet) snippet = re.sub(r'\s+', ' ', snippet) snippet = snippet.replace(' . ', '. ') snippet = snippet.replace(' , ', ', ') snippet = snippet.strip() wordcount = article['wordcount'] results.append({ 'title' : title, 'snippet' : snippet, 'wordcount' : wordcount }) # json.dump(results, default_style='', default_flow_style=False, # allow_unicode=True) return results
def edict(phenny, input): if not input.group(2): return phenny.say("Please enter a word.") i = input.group(2) try: i.encode('ascii') print(i) x = urlopen("http://www.csse.monash.edu.au/~jwb/cgi-bin/wwwjdic.cgi?1ZDJ%s"% i) c = x.read() bs = BeautifulSoup(c) #print(bs) if bs.pre: res = bs.pre.contents[0].splitlines()[1] #print(res) else: res = "No matches found." phenny.say(res) except: print(i) x = urlopen("http://www.csse.monash.edu.au/~jwb/cgi-bin/wwwjdic.cgi?1ZIK%s"%(quote(i))) c = x.read() bs = BeautifulSoup(c) if bs.li: res = bs.li.contents[0] else: res = "No matches found." phenny.say(res)
def __init__(self, user, password, baseURL='http://wikipast.world/wiki/', memory=0): """ :param user: nom d'utilisateur du bot :param password: mot de passe du bot :param baseURL: url du wiki """ self.user = user passw=urllib2.quote(password) login_params='?action=login&lgname=%s&lgpassword=%s&format=json'% (user,passw) # Login request r1=requests.post(baseURL+'api.php'+login_params) login_token=r1.json()['login']['token'] #login confirm login_params2=login_params+'&lgtoken=%s'% login_token r2=requests.post(baseURL+'api.php'+login_params2,cookies=r1.cookies) #get edit token2 params3='?format=json&action=query&meta=tokens&continue=' r3=requests.get(baseURL+'api.php'+params3,cookies=r2.cookies) self.edit_token=r3.json()['query']['tokens']['csrftoken'] self.edit_cookie=r2.cookies.copy() self.edit_cookie.update(r3.cookies) self.baseURL = baseURL self.memory = memory self.page_buffer = []
def _deal_related(self, url, title): """获取文章相似文章 Args: url: 文章链接 title: 文章标题 Returns: related_dict: 相似文章字典 Raises: WechatSogouException: 错误信息errmsg """ related_req_url = 'http://mp.weixin.qq.com/mp/getrelatedmsg?' \ 'url=' + quote(url) \ + '&title=' + title \ + '&uin=&key=&pass_ticket=&wxtoken=&devicetype=&clientversion=0&x5=0' related_text = self._get(related_req_url, 'get', host='mp.weixin.qq.com', referer=url) related_dict = eval(related_text) ret = related_dict['base_resp']['ret'] errmsg = related_dict['base_resp']['errmsg'] if related_dict['base_resp']['errmsg'] else 'ret:' + str(ret) if ret != 0: logger.error(errmsg) raise WechatSogouException(errmsg) return related_dict
def fetch(w): url = du + request.quote(w) with request.urlopen(url) as r: r = r.read().decode('utf-8') d = wubi.findall(r) w = {k:v for k,v in (i.split(': ') for i in d )} return w
def get_object(bucket, obj, file_path): if file_path.endswith("/"): return in_file = urlopen('http://%s.s3.amazonaws.com/%s' % (bucket, quote(obj.key))) size = int(in_file.info()['content-length']) assert size == obj.size folder = os.path.dirname(file_path) if not os.path.exists(folder): os.makedirs(folder) with open(file_path, 'wb') as out_file: while size > 0: buf = in_file.read(min(size, 64 * 1024)) out_file.write(buf) size -= len(buf) os.utime(file_path, (obj.date, obj.date))
def get_url_headers(url, forced=False): domain = urlparse.urlparse(url)[1] sub_dom = scrapertools.find_single_match(domain, r'\.(.*?\.\w+)') if sub_dom and not 'google' in url: domain = sub_dom domain_cookies = cj._cookies.get("." + domain, {}).get("/", {}) if "|" in url or not "cf_clearance" in domain_cookies: if not forced: return url headers = dict() headers["User-Agent"] = default_headers["User-Agent"] headers["Cookie"] = "; ".join(["%s=%s" % (c.name, c.value) for c in domain_cookies.values()]) return url + "|" + "&".join(["%s=%s" % (h, urllib.quote(headers[h])) for h in headers])
def profanityAlert(path_file): file_check = open(path_file) content = file_check.read() file_check.close() try: response = ur.urlopen("http://www.wdylike.appspot.com/?q=%s" % (ur.quote(content))) output = bool(response.read()) response.close() except: print("The detector api must be down. Try after some time") if output == True: print("Profanity Alert !!") else: print("No Profanities found.")
def search_book(word): params = { # 'q': word.encode('utf-8'), 'q': quote(word), # 对于使用 GET 方式的获取数据 API,可以通过 fields 参数指定返回数据中的信息项的字段,以减少返回数据中开发者并不关心的部分。 # fields 参数的格式目前只支持逗号分隔的字段名,没有 fields 参数或 fields 参数为 all 表示不做过滤。 'fields': 'id,title,rating,average,author,publisher,price,images,alt', 'count': 3 } params = urlencode(params) url = "%s?%s" % (BOOK_URL_BASE, params) req = Request(url) try: resp = urlopen(req) except URLError as e: if hasattr(e, 'reason'): reply = 'We failed to reach a server' return reply elif hasattr(e, 'code'): reply = 'The server couldn\'t fulfill the request.' return reply else: # everything is fine content = json.loads(resp.read().decode('utf-8')) # print(content) books = content['books'] # print(books) book_list = [] for i, book in enumerate(books): item = {} title = '%s\t%s分\n%s\n%s\t%s' % ( book['title'], book['rating']['average'], ','.join( book['author']), book['publisher'], book['price']) description = '' if i == 1: picUrl = book['images']['large'] else: picUrl = book['images']['small'] url = book['alt'] item['title'] = title item['description'] = description item['image'] = picUrl item['url'] = url book_list.append(item) return book_list
def http_parser(host, href, method, headers, data): method = method.upper() if method == 'GET': if data: try: if '?' not in href: href += '?' href += '&'.join( ['{}={}'.format(k, quote(v)) for k, v in data.items()]) except AttributeError: raise Exception("`GET` data should be a dict") except Exception: raise Exception("Failed to unpack url") user_agent = HTTPCons.user_agent if not headers: headers = { 'Host': host, 'User-Agent': user_agent, 'Connection': 'close' } else: headers.update({ 'Host': host, 'User-Agent': headers.get("User-Agent", user_agent), 'Connection': 'close' }) try: head = "\r\n".join( ["{}: {}".format(k, v) for k, v in headers.items()]) except AttributeError: raise Exception("`headers` should be a dict") except Exception: raise Exception("Failed to unpack headers") if method == 'POST' and data: head += "\r\nContent-Length: {}".format(len(data)) elif method not in ('POST', 'GET'): raise Exception("Method Not Implement `{}`".format(method)) return { 'request': "{method} {href} HTTP/1.1".format(method=method, href=href), 'headers': head, 'entity': data or '' }
async def main(self, bot, database, message, arguments): if not arguments: letter = ":clock11: **| Servers time is {}**".format( str(datetime.fromtimestamp(mfloor(time())).strftime('%H:%M'))) await bot.say(message.channel, letter) else: google = quote(str(arguments[0])) query = "https://maps.googleapis.com/maps/api/geocode/json?address={}&key={}&language={}".format( google, bot.config.google.key, "en") json_data = await bot.utils.web.get_content(query) geo = bot.utils.json2obj(json_data) if geo.status == "OK": lat = str( float("{0:.4f}".format( geo.results[0].geometry.location.lat))) lng = str( float("{0:.4f}".format( geo.results[0].geometry.location.lng))) query = "https://maps.googleapis.com/maps/api/timezone/json?location=%s,%s×tamp=%s&key=%s&language=%s" % ( lat, lng, time(), bot.config.google.key, "en") json_data = await bot.utils.web.get_content(query) location = bot.utils.json2obj(json_data) time_now = str( int(time() + location.rawOffset + location.dstOffset)) time_in_hour_format = datetime.fromtimestamp( int(time_now)).strftime('%H:%M') time_in_weekday = datetime.fromtimestamp( int(time_now)).weekday() if time_in_weekday == 0: day_is = "Monday" elif time_in_weekday == 1: day_is = "Tuesday" elif time_in_weekday == 2: day_is = "Wednesday" elif time_in_weekday == 3: day_is = "Thursday" elif time_in_weekday == 4: day_is = "Friday" elif time_in_weekday == 5: day_is = "Saturday" elif time_in_weekday == 6: day_is = "Sunday" letter = ":clock11: **| Time in {} is {} and the day is {}**".format( geo.results[0].formatted_address, time_in_hour_format, day_is) await bot.say(message.channel, letter)
def start_requests(self): ak = "1XjLLEhZhQNUzd93EjU5nOGQ" # 这里填入你的百度API的ak # query = '便利店$超市$商店' # query = '便利店$超市' # query = '便利店' # query = '珠宝店' # query = '珠宝$首饰$黄金$银$金$钻石' # query = '珠宝$首饰' query = '珠宝首饰' lat_lb = 22.243608 lng_lb = 113.684206 lat_rt = 22.862324 lng_rt = 114.658294 # 深圳市坐标范围 # lat_lb = 22.416237 # lng_lb = 113.684206 # lat_rt = 22.862324 # lng_rt = 113.987034 # 罗湖区坐标范围 113.684206,22.416237;113.987034,22.862324 # las = 0.025 # 给las一个值 # las = 0.007812 # 给las一个值 # las = 0.003906 # 给las一个值 las = 0.001953 # 给las一个值 lat_count = int((lat_rt - lat_lb) / las + 1) lon_count = int((lng_rt - lng_lb) / las + 1) self.logger.debug('lat_count: {} lon_count: {}'.format( lat_count, lon_count)) print('lat_count: {} lon_count: {}'.format(lat_count, lon_count)) for lat_c in range(0, lat_count): lat_b1 = round(lat_lb + las * lat_c, 6) for lon_c in range(0, lon_count): lon_b1 = round(lng_lb + las * lon_c, 6) bounds = str(lat_b1) + ',' + str(lon_b1) + ',' + str( round(lat_b1 + las, 6)) + ',' + str(round(lon_b1 + las, 6)) url = self.base_url.format(query=query, page_num=0, bounds=bounds, ak=ak) url = quote(url, safe=";/?:@&=+$,", encoding="utf-8") self.logger.debug('start url: {}'.format(url)) print('start url: {}'.format(url)) yield Request(url, callback=self.parse_poi, meta={ 'query': query, 'page_num': 0, 'bounds': bounds, 'ak': ak })
async def weather(message, client, arguments): # Fetches weather send(1, "Fetching weather data") url = 'http://api.openweathermap.org/data/2.5/weather?q=%s&appid=%s' % ( quote(arguments), apikey) data = loads(rget(url).text) # Formulating response DO NOT TOUCH WAS PAIN :) try: definition = data["name"] + "\nCurrently: " + data["weather"][0][ "description"] + "\nWind speed: " + str( data["wind"]["speed"]) + " m/s\nCurrent Temp: " + str( int(data["main"]["temp"] - 273.15)) + " °C / " + str( int((data["main"]["temp"] - 273.15) * 9 / 5 + 32) ) + " °F \nMax Temp: " + str( int(data["main"]["temp_max"] - 273.15) ) + " °C / " + str( int((data["main"]["temp_max"] - 273.15) * 9 / 5 + 32) ) + " °F \nMin Temp: " + str( int(data["main"]["temp_min"] - 273.15) ) + " °C / " + str( int((data["main"]["temp_min"] - 273.15) * 9 / 5 + 32) ) + " °F \nSunrise: " + datetime.fromtimestamp( data["sys"]["sunrise"]).strftime( '%Y-%m-%d %H:%M:%S' )[11:16] + "\nSunset: " + datetime.fromtimestamp( data["sys"]["sunset"]).strftime( '%Y-%m-%d %H:%M:%S' )[11:16] + "\nHumidity: " + str( data["main"]["humidity"] ) + " %" + "\nPressure: " + str( data["main"]["pressure"] ) + "00 Pa" + "\nLon: " + str( data["coord"] ["lon"]) + "°\nLat: " + str( data["coord"]["lat"]) + "°\n" response = "Weather in: " + data["name"] + ", " + data["sys"][ "country"] + "```" + definition + "```" except KeyError: response = "No match was found" # Sending message and logging await client.send_message(message.channel, ":earth_africa: **| {}**".format(response)) send(1, "Done with fetching weather data")
def parseData(self, response): book = response.meta.get('book') category = response.meta.get('category') realPath = response.meta.get('realPath') params = response.meta.get('params') result = json.loads(response.body_as_unicode()) # download data if (book['tree_text']['grade'] is None): path = [ book['tree_text']['period'], book['tree_text']['subject'], book['tree_text']['term'], book['tree_text']['edition'], book['publish_info'], book['name'] ] else: path = [ book['tree_text']['period'], book['tree_text']['grade'], book['tree_text']['subject'], book['tree_text']['term'], book['tree_text']['edition'], book['publish_info'], book['name'] ] for data in [x for x in result['data'] if 'down_path' in x]: down_path = (downloadPath + "/".join(path) + "/" + realPath + "/" + data['file_extension']).replace("//", "/") name = data['title'] if (len(name) > 100): name = name[:100] savename = name + "_" + str( data['pkid']) + "." + data['file_extension'] down_url = data['down_path'] if not os.path.exists(down_path): os.makedirs(down_path) print(down_url) print(down_path + "/" + savename) urlretrieve(quote(down_url, ':/'), down_path + "/" + savename) # is last page? curpage = response.meta.get('page') pagesize = response.meta.get('pagesize') total = result['total'] if (curpage * pagesize < total): params['page'] = curpage + 1 response.meta['page'] = curpage + 1 response.meta['params'] = params yield scrapy.Request(url=self.base_url + "?" + urlencode(params), meta=response.meta, callback=self.parseData)
async def update_cookies(self): while True: try: url = 'http://www.chinaso.com/search/pagesearch.htm?q={}'.format( quote('中国搜索')) try: req = HttpRequest(url, allow_redirects=False) await self.extension.handle_request(req) resp = await self.downloader.fetch(req) except HttpError as e: resp = e.response cookies = self.get_cookies_in_response(resp) self.cookies.update(cookies) except Exception as e: log.warning('Failed to update cookies: %s', e) finally: await asyncio.sleep(5 * 60)
def get_company_code(self): try: search_key_urlencode = request.quote(self.search_key) headers = { 'User-Agent': random.choice(user_agent_list), 'Cookie': self.cookies_load(), } headers.update(self.base_headers) search_url = 'https://www.tianyancha.com/search?key={}'.format( search_key_urlencode) r = requests.get(search_url, headers=headers) html = etree.HTML(r.text) self.company_code = html.xpath( "//div[@class='search-result-single ']/@data-id")[0] except Exception as e: pass
def get_data_from_chinaso(company_list): #爬取新闻数据 chinaso_url = "http://news.chinaso.com/newssearch.htm?q=" for company in company_list: company_name = str(company.company_name) pro_company_name = quote(company_name) #编码中文字符 page = 1 while True: time.sleep(2) tmp_url = chinaso_url + str(pro_company_name) + "&page=" + str( page) print(tmp_url) print("当前搜索单位:" + company_name) print("当前搜索页数:" + str(page)) web = urlopen(tmp_url) # 打开网址 html = web.read() # 读取网页内容,保存到html中 bs0bj = BeautifulSoup(html, features="lxml", from_encoding="utf-8") # 创建一个beautifulsoup的类 news_links = bs0bj.select('li[class="reItem"]') # 通过标签筛选文字信息 for link in news_links: title = str(link.find("a").get_text()) #标题 href = link.find("a").get("href") #链接 abst = link.select('div[class="reNewsWrapper clearfix"]') #摘要 if len(abst) > 0: abst = str(abst[0].find('p').get_text().strip().replace( "\n", "").replace(" ", "")) else: abst = "" #有些新闻的摘要只有图 src_time = link.select('p[class="snapshot"]') if len(src_time) > 0: src_time = link.select('p[class="snapshot"]')[0].find( 'span').get_text() else: src_time = "" if company_name in title or company_name in abst: company.title_list.append(title) company.href_list.append(href) company.abst_list.append(abst) company.src_time_list.append(src_time) company.news_num += 1 #print(bs0bj.select('a[_dom_name="next"]')) if len(bs0bj.select('a[_dom_name="next"]')) == 0: print("没有下一页了") break else: page += 1
def _get_topic_params(topic_url=None, topic_name=None): if not topic_url: if not topic_name: raise Exception() topic_url = 'http://huati.weibo.com/k/%s' % request.quote(topic_name) html = urlopen(topic_url).read().decode('utf-8', errors='ignore') page_id = re.search(re.compile("CONFIG\['page_id'\]='([^']+)'"), html).group(1) domain = page_id[:6] soup = Soup(html) html2 = soup.find( 'script', text=re.compile('FM\.view\({"ns":"","domid":"Pl_Third_Inline__3')).text html2 = eval(html2[html2.find('(') + 1:html2.rfind(')')])['html'].replace( '\\/', '/') soup2 = Soup(html2) introduction = soup2.text.strip()[len('导语:'):] return page_id, domain, introduction
def use_baidu_api(raw_url, params=None, api_type="website"): parameters = [] if api_type == "website": ak = "ak=%s" % opt.website_ak else: ak = "ak=%s" % opt.website_ak if params is not None: parameters.extend(params) parameters.append(ak) url = raw_url + "?" + "&".join(parameters) encodedStr = request.quote(url, safe="/:=&?#+!$,;'@()*[]") with request.urlopen(encodedStr) as f: data = json.loads(f.read().decode("utf-8"), encoding="utf-8") # data是接收的原始数据,需要判断状态码等 if data["status"] != 0: return None return data["result"]
def getlnglat(address): #http://api.map.baidu.com/geocoder/v2/?address=北京市海淀区上地十街10号&output=json&ak=您的ak&callback=showLocation //GET请求 url = 'http://api.map.baidu.com/geocoder/v2/' output = 'json' ak = '8399wyuCP00HcW0c7rHfjMpCIT6dA4mC' # 浏览器端密钥 address = quote(address) # 由于本文地址变量为中文,为防止乱码,先用quote进行编码 uri = url + '?' + 'address=' + address + '&output=' + output + '&ak=' + ak print(uri) req = urlopen(uri, timeout=6) res = req.read().decode() temp = json.loads(res) lat = temp['result']['location']['lat'] lng = temp['result']['location']['lng'] code = temp['status'] # print(lat, lng,code,) return lat, lng
def getlnglat(address): url = 'http://api.map.baidu.com/geocoder/v2/' output = 'json' ak = baidu_AK # 百度地图ak,具体申请自行百度,提醒需要在“控制台”-“设置”-“启动服务”-“正逆地理编码”,启动 address = quote(address) # 由于本文地址变量为中文,为防止乱码,先用quote进行编码 uri = url + '?' + 'address=' + address + '&output=' + output + '&ak=' + ak req = urlopen(uri) res = req.read().decode() #print(res) temp = json.loads(res) if temp['status'] != 0: lat = None lng = None else: lat = temp['result']['location']['lat'] lng = temp['result']['location']['lng'] return lat, lng # 纬度 latitude , 经度 longitude ,
def _serialize_payload(self, payload): """ Merges any default parameters from ``self.params`` with the ``payload`` (giving the ``payload`` precedence) and attempts to serialize the resulting ``dict`` to JSON. Note: MailChimp expects the JSON string to be quoted (their docs call it "URL encoded", but python's ``urllib`` calls it "quote"). Raises a ``SerializationError`` if data cannot be serialized. """ params = self.params.copy() params.update(payload) try: jsonstr = json.dumps(params) serialized = quote(jsonstr) return serialized except TypeError: raise SerializationError(payload)
def __get_place(self, gov_id, type_dic, preferred_lang): gov_url = 'http://gov.genealogy.net/semanticWeb/about/' + quote(gov_id) response = urlopen(gov_url) data = response.read() dom = parseString(data) top = dom.getElementsByTagName('gov:GovObject') place = Place() place.gramps_id = gov_id if not len(top) : return place, [] count = 0 for element in top[0].getElementsByTagName('gov:hasName'): count += 1 place_name = self.__get_hasname(element) if count == 1: place.set_name(place_name) else: if place_name.lang == preferred_lang: place.add_alternative_name(place.get_name()) place.set_name(place_name) else: place.add_alternative_name(place_name) for element in top[0].getElementsByTagName('gov:hasType'): curr_lang = place.get_name().get_language() place_type = self.__get_hastype(element,curr_lang, type_dic, preferred_lang) place.set_type(place_type) for element in top[0].getElementsByTagName('gov:position'): latitude, longitude = self.__get_position(element) place.set_latitude(latitude) place.set_longitude(longitude) ref_list = [] for element in top[0].getElementsByTagName('gov:isPartOf'): ref, date = self.__get_ispartof(element) ref_list.append((ref, date)) for element in top[0].getElementsByTagName('gov:isLocatedIn'): ref, date = self.__get_ispartof(element) ref_list.append((ref, date)) for element in top[0].getElementsByTagName('gov:hasURL'): url = self.__get_hasurl(element) place.add_url(url) return place, ref_list
def output(self, query): if query.startswith(':'): cmd = query.split(' ') return self.handle_command(cmd) else: url = 'http://www.wolframalpha.com/input/?i=' + quote(query) url_str = '\n(' + url + ')' if self.show_url else '' self.last_url = url try: resp = self.send_query(query) root = etree.fromstring(resp.content) out = self.parse_etree(root) return '\n\n'.join(out) + url_str except Exception as e: return repr(e)
def urls(itemy, loc): baidu_api = [ 'WnM8BMNT1lcHKSPcnGY4vibgGr7BR32H', 'SqLUYy6nyGIrRzo2p3NoZ06d9G8F6Zmq', '0gTtWeiV3BxSG9CFsW8qhSPnVjXDH6Dl', '1daHSdZTRcUKocGae0Q1uX6e3PjatSSB', 'mycEE9PyvCo8aFLFnocPp2POYgacuAAP', '7sFH4KOza31IUjeGM3o1b3aO', 'Dydtlpvoidza3BFGClf65ulPcDMHnEaN', 'x39uxyAjOnzyWx2sXzPPiHPLHAzcanCO', '8ZHsSKtmEsNYMMGRneDGtle7Ognd2W5e', 'aj2dDcCWbzWTeHkM1X1yaGF9e1nBCLMu' ] urls = [] for page in range(0, 20): range_i = math.ceil(float(page - 1) / 2) url = "http://api.map.baidu.com/place/v2/search?query=" + request.quote( itemy) + "&bounds=" + loc url = url + "&page_size=20&page_num=" + str( page) + "&output=json&ak=" + baidu_api[range_i] urls.append(url) return urls
def send_email(config, row): cert_url = urllib.quote(config.cert_url.format(row['filename']), safe=':') row['cert_url'] = ("https://wallet.blockcerts.org/#/import-certificate/{}". format(cert_url)) imgFile = io.BytesIO() qrcode.make(row['cert_url']).get_image().save(imgFile, 'JPEG') img = base64.b64encode(imgFile.getvalue()).decode() row['qrcode'] = ('<img src="cid:qrcode" alt="Scannable barcode ' 'for use with mobile devices">') body = Template(config.cert_email_body).safe_substitute(row) mailer = import_module('cert_mailer.helpers.{}'.format( config.mailer)).Mailer() mailer.send(config, config.cert_email_subject, body, img, row)
def getlnglat(address): """ 获取一个中文地址的经纬度(lat:纬度值,lng:经度值) """ url_base = "http://api.map.baidu.com/geocoder/v2/" output = "json" ak = "nSxiPohfziUaCuONe4ViUP2N" # 浏览器端密钥 address = quote(address) # 由于本文地址变量为中文,为防止乱码,先用quote进行编码 url = url_base + '?' + 'address=' + address + '&output=' + output + '&ak=' + ak lat = 0.0 lng = 0.0 res = requests.get(url) temp = json.loads(res.text) if temp["status"] == 0: lat = temp['result']['location']['lat'] lng = temp['result']['location']['lng'] return lat, lng
def scrape_user_info(self, user): assert isinstance( user, str), 'Parameter \'user\' isn\'t an instance of type \'str\'!' if log_tieba: logging.info('Scraping info of tieba user: %s...' % user) response = requests.get( tieba_user_profile_url.format(user=quote(user))) bs = BeautifulSoup(response.text, 'lxml') item = TiebaUserItem() item.name = user if bs.find('span', {'class': 'userinfo_sex_male'}) is not None: item.sex = 'male' else: item.sex = 'female' age = bs.find('span', { 'class': 'user_name' }).find_all('span')[2].get_text() item.tieba_age = float(re.search(r'吧龄:(.*)年', age).group(1)) item.avatar_url = bs.find('a', { 'class': 'userinfo_head' }).img.attrs['src'] item.follow_count = int( bs.find_all('span', {'class': 'concern_num'})[0].find('a').get_text()) item.fans_count = int( bs.find_all('span', {'class': 'concern_num'})[1].find('a').get_text()) forum_div1 = bs.find('div', {'id': 'forum_group_wrap'}) forum_div2 = bs.find('div', {'class': 'j_panel_content'}) # 关注的吧需要展开才能显示完全 if forum_div1 is not None: forum_items1 = forum_div1.find_all('a', {'class': 'unsign'}) item.forum_count += len(forum_items1) if forum_div2 is not None: forum_items2 = forum_div2.find_all('a', {'class': 'unsign'}) item.forum_count += len(forum_items2) post = bs.find('span', { 'class': 'user_name' }).find_all('span')[4].get_text() item.post_count = int(re.search(r'发贴:(\d+)', post).group(1)) if log_tieba: logging.info('Succeed in scraping info of tieba user: %s.' % user) return item
def search(self, query, num_results=10, prefetch_pages=False, prefetch_threads=10, language="en"): searchResults = [] fetcher_threads = deque([]) i = 0 while len(searchResults) < num_results: start = i * GoogleSearch.RESULTS_PER_PAGE opener = urllib2.build_opener() opener.addheaders = GoogleSearch.DEFAULT_HEADERS response = opener.open(GoogleSearch.SEARCH_URL + "?q=" + urllib2.quote(query) + "&hl=" + language + ("" if start == 0 else ("&start=" + str(start)))) soup = BeautifulSoup(response.read(), "lxml") response.close() results = self.parseResults( soup.select(GoogleSearch.RESULT_SELECTOR)) searchResults += results i += 1 if prefetch_pages: for result in results: while True: running = 0 for thread in fetcher_threads: if thread.is_alive(): running += 1 if running < prefetch_threads: break sleep(1) fetcher_thread = Thread(target=result.getText) fetcher_thread.start() fetcher_threads.append(fetcher_thread) if len(searchResults) > num_results: searchResults = searchResults[:num_results] for thread in fetcher_threads: thread.join() return SearchResponse(searchResults, 'not used')
async def main(self, bot, database, message, arguments): url = "http://api.openweathermap.org/data/2.5/weather?q={}&appid={}".format( quote(arguments[0]), bot.config.openweather.key) json_data = await bot.utils.web.get_content(url) weather = bot.utils.json2obj(json_data) try: definition = """Weather in: {}, {}``` Currently: {} Wind speed: {} m/s Current Temp: {} °C / {} °F Max Temp: {} °C / {} °F Min Temp: {} °C / {} °F Sunrise: {} Sunset: {} Humidity: {} % Pressure: {}00 Pa Lon: {}° Lat: {}```""".format( weather.name, weather.sys.country, weather.weather[0].description, weather.wind.speed, mceil((weather.main.temp - 273.15) * 10) / 10, mceil(((weather.main.temp - 273.15) * 9 / 5 + 32) * 10) / 10, mceil((weather.main.temp_max - 273.15) * 10) / 10, mceil( ((weather.main.temp_max - 273.15) * 9 / 5 + 32) * 10) / 10, mceil((weather.main.temp_min - 273.15) * 10) / 10, mceil( ((weather.main.temp_min - 273.15) * 9 / 5 + 32) * 10) / 10, datetime.fromtimestamp( weather.sys.sunrise).strftime('%Y-%m-%d %H:%M:%S')[11:16], datetime.fromtimestamp( weather.sys.sunset).strftime('%Y-%m-%d %H:%M:%S')[11:16], weather.main.humidity, weather.main.pressure, weather.coord.lon, weather.coord.lat) except KeyError: definition = "No match was found" letter = ":earth_africa: **| {}**".format(definition) await bot.say(message.channel, letter)
def generate_email(self): template = ''' Good morning {first_name}, We are pleased to announce the launch of the new {home_club} and ClubLink website. The new sites have been built on a fully responsive platform that is easy to navigate. All the familiar tools for managing your account, booking tee times with LinkLine OnLine, accessing the club roster, or signing up for events are very accessible and most importantly, mobile friendly. As this is a completely new system, you will need to create a new password to access the member portal. To do so, please click the link below: {reset_base}?token={token} As a reminder, should you ever forget your password again in the future, you can reset your password at https://clublink.com/login/forgot/. Once you have logged in successfully, please familiarize yourself with the new website. We've organized things to be more user friendly based upon feedback over the years with our previous site. Here are a few quick tips to navigating your new site: Booking a tee time is now easier than ever! On the homepage, click the “Book a Tee Time” button to book tee times with LinkLine OnLine To view the Club Calendar, from the homepage click “My Club” To view your Member Account Statement, from the homepage click “My Account” To opt-in to online statements, under “My Account”, click “My Profile”, and then “Communications”. Check the box next to “Receive annual dues notice via email” and “Receive statement via email” If you encounter any issues, please email Member Services at [email protected]. If you need to speak to a Member Services representative, please call 1-800-273-5113. Member Services Call Center Hours of Operation Weekdays 8 a.m. – 5:30 p.m. Weekends 8 a.m. – 2 p.m. Regards, ClubLink Member Services 15675 Dufferin Street King City, ON, L7B 1K5 1-800-273-5113 [email protected] www.clublink.com '''.format( **{ 'first_name': self.first_name, 'home_club': self.home_club.name if self.home_club else None, 'reset_base': 'https://clublink.com/login/reset/', 'token': quote(self.generate_reset_token()) }) return template
def percent_encode(string): if string is None: raise Exception('AccessKeyId or AccessKeySecret is None') if not isinstance(string, (str, bytes, int)): raise TypeError('AccessKeyId or AccessKeySecret should be String') if isinstance(string, bytes): string.decode('utf-8') elif isinstance(string, int): string = str(string) else: string.encode('utf-8').decode('utf-8') string = quote(string, '') string = string.replace('+', '%20') string = string.replace('*', '%2A') string = string.replace('%7E', '~') return string
def get_image_url_list(query, n): endpoint = 'https://api.photozou.jp/rest/search_public.json' # queryをutf-8にデコード request = "{0}?keyword={1}&limit={2}".format( endpoint, quote(query.encode("utf-8")), n) response = urlopen(request).read() resources = json.loads(response) # 画像のurlを入れるリストを準備 url_list = [] # 画像のURLを抜き取る for resource in resources['info']['photo']: url = resource['image_url'] # オリジナル # url = resource['original_image_url'] # サムネイル # url = resource['thumbnail_image_url'] url_list.append(url) return url_list
def getlnglat(address): url = 'http://api.map.baidu.com/geocoder/v2/?address=' output = 'json' add = quote(address) url2 = url + add + '&output=' + output + "&ak=" + 'oFLG0o9n50jNNjuFfuDTOE6gqjGG2p1t' req = urlopen(url2) res = req.read().decode() temp = json.loads(res) try: log = temp['result']['location']['lng'] lat = temp['result']['location']['lat'] except: log = 0 lat = 0 return log, lat
def get_link(movie): movie_gbk = movie.encode('gbk') url = 'http://s.ygdy8.com/plus/so.php?kwtype=0&searchtype=title&keyword=' + quote(movie_gbk) req = requests.get(url) bsmovie = bs4.BeautifulSoup(req.text, 'html.parser') link = bsmovie.select('.co_content8 b a') finallink = 'http://www.ygdy8.com' + link[0].get('href') # print(finallink) req = requests.get(finallink).content.decode('gbk') bsmovie2 = bs4.BeautifulSoup(req, 'html.parser') movie_link = bsmovie2.select('.co_content8 table tbody a') link = '' for i in range(0, len(movie_link)): # 延迟三秒,方式封IP time.sleep(3) link = '链接' + str(i) + ":" + movie_link[i].get('href') return link