def getCPPCookies(self): # reads cookies from client if 'HTTP_COOKIE'in environ: print(AdvCGI.header) # print(environ.keys()) for eachCookie in [x.strip() for x in environ['HTTP_COOKIE'].split(';')]: # print(eachCookie+'.... ') if len(eachCookie) > 6 and \ eachCookie[:3] == 'CPP': tag = eachCookie[3:7] try: self.cookies[tag] = \ eval(unquote(eachCookie[8:])) except (NameError, SyntaxError): self.cookies[tag] = \ unquote(eachCookie[8:]) if 'info' not in self.cookies: self.cookies['info']='' if 'user' not in self.cookies: self.cookies['user']='' else: self.cookies['info'] = self.cookies['user'] = '' if self.cookies['info'] != '': self.who, langStr, self.fn = self.cookies['info'].split( ':') print(self.cookies) self.langs = langStr.split( ',') # print(self.fn) else: self.who = self.fn = '' self.langs = ['Python']
def clean(self): cleaned_data = super(ZipForm, self).clean() self.name = cleaned_data["filename"] if sys.version_info[0] == 3: # FIXME: remove this check when we stop supporting python2.7 cleaned_data["filename"] = unquote(cleaned_data["filename"]) else: try: cleaned_data["filename"] = unquote( cleaned_data["filename"].encode("ascii")) except: # noqa: E722 @FIXME pass if cleaned_data["user"] == "None": cleaned_data["user"] = None else: cleaned_data["user"] = User.objects.get(pk=cleaned_data["user"]) if cleaned_data["user"] is None: return cleaned_data if cleaned_data["filename"] not in self.valid_files: raise forms.ValidationError( _("Invalid filename %s") % cleaned_data["filename"]) if cleaned_data["points"] is None: raise forms.ValidationError(_("Must have set points")) return cleaned_data
def process(self): while True: try: url = self.queue.get(timeout=2.0) self.lock.acquire() self.working_thread += 1 self.lock.release() except Exception as e: if self.working_thread == 0: break else: continue try: if url in self.processed_url: pass else: self.processed_url.add(url) base_url = url.rstrip('.DS_Store') if not url.lower().startswith('http'): url = 'http://%s' % url schema, netloc, path, _, _, _ = urlparse(url, 'http') try: response = urlopen(url,timeout=5) except Exception as e: if str(e) == 'HTTP Error 403: Forbidden': folder_name = schema + "://" + netloc + '/'.join(path.split('/')[:-1]) self.__web_structure.add(unquote(folder_name)) print("[*]Found Folder:" + folder_name) else: # print (e) pass data = response.read() if response.code == 200: folder_name =schema+"://"+ netloc + '/'.join(path.split('/')[:-1]) self.__web_structure.add(unquote(folder_name)) if url.endswith('.DS_Store'): ds_store_file = io.BytesIO() ds_store_file.write(data) d = DSStore.open(ds_store_file) dirs_files = set() for x in d.traverse(): dirs_files.add(x.filename) fullName=folder_name+"/"+unquote(x.filename) self.__web_structure.add(fullName) print("[*]Found File:" + folder_name + "/" + x.filename) for name in dirs_files: if name != '.': self.queue.put(base_url + quote(name) + '/.DS_Store') d.close() except Exception as e: # print(e) pass finally: self.working_thread -= 1
def handle_request(self, data=None): """Handles both POST and GET reqs. In case of GET there's no data. It also extracts data from the url path (regex groups) and passes it to the appropriate end-handler func. """ thread_name = threading.current_thread().name print(thread_name, self.raw_requestline) # resolve request path to end-handler function # (url) unquote the request path so that eventual unicode codes (%<code>) are converted back to unicode chars delegations = [(re.fullmatch(url_pattern, unquote(self.path)), action) for url_pattern, action in BackOfficeReqHandler.REQUEST_HANDLERS.items() if re.fullmatch(url_pattern, unquote(self.path)) is not None] # for an existing request path there should be exactly one handler func. if len(delegations) == 1: delegate = delegations[0] args = self, if data is not None: # if there is POST data args = args + (data,) for group in delegate[0].groups(): # if there are more args to be extracted from the request url (e.g. user, month, year) args = args + (group,) try: return delegate[1](*args) # call the appropriate handler func finally: self.wfile.flush() else: # error: page doesn't exist self.send_response(404) self.end_headers() self.wfile.write(str.encode("The requested page {page} is not found!".format(page=self.path), 'utf-8')) self.wfile.flush() return
def get_movie_name(key, value): print('=======================' + value) value = request.quote(value) path = 'https://movie.douban.com/subject_search?search_text=' + value req = request.Request(path, None, header) r = request.urlopen(req) data = r.read().decode('utf-8') # print(data) soup = BeautifulSoup(data, 'html.parser') try: title_full = soup.find("div", "pl2").a.get_text() except: print('!!!!!!!!!! can not request ' + request.unquote(value, 'utf-8', 'replace') + 'skip') return None url = soup.find("div", "pl2").a.get('href') category_back = get_movie_detail(url) if '/' in title_full: title_real = title_full.split('/')[0] else: title_real = title_full title_real = title_real.strip() score = soup.find('span', 'rating_nums').get_text() title_final = title_real + '_' + score + '_' + category_back if request.unquote(value, 'utf-8', 'replace') != title_real: print('callback title not same input: ' + request.unquote(value, 'utf-8', 'replace') + ",callback:" + title_full) print('callback :' + title_final) return None else: head = re.compile('.*\\\\').findall(key) foot = re.compile('\.\w+').findall(key) file_name = head[0] + title_final + foot[foot.__len__() - 1] return file_name
def add_requirement(): add_requirement_data = {} try: if request.method == 'POST': body_value = request.get_data().decode(encoding='utf-8').split('&') #body_value = unquote(request.get_data().decode(encoding='utf-8')).split('&') else: return jsonify(code_error("Method error")) add_reqdict = {} project_id = 0 for value_temp in body_value: arg_key = value_temp.split('=')[0] if len(value_temp.split('=')) != 1: arg_value = value_temp.split('=')[1] else: arg_value = "" if arg_key == 'project_id': project_id = safe_convert(arg_value, int, 0) elif arg_key == 'user_name': user_name = safe_convert(arg_value, str, "") add_reqdict['user_name'] = unquote(user_name) elif arg_key == 'email': email = safe_convert(arg_value, str, "") add_reqdict['email'] = unquote(email) elif arg_key == 'requirement': requirement = safe_convert(arg_value, str, "") add_reqdict['requirement'] = unquote(requirement) insert_status = add_requirement_redis(project_id, add_reqdict) rst = cross_header(jsonify(recdata(insert_status))) return rst except Exception as e: return jsonify(code_error(e))
def parsing_first_200_links(topic): bs_obj = BS( requests.get(f'https://ru.wikipedia.org/wiki/{topic}').text, 'html.parser') bs_obj = bs_obj.find(id="mw-content-text").div all_links_list = bs_obj.find_all('a') good_links_list = [] for link in all_links_list: # Исключаю из списка ссылок слова, которые мне не понравились...имею право try: if (link['href'].startswith('/wiki/') and re.findall( '[а-яА-Я_()0-9\-]', unquote(link['href']).replace('/wiki/', '')) != [] and re.findall('[a-zA-Z]', unquote(link['href']).replace('/wiki/', '')) == []): if (link['href'].find(':') < 1 and link['href'].find(quote('Файл')) < 1 and link['href'].find(quote('язык')) < 1 and link['href'].find(quote('библиот')) < 1 and link['href'].find(quote('Библиот')) < 1): good_links_list.append( unquote(link['href'].replace('/wiki/', ''))) except KeyError: pass return good_links_list[:200] if len( good_links_list) > 200 else good_links_list
def clean(self): cleaned_data = super(ZipForm, self).clean() self.name = cleaned_data['filename'] if sys.version_info[0] == 3: # FIXME: remove this check when we stop supporting python2.7 cleaned_data['filename'] = unquote(cleaned_data['filename']) else: try: cleaned_data['filename'] = unquote( cleaned_data['filename'].encode('ascii')) except: # noqa: E722 @FIXME pass if cleaned_data['user'] == 'None': cleaned_data['user'] = None else: cleaned_data['user'] = User.objects.get(pk=cleaned_data['user']) if cleaned_data['user'] is None: return cleaned_data if cleaned_data['filename'] not in self.valid_files: raise forms.ValidationError( _('Invalid filename %s') % cleaned_data['filename']) if cleaned_data['points'] is None: raise forms.ValidationError(_('Must have set points')) return cleaned_data
def google(bot, nick, chan, arg): """ google <arg> -> Return the google result for <arg> """ if not arg: return bot.msg(chan, get_doc()) args = arg.split() print(args) if re.match(r"-\d*", args[0]): count = int(args[0][1:]) query = ' '.join(args[1:]) print(count, query) else: count = 1 query = arg url = "http://ajax.googleapis.com/ajax/services/search/web" params = {"v": "1.0", "safe": "off", "q": query} data = requests.get(url, params=params) data = data.json() results = data["responseData"]["results"] if not results: bot.msg(chan, "%s: No results found." % (nick)) for i in range(0, count): result_url = unquote(unquote(results[i]["url"])) result_title = unescape(results[i]["titleNoFormatting"]) bot.msg(chan, "\x02%s\x02 ⟶ %s" % (bot.style.color(result_title, color="grey"), bot.style.underline(bot.hicolor(result_url))))
def get_file_route(path, filename): path = path.replace('//', '/') save_location = client_settings['LOCAL_SAVE_LOCATION'] file_location = '%s/%s' % (save_location, path) response_file = '%s/%s' % (file_location, filename) if os.path.isdir(response_file): return redirect('%s/' % response_file, 301) else: return send_from_directory(unquote(file_location), unquote(filename))
def __search__(self, print_results=False): """Returns list of results if successful or False otherwise""" results = [] for page in range(0, self.pages): rsz = 8 if self.rsz == RSZ_SMALL: rsz = 4 args = { 'q': self.query, 'v': '1.0', 'start': page * rsz, 'rsz': self.rsz, 'safe': self.safe, 'filter': self.filter, 'hl': self.hl } self.logger.debug('search: "%s" page# : %s' % (self.query, page)) q = urlencode(args) search_results = self.__urlopen(URL + q) data = json.loads(search_results.read().decode('utf-8')) if not 'responseStatus' in data: self.logger.error( 'response does not have a responseStatus key') continue if data.get('responseStatus') != 200: self.logger.debug('responseStatus is not 200') self.logger.error('responseDetails : %s' % (data.get('responseDetails', None))) if self.raise_http_exceptions: raise PyGoogleHttpException( 'PyGoogle HTTP Exception code: %s' % data.get('responseStatus')) continue if print_results: if 'responseData' in data: for result in data['responseData'].get('results', []): if result: print( '[%s]' % (urllib2.unquote(result['titleNoFormatting']))) print( result['content'].strip("<b>...</b>").replace( "<b>", '').replace("</b>", '').replace("'", "'").strip()) print( urllib2.unquote(result['unescapedUrl']) + '\n') else: # no responseData key was found in 'data' self.logger.error( 'no responseData key found in response. very unusal') results.append(data) return results
def prefix_query(self, prefix, include_doc=True): prefix_path = self.get_path(prefix) ret = [] for k,v in self.data.items(): if k.startswith(prefix_path): if include_doc: ret.append({'key':unquote(k).split('!'), 'doc':v}) else: ret.append(unquote(k).split('!')) return ret
def add_song(self): # Get stuff out of the URL path = self.path.split("/") artist = unquote(path[2]) album = unquote(path[3]) song = unquote(path[4]) # Get the corresponding objects artist = collection.get_artist(artist) album = artist.find_album(album) song = album.find_song(song) # Add the song to the queue music.add_song(song.path)
async def _rcmd_bw(self, args): # Palabras baneadas en el chat # TODO, actualizar el registro del chat part, whole = '', '' if args: part = urlreq.unquote(args[0]) if len(args) > 1: whole = urlreq.unquote(args[1]) if hasattr(self, '_bwqueue'): # self._bwqueue = [self._bwqueue.split(':', 1)[0] + ',' + args[0], # self._bwqueue.split(':', 1)[1] + ',' + args[1]] # TODO agregar un callEvent await self.set_banned_words(*self._bwqueue) await self.client._call_event("bannedwords_update", part, whole)
def parseRequest(self, strng, encoding): self.original_request=strng x=strng.split('&') for v in x: y=v.split('=') if len(y)>1: if(self.parameters.get(y[0]))==None: self.parameters[REQUEST.unquote(y[0],encoding).replace('+',' ')]=[REQUEST.unquote(y[1],encoding).replace('+',' ')] else: self.parameters[REQUEST.unquote(y[0],encoding).replace('+',' ')].append(REQUEST.unquote(y[1],encoding).replace('+',' ')) pass
def spider(url): if (url[-1] == '/'): url = url[0:-1] # get rid of trailing / if (url in spidered): return else: spidered.append(url) print('Processing webpage %s' % request.unquote(url)) try: req = request.urlopen(url + '/') except urllib.error.HTTPError as e: print("HTTP Error: %s" % e.reason) return output = req.read().decode('utf-8') soup = BeautifulSoup(output, 'lxml') # find links links = soup.find_all('a') tospider = [] for link in links: link = link.get('href') try: if (link[0] == '/'): link = site + link if (link[0] == '#'): continue if (link[-4:] == '.mid'): print('Downloading MIDI file %s' % request.unquote(link)) path = link[28:] try: os.makedirs(os.path.dirname(path)) except: pass request.urlretrieve(link, path) elif (link[-4:] == '.zip'): path = link[28:] print('Downloading ZIP file %s' % request.unquote(link)) try: os.makedirs(os.path.dirname(path)) except: pass request.urlretrieve(link, path) else: tospider.append(link) except: pass print("Webpage %s finished" % request.unquote(url)) for link in tospider: try: spider(link) except: pass
def getMediaInfo(VideoNode): view = VideoNode.getAttribute("viewCount") if view == '': view = 0 view = int(view) ################################################################ ###Find number of days between date video was viewed and today lastViewedAt = VideoNode.getAttribute("lastViewedAt") if lastViewedAt == '': DaysSinceVideoLastViewed = 0 else: d1 = datetime.datetime.today() d2 = datetime.datetime.fromtimestamp(float(lastViewedAt)) DaysSinceVideoLastViewed = (d1 - d2).days ################################################################ ################################################################ ###Find number of days between date video was added and today addedAt = VideoNode.getAttribute("addedAt") if addedAt == '': DaysSinceVideoAdded = 0 else: d1 = datetime.datetime.today() da2 = datetime.datetime.fromtimestamp(float(addedAt)) DaysSinceVideoAdded = (d1 - da2).days if VideoNode.hasAttribute('viewOffset') and VideoNode.hasAttribute( 'duration'): progress = int(VideoNode.getAttribute('viewOffset')) * 100 / int( VideoNode.getAttribute('duration')) else: progress = 0 ################################################################ MediaNode = VideoNode.getElementsByTagName("Media") media_id = VideoNode.getAttribute("ratingKey") for Media in MediaNode: PartNode = Media.getElementsByTagName("Part") for Part in PartNode: file = Part.getAttribute("file") if sys.version < '3': # remove HTML quoted characters, only works in python < 3 file = urllib2.unquote(file.encode('utf-8')) else: file = urllib2.unquote(file) return { 'view': view, 'DaysSinceVideoAdded': DaysSinceVideoAdded, 'DaysSinceVideoLastViewed': DaysSinceVideoLastViewed, 'file': file, 'media_id': media_id, 'progress': progress }
def _BC_download_file(self, url, path): if(self.prefs.blackboard_url not in url): url = self.prefs.blackboard_url+url resp = self.sess.get(url, stream=True) headers = resp.headers url = urllib2.unquote(resp.url) if(platform == "darwin"): url = url.encode('latin1') self.BC_log('path: {0}'.format(path)) self.BC_log('url: {0}'.format(url)) self.BC_log("header: {0}".format(resp.headers)) header_content = headers['Content-Disposition'] # self.log('local_filename1: {0}'.format(repr(header_content))) coding, local_filename = re.findall("[*]=(.+)''(.+)", header_content)[0] # self.log('coding: {0}'.format(repr(coding))) # self.log('repr local_filename2: {0}'.format(repr(local_filename))) local_filename_unquoted = urllib2.unquote(local_filename) self.debug = local_filename_unquoted # self.log('local_filename3: {0}'.format(local_filename_unquoted)) # self.log('str local_filename3: {0}'.format(str(local_filename_unquoted))) # self.log('repr local_filename3: {0}'.format(repr(local_filename_unquoted))) # self.log('type local_filename3: {0}'.format(type(local_filename_unquoted))) final_local_filename = local_filename_unquoted # final_local_filename = local_filename_unquoted # self.log(u'local_filename4: {0}'.format(final_local_filename)) # self.log(u'repr local_filename4: {0}'.format(repr(final_local_filename))) file_size = resp.headers['Content-Length'] # if(int(file_size)>=1024*1024*100): # while(1): # download = raw_input("The file {1} is around {0}MB, still download?(y/n)".format(int(file_size)/1024/1024, local_filename)) # if(download.lower() == 'y'): # break # elif(download.lower() == 'n'): # return local_filename # else: # print("Please input only y or n!") # NOTE the stream=True parameter if(not self.BC_file_same(os.path.join(path, final_local_filename), file_size)): self.BC_log(u"Downloading {0}".format(final_local_filename)) r = resp with open(os.path.join(path, final_local_filename), 'wb') as f: for chunk in r.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks f.write(chunk) f.flush() #f.flush() commented by recommendation from J.F.Sebastian else: self.BC_log(u'File are found to be same: {0}'.format(final_local_filename)) return final_local_filename
def vote() -> dict: try: cli_uuid = unquote(get_arg("uuid")) if len(cli_uuid) == 2: #base14检测 cli_img = unquote(get_arg("img")) if len(cli_img) == 5: #base14检测 cli_cls = get_arg("class") print("uuid:", cli_uuid, "img:", cli_img, "class:", cli_cls) cli_dir = user_dir + cli_uuid + '/' #os.makedirs(cli_dir, exist_ok=True) with open(cli_dir + cli_img, "w") as f: f.write(cli_cls) return {"stat":"success"} else: return {"stat":"invimg"} else: return {"stat":"invid"} except: return {"stat":"noid"}
def _imageinfo_from_filename(self, path): """Parse some format: >>> fmt = "rootfs:<vendor>:<arch>:<version>.<suffix.es>" >>> ImageDiscoverer(None)._imageinfo_from_filename(fmt) <Image vendorid=<vendor> version=<version> \ path=rootfs:<vendor>:<arch>:<version>.<suffix.es> /> """ filename = os.path.basename(path) log.debug("Parsing filename: %s" % filename) # We need to unquote the filename, because it can be an ULR with # escaped chars (like the :) parts = unquote(filename).split(":") assert parts.pop(0) == "rootfs", "Only supporting rootfs images" info = RemoteImage(self.remote) info.path = path info.vendorid = parts.pop(0) info.arch = parts.pop(0) # Strip an eventual suffix info.version, sep, info.suffix = parts.pop(0).partition(".") return info
def buildRequest(self, strVar, query, isCmd, isHeader, header=None): if "[random]" in strVar: strVar = strVar.replace("[random]", core.txtproc.rndString(16)) if isHeader: if (header == "cookie"): query = request.quote(query) strVar = strVar.replace("%3b", "[semicolon]") strVar = request.unquote(strVar) strVar = strVar.replace("; ", "COOKIESEPARATOR").replace("=", "COOKIEEQUAL").replace(";", "COOKIESEPARATOR") strVar = strVar.replace("[semicolon]", ";") strVar = strVar.replace("[eq]", "=") strVar = strVar.replace("[", "LEFTSQBRK").replace("]", "RIGHTSQBRK") strVar = request.quote(strVar) strVar = strVar.replace("COOKIESEPARATOR", "; ").replace("COOKIEEQUAL", "=")\ .replace("LEFTSQBRK", "[").replace("RIGHTSQBRK", "]") else: strVar = strVar.replace("[eq]", "=") if isCmd: if "[cmd]" in strVar: strVar = strVar.replace("[cmd]", query) if "[sub]" in strVar: strVar = strVar.replace("[sub]", "null") else: if "[cmd]" in strVar: strVar = strVar.replace(";[cmd]", "").replace("%3B[cmd]", "") strVar = strVar.replace("[sub]", query) if "[blind]" in strVar: strVar = strVar.replace("[blind]", query) return strVar
def validate(root, f): root = os.path.abspath(root) try: if os.path.getsize(f) < 80: # this is probably a texttest place holder file # it is definitely too small to contain a schema return doc = etree.parse(f) schemaLoc = doc.getroot().get( '{http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation') if schemaLoc: localSchema = os.path.join(os.path.dirname( __file__), '..', '..', 'data', 'xsd', os.path.basename(schemaLoc)) if os.path.exists(localSchema): schemaLoc = localSchema # if schemaLoc not in schemes: // temporarily disabled due to lxml bug # https://bugs.launchpad.net/lxml/+bug/1222132 schemes[schemaLoc] = etree.XMLSchema(etree.parse(schemaLoc)) schemes[schemaLoc].validate(doc) for entry in schemes[schemaLoc].error_log: s = unquote(str(entry)) # remove everything before (and including) the filename s = s[s.find(f.replace('\\', '/')) + len(f):] print(os.path.abspath( f)[len(root) + 1:].replace('\\', '/') + s, file=sys.stderr) except Exception: print("Error on parsing '%s'!" % os.path.abspath( f)[len(root) + 1:].replace('\\', '/'), file=sys.stderr) traceback.print_exc()
def do_GET(self): assert self.path[0] == "/" target = unquote(self.path[1:]) if target == "": self.send_response(302) self.send_header("Location", "?" + args.initial_target) self.end_headers() return if not target.startswith("?"): self.send_response(404) self.end_headers() return target = target[1:] ninja_output, ninja_error, exit_code = ninja_dump(target) if exit_code == 0: page_body = generate_html(parse(ninja_output.strip())) else: # Relay ninja's error message. page_body = "<h1><tt>%s</tt></h1>" % ninja_error self.send_response(200) self.end_headers() self.wfile.write(create_page(page_body).encode("utf-8"))
def get_mp3_from_url(url, folder, converse=True): """ Results are saved in folder folder. folder must end with / or \\. Relative and absolute paths accepted """ prefix = 'http://incompetech.com' split_by = '/music/royalty-free/mp3-royaltyfree/' middle = split_by try: webpage = urlreq.urlopen(prefix + url).read().decode('utf-8') except Exception as e: print(e, 'while doing', url) return -1 splitted = webpage.split(split_by)[1] extracted = splitted.split('>')[0].strip('"') prettier_name = urlreq.unquote(extracted) mp3 = prefix + \ middle + \ extracted f = open(folder + prettier_name, 'wb') mp3 = urlreq.urlopen(mp3).read() f.write(mp3) f.close() size = os.stat(folder + prettier_name).st_size if converse: print('Downloaded', prettier_name, '\twith size of', size_to_units(size), '.') return size
def get_access_token(self,xrenew=False): """ 生成人人网认证请求链接 :param xrenew: 如果此值为真,则会强制重新获取access_token,用于更换用户 """ #获取人人网认证信息 url = self.info["AUTHORIZE"] #拼接请求字段 param = { "client_id": self.info["API_KEY"], "redirect_uri": self.info["REDIRECT_URL"], "response_type": "token", "display": "popup" } if xrenew: param["x_renew"] = "True" #生成请求链接 request = urlencode(param) r_url = "%s?%s" % (url,request) open_new_tab(r_url) self.info["ACCESS_TOKEN"] = \ unquote(input("请输入浏览器中的access_token:\n")) self.config.set("Renren", "access_token", self.info["ACCESS_TOKEN"])
def __init__(self, url, request = None, is_url_page = False): if request: url += '{}/'.format(request) if not is_url_page: super().__init__(url) self.url = unquote(split(r'\?', url)[0]) + '/' # get name of the city city = split(r'/', url)[3] self.city = '' for (ru, en) in cities.items(): if en == city: self.city = ru break else: self.page = url self.url = None self._bs = BeautifulSoup(self.page, 'html.parser') firm_num = split(' ', self._bs.find('h1', class_='searchResults__headerName').text )[0] num = toint(firm_num) if num: self.num_pages = ceil(num/12) else: self.num_pages = 1 self.page_num = int(self._bs.find('span', class_='pagination__page _current').string)
def _on_success(self, resp, paging): """ This can be overridden in user-defined blocks. Defines how successful polling requests will be handled. """ self._reset_retry_cycle() signals, paging = self._process_response(resp) self.logger.debug('signals pre-remove-duplicates: %s' % signals) signals = self._discard_duplicate_posts(signals) self.logger.debug('signals post-remove-duplicates: %s' % signals) # add the include_query attribute if it is configured if self.include_query() and signals is not None: for s in signals: setattr( s, self.include_query(), unquote(self.current_query) ) if signals: self.notify_signals(signals) if paging: self.page_num += 1 self._paging() else: self._epilogue()
def xml_get_text(_node): """Helper function to get character data from an XML tree""" rc = list() for node in _node.childNodes: if node.nodeType == node.TEXT_NODE: rc.append(node.data) return unquote(''.join(rc))
def play(request, id): song = Song.objects.get(id=id) play_list = request.session.get("play_list", []) exist = False if play_list: for i in play_list: if int(id) == i["id"]: exist = True break if exist == False: play_list.append({ "id": int(id), "singer": song.singer, "name": song.name, "time": song.time, "url": song.file.url, }) play_urls = [] for info in play_list: song_id = info["id"] temp = Song.objects.get(id=song_id) play_urls.append(temp.file.url) request.session["play_list"] = play_list if song.lyrics != "": path = unquote(song.lyrics.url)[1::] with open(path, "r", encoding="UTF-8") as f: lyrics = f.read() else: lyrics = "暂无歌词" d = Dynamic.objects.filter(song_id=int(id)).first() plays = d.plays + 1 if d else 1 Dynamic.objects.update_or_create(song_id=id, defaults={"plays": plays}) return render(request, "play.html", locals())
def get_vm_net_info(net_device): """ """ net_info = {} if isinstance(net_device, vim.vm.device.VirtualVmxnet3): adapter_type = 'VMXNET3' elif isinstance(net_device, vim.vm.device.VirtualE1000): adapter_type = 'E1000' elif isinstance(net_device, vim.vm.device.VirtualE1000e): adapter_type = 'E1000E' else: adapter_type = '' net_info['adapter_type'] = adapter_type if isinstance( net_device.backing, vim.vm.device.VirtualEthernetCard. DistributedVirtualPortBackingInfo): pg_type = 'dvs' pg_moid = net_device.backing.port.portgroupKey elif isinstance(net_device.backing, vim.vm.device.VirtualEthernetCard.NetworkBackingInfo): pg_type = 'ovs' pg_moid = net_device.backing.network._moId else: pg_type = '' pg_moid = '' net_info['pg_type'] = pg_type net_info['pg_moid'] = pg_moid net_info['portgroup'] = unquote(net_device.deviceInfo.summary) net_info['key'] = net_device.key net_info['label'] = net_device.deviceInfo.label net_info['mac_addr'] = net_device.macAddress net_info['connected'] = net_device.connectable.connected net_info['ipv4'] = '' net_info['prefix'] = '' return net_info
def dummy_post_request(url, data='', headers={}, timeout=None): import json from urllib.request import unquote params = json.loads(unquote(data)) resp = {'received': params} json = json.dumps(resp) return DummyResponse(json)
def _replace_sources(self, content): soup = BeautifulSoup(content) resource_holders = soup.findAll(["img", "a", "video"]) for holder in resource_holders: if holder.name == "img": if holder["src"].startswith(settings.MEDIA_URL): holder["src"] = holder["src"].split("/")[-1] elif holder.name == "a": if holder.has_key("href") and holder["href"].startswith(settings.MEDIA_URL): holder["href"] = holder["href"].split("/")[-1] elif holder.name == "video": video_sources = holder.findAll("source") for video_source in video_sources: if video_source["src"].startswith(settings.MEDIA_URL): video_source["src"] = video_source["src"].split("/")[-1] objs = soup.findAll("object") for obj in objs: obj["data"] = obj["data"].split("/")[-1] src = obj.find("param", attrs={"name": "src"}) src["value"] = src["value"].split("/")[-1] flashvars = obj.find("param", attrs={"name": "flashvars"}) reg_ex = r"url=.*/(.*?)&.*" flashvars["value"] = re.sub( reg_ex, lambda match: "url=./{}".format(unquote(match.group(1).replace(settings.MEDIA_URL, ""))), flashvars["value"], ) return str(soup)
def webhook_test(self, _, args): """ Test your webhooks from within err. The syntax is : !webhook test [relative_url] [post content] It triggers the notification and generate also a little test report. """ url = args[0] if PY3 else args[0].encode() # PY2 needs a str not unicode content = ' '.join(args[1:]) # try to guess the content-type of what has been passed try: # try if it is plain json loads(content) contenttype = 'application/json' except ValueError: # try if it is a form splitted = content.split('=') # noinspection PyBroadException try: payload = '='.join(splitted[1:]) loads(unquote(payload)) contenttype = 'application/x-www-form-urlencoded' except Exception as _: contenttype = 'text/plain' # dunno what it is log.debug('Detected your post as : %s' % contenttype) response = self.test_app.post(url, params=content, content_type=contenttype) return TEST_REPORT % (url, contenttype, response.status_code)
def do_GET(self): assert self.path[0] == '/' target = unquote(self.path[1:]) if target == '': self.send_response(302) self.send_header('Location', '?' + args.initial_target) self.end_headers() return if not target.startswith('?'): self.send_response(404) self.end_headers() return target = target[1:] ninja_output, ninja_error, exit_code = ninja_dump(target) if exit_code == 0: page_body = generate_html(parse(ninja_output.strip())) else: # Relay ninja's error message. page_body = '<h1><tt>%s</tt></h1>' % html_escape(ninja_error) self.send_response(200) self.end_headers() self.wfile.write(create_page(page_body).encode('utf-8'))
def create_thumbnail_url(common_url): image_width_px = 180 title = unquote(common_url).split(":")[-1] page = pywikibot.Page(pywikibot.Site(url=common_url), title=title) imagepage = pywikibot.FilePage(page.site, page.title()) image_url_fitted = imagepage.get_file_url(image_width_px) return image_url_fitted
def parse(self, response): name = response.url.split('term=')[-1] name = request.unquote(name) item = { 'name': name, 'dish': '', 'Operation time': '', 'url': response.url } start_a = response.xpath("//a[@class='rest-row-name rest-name ']") if start_a: start_a = start_a[0] first_name = start_a.xpath( "./span[@class='rest-row-name-text']//text()").extract() first_name = ''.join(first_name) if first_name == name: url = '%s%s' % (self.host, start_a.xpath('./@href').extract()[0]) yield scrapy.Request(url=url, callback=self.get_detail, meta={'name': name}) else: yield item else: yield item
def search(self, search_term): #Perform the search and get the text of the page. params = {'q': search_term, 'btnG': 'Google Search'} text = self.connection.get(GoogleSearch.google_url, params) if not text: return None #Pull out the links of results start = text.find('<div id="res">') end = text.find('<div id="foot">') if text[start:end] == '': self.logger.warn("No results for `{}`".format(search_term)) return None links = [] text = text[start:end] start = 0 end = 0 while start > -1 and end > -1: start = text.find('<a href="/url?q=') text = text[start + len('<a href="/url?q='):] end = text.find('&sa=U&ei=') if start > -1 and end > -1: link = unquote(text[0:end]) text = text[end:len(text)] if link.find('http') == 0: links.append(link) #If necessary, filter the links based on content. if len(self.restrict_to) > 0: filtered_links = [] for link in links: for domain in self.restrict_to: if domain in link: filtered_links.append(link) links = list(set(filtered_links)) return links
def match_data_request(url): html = page_request(url) xhash = unquote(re.search('"xhash":"(.+?)"', html).group(1)) id_match = re.search('"id":"(.+?)"', html).group(1) id_sport = re.search('"sportId":(.+?)', html).group(1) id_version = re.search('"versionId":(.+?)', html).group(1) return id_version, id_sport, id_match, xhash
def url2domain(url): url = re.sub('(http(s)*://)+', 'http://', url) parsed_url = urlparse(unquote(url.strip())) if parsed_url.scheme not in ['http','https']: return None netloc = re.search("(?:www\.)?(.*)", parsed_url.netloc).group(1) if netloc is not None: return str(netloc.encode('utf8')).strip() return None
def prediction_output(): # pull the 'url_of_article' from input field and store it url_to_analyse = request.args.get('url_of_article') # pull 'text_of_article' from input field and store it text_to_analyse = request.args.get('text_of_article') the_result = [] if (text_to_analyse != '') and (url_to_analyse != ''): # pre-parsing using urllib url_to_analyse = unquote(url_to_analyse) g = urlparse(url_to_analyse) # crucial parsing element if g.netloc != '': url_to_analyse = g.netloc elif g.path != '': url_to_analyse = "https://" + g.path g = urlparse(url_to_analyse) url_to_analyse = g.netloc # final parse by my build-in function parse_url = helpful_functions.extract_domain_name( tlds_domain_suffixes, url_to_analyse) # parse_url = tld.get_tld(url_to_analyse, as_object=True, fail_silently=True) print(url_to_analyse) if parse_url != "": url_to_analyse = parse_url # parsing the URL print(url_to_analyse) the_result = model_it(text_to_analyse, url_to_analyse, golden_fake_vector, golden_true_vector, model, nlp_optimized, url_analysis, predict, pandas, metadata_fake, metadata_true, helpful_functions) else: the_result = ['Wrong URL, please fix URL'] if len(the_result) == 0: return render_template("output.html") elif len(the_result) == 1: return render_template("output.html", the_result=the_result[0]) else: return render_template("output.html", the_result=the_result[0], mu0=the_result[1], mu1=the_result[2], mu2=the_result[3], mu3=the_result[4], mu4=the_result[5], mu5=the_result[6], mu6=the_result[7], mu7=the_result[8], mu8=the_result[9], mu9=the_result[10], mu10=the_result[11], mu11=the_result[12], mu12=the_result[13])
def validate(root, f): root = os.path.abspath(root) normalized = os.path.abspath(f)[len(root) + 1:].replace('\\', '/') try: if os.path.getsize(f) < 80: # this is probably a texttest place holder file # it is definitely too small to contain a schema return doc = etree.parse(f) schemaLoc = doc.getroot().get( '{http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation' ) if schemaLoc and '/xsd/' in schemaLoc: localSchema = os.path.join(os.path.dirname(__file__), '..', '..', 'data', schemaLoc[schemaLoc.find('/xsd/') + 1:]) if os.path.exists(localSchema): schemaLoc = localSchema # if schemaLoc not in schemes: // temporarily disabled due to lxml bug # https://bugs.launchpad.net/lxml/+bug/1222132 schemes[schemaLoc] = etree.XMLSchema(etree.parse(schemaLoc)) schemes[schemaLoc].validate(doc) for entry in schemes[schemaLoc].error_log: s = unquote(str(entry)) # remove everything before (and including) the filename s = s[s.find(f.replace('\\', '/')) + len(f):] print(normalized + s, file=sys.stderr) except Exception: print("Error on parsing '%s'!" % normalized, file=sys.stderr) traceback.print_exc()
def parse(self, response): item = MusicItem() item['type'] = '专辑' # area_list=['4','14','15','3','0'] # area=area_list.pop() rsq_url = response.url url_area = re.search(r'area%22%3A(.*?)%2C', rsq_url).group(1) if url_area == '1': item['area'] = '内地' elif url_area == '0': item['area'] = '港台' elif url_area == '3': item['area'] = '欧美' elif url_area == '15': item['area'] = '韩国' elif url_area == '14': item['area'] = '日本' elif url_area == '4': item['area'] = '其他' dict_dat = json.loads(response.text) dat_nodes = dict_dat["albumlib"]['data']['list'] for node in dat_nodes: item['album'] = node["album_name"] yield item next_num = int( request.unquote(re.search(r'sin%22%3A(.*?)%2C', rsq_url).group(1))) + 20 # next_url = 'https://u.y.qq.com/cgi-bin/musicu.fcg?&g_tk=5381&jsonpCallback=getUCGI40028220795882663&loginUin=0&hostUin=0&format=jsonp&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq&needNewCode=0&data=%7B%22albumlib%22%3A%7B%22method%22%3A%22get_album_by_tags%22%2C%22param%22%3A%7B%22area%22%3A4%2C%22company%22%3A-1%2C%22genre%22%3A-1%2C%22type%22%3A-1%2C%22year%22%3A-1%2C%22sort%22%3A2%2C%22get_tags%22%3A1%2C%22sin%22%3A' + str( next_num ) + '%2C%22num%22%3A20%2C%22click_albumid%22%3A0%7D%2C%22module%22%3A%22music.web_album_library%22%7D%7D' while bool(dat_nodes): yield Request(next_url, callback=self.parse) # next_purl=next_url.replace(re.search(r'area%22%3A(.*?)%2C',rsq_url).group(1),area) print('进入下个主题数据', area)
def request_query_to_dict(query_string): tmp_array = query_string.split("&") result = {} for ele in tmp_array: k, v = ele.split("=", 2) result[k] = unquote(v) return result
def _extract_attrs(x, n): """Extracts attributes for an image. n is the index where the attributes begin. Extracted elements are deleted from the element list x. Attrs are returned in pandoc format. """ try: return extract_attrs(x, n) except (ValueError, IndexError): if PANDOCVERSION < '1.16': # Look for attributes attached to the image path, as occurs with # image references for pandoc < 1.16 (pandoc-fignos Issue #14). # See http://pandoc.org/MANUAL.html#images for the syntax. # Note: This code does not handle the "optional title" for # image references (search for link_attributes in pandoc's docs). assert x[n - 1]['t'] == 'Image' image = x[n - 1] s = image['c'][-1][0] if '%20%7B' in s: path = s[:s.index('%20%7B')] attrs = unquote(s[s.index('%7B'):]) image['c'][-1][0] = path # Remove attr string from the path return PandocAttributes(attrs.strip(), 'markdown').to_pandoc() raise
def parse_new_login_page(cls, res_xml): """Parse new login page xml response.""" data = xml2dict(res_xml)['error'] if 'pass_ticket' in data: data['pass_ticket'] = unquote(data['pass_ticket']) return data
def webhook_test(self, _, args): """ Test your webhooks from within err. The syntax is : !webhook test [relative_url] [post content] It triggers the notification and generate also a little test report. """ url = args[0] if PY3 else args[0].encode( ) # PY2 needs a str not unicode content = ' '.join(args[1:]) # try to guess the content-type of what has been passed try: # try if it is plain json loads(content) contenttype = 'application/json' except ValueError: # try if it is a form splitted = content.split('=') #noinspection PyBroadException try: payload = '='.join(splitted[1:]) loads(unquote(payload)) contenttype = 'application/x-www-form-urlencoded' except Exception as _: contenttype = 'text/plain' # dunno what it is logging.debug('Detected your post as : %s' % contenttype) response = self.test_app.post(url, params=content, content_type=contenttype) return TEST_REPORT % (url, contenttype, response.status_code)
def google(message, keywords): """ google で検索した結果を返す https://github.com/llimllib/limbo/blob/master/limbo/plugins/google.py """ if keywords == 'help': return query = quote(keywords) url = "https://encrypted.google.com/search?q={0}".format(query) soup = BeautifulSoup(requests.get(url).text, "html.parser") answer = soup.findAll("h3", attrs={"class": "r"}) if not answer: botsend(message, "`{}` での検索結果はありませんでした".format(keywords)) try: _, url = answer[0].a['href'].split('=', 1) url, _ = url.split('&', 1) botsend(message, unquote(url)) except IndexError: # in this case there is a first answer without a link, which is a # google response! Let's grab it and display it to the user. return ' '.join(answer[0].stripped_strings)
def OAuthSingIn(self): log.debug("Running authorization process.") try: os.remove(os.path.join(self.DIR, data_file)) except: pass resp_token = self.API.send_get(auth_rt, { "oauth_callback": __callback__ }) if resp_token["stat"] == "ok": self.API.TOKEN = resp_token["oauth_token"] self.API.TOKEN_SECRET = resp_token["oauth_token_secret"] url = base + "/oauth/authorize?oauth_token=" + self.API.TOKEN webbrowser.open(url) log.warning("Authorize the app in browser using this URL\n\t" + url) log.warning("Enter your oauth_verifier from callback URL") verifier = input("~~~") if verifier != "": resp_verifier = self.API.send_get(auth_at, { "oauth_verifier": verifier, "oauth_token": self.API.TOKEN }) if resp_verifier["stat"] == "ok": self.API.TOKEN = resp_verifier["oauth_token"] self.API.TOKEN_SECRET = resp_verifier["oauth_token_secret"] session = open(os.path.join(self.DIR, data_file), "w+") session.write("fullname=" + unquote(resp_verifier["fullname"]) \ + "\noauth_token=" + self.API.TOKEN \ + "\noauth_token_secret=" + self.API.TOKEN_SECRET \ + "\nuser_nsid=" + unquote(resp_verifier["user_nsid"]) \ + "\nusername="******"username"]) session.close() log.info("Signed in as %s", unquote(resp_verifier["fullname"])) return True log.critical("Something unexpected happened, try the authorization again.") return False
def search(self): """Returns a dict of Title/URLs""" results = {} search_results = self.__search__() if not search_results: self.logger.info('No results returned') return results for data in search_results: if 'responseData' in data: for result in data['responseData'].get('results', []): if result and 'titleNoFormatting' in result: title = urllib2.unquote(result['titleNoFormatting']) results[title] = urllib2.unquote(result['unescapedUrl']) else: self.logger.error('no responseData key found in response') self.logger.error(data) return results
def play_song(self): # Get information from the path path = self.path.split("/") artist = unquote(path[2]) album = unquote(path[3]) song = unquote(path[4]) # Get the corresponding objects artist = collection.get_artist(artist) album = artist.find_album(album) song = album.find_song(song) # stop current music music.stop() # Clear the song queue music.clearList() # Add the song to the queue music.add_song(song.path) music.play()
def uri_metadata(uri): '''Discover media-file metadata using GStreamer.''' discoverer = GstPbutils.Discoverer() uri = uri.split("://") info = discoverer.discover_uri(uri[0] + "://" + quote(unquote(uri[1]))) return info
def send_songs(self): # Path will be something like songs/artist/album separated_path = self.path.split('/') # Get artist artist = unquote(separated_path[len(separated_path) - 2]) artist = collection.get_artist(artist) # Get album album = unquote(separated_path[len(separated_path) - 1]) album = artist.find_album(album) # get all the songs songs = [x.name for x in album.songs] # Encode the list into JSON encoder = json.JSONEncoder() songs = encoder.encode(songs) # Send the songs self.send_response(200) self.end_headers() self.wfile.write(songs.encode())
def oneDriveImageProxy(): url = unquote(request.args.get('url')) parsedUrl = urlparse(url) if not (parsedUrl.netloc.endswith('livefilestore.com')): return jsonify({'error':"Image source not supported."}) req = requests.get(url, stream = True) resp = Response(stream_with_context(req.iter_content()), content_type = req.headers['content-type']) resp.headers['Expires'] = req.headers['Expires'] resp.headers['Cache-Control'] = req.headers['Cache-Control'] return resp
def __init__(self, img_id, query=None): self.img_id = img_id urlname = "S_IMG_CO_ISS_{}_{}.json".format(img_id[1:], img_id[0]) fullurl = "{}/{}".format(metadata_url, urlname) print("Requesting", fullurl) if query is not None: query = unquote(urlencode(query)) self.r = requests.get(fullurl, params=query).json() else: self.r = requests.get(fullurl).json()
def get_hits(qi, query): """ Returns matching records. """ normalized_query = re.sub('\W+', '', unquote(query)).lower() delta = len(normalized_query) // 4 hits = qi.find_matches(normalized_query, delta, 15) result = [{'city': hit[0], 'country_code': hit[1], 'population': hit[2]} for hit in hits] return json.dumps(result)
def send_albums(self): # Path will be something like albums/artist seperated_path = self.path.split('/') artist = unquote(seperated_path[len(seperated_path) - 1]) artist = collection.get_artist(artist) albums = [x.name for x in artist.albums] encoder = json.JSONEncoder() albums = encoder.encode(albums) self.send_response(200) self.end_headers() self.wfile.write(albums.encode())
def get_hits(qi, query): """ Returns matching records. """ normalized_query = re.sub('\W+', '', unquote(query)).lower() delta = len(normalized_query) // 4 hits = qi.find_matches(normalized_query, delta, 15) result = [{'id': hit[0], 'title': hit[1], 'year': hit[2]} for hit in hits] return json.dumps(result)