def download_files(session, f): filename = f[1] + utils.unquote(f[0])[utils.unquote(f[0]).rindex('/'):] if not exists(filename): response = session.get(f[0], allow_redirects=False) if response.status_code == 301: download_files(session, (response.headers['Location'], f[1])) elif response.status_code == 200: create_filepath(f[1]) with open(filename, 'wb') as fd: for chunk in response.iter_content(1024): fd.write(chunk) print('[+] ' + filename) else: response = session.head(f[0], allow_redirects=False) if response.status_code == 301: download_files(session, (response.headers['Location'], f[1])) elif response.status_code == 200: last_mod_file = getmtime(filename) try: last_mod_www = timegm(strptime(response.headers['Last-Modified'], '%a, %d %b %Y %H:%M:%S %Z')) except KeyError: print('Can\'t check {} for updates.'.format(f[0])) last_mod_www = last_mod_file if last_mod_www > last_mod_file: response = session.get(f[0]) if response.status_code == 200: create_filepath(f[1]) with open(filename, 'wb') as fd: for chunk in response.iter_content(1024): fd.write(chunk) print('[M] ' + filename)
def download_file(url, filename=None, chunk_size=512, directory=os.getcwd(), auth=None): #if no filename is given, try and get it from the url if not filename: filename = unquote(url.split('/')[-1]) full_name = os.path.join(directory, filename) #make the destination directory, but guard against race condition if not os.path.exists(os.path.dirname(full_name)): try: os.makedirs(os.path.dirname(full_name)) except OSError as exc: raise Exception('something failed') r = requests.get(url, stream=True, auth=auth) with open(full_name, 'wb') as f: for chunk in r.iter_content(chunk_size=chunk_size): if chunk: # filter out keep-alive new chunks f.write(chunk) f.flush() r.close()
def admin_user(app): done = "None" email = "" if len(request.args) != 0: email = request.args.get("email", "") # Remove url encoding for special characters email = unquote(email) done = request.args.get("done", "None") if len(request.form) != 0: email = request.form.get("email", "") done = request.form.get("done", "None") if email != "": clear_session(app) elif "admin_user_email" in session: email = session["admin_user_email"] if email != "": user = User(email).get_details() if user != {}: session["admin_user_email"] = user["email"] session["admin_user_object"] = user user_group = return_users_group(user) return render_template_custom("admin/user.html", user=user, user_group=user_group, done=done) return redirect("/admin/user/not-found")
def getstations(locations): location = unquote(locations) xbmc.log("looking for: " + location, level=xbmc.LOGNOTICE) stations = npr.Stations(location) data = stations.a['station'] streamlist = [] for d in data: s = str(d) if d.get('mp3') is not None: if d.get('name') is not None: #work around for crappy kodi only using HTTP __stationsList__.append(d['name']) if "playerservices.streamtheworld.com/pls" in d['mp3']: f = urllib2.urlopen(d['mp3']) myfile = f.read() s = myfile.splitlines() for i in range(len(s)): url = s[i].decode() name = d['name'] if "File" in url: if ".com:443" in url: urls = url.replace(":443", "") __streamList__.append(regexurl(urls)) else: __streamList__.append(regexurl(urls)) else: __streamList__.append(d['mp3'])
def _parse_result(self): """ Parse search result data. Raises: PoogleParserError: Raised if the result can not be parsed for any reason """ self.title = self._soup.a.text self._log.info('Result title parsed: %s', self.title) # Make sure this is a valid result URL (and not a link to image results, as an example). href = self._soup.a.get('href') if not href.startswith('/url?'): raise PoogleParserError('Unrecognized URL format: %s', href) match = self.url_regex.match(href) if not match or not match.group('url'): self._log.error( 'Unable to parse search result URL: {h}'.format(h=href)) raise PoogleParserError('Unable to parse search result URL: %s', href) url = unquote(match.group('url')) self.url = URL(url) self._log.info('Result URL parsed: %s', self.url)
def term_from_uri(uri): """Removes prepended URI information from terms.""" if uri is None: return None # This insures that if we get a Literal with an integer value (as we # do for modification positions), it will get converted to a string, # not an integer. if isinstance(uri, rdflib.Literal): uri = str(uri.toPython()) # This is to handle URIs like # http://www.openbel.org/bel/namespace//MAPK%20Erk1/3%20Family # or # http://www.openbel.org/bel/namespace/MAPK%20Erk1/3%20Family # In the current implementation, the order of the patterns # matters. patterns = ['http://www.openbel.org/bel/namespace//(.*)', 'http://www.openbel.org/vocabulary//(.*)', 'http://www.openbel.org/bel//(.*)', 'http://www.openbel.org/bel/namespace/(.*)', 'http://www.openbel.org/vocabulary/(.*)', 'http://www.openbel.org/bel/(.*)'] for pr in patterns: match = re.match(pr, uri) if match is not None: term = match.groups()[0] term = unquote(term) return term # If none of the patterns match then the URI is actually a simple term # for instance a site: "341" or a substitution: "sub(V,600,E)" return uri
def get_path(link): match = search("file://", link) if match: idx = match.end() # Regresa la ruta sin "file://" y decodificada a utf-8 return unquote(link[idx:]) else: return False
def download(self, download_item): if not download_item: raise Exception( 'Error while downloading, download data is missing!') if not isinstance(download_item, DownloadItem): raise Exception('Error while downloading, data is invalid!') if not download_item.url or type(download_item.url) is not str: raise Exception('Error while download, missing URL!') file_name = "{}.{}".format( download_item.song_item.title if download_item.song_item and download_item.song_item.title else unquote( download_item.url.split('/')[-1]), download_item.url.split('.')[-1]) destination_directory = self.__pref.getPref('destination', os.path.abspath('.')) if not os.path.isdir(destination_directory) or not os.path.exists( destination_directory): raise Exception( 'Download location `{}` is not exist, please recheck directory in settings and try again!' .format(destination_directory)) file_name = os.path.join(destination_directory, file_name) try: if os.path.exists(file_name) and os.path.isfile(file_name): os.remove(file_name) except: pass u = self.__http.request('GET', download_item.url, preload_content=False) f = open(file_name, 'wb') meta = u.info() file_size = int(meta.getheaders("Content-Length")[0]) print("{}Downloading {}`{}` ({} bytes){}".format( ConsoleColor.BOLD + ConsoleColor.WARNING, ConsoleColor.ENDC + ConsoleColor.WARNING, file_name, file_size, ConsoleColor.ENDC)) file_size_dl = 0 block_sz = 8192 while True: buffer = u.read(block_sz) if not buffer: break file_size_dl += len(buffer) f.write(buffer) status = r"%10d bytes [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size) status = status + chr(8) * (len(status) + 1) print(status, end='') f.close() print("\n\033[92m\033[1m -> Done!\033[0m")
def _from_uri(cls, uri): parts = [unquote(cls.encode(p)) for p in uri.split(":")] if not parts or parts[0] != PandoraUri.SCHEME or len(parts) < 2: raise NotImplementedError("Not a Pandora URI: {}".format(uri)) uri_cls = cls.TYPES.get(parts[1]) if uri_cls: return uri_cls(*parts[2:]) else: raise NotImplementedError("Unsupported Pandora URI type '{}'".format(uri))
def recipe(name): """ Details of one recipe """ recipe = Recipe.query.filter_by(name=unquote(name)).first_or_404() tags = [tag.name for tag in recipe.tags] return render_template("recipe.html", title=recipe.name, recipe=recipe.to_dict(), tags=", ".join(tags))
def _sort_parameters(self, query): # split on ampersand params = query.split("&") # split each param into two-item lists of (key,value) and quote list entries params = [[quote(unquote(v)) for v in item.split("=", 1)] for item in params] # sort based on key portion params = sorted(params, key=lambda p: p[0]) # join back together on ampersand return "&".join(map(lambda p: "=".join(p), params))
def root(): if request.method == 'GET': city_name = request.args.get('city') if city_name != None: city = unquote(city_name) else: city = None data = get_graves_from_city(city) return render_template("frontend/index.html", graves=data, city=city)
def get_referrer(request): """Get a valid referrer code""" refhash = request.COOKIES.get('refhash') if refhash: result = signing.loads(requests_utils.unquote(refhash)) try: student = Student.objects.get(referrer_code=result['referrer_code']) return student except Student.DoesNotExist: pass
def extract_state_token(token_string): """ Extract the data from a state token string """ b64_json = str(unquote(token_string)) json_string = base64.urlsafe_b64decode(b64_json) json_state = json.loads(json_string) redirect = json_state.get('redirect', 'home.index') scopes = json_state.get('scopes', []) return redirect, scopes
def serve_arquivos_json_upload_para_base(comentario=''): comentario = unquote(comentario) if not usuarioLocal(): return jsonify({'mensagem':{'lateral':'', 'popup':'Opção apenas disponível para usuário local', 'confirmar':''}}) if not config.config['BASE'].get('base_local',''): return jsonify({'mensagem':{'lateral':'', 'popup':'Base sqlite local não foi configurada', 'confirmar':''}}) nosLigacoes = request.get_json() rede_relacionamentos.carregaJSONemBaseLocal(nosLigacoes, comentario) return jsonify({'retorno':'ok'})
def list_routes(): import urllib output = [] for rule in manager.app.url_map.iter_rules(): methods = ','.join(rule.methods) line = unquote("{:50s} {:20s} {}".format(rule.endpoint, methods, rule)) output.append(line) for line in sorted(output): print(line)
def refresh_access_token(refresh_token): influxdb.count('spotify.refresh_token_requests') try: refresh_token = unquote(refresh_token) tokens = models.get_access_token_from_refresh_token(refresh_token) return jsonify(**tokens) except Exception as e: current_app.logger.error( 'Could not refresh access token: {}'.format(e)) return jsonify(error='Unable to refresh token.'), 401
def build_get_features_url(base_url, layername): params = { "service": "WFS", "version": "1.0.0", "request": "GetFeature", "typeName": layername, "outputFormat": "application/json", } req = PreparedRequest() req.prepare_url(base_url, params) return unquote(req.url)
def update_atime(vlc_path): """ You should enable lastaccesstime on your system. If you do not want to enable it or can't, you can use this function to update it manually. """ path = vlc_path.replace('file:///', '') path = path.replace('/', '\\') path = unquote(path) now = int(time.time()) mtime = os.stat(path).st_mtime os.utime(path, now, mtime)
def decrypt_command(self, cmd): _LOGGER.info('Decrypt command') if type(cmd) != str: raise TypeError( 'Wrong type for "cmd" paramater. Expect Str got {0}.'.format( type(cmd))) enc_cmd_part = b64decode(utils.unquote(cmd[13:]).encode('utf8')) cipher_aes = AES.new(self.client_aes_key, AES.MODE_CBC, self.client_aes_iv) return self.zero_byte_unpadding(cipher_aes.decrypt(enc_cmd_part), AES.block_size).decode('utf8')
def refresh_access_token(refresh_token): influxdb.count('youtube.refresh_token_requests') try: refresh_token = unquote(refresh_token) credentials = models.refresh_tokens(refresh_token) return jsonify(access_token=credentials['access_token'], expires_in=credentials['expires_in']) except Exception as e: current_app.logger.error( 'Could not authenticate youtube user: {}'.format(e)) return jsonify(error='Unable to refresh token.'), 503
def parse_request_line(line)->(str, str, str): """ :param line: request line :return: method, uri, version """ items = line.split(" ") if len(items) != 3: raise RequestLineError(line) uri = unquote(items[1]) return items[0], uri, items[2]
def getVideoVine(item): #first attempt to get mp4 directly from embed content iframe = sget(item,'secure_media/oembed/html') or \ sget(item,'media/oembed/html') or \ sget(item,'secure_media_embed/content') or \ sget(item,'media_embed/content') if iframe: m = re.search('[?&]src=(https?.*?vine\.co.*?)&',iframe) if m and m.group(1): url = unquote(m.group(1)) return url,None
def downLoadData(self, url, basePath: Path, name=None, extension=None, overwrite=True, show=False, delayFileParse=True): from clint.textui import progress epubResponse = None realUrl = url if delayFileParse: epubResponse = self.req.get(url, stream=True) realUrl = epubResponse.url urlPath = Path(urllib.parse.urlparse(realUrl).path) if not extension: extension = urlPath.suffix if not name: name = urlPath.stem fileName = name + extension fileName = unquote(fileName) path = basePath.joinpath(fileName) if not overwrite: if path.is_file() or path.is_dir(): if show: print("skip for not overwrite") return realUrl if not epubResponse: epubResponse = self.req.get(url, stream=True) if epubResponse.ok: with open(path, 'wb') as epubFile: total_length = epubResponse.headers.get('content-length') if total_length: total_length = int(total_length) chunks = epubResponse.iter_content(chunk_size=1024) hint = fileName if len(fileName) > 16: hint = "{}...{}\t".format(fileName[:7], fileName[-7:]) if show and total_length: nbBlocks = int(total_length) / 1024 + 1 chunks = progress.bar(it=chunks, expected_size=nbBlocks, label=hint) for chunk in chunks: if chunk: epubFile.write(chunk) epubFile.flush() if total_length and path.stat().st_size < total_length: import os os.remove(path) raise IOError() return realUrl
def get_cp_contact_for_netblock(): cidr = request.args.get('cidr') if not cidr: return ApiResponse({'message': 'no cidr defined' ,}, 404, {}) cidr = unquote(cidr) try: notification_setting = NotificationSetting.contact_for_netblock(cidr) except AttributeError as e: return ApiResponse({'message': str(e) ,}, 404, {}) except Exception as e: return ApiResponse({'message': str(e) ,}, 421, {}) return ApiResponse(notification_setting)
def get_rid(self): url = "http://www.kuwo.cn/api/www/search/searchMusicBykeyWord" params = {"key": "五月天", "pn": "1", "rn": "30", "reqId": self.reqId} url = f"http://www.kuwo.cn/api/www/search/searchMusicBykeyWord?key=五月天&reqId={self.reqId}&pn={self.pn}&rn={self.rn}" ret = session.get(url, headers=self.headers).json() # rids = list(map( lambda x: x["rid"], ret["data"]["list"])) item_list = [] for _ in ret["data"]["list"]: item = {} item["rid"] = _["rid"] item["name"] = unquote(_["name"]) item_list.append(item) return item_list
def get_images(): data = request.get_json() # Directory URLs begin with /s/ new_url = Path(unquote(data['output_dir_url_new'][2:])) / data["path"] ref_url = Path(unquote(data['output_dir_url_ref'][2:])) / data["path"] # print(data) # DEBUG blobs = createAutoRois(new_url, ref_url, data["diff_type"], data['threshold'], data['diameter']) blobs = (blobs.tolist()) # print(blobs) # print("len:", len(blobs)) ## limits the number of rois to "num_crops", filtering small rois. while len(blobs) > data['count']: index_min = np.argmin([yxr[2] for yxr in blobs]) del blobs[index_min] # print(blobs) # DEBUG # print("sorted len: ", len(blobs)) # DEBUG return jsonify(blobs)
def get_video_info(self) -> bool: r = self.session.get( url=f'https://www.douyin.com/video/{self.video_num}', headers={ 'Host': 'www.douyin.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', }, params={'previous_page': 'app_code_link'} ) r = re.search(r'id="RENDER_DATA" type="application/json">(.+?)</script>', r.text).group(1) # print(unquote(r)) self.video_info = json.loads(unquote(r)) return True
def refresh_access_token(refresh_token): influxdb.count('youtube.refresh_token_requests') try: refresh_token = unquote(refresh_token) credentials = models.refresh_tokens(refresh_token) return jsonify( access_token=credentials['access_token'], expires_in=credentials['expires_in'] ) except Exception as e: current_app.logger.error( 'Could not authenticate youtube user: {}'.format(e)) return jsonify(error='Unable to refresh token.'), 503
def sign(self): self.req.headers["Authorization"] = "".join([ "QS ", self.access_key_id, ":", self.get_authorization() ]) self.logger.debug(self.req.headers["Authorization"]) prepared = self.req.prepare() scheme, netloc, path, params, query, fragment = urlparse( prepared.url, allow_fragments=False ) path = quote(unquote(path)) prepared.url = urlunparse( (scheme, netloc, path, params, query, fragment) ) return prepared
def get_token(self): url = "https://www.zhipin.com/job_detail" params = {"query": self.query, "city": self.city, "position": ""} ret = session.get(url, headers=headers, params=params, allow_redirects=False) print(ret.status_code) print(ret.content.decode()) print(ret.headers) location = ret.headers.get("location") seed = unquote(location) path = urlparse(seed).query params = path.split("&") print(params) item = {} for p in params: p_list = p.split("=") item[p_list[0]] = p_list[1] print(item) self.seed = item.get('seed') + "=" self.ts = item.get('ts') self.name = item.get('name') jscode = f"""let seed = "{self.seed}", ts = {self.ts};""" ret = getencpassword(session, "./ABC.z.js", jscode) print(ret) o_cookie = { "__zp_stoken__": quote(ret), #"_l":quote(f""" # l=/job_detail/?query=爬虫&city=101200100&industry=&position=&r=https://www.zhipin.com/web/common/security-check.html?seed={self.seed}&name={self.name}&ts={self.ts}&callbackUrl=/job_detail/?query=爬虫&city=101200100&industry=&position=&srcReferer=https://www.zhipin.com/web/common/security-check.html?seed={self.seed}&name={self.name}&ts={self.ts}&callbackUrl=/job_detail/?query=爬虫&city=101200100&industry=&position=&srcReferer=https://www.zhipin.com/web/common/security-check.html?seed={self.seed}&name={self.name}&ts={self.ts}&callbackUrl=/job_detail/?query=爬虫&city={self.city}&industry=&position=&srcReferer=https://www.zhipin.com/wuhan/&friend_source=0 #"""), "_a": "77323247.1586174956..1586174956.1.1.1.1", "_c": f"{self.ts}", "_g": "-", #"Hm_lvt_194df3105ad7148dcf2b98a91b5e727a":"1586159333,1586160680,1586163030,1586163673", #"Hm_lpvt_194df3105ad7148dcf2b98a91b5e727a":"1586168065" } cookie = cookiejar_from_dict(o_cookie) session.cookies = cookie url = "https://www.zhipin.com/" + location ret = session.get( url, headers=headers.update({ "referer": f"https://www.zhipin.com/job_detail/?query={self.query}&city=100010000&industry=&position=" })) print(ret)
def edit_recipe(name): """ Most of the work is comparing old ingredients with new ones. Deleting quantities for the removed ingredients is handled by Recipe.delete_quantities, modifying quantities is handled by Recipe.add_ingredients method. We do not delete ingredients that become now unused from the database. """ recipe = Recipe.query.filter_by(name=unquote(name)).first_or_404() recipe_d = recipe.to_dict() ing_d = [{ "ing_name": ing["name"], "unit": ing["unit"], "quantity": ing["quantity"] } for ing in recipe_d["ingredients"]] form = AddRecipeForm(obj=recipe, ingredients=ing_d, original_name=recipe.name) if form.validate_on_submit(): #TODO maybe add a Recipe.update method that loops over a dict with setattr? recipe.name = form.name.data recipe.steps = form.steps.data recipe.servings = form.servings.data recipe.cooking_temperature = form.cooking_temperature.data recipe.cooking_time = form.cooking_time.data recipe.prep_time = form.prep_time.data ingredients = [ ing for ing in form.ingredients.data if ing["ing_name"] and ing["quantity"] and ing["unit"] ] [ing.update({"name": ing.pop("ing_name")}) for ing in ingredients] old_ingredient_names = {ing["ing_name"] for ing in ing_d} new_ingredient_names = {ing["name"] for ing in ingredients} recipe.delete_quantities(old_ingredient_names - new_ingredient_names) logging.debug(ing_d) logging.debug(ingredients) logging.debug(old_ingredient_names - new_ingredient_names) recipe.add_ingredients(ingredients) recipe.add_tags() db.session.add(recipe) db.session.commit() flash(f"Recipe '{recipe.name}' edited") return redirect(url_for("main.recipe", name=recipe.name)) return render_template("add_recipe.html", title=f'{recipe_d["name"]} - editing', form=form)
def serve_arquivos_json_upload(nomeArquivo): nomeArquivo = unquote(nomeArquivo) filename = secure_filename(nomeArquivo) if len(request.get_json())>100000: return jsonify({'mensagem':{'lateral':'', 'popup':'O arquivo é muito grande e não foi salvo', 'confirmar':''}}) nosLigacoes = request.get_json() if usuarioLocal(): cam = nomeArquivoNovo(os.path.join(local_file_dir, filename + '.json')) filename = os.path.split(cam)[1] else: filename += '.'+secrets.token_hex(10) + '.json' cam = os.path.join(local_file_dir, filename) with open(cam, 'w') as outfile: json.dump(nosLigacoes, outfile) return jsonify({'nomeArquivoServidor':filename})
def fixurl(url): # Inspired from https://stackoverflow.com/a/804380 but using requests from requests.utils import urlparse, urlunparse, quote, unquote # turn string into unicode if not isinstance(url, unicode): url = url.decode('utf8') # parse it parsed = urlparse(url) # divide the netloc further userpass, at, hostport = parsed.netloc.rpartition('@') user, colon1, pass_ = userpass.partition(':') host, colon2, port = hostport.partition(':') # encode each component scheme = parsed.scheme.encode('utf8') user = quote(user.encode('utf8')) colon1 = colon1.encode('utf8') pass_ = quote(pass_.encode('utf8')) at = at.encode('utf8') host = host.encode('idna') colon2 = colon2.encode('utf8') port = port.encode('utf8') path = '/'.join( # could be encoded slashes! quote(unquote(pce).encode('utf8'), '') for pce in parsed.path.split('/')) query = quote(unquote(parsed.query).encode('utf8'), '=&?/') fragment = quote(unquote(parsed.fragment).encode('utf8')) # put it back together netloc = ''.join((user, colon1, pass_, at, host, colon2, port)) #urlunparse((scheme, netloc, path, params, query, fragment)) params = '' return urlunparse((scheme, netloc, path, params, query, fragment))
def get_fileid(path, url, user, passwd): r = request('PROPFIND', url=url+'/{}/{}'.format(user, quote(path)), auth=(user, passwd), data=open('./propfind-fileid.xml', 'rb'), headers={'Depth': '1'}) xml = etree.fromstring(r.text) nsmap = xml.nsmap response = xml.findall('d:response', namespaces=nsmap) # filename:id file_dict = dict() for i in response: fullname = i.find('d:href', namespaces=nsmap).text filename = Path(fullname).name.__str__() #fileid = i.find('d:propstat/d:prop/oc:fileid', namespaces=nsmap) fileid = i.find('*//oc:fileid', namespaces=nsmap).text file_dict[unquote(filename)] = fileid return file_dict
def get_access_token(code): influxdb.count('youtube.access_token_requests') try: code = unquote(code) flow = client.OAuth2WebServerFlow( YOUTUBE_CLIENT_ID, YOUTUBE_CLIENT_SECRET, scope='https://www.googleapis.com/auth/youtube.readonly', redirect_uri=YOUTUBE_REDIRECT_URI ) credentials = flow.step2_exchange(code) return jsonify( access_token=credentials.access_token, refresh_token=credentials.refresh_token, token_expiry=credentials.token_expiry ) except Exception as e: current_app.logger.error( 'Could not authenticate youtube user: {}'.format(e)) return jsonify( error='There was an error while trying to authenticate you.' 'Please, try again.'), 503
def _parse_result(self): """ Parse search result data. Raises: PoogleParserError: Raised if the result can not be parsed for any reason """ self.title = self._soup.a.text self._log.info('Result title parsed: %s', self.title) # Make sure this is a valid result URL (and not a link to image results, as an example). href = self._soup.a.get('href') if not href.startswith('/url?'): raise PoogleParserError('Unrecognized URL format: %s', href) match = self.url_regex.match(href) if not match or not match.group('url'): self._log.error('Unable to parse search result URL: {h}'.format(h=href)) raise PoogleParserError('Unable to parse search result URL: %s', href) url = unquote(match.group('url')) self.url = URL(url) self._log.info('Result URL parsed: %s', self.url)
from requests.utils import unquote encodedUrl1='NIFTY%2025' encodedUrl2='NIFTY%20A%5CB%5CC%5CD' encodedUrl3='22%2F01%2F2014' # Using urls library print unquote(encodedUrl1) print unquote(encodedUrl2) print unquote(encodedUrl3)
statusText = property(lambda self: self._statusText) responseText = property(lambda self: self._responseText) responseXML = property(lambda self: self._responseXML) # Request def open(self,sesID, id,method, url, async=False, user=None, password=None): self._method = method.upper() # print id,method, url # if self._method in ['CONNECT','TRACE','TRACK']: # raise SECURITY_ERR # if self._method not in 'DELETE,GET,HEAD,OPTIONS,POST,PUT'.split(','): # raise SYNTAX_ERR # self.abort() if url.startswith("blob:"): url=url.replace("blob:","") url=utils.unquote(url) if not url.startswith("http://"): url="http://"+url print url self._url=url self._sesID=sesID self._id=id self._send = False self._rheaders = {} host, self._path = _host_path(url) # self._http_connection = _HTTPConnection(host) self._user = user self._password = password self._async = async self._readyState = self.OPENED
def _parse_html(self, data): content = data.xpath('.//table[@id="searchResult"]')[0] torrents = TorrentList() if content is None: return torrents for torrent in content.xpath('.//tr'): link_info = torrent.xpath('.//div[@class="detName"]/a') if len(link_info) == 0: continue link_info = link_info[0] peer_info = torrent.xpath('.//td[@align="right"]') if len(peer_info) != 2: continue torrent_info = Torrent() torrent_info.title = link_info.text_content() torrent_info.link = sub('/[^/]*$', '', self.base_url + link_info.attrib['href']) torrent_info.full_link = self.base_url + link_info.attrib['href'] torrent_info.seeders = int(peer_info[0].text) torrent_info.leechers = int(peer_info[1].text) description = torrent.find_class('detDesc') if len(description): description = description[0].text_content() description = sub('\s', ' ', description) date = search('[a-zA-Z0-9]+-[a-zA-Z0-9]+\s\d+(:\d+)?', description) if date: date = date.group() if 'Y-day' in date: time = datetime.datetime.today() date = date.replace('Y-day', '-'.join([str(i).zfill(2) for i in [time.month, time.day]])) torrent_info.date = date size_str = search('Size[^,]*', description) if size_str: size_str = size_str.group()[5:] torrent_info.size_str = size_str size_info = size_str.lower().split(' ') size = float(size_info[0]) if size_info[1] == 'kib': size *= 1024 elif size_info[1] == 'mib': size *= 1048576 elif size_info[1] == 'gib': size *= 1073741824 torrent_info.size = size uploader = search('ULed\sby\s.*', description) if uploader: torrent_info.uploader = uploader.group()[8:] ttype = torrent.find_class('vertTh') attr = None if len(ttype) != 0: ttype = ttype[0] attr = ttype.getnext() ttype = ttype.xpath('.//a') if len(ttype) == 2: category = '.'.join([i.text_content().lower() for i in ttype]) torrent_info.category = sub('[\s-]+', '_', category) if attr is not None: attr = [i for i in attr.xpath('.//a|.//img') if i.tag == 'a' or i.tag == 'img'] for link in attr: if link.tag == 'a': attributes = link.attrib if 'href' in attributes and \ attributes['href'][:7] == 'magnet:': torrent_info.attributes['magnet'] = True torrent_info.magnet = unquote(attributes['href']) elif 'href' in attributes and \ attributes['href'][-7:] == 'torrent': torrent_info.attributes['torrent_link'] = True torrent_info.torrent = attributes['href'] else: iattr = link.attrib if 'alt' in iattr and iattr['alt'] == 'Trusted': torrent_info.attributes['trusted'] = True elif 'alt' in iattr and iattr['alt'] == 'VIP': torrent_info.attributes['vip'] = True if 'src' in iattr and 'title' in iattr: if 'cover image' in iattr['title']: torrent_info.attributes['cover_image'] = True elif 'comments.' in iattr['title']: torrent_info.attributes['comments'] = True count = search('\d+', iattr['title']) if count: torrent_info.comment_amount = \ int(count.group()) torrents.append(torrent_info) return torrents
def norm_path(vlc_path): path = vlc_path.replace('file:///', '') path = path.replace('/', '\\') path = unquote(path) return path
def ncdc_cdo_json_to_df(data, **kwargs): # Read in API key api_key = utils.read_api_key('ncdc_cdo') headers = {'token': api_key} sdate = pd.datetime(1800, 1, 1) td = pd.datetime.today() edate = pd.datetime(td.year, td.month, td.day) + pd.Timedelta(days=1) if 'NORMAL_' in data.query_params['datasetid']: # All the NORMAL_* datasets must have a startdate/endate of # 2010-01-01/2010-12-31 sdate = pd.datetime(2010, 1, 1) edate = pd.datetime(2010, 12, 31) delta = pd.Timedelta(days=367) elif 'stationid' in data.query_params: # Get startdate and/or enddate information s = Session() ireq = Request('GET', r'http://www.ncdc.noaa.gov/cdo-web/api/v2/stations/{0}'.format( data.query_params['stationid']), headers=headers) prepped = ireq.prepare() dreq = s.send(prepped) sdate = pd.to_datetime(dreq.json()['mindate']) edate = pd.to_datetime(dreq.json()['maxdate']) if 'startdate' in data.query_params: tdate = pd.to_datetime(data.query_params['startdate']) if tdate > sdate: sdate = tdate if 'enddate' in data.query_params: tdate = pd.to_datetime(data.query_params['enddate']) if tdate < edate: edate = tdate delta = pd.Timedelta(days=365) else: delta = pd.Timedelta(days=106751) if sdate >= edate: raise ValueError(""" * * The startdate of {0} is greater than, or equal to, the enddate of {1}. * """.format(sdate, edate)) df = pd.DataFrame() testdate = sdate while testdate < edate: time.sleep(1) data.query_params['startdate'] = testdate.strftime('%Y-%m-%d') testdate = testdate + delta if testdate > edate: testdate = edate data.query_params['enddate'] = testdate.strftime('%Y-%m-%d') s = Session() ireq = Request('GET', data.url, params=data.query_params, headers = headers) prepped = ireq.prepare() prepped.url = unquote(prepped.url) req = s.send(prepped) try: req.raise_for_status() except HTTPError: continue if len(req.content) == 0: continue try: tdf = pd.io.json.json_normalize(req.json()['results']) except KeyError: continue df = df.append(tdf) if len(df) == 0: if 'NORMAL_' in data.query_params['datasetid']: raise ValueError(""" * * No normalized statistics available for station {0} * """.format(data.query_params['stationid'])) else: raise ValueError(""" * * No data within {0} and {1}. * There should be data between {2} and {3}. * """.format(data.query_params['startdate'], data.query_params['enddate'], pd.to_datetime(dreq.json()['mindate']), pd.to_datetime(dreq.json()['maxdate']))) df.drop_duplicates(df.columns, keep='first', inplace=True) if 'date' in df.columns: fdf = df.pivot(index='date', columns='datatype', values='value') df['dt_att'] = df['datatype'] + '_att' sdf = df.pivot(index='date', columns='dt_att', values='attributes') ndf = fdf.join(sdf) else: ndf = tdf return ndf
def unquote_pkt(self, index): return unquote(self.strings_pkt(index))