def _request(self, url): """ Requests the page as gzip and uncompresses it Returns a stream object """ try: request = urllib.request.Request(url) request.add_header('Accept-Encoding', 'gzip') response = self.opener.open(request, timeout = 10) except urllib.request.HTTPError as e: if e.code == 401: raise utils.APIError( "Unauthorized. Please check if your username and password are correct." "\n\nPlease note that you might also be getting this error if you have " "non-alphanumeric characters in your password due to an upstream " "MAL bug (#138).") else: raise utils.APIError("HTTP error %d: %s" % (e.code, e.reason)) except urllib.request.URLError as e: raise utils.APIError("Connection error: %s" % e) if response.info().get('content-encoding') == 'gzip': ret = gzip.decompress(response.read()) else: # If the content is not gzipped return it as-is ret = response.read() if isinstance(ret, bytes): return ret.decode('utf-8') return ret
def download_artifact(url, dest, uid, token): print('url is %s dest is %s uid is %s token is %s' % (url,dest,uid,token)) # create dest if does not exist if dest: if os.path.exists(dest): print('dest exists: ', dest) else: print('dest does not exist, creating now : ', dest) os.mkdir(dest) else: dest = str(Path.home()) splittedurl = url.rsplit('/', 1).pop() dest = dest + '/' + splittedurl # https security handler if (not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)): ssl._create_default_https_context = ssl._create_unverified_context request = urllib.request.Request(url) if uid and token: credentials = ('%s:%s' % (uid, token)) encoded_credentials = base64.b64encode(credentials.encode('ascii')) request.add_header('Authorization', 'Basic %s' % encoded_credentials.decode("ascii")) try: with urllib.request.urlopen(request, timeout=2) as response, open(dest, 'wb') as out_file: data = response.read() out_file.write(data) print("Success!") except urllib.error.URLError: print("Artifactory connection timed out, please check URL, UID and Token.")
def _fetch_json(self, path, post_data=None, **args): request = urllib.request.Request(url=self._url(path, **args)) if post_data: post_data = dict((k, v) for k, v in post_data.items() if v or isinstance(v, int)) request.data = urllib.parse.urlencode(self._clean(**post_data)) if self.access_token: request.add_header("Authorization", "Bearer " + self.access_token) try: return json.loads( (urllib.request.urlopen( request, timeout=self.request_timeout).read()).decode('utf-8')) except urllib.request.HTTPError as error: try: # Extract the developer-friendly error message. message = json.loads(error.read())["error_description"] except Exception: raise error if (self.retry_rate_limit and error.code == 503 and message == "Over Rate Limit"): # Retry later. reset_time = float(error.headers.get("X-RateLimit-Reset")) delay = max(2, reset_time - time.time() + 1) logging.warning("Rate Limit, delaying for %d seconds" % delay) time.sleep(delay) return self._fetch_json(path, post_data, **args) else: raise QuipError(error.code, message, error)
def post(build: Build): if not SLACK_NOTIFICATION and SLACK_NOTIFICATION_URL and AUR_PACKAGER_BASE_URL: return detail_url = AUR_PACKAGER_BASE_URL + str(reverse_lazy('manager:build_detail', kwargs={'package_name': build.package.name, 'build_number': 1})) base = '<{}|{}> {}: <{}|{}>'.format( package_url(aur_server_tag=build.package.server, package_name=build.package.name), build.package.name, build.version, detail_url, build.status) if build.status == Build.SUCCESS: emoji = ':+1:' sha256s = json.loads(build.sha256) artifacts = [] for artifact in Artifact.objects.filter(package=build.package): download_url = AUR_PACKAGER_BASE_URL + str(reverse_lazy('manager:build_download', kwargs={'package_name': artifact.name, 'build_number': 1})) sha256 = sha256s[artifact.name] s = '<{}|:arrow_down: {}> sha256: {}'.format(download_url, artifact.name, sha256) artifacts.append(s) text = '\n'.join([base] + artifacts) else: emoji = ':ghost:' text = base name = '{}: {} {}'.format(build.status, build.package.name, build.version) data = {'text': text, 'username': name, 'icon_emoji': emoji} request = urllib.request.Request(SLACK_NOTIFICATION_URL) request.add_header('Content-type', 'application/json') try: urllib.request.urlopen(request, json.dumps(data).encode()) except urllib.error.URLError: pass
def __send_xml_str(self, xml_str): logger.debug("Sending: %s" % xml_str) xml_data = urllib.parse.urlencode({'XML': xml_str}) request = urllib.request.Request(self.door_url(), xml_data) base64string = base64.encodestring('%s:%s' % (self.door_user, self.door_pass)).replace('\n', '') request.add_header("Authorization", "Basic %s" % base64string) context = ssl._create_unverified_context() context.set_ciphers('RC4-SHA') self.lock.acquire() try: result = urllib.request.urlopen(request, context=context) return_code = result.getcode() return_xml = result.read() result.close() finally: self.lock.release() logger.debug("Response code: %d" % return_code) logger.debug("Response: %s" % return_xml) if return_code != 200: raise Exception("Did not receive 200 return code") error = get_attribute(return_xml, "errorMessage") if error: raise Exception("Received an error: %s" % error) return return_xml
def main(): fd = open(FILE1, 'rb') url = VAULT_URL + urllib.parse.quote(os.path.basename(FILE1)) request = urllib.request.Request( url=url, data=fd, method='PUT' ) request.add_header('Content-Length', os.path.getsize(fd.name)) print(str.format( 'Make HTTP request: {}', url )) try: response = urllib.request.urlopen(request) print(str.format( 'HTTP response: {}: {}', response.status, response.msg )) except Exception as e: print(str.format( 'Error: {}\nHTTP request PUT: {}', e, url ))
def get_blob(self, thread_id, blob_id): """Return a file-like object with the contents of the given blob. The object is described in detail here: https://docs.python.org/2/library/urllib2.html#urllib2.urlopen """ request = urllib.request.Request( url=self._url("blob/%s/%s" % (thread_id, blob_id))) if self.access_token: request.add_header("Authorization", "Bearer " + self.access_token) try: return urllib.request.urlopen(request, timeout=self.request_timeout) except urllib.request.HTTPError as error: try: # Extract the developer-friendly error message message = json.loads(error.read())["error_description"] except Exception: raise error if (self.retry_rate_limit and error.code == 503 and message == "Over Rate Limit"): # Retry later. reset_time = float(error.headers.get("X-RateLimit-Reset")) delay = max(2, reset_time - time.time() + 1) logging.warning("Rate Limit, delaying for %d seconds" % delay) time.sleep(delay) return self.get_blob(thread_id, blob_id) else: raise QuipError(error.code, message, error)
def search_Google(self): url = self.get_url() print(url) self.only_api = False request = urllib.request.Request(url) request.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 6.0; \ Windows NT 5.1)') try: search_results = urllib.request.urlopen(request) encoding = search_results.headers.get_content_charset() try: resp = search_results.read().decode(encoding) except: print("could not connect to Google") soup = BeautifulSoup(resp) elements = soup.select("h3.r") for site in self.sites: for i in range(len(elements)): element = elements[i] element = element.select("a")[0] url = element['href'] url = self.parse_url(url) if site.name.lower() in (url): self.urls.append((site, url)) except Exception as e: print(e) print("Error : opening url " + url) print(self.urls)
def query(resource, mbid, includes=[]): """Queries MusicBrainz' web service for *resource* with *mbid* and the given list of includes. Returns an LXML ElementTree root node. All namespaces are removed from the result. """ url = '{}/{}/{}'.format(wsURL, resource, mbid) if queryCallback: queryCallback(url) if len(includes) > 0: url += '?inc={}'.format('+'.join(includes)) logging.debug(__name__, 'querying {}'.format(url)) ans = db.query("SELECT xml FROM {}musicbrainzqueries WHERE url=?".format(db.prefix), url) try: data = ans.getSingle() except db.EmptyResultException: try: request = urllib.request.Request(url) request.add_header('User-Agent', 'Maestro/0.4.0 (https://github.com/maestromusic/maestro)') with urllib.request.urlopen(request) as response: data = response.read() except urllib.error.HTTPError as e: if e.code == 404: raise e else: raise ConnectionError(e.msg) db.query("INSERT INTO {}musicbrainzqueries (url, xml) VALUES (?,?)" .format(db.prefix), url, data) root = etree.fromstring(data) # remove namespace tags for node in root.iter(): if node.tag.startswith('{'): node.tag = node.tag.rsplit('}', 1)[-1] return root
def upload_file(self, file_name, file_path): """ Upload a file to a server Attempts to upload a local file with path filepath, to the server, where it will be named filename. Args: :param file_name: The name that the uploaded file will be called on the server. :param file_path: The path of the local file to upload. Returns: :return: A GPFile object that wraps the URI of the uploaded file, or None if the upload fails. """ request = urllib.request.Request(self.url + '/rest/v1/data/upload/job_input?name=' + file_name) if self.authorization_header() is not None: request.add_header('Authorization', self.authorization_header()) request.add_header('User-Agent', 'GenePatternRest') with open(file_path, 'rb') as f: data = f.read() try: response = urllib.request.urlopen(request, data) except IOError: print("authentication failed") return None if response.getcode() != 201: print("file upload failed, status code = %i" % response.getcode()) return None return GPFile(self, response.info().get('Location'))
def _defaultFetcher(url): """Retrieve data from ``url``. cssutils default implementation of fetch URL function. Returns ``(encoding, string)`` or ``None`` """ try: request = urllib.request.Request(url) request.add_header('User-agent', 'cssutils %s (http://www.cthedot.de/cssutils/)' % VERSION) res = urllib.request.urlopen(request) except urllib.error.HTTPError as e: # http error, e.g. 404, e can be raised log.warn('HTTPError opening url=%s: %s %s' % (url, e.code, e.msg), error=e) except urllib.error.URLError as e: # URLError like mailto: or other IO errors, e can be raised log.warn('URLError, %s' % e.reason, error=e) except OSError as e: # e.g if file URL and not found log.warn(e, error=OSError) except ValueError as e: # invalid url, e.g. "1" log.warn('ValueError, %s' % e.args[0], error=ValueError) else: if res: mimeType, encoding = encutils.getHTTPInfo(res) if mimeType != 'text/css': log.error('Expected "text/css" mime type for url=%r but found: %r' % (url, mimeType), error=ValueError) content = res.read() if hasattr(res, 'close'): res.close() return encoding, content
def __call(self, url=API_URL, params={}, data=None, headers={}): """Common method for API call. url: API URL params: query string parameters data: POST data headers: additional request headers Return: parsed JSON structure or raise GooglError. """ params.update(key=self.key) if self.userip is not None: params.update(userip=self.userip) full_url = "%s?%s" % (url % self.api, urllib.parse.urlencode(params)) request = urllib.request.Request(full_url, data=bytes(data, encoding="UTF-8"), headers=headers) if self.referer is not None: request.add_header("Referer", self.referer) if self.client_login is not None: request.add_header("Authorization", "GoogleLogin auth=%s" % self.client_login) try: response = urllib.request.urlopen(request) return json.loads(str(response.read(), encoding="UTF-8")) except urllib.error.HTTPError as e: error = json.loads(e.fp.read()) raise GooglError(error["error"]["code"], error["error"]["message"])
def get_recent_jobs(self, n_jobs=10): """ Returns the user's N most recently submitted jobs on the GenePattern server. Args: If not specified, n_jobs = 10. Returns: An array of GPJob objects. """ # Query the server for the list of jobs request = urllib.request.Request(self.url + '/rest/v1/jobs/?pageSize=' + str(n_jobs) + '&userId=' + str(urllib.parse.quote(self.username)) + '&orderBy=-dateSubmitted') if self.authorization_header() is not None: request.add_header('Authorization', self.authorization_header()) request.add_header('User-Agent', 'GenePatternRest') response = urllib.request.urlopen(request) response_string = response.read().decode('utf-8') response_json = json.loads(response_string) # For each job in the JSON Array, build a GPJob object and add to the job list job_list = [] for job_json in response_json['items']: job_id = job_json['jobId'] job = GPJob(self, job_id) job.info = job_json job.load_info() job_list.append(job) return job_list
def _query(self, url, request, data=None): """ Cette fonction à usage interne est appelée par get(), post(), put(), etc. Elle reçoit en argument une url et un """ try: # si on a un identifiant de session, on le renvoie au serveur if self.session: request.add_header("Cookie", self.session) # lance la requête. Si data n'est pas None, la requête aura un # corps non-vide, avec data dedans. with urllib.request.urlopen(request, data) as connexion: # récupère les en-têtes HTTP et le corps de la réponse, puis # ferme la connection headers = dict(connexion.info()) result = connexion.read() # si on envoie un identifiant de session, on le stocke if "Set-Cookie" in headers: self.session = headers["Set-Cookie"] # on effectue le post-processing, puis on renvoie les données. # c'est fini. return self._post_processing(result, headers) except urllib.error.HTTPError as e: # On arrive ici si le serveur a renvoyé un code d'erreur HTTP # (genre 400, 403, 404, etc.). On récupère le corps de la réponse # car il y a peut-être des explications dedans. On a besoin des # en-tête pour le post-processing. headers = dict(e.headers) message = e.read() raise ServerError(e.code, self._post_processing(message, headers)) from None
def glsrequest(uri, method, data=None): ''' Returns xml node tree as Element instance. 'uri' may be absolute or relative to _BASEURI. 'method' in ('GET', 'POST', 'PUT') 'data' can be a string or Element instance ''' if method not in {'GET', 'POST', 'PUT'}: raise GlslibException(MSGUNSUPPORTEDMETHOD % method) if not uri.startswith(_BASEURI): uri = _BASEURI.rstrip('/') + '/' + uri.lstrip('/') request = urllib.request.Request(uri) request.add_header("Authorization", "Basic %s" % _AUTHSTR) if etree.iselement(data): # tostring generates bytestring (as required for data) data = etree.tostring(data) request.add_header('Content-Type', 'application/xml') request.add_data(data) request.get_method = lambda: method msg = '%s %s\n%s\n%s' % (request.get_method(), request.get_full_url(), request.headers, data.decode('utf-8') if data else '') logger.debug(msg) try: r = urllib.request.urlopen(request) return etree.XML(r.read()) except urllib.error.HTTPError as httperr: logger.error(httperr.read()) raise except urllib.error.URLError as urlerr: logger.error(request.get_full_url()) raise
def execute_request(url, method, payload=""): """ Executes an HTTP request and returns the result in a string Args: url (str): the full URL to send a request to method (str): the HTTP method to use payload (str): the json payload to send if appropriate for HTTP method (default "") Returns: str: the content returned by the server """ if print_calls: print(url) response = None try: if payload: payload = payload.encode("UTF-8") request = urllib.request.Request(url, method=method, data=payload) request.add_header("Content-Type", "application/json") else: request = urllib.request.Request(url, method=method) request.add_header("Accept-Encoding", "gzip") response = urllib.request.urlopen(request) content = response.read() if content: content = zlib.decompress(content, zlib.MAX_WBITS | 16).decode(encoding="UTF-8") return content finally: if response: response.close()
def SendRequest(host, session, requestString): data=bytes(json.dumps({ "query" : requestString }), "ASCII") request=urllib.request.Request(host + ":16742", data) request.add_header("Cookie", "session=" + session) response = json.loads(urllib.request.urlopen(request).readall().decode('ascii')) #sys.stderr.write(response + "\n") return response
def open(url, query_params=None, user_agent=None, post_data=None, referer=None, get_method=None, cookies=False, **kwargs): if query_params is None: query_params = {} if user_agent is None: user_agent = ua_firefox query_params.update(kwargs) url = prepare_url(url, query_params) request = urllib.request.Request(url, post_data) if get_method is not None: request.get_method = lambda: get_method request.add_header('User-Agent', user_agent) if referer is not None: request.add_header('Referer', referer) if cookies: opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(jar)) else: opener = urllib.request.build_opener() return opener.open(request)
def send_messages(self, access_token, messages, timeout): """Send messages to server, along with user authentication.""" data = { 'assignment': self.assignment.endpoint, 'messages': messages, } serialized_data = json.dumps(data).encode(encoding='utf-8') address = self.SUBMISSION_ENDPOINT.format(server=self.args.server, prefix='http' if self.args.insecure else 'https') address_params = { 'access_token': access_token, 'client_version': client.__version__, } address += '&'.join('{}={}'.format(param, value) for param, value in address_params.items()) log.info('Sending messages to %s', address) request = urllib.request.Request(address) request.add_header("Content-Type", "application/json") response = urllib.request.urlopen(request, serialized_data, timeout) return json.loads(response.read().decode('utf-8'))
def getWebPage(url, headers, cookies, postData=None): try: if (postData): params = urllib.parse.urlencode(postData) params = params.encode('utf-8') request = urllib.request.Request(url, data=params, headers=headers) else: print('Fetching '+url) request = urllib.request.Request(url, None, headers) request.add_header('Cookie', cookies) if (postData): response = urllib.request.build_opener(urllib.request.HTTPCookieProcessor).open(request) else: response = urllib.request.urlopen(request) if response.info().get('Content-Encoding') == 'gzip': buf = BytesIO(response.read()) f = gzip.GzipFile(fileobj=buf) r = f.read() else: r = response.read() return r except Exception as e: print("Error processing webpage: "+str(e)) return None
def execute_request(url, method="GET", payload=""): """Executes an HTTP request and returns the result in a string url str the full URL to send a request to method str the HTTP method to use payload str the json payload to send if appropriate for HTTP method return str the content returned by the server """ if print_calls: print(url) response = None try: if payload: payload = payload.encode("UTF-8") request = urllib.request.Request(url, method=method, payload=payload) else: request = urllib.request.Request(url, method=method) request.add_header("Accept-Encoding", "gzip") response = urllib.request.urlopen(request) content = response.read() content = zlib.decompress(content, zlib.MAX_WBITS | 16).decode(encoding="UTF-8") return content finally: if response: response.close()
def order_complete(request): #토큰 얻기 data = urllib.parse.urlencode({"imp_key":IMP_KEY,"imp_secret":IMP_SECRET}) data = data.encode('UTF-8') f = urllib.request.urlopen('https://api.iamport.kr/users/getToken/',data) result = f.read().decode('UTF-8') imp_uid = request.POST.get('imp_uid') paid_amount = request.POST.get('paid_amount') result_json=json.loads(result) access_token=result_json['response']['access_token'] #imp_uid로 요청 url = 'https://api.iamport.kr/payments/'+imp_uid request = urllib.request.Request(url) request.add_header("X-ImpTokenHeader",access_token) response = urllib.request.urlopen(request) result2 = response.read().decode('UTF-8') result2_json=json.loads(result2) #결과 받기 pay_amount = result2_json['response']['amount']#int로 들어옴 pay_status = result2_json['response']['status'] pay_method = result2_json['response']['pay_method'] if pay_status == 'paid' and str(pay_amount) == paid_amount: return HttpResponse('{"check":true}') elif pay_status == 'ready' and pay_method == 'vbank': return HttpResponse('{"check":true}') else: return HttpResponse('{"check":false,"pay_status":'+pay_status+'}')
def sendRequest(self, path, data = {}, token = True, post = True, headers = {}): response = None request = None if post: headers['Content-Type'] = 'application/xml; charset=UTF-8' if token: headers['Authorization'] = '%s' % self.token try: if post: request = urllib.request.Request(self.apiURL+path, data.encode('utf8')) elif len(data) == 0: request = urllib.request.Request(self.apiURL+path) else: print('I have data in sendRequest but i don\'t know what i should do with it :D') if request is not None: for k,v in headers.items(): request.add_header(k, v) response = urllib.request.urlopen(request) except urllib.error.HTTPError as e: print('Error while requesting API call: %s (%s)' % (e.msg, e.code)) print('URL: %s' % (self.apiURL+path)) except urllib.error.URLError as e: print('Error while requesting API call: %s' % (e.reason)) return response
def retrieve_page(dbinfo, url): """ Retrieve a web page, with retries if necessary. """ crawl_delay = CRAWL_DELAY html = '' attempt = 1 while True: try: request = urllib.request.Request(url) request.add_header( 'User-Agent', ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:21.0) ' + 'Gecko/20100101 Firefox/21.0') ) request.add_header( 'Accept', ('text/html,application/xhtml+xml,application/xml;' + 'q=0.9,*/*;q=0.8') ) html = urllib.request.urlopen(request).read().decode('utf-8') return html except: if attempt >= RETRY_ATTEMPTS: log(dbinfo, 'ERROR', 'Error retrieving web page, too many retries: ' + url) return None else: log(dbinfo, 'WARNING', 'Problem retrieving web page, retrying: ' + url) sleep(crawl_delay) crawl_delay = crawl_delay * 2 attempt += 1
def make_call(api_url, query_args=None): # api_url is expected to be the fully constructed URL, with any needed # arguments appended. # This function will simply make the call, and return the response as # an ElementTree object for parsing. If response cannot be parsed # because it is not valid XML, this function assumes an API error and # raises an APIException, passing forward the pages contents (which # generally gives some indication of the error. if query_args is not None: get_params = urlencode_no_plus.urlencode_no_plus(query_args) request = urllib.request.Request(api_url + '%s' % get_params) else: request = urllib.request.Request(api_url) # Added these readers to avoid some weird errors from the host. request.add_header('Referer', 'http://thegamesdb.net/') request.add_header('User-agent', 'Mozilla/5.0') response = urllib.request.urlopen(request) page = response.read() # Make sure the XML Parser doesn't return a ParsError. If it does, # it's probably and API Issue, so raise an exception, printing the # response from the API call. try: xml_response = ET.fromstring(page) except ET.ParseError: raise APIException(page) return xml_response
def get_msgbox(opener): url = emuch_url + '/box.php' request = urllib.request.Request(url) request.add_header("Content-Type","application/x-www-form-urlencoded;charset=utf-8") request.add_header('User-Agent', user_agent) return opener.open(request)
def get_credit(opener): url = emuch_url + '/memcp.php?action=getcredit' values = {'formhash': '2c8099cd', 'getmode': '1', #2 'message': '', 'creditsubmit': b'\xc1\xec\xc8\xa1\xba\xec\xb0\xfc' #u'领取红包'.encode('gbk') } data = urllib.parse.urlencode(values) data = data.encode('utf-8') request = urllib.request.Request(url, data)#, method='POST') request.add_header("Content-Type","application/x-www-form-urlencoded;charset=utf-8") request.add_header('User-Agent', user_agent) print_log('try to get credit...') r = opener.open(request) body = r.read().decode('gbk') info = [b'\xb9\xa7\xcf\xb2\xa3\xa1\xc4\xe3\xbb\xf1\xb5\xc3'.decode('gbk'), #u'恭喜!你获得' b'\xbd\xf1\xcc\xec\xb5\xc4\xba\xec\xb0\xfc\xa3\xac\xc4\xfa\xd2\xd1\xbe\xad\xc1\xec\xc8\xa1\xc1\xcb\xa3\xac\xd2\xbb\xcc\xec\xbe\xcd\xd2\xbb\xb4\xce\xbb\xfa\xbb\xe1'.decode('gbk'),#u'今天的红包,您已经领取了,一天就一次机会' '', ] msgs = ['get credit successfully!', 'can not get twice!', 'undefined error!'] #out_html(body, "get_credit") for i, s in enumerate(info): if s in body: print_log(msgs[i]) return i
def _request(self, method, url, get=None, post=None, auth=False): if get: url = "{}?{}".format(url, urllib.parse.urlencode(get)) if post: post = urllib.parse.urlencode(post).encode('utf-8') request = urllib.request.Request(self.url + url, post) request.get_method = lambda: method if auth: request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Authorization', '{0} {1}'.format( self._get_userconfig('token_type').capitalize(), self._get_userconfig('access_token'), )) try: response = self.opener.open(request, timeout = 10) return json.loads(response.read().decode('utf-8')) except urllib.request.HTTPError as e: if e.code == 400: raise utils.APIError("Invalid PIN. It is either probably expired or meant for another application.") else: raise utils.APIError("Connection error: %s" % e) except socket.timeout: raise utils.APIError("Connection timed out.")
def login_emuch(opener, username, passwd): url = emuch_url + '/logging.php?action=login' values = {'formhash': 'f6ac2e8a', 'referer': 'http://emuch.net/bbs/index.php', 'username': username, 'password': passwd, 'cookietime': '31536000', 'loginsubmit': b'\xbb\xe1\xd4\xb1\xb5\xc7\xc2\xbc' #u'会员登录'.encode('gbk') } data = urllib.parse.urlencode(values) data = data.encode('utf-8') request = urllib.request.Request(url, data)#, method='POST') request.add_header("Content-Type","application/x-www-form-urlencoded;charset=utf-8") request.add_header('User-Agent', user_agent) r = opener.open(request) body = r.read().decode('gbk') #out_html(body, 'login') es = b'\xca\xe4\xc8\xeb\xb5\xc4\xd5\xca\xba\xc5\xc3\xdc\xc2\xeb\xb4\xed\xce\xf3\xa3\xac\xc7\xeb\xd6\xd8\xca\xd4'.decode('gbk') #输入的帐号密码错误,请重试 f = es in body print_log({0: "logined successfully!", 1 :'error usename or password!'}[f]) return not f
def download(url, user_agent = 'wswp', num_retries = 2, charset = 'utf-8', proxy = None): """ Use user-agent to download website pages and return HTML TEXT, and will try a few times after website return 5** error code return urllib.request.urlopen(urllib.request.Request(url)).read().decode('utf-8') """ print('Downloading:', url) request = urllib.request.Request(url) request.add_header('User-agent', user_agent) #用户代理下载网页 try: if proxy: proxy_support = urllib.request.ProxyHandler({'http': proxy}) #代理访问页面 opener = urllib.request.build_opener(proxy_support) urllib.request.install_opener(opener) resp = urllib.request.urlopen(request) cs = resp.headers.get_content_charset() if not cs: cs = charset # Decode by utf-8 html = resp.read().decode(cs) except (URLError, HTTPError, ContentTooShortError) as e: print('Download error:', e.reason) html = None if num_retries > 0: if hasattr(e, 'code') and 500 <= e.code < 600: return download(url, num_retries - 1) return html
if len(output.strip()): puts(output) try: try_exec("cd /var/www/%s" % creds['domain']) try_exec("git pull --rebase") try_exec("git rev-parse HEAD > ./git-sha1.yml") try_exec("echo r | sudo -u daemon tee /var/tmp/uwsgi-%s.fifo > /dev/null" % creds['domain']) puts('Notifying Rollbar of deploy ... ', False) post_data = urllib.parse.urlencode({ 'environment': 'production', 'access_token': config.read_secret('rollbar_key'), 'local_username': pwd.getpwuid(os.getuid()).pw_name, 'revision': env.sha1 }) request = urllib.request.Request('https://api.rollbar.com/api/1/deploy/') request.add_header('Content-Type', 'application/x-www-form-urlencoded;charset=utf-8') urllib.request.urlopen(request, post_data.encode('utf-8')) puts('done') finally: connection.close()
def channelUpdate(channel): """ :param url: API请求url :param channel: 请求的新闻频道字典key :return: 无 """ newsCount = 0 # 单个频道更新的新闻数 # 请求相关参数(均以str存储) para_channelId = channelDict[channel][0] # 请求的新闻频道ID para_channelName = channel para_maxResult = "50" # 默认20,每页新闻数量,最大为100 para_needAllList = "0" # 是否需要返回所有的图片及段落属行allList。 para_needContent = "1" # 是否需要返回正文,1为需要,其他为不需要 para_needHtml = "1" # 是否需要返回正文的html格式,1为需要,其他为不需要 tableName = channelDict[channel][1] # pageList = [2, 1] pageList = [1] # 每个频道从后往前拉取50条新闻分析 for k in pageList: para_pageIndex = str(k) # 当前请求的是列表的第几页 # 参数拼接url myQuerys = "channelId=" + para_channelId + "&maxResult=" + para_maxResult + "&needAllList=" + para_needAllList + "&needContent=" + para_needContent + "&needHtml=" + para_needHtml + "&page=" + para_pageIndex bodys = {} url = host + path + '?' + myQuerys # 请求新闻列表 request = urllib.request.Request(url) request.add_header('Authorization', 'APPCODE ' + appcode) response = urllib.request.urlopen(request) content = response.read() # 解析json为dict格式 newsDict = json.loads(content) # (从后往前)遍历分析每篇新闻 for idx in range(49, -1, -1): item = newsDict['showapi_res_body']['pagebean']['contentlist'][idx] newsChannelName = para_channelName newsSource = item['source'] newsPubtime = item['pubDate'] newsPicUrl1 = "" newsPicUrl2 = "" newsPicUrl3 = "" # 用MD5算法处理新闻Url作为数据库主键ID newsLink = item['link'] m = hashlib.md5() m.update(newsLink.encode(encoding='utf-8')) newsMD5 = m.hexdigest() # 标题\正文\HTML文件中可能存在影响数据库写入的引号,使用pymysql的pymysql.escape_string(html)方法对内容中的引号自动转义 rowTitle = item['title'] rowContent = item['content'] rowHTML = item['html'] newsTitle = pymysql.escape_string(rowTitle) newsContent = pymysql.escape_string(rowContent) newsHTML = pymysql.escape_string(rowHTML) picNum = len(item['imageurls']) # 处理图片信息 try: if item['havePic']: if picNum == 1: newsPicUrl1 = item['imageurls'][0]['url'] elif picNum == 2: newsPicUrl1 = item['imageurls'][0]['url'] newsPicUrl2 = item['imageurls'][1]['url'] elif picNum == 3: newsPicUrl1 = item['imageurls'][0]['url'] newsPicUrl2 = item['imageurls'][1]['url'] newsPicUrl3 = item['imageurls'][2]['url'] else: picNum = 3 newsPicUrl1 = item['imageurls'][0]['url'] newsPicUrl2 = item['imageurls'][1]['url'] newsPicUrl3 = item['imageurls'][2]['url'] except KeyError: picNum = 0 # 新闻入库 SQL_CHECK = "SELECT id FROM %s WHERE id=\"%s\"" % (tableName, newsMD5) cur.execute(SQL_CHECK) checkExist = cur.fetchone() # 获取当前时间作为新闻入库时间戳 newsSaveTime = datetime.datetime.now().strftime( '%Y-%m-%d %H:%M:%S') if checkExist != None: # 新闻去重 # print("[%s]" % newsSaveTime, "WANR: [%s]频道新闻[%s]已存在!" % (para_channelName,newsTitle)) continue else: if len(newsContent) == 0: # print("[%s]" % newsSaveTime, "WANR: [%s]频道新闻[%s]正文为空!" % (para_channelName,newsTitle)) continue else: # 检测到未收录再去抽取关键词,这样可以减少提取关键词方法调用的次数 # 提取正文中的关键词 keywords_str = keysExtract.keywords_by_jieba_TF(rowContent) newsCount = newsCount + 1 # 定义SQL插入语句 SQL_INSERT = "INSERT INTO %s (id, title, channelName, source, pubtime, savetime, link, havepic, content , html, picurl1, picurl2, picurl3, keywords) VALUES ('%s', \"%s\", '%s', '%s', '%s', '%s', '%s', %d, \"%s\", \"%s\",'%s', '%s', '%s', '%s')" % ( tableName, newsMD5, newsTitle, newsChannelName, newsSource, newsPubtime, newsSaveTime, newsLink, picNum, newsContent, newsHTML, newsPicUrl1, newsPicUrl2, newsPicUrl3, keywords_str) try: cur.execute(SQL_INSERT) # 提交到数据库执行 conn.commit() except: conn.rollback() newsCount = newsCount - 1 # 日志输出 # print("[%s]" % datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), " UPDATE_INFO: 本次更新[%s]频道新闻[%s]条" % (channel, newsCount)) countDict[channel] = newsCount return newsCount
def _download(self, url): try: with self.cargo.tempdir() as td: default_name = os.path.basename(url) request = urllib.request.Request(url) request.add_header("Accept", "*/*") # We do not use etag in case what we have in cache is # not matching ref in order to be able to recover from # corrupted download. if self.sha: etag = self._get_etag(self.sha) if etag and self.is_cached(): request.add_header("If-None-Match", etag) with contextlib.closing( urllib.request.urlopen(request)) as response: info = response.info() etag = info["ETag"] if "ETag" in info else None filename = info.get_filename(default_name) filename = os.path.basename(filename) local_file = os.path.join(td, filename) with open(local_file, "wb") as dest: shutil.copyfileobj(response, dest) # Make sure url-specific mirror dir exists. os.makedirs(self._get_mirror_dir(), exist_ok=True) # Store by sha256sum sha256 = utils.sha256sum(local_file) # Even if the file already exists, move the new file over. # In case the old file was corrupted somehow. os.rename(local_file, self._get_mirror_file(sha256)) if etag: self._store_etag(sha256, etag) return sha256 except urllib.error.HTTPError as e: if e.code == 304: # 304 Not Modified. # Because we use etag only for matching sha, currently specified sha is what # we would have downloaded. return self.sha raise SourceError( "{}: Error mirroring {}: {}".format(self, url, e), temporary=True, ) from e except ( urllib.error.URLError, urllib.error.ContentTooShortError, OSError, ) as e: raise SourceError( "{}: Error mirroring {}: {}".format(self, url, e), temporary=True, ) from e
@Author: Rover @Date: 2019-11-27 20:47:30 @LastEditors: Rover @LastEditTime: 2019-11-27 21:26:05 ''' import urllib.request from twisted.python.compat import cookielib url = 'http://www.baidu.com' print('way 1:') response1 = urllib.request.urlopen(url) print(response1.getcode()) print(len(response1.read())) print('way 2') request = urllib.request.Request(url) request.add_header("user-agent", "Mozilla/5.0") response2 = urllib.request.urlopen(request) print(response2.getcode()) print(len(response2.read())) print('way 3') cj = cookielib.CookieJar() opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj)) urllib.request.install_opener(opener) response3 = urllib.request.urlopen(url) print(response3.getcode()) print(cj) print(response3.read())
import urllib.request import mmap #fname = 'nmt_checkpoint-170281.data-00000-of-00001' #fname = 'nmt_checkpoint-170281.index' fname = 'nmt_checkpoint-170281.meta' f = open( '/media/jg/24aee1e5-3eae-44a2-905d-aa7923b69d48/nmt_training_output/nmt_2019-05-09-08-56-35.624988/' + fname, 'rb') mmapped_file_as_string = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) # Do the request request = urllib.request.Request("http://192.168.1.15:8000", mmapped_file_as_string) request.add_header("Content-Type", "application/zip") request.add_header("Content-Disposition", fname) request.get_method = lambda: 'PUT' response = urllib.request.urlopen(request) print(response.info()) print(response.read()) #close everything mmapped_file_as_string.close() f.close() # Now re-download the thing we just uploaded (veracity) response = urllib.request.urlopen('http://192.168.1.15:8000/' + fname) data = response.read() with open(fname, 'wb') as f: f.write(data)
import json import csv import os from bitcoin_functions import isBTCAddress urls = ['https://bitfunder.com/assetlist.json'] try: os.remove('../Lists/shareholders.csv') except: pass for url in urls: request = urllib.request.Request(url) request.add_header( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0' ) res = urllib.request.urlopen(request) assets = json.loads(res.readall().decode('utf-8')) duplicate_list = [] print(len(assets), "assets retrieved.") with open('../Lists/shareholders.csv', 'a') as f: writer = csv.writer(f) for asset in assets: address = asset['user_btc_address'] if address not in duplicate_list and isBTCAddress(address):
"Query": { "Find": { "Light" : { "sid": { "ne": "" } } } } } data = urllib.parse.urlencode(postData) # urlencode the data binary_data = data.encode(encoding) # POST needs binary data, so encode it # urlopen with data causes a POST request instead of a GET request = urllib.request.Request(requestUrl, data=binary_data) request.add_header('authorization', "Bearer " + access_token) #request.add_header('Content-Type', "application/json") print("\nRequesting Real Time Lighting Data: (" + requestUrl + ")\n") # perform the request response = urllib.request.urlopen(request) #The results are received as XML results = response.read().decode(encoding) # If the data were formatted as json, the line below would create a dictionary from it. #responseDictionary = json.loads(results) #Convert the XML data to a Dictionary
#Trying to build a python scipt to pull out user information of users based on the profile IDs (Learning from Ben Hoff) import json import os import urllib.request import base64 url = "https://graph.facebook.com/v2.10/me/photos" facebook_api_key = '5e4111fedb9de3c7cd76c7f857fb4119' base64string = base64.encodebytes request = urllib.request.Request(url) request.add_header('Authorization', 'Bearer {}'.format(facebook_api_key)) #request.add_header('access_token',facebook_api_key) response = urllib.request.urlopen(request) encoding = response.headers.get_content_charset() if encoding is None: print('encoding None!') encoding = 'utf-8' data = json.loads(response.read().decode(encoding)) print(data) #Left at 45:12 as the code started working for BEn Hoff and not for me and will try to figure out why.
def fetchPage(params={}): get = params.get link = get("link") ret_obj = {"new_url": link, "content": "", "status": 500, "header": ""} if get("post_data"): log("called for : " + repr(params['link'])) else: log("called for : " + repr(params)) if not link or int(get("error", "0")) > 2: log("giving up") ret_obj["status"] = 500 return ret_obj if get("post_data"): if get("hide_post_data"): log("Posting data", 2) else: log("Posting data: " + urllib.parse.urlencode(get("post_data")), 2) request = urllib.request.Request( link, urllib.parse.urlencode(get("post_data"))) request.add_header('Content-Type', 'application/x-www-form-urlencoded') else: log("Got request", 2) request = urllib.request.Request(link) if get("headers"): for head in get("headers"): request.add_header(head[0], head[1]) request.add_header('User-Agent', USERAGENT) if get("cookie"): request.add_header('Cookie', get("cookie")) if get("refering"): log("Setting refering: " + get("refering"), 3) request.add_header('Referer', get("refering")) try: log("connecting to server...", 1) con = urllib.request.urlopen(request) ret_obj["header"] = con.info().headers ret_obj["new_url"] = con.geturl() if get("no-content", "false") == "false" or get("no-content", "false") == "false": inputdata = con.read() #data_type = chardet.detect(inputdata) #inputdata = inputdata.decode(data_type["encoding"]) try: ret_obj["content"] = inputdata.decode("utf-8") except: try: ret_obj["content"] = inputdata.decode("latin-1") except: raise con.close() log("Done") ret_obj["status"] = 200 return ret_obj except urllib.error.HTTPError as e: err = str(e) log("HTTPError : " + err) log("HTTPError - Headers: " + str(e.headers) + " - Content: " + e.fp.read()) params["error"] = str(int(get("error", "0")) + 1) ret = fetchPage(params) if not "content" in ret and e.fp: ret["content"] = e.fp.read() return ret ret_obj["status"] = 500 return ret_obj except urllib.error.URLError as e: err = str(e) log("URLError : " + err) time.sleep(3) params["error"] = str(int(get("error", "0")) + 1) ret_obj = fetchPage(params) return ret_obj
repositories = [] page = 1 while not depsonly: try: request = urllib.request.Request( "https://api.github.com/users/AICP/repos?page=%d" % page) if os.environ.get( 'GITHUB_API_USERNAME') is not None and os.environ.get( 'GITHUB_API_TOKEN') is not None: base64string = base64.encodestring( ('%s:%s' % (os.environ.get('GITHUB_API_USERNAME'), os.environ.get('GITHUB_API_TOKEN')) ).encode()).decode().replace('\n', '') request.add_header("Authorization", "Basic %s" % base64string) result = json.loads(urllib.request.urlopen(request).read().decode()) except: print("API Error") break if len(result) == 0: break for res in result: repositories.append(res) page = page + 1 local_manifests = r'.repo/local_manifests' if not os.path.exists(local_manifests): os.makedirs(local_manifests) def exists_in_tree(lm, repository):
import os import sys import urllib.request token = "YOUR_ACCESS_TOKEN" # 네아로 접근 토큰 header = "Bearer " + token # Bearer 다음에 공백 추가 url = "https://openapi.naver.com/v1/nid/me" request = urllib.request.Request(url) request.add_header("Authorization", header) response = urllib.request.urlopen(request) rescode = response.getcode() if(rescode==200): response_body = response.read() print(response_body.decode('utf-8')) else: print("Error Code:" + rescode)
#-*- coding: utf-8 -*- import os import sys import urllib.request #import pygame #import time pygame.init() client_id = "_NcqU0x4Dar0uSgUxEwZ" client_secret = "7ARCUWsj7G" encText = urllib.parse.quote("sex sex sex") data = "speaker=mijin&speed=5&text=" + encText; url = "https://openapi.naver.com/v1/voice/tts.bin" request = urllib.request.Request(url) request.add_header("X-Naver-Client-Id",client_id) request.add_header("X-Naver-Client-Secret",client_secret) response = urllib.request.urlopen(request, data=data.encode('utf-8')) rescode = response.getcode() if(rescode==200): print("TTS mp3 저장") response_body = response.read() with open('1111.mp3', 'wb') as f: f.write(response_body) #pygame.mixer.music.load("1111.mp3") #pygame.mixer.music.play() #time.sleep(1) #pygame.mixer.music.stop() else: print("Error Code:" + rescode)
# Trivial HTTP client for use in sample test. Support optional decompression of server response. # NB it'd be possible to perform these operations in the main PySys process too but using separate processes for # I/O-intensive operations allows for greater multi-threaded testing performance import urllib.request, sys, gzip url, acceptencoding, auth = sys.argv[1:] request = urllib.request.Request(url) if acceptencoding: request.add_header('Accept-encoding', acceptencoding) assert auth == 'AuthNone', 'Support for testing other auth types is not yet implemented' with urllib.request.urlopen(request) as r: body = r.read() assert r.headers.get( 'Content-encoding', '') == acceptencoding, 'Got unexpected encoding: %r' % r.headers.get( 'Content-encoding') if r.headers.get('Content-encoding', '') == 'gzip': body = gzip.decompress(body) print(body.decode('utf-8'))
def url_open_with_basic_auth(url: str): """ Open the url with basic authentication. """ request = urllib.request.Request(url) request.add_header('Authorization', 'Basic ' + credentials) return urllib.request.urlopen(request)
def rutorLinks(filmID): print("Загрузка торрент-ссылок для filmID " + filmID + ".") if SOCKS_IP: default_socket = socket.socket socks.set_default_proxy(socks.SOCKS5, SOCKS_IP, SOCKS_PORT) socket.socket = socks.socksocket request = urllib.request.Request(RUTOR_BASE_URL + filmID) request.add_header("Accept-encoding", "gzip") request.add_header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0") try: response = urllib.request.urlopen(request) except Exception: print("Ошибка соединения при загрузке торрент-ссылок для filmID " + filmID + ". Даём второй шанс.") response = urllib.request.urlopen(request) if response.info().get('Content-Encoding') == 'gzip': gzipFile = gzip.GzipFile(fileobj=response) content = gzipFile.read().decode("utf-8") else: content = response.read().decode("utf-8") if SOCKS_IP: socket.socket = default_socket strIndex = content.find("<div id=\"index\">") if strIndex != -1: content = content[strIndex:] else: raise IndexError("Ошибка загрузки торрент-ссылок для filmID " + filmID + ". Не найден блок с торрентами.") strIndex = content.find("</div>") if strIndex != -1: content = content[:-(len(content) - strIndex)] else: raise IndexError("Ошибка загрузки торрент-ссылок для filmID " + filmID + ". Не найден блок с торрентами.") patternLink = re.compile("<a class=\"downgif\" href=\"(.*?)\">") matches1 = re.findall(patternLink, content) patternName = re.compile("<a href=\"/torrent/(.*?)\">(.*?)</a>") matches2 = re.findall(patternName, content) patternSeeders = re.compile("alt=\"S\" />(.*?)</span>") matches3 = re.findall(patternSeeders, content) if len(matches1) != len(matches2) != len(matches3): raise IndexError("Ошибка загрузки торрент-ссылок для filmID " + filmID + ". Неверный формат блока с торрентами.") allTorrents = [] for i in range(len(matches1)): tmpDict = {"link": matches1[i].strip(), "name": html.unescape(matches2[i][1]).strip(), "seeders": int(html.unescape(matches3[i]).strip())} allTorrents.append(tmpDict) result = {} for item in allTorrents: tmpParts = item["name"].split("|") if len(tmpParts) == 1: continue realName = tmpParts[0].strip().upper() tags = set() for i in range(1, len(tmpParts)): tmpParts2 = tmpParts[i].split(",") for tmpPart in tmpParts2: tags.add(tmpPart.strip().upper()) if ("LINE" in tags) or ("UKR" in tags) or ("3D-VIDEO" in tags) or ("60 FPS" in tags): continue if not (("ЛИЦЕНЗИЯ" in tags) or ("ITUNES" in tags) or ("D" in tags) or ("D2" in tags)): continue if "UHD BDREMUX" in realName: if "HDR" in tags: if result.get("UHD BDRemux HDR") != None: if item["seeders"] > result["UHD BDRemux HDR"]["seeders"]: result["UHD BDRemux HDR"]["link"] = item["link"] else: result["UHD BDRemux HDR"] = {"link": item["link"], "seeders": item["seeders"]} #print("!UHD BDRemux HDR: " + tmpParts[0]) else: if result.get("UHD BDRemux SDR") != None: if item["seeders"] > result["UHD BDRemux SDR"]["seeders"]: result["UHD BDRemux SDR"]["link"] = item["link"] else: result["UHD BDRemux SDR"] = {"link": item["link"], "seeders": item["seeders"]} #print("!UHD BDRemux SDR: " + tmpParts[0]) elif "BDREMUX" in realName: if result.get("BDRemux") != None: if item["seeders"] > result["BDRemux"]["seeders"]: result["BDRemux"]["link"] = item["link"] else: result["BDRemux"] = {"link": item["link"], "seeders": item["seeders"]} #print("!BDRemux: " + tmpParts[0]) elif "BDRIP-HEVC 1080" in realName: if result.get("BDRip-HEVC 1080p") != None: if item["seeders"] > result["BDRip-HEVC 1080p"]["seeders"]: result["BDRip-HEVC 1080p"]["link"] = item["link"] else: result["BDRip-HEVC 1080p"] = {"link": item["link"], "seeders": item["seeders"]} #print("!BDRip-HEVC 1080p: " + tmpParts[0]) elif "BDRIP 1080" in realName: if result.get("BDRip 1080p") != None: if item["seeders"] > result["BDRip 1080p"]["seeders"]: result["BDRip 1080p"]["link"] = item["link"] else: result["BDRip 1080p"] = {"link": item["link"], "seeders": item["seeders"]} #print("!BDRip 1080p: " + tmpParts[0]) elif "WEB-DL 2160" in realName: if "HDR" in tags: if result.get("WEB-DL 2160p HDR") != None: if item["seeders"] > result["WEB-DL 2160p HDR"]["seeders"]: result["WEB-DL 2160p HDR"]["link"] = item["link"] else: result["WEB-DL 2160p HDR"] = {"link": item["link"], "seeders": item["seeders"]} #print("!WEB-DL 2160p HDR: " + tmpParts[0]) else: if result.get("WEB-DL 2160p SDR") != None: if item["seeders"] > result["WEB-DL 2160p SDR"]["seeders"]: result["WEB-DL 2160p SDR"]["link"] = item["link"] else: result["WEB-DL 2160p SDR"] = {"link": item["link"], "seeders": item["seeders"]} #print("!WEB-DL 2160p SDR: " + tmpParts[0]) elif "WEB-DL 1080" in realName: if result.get("WEB-DL 1080p") != None: if item["seeders"] > result["WEB-DL 1080p"]["seeders"]: result["WEB-DL 1080p"]["link"] = item["link"] else: result["WEB-DL 1080p"] = {"link": item["link"], "seeders": item["seeders"]} #print("!WEB-DL 1080p: " + tmpParts[0]) if result.get("UHD BDRemux HDR") or result.get("UHD BDRemux SDR") or result.get("BDRip-HEVC 1080p") or result.get("BDRip 1080p"): result.pop("WEB-DL 2160p HDR", None) result.pop("WEB-DL 2160p SDR", None) result.pop("WEB-DL 1080p", None) finalResult = [] if result.get("WEB-DL 1080p"): finalResult.append({"link": result["WEB-DL 1080p"]["link"], "type": "WEB-DL 1080p"}) if result.get("WEB-DL 2160p SDR"): finalResult.append({"link": result["WEB-DL 2160p SDR"]["link"], "type": "WEB-DL 2160p SDR"}) if result.get("WEB-DL 2160p HDR"): finalResult.append({"link": result["WEB-DL 2160p HDR"]["link"], "type": "WEB-DL 2160p HDR"}) if result.get("BDRip 1080p"): finalResult.append({"link": result["BDRip 1080p"]["link"], "type": "BDRip 1080p"}) if result.get("BDRip-HEVC 1080p"): finalResult.append({"link": result["BDRip-HEVC 1080p"]["link"], "type": "BDRip-HEVC 1080p"}) if result.get("BDRemux"): finalResult.append({"link": result["BDRemux"]["link"], "type": "BDRemux"}) if result.get("UHD BDRemux SDR"): finalResult.append({"link": result["UHD BDRemux SDR"]["link"], "type": "UHD BDRemux SDR"}) if result.get("UHD BDRemux HDR"): finalResult.append({"link": result["UHD BDRemux HDR"]["link"], "type": "UHD BDRemux HDR"}) #print(finalResult) return finalResult
def plugin(srv, item): """ addrs: (method, url, dict(params), list(username, password), json) """ srv.logging.debug("*** MODULE=%s: service=%s, target=%s", __file__, item.service, item.target) method = item.addrs[0] url = item.addrs[1] params = item.addrs[2] timeout = item.config.get('timeout', 60) basicauth_token = None try: username, password = item.addrs[3] credentials = '%s:%s' % (username, password) basicauth_token = base64.b64encode( credentials.encode('utf-8')).decode() except: pass tojson = None try: tojson = item.addrs[4] except: pass # Try and transform the URL. Use original URL if it's not possible try: url = url.format(**item.data) except: pass if params is not None: for key in list(params.keys()): # { 'q' : '@message' } # Quoted field, starts with '@'. Do not use .format, instead grab # the item's [message] and inject as parameter value. if params[key].startswith('@'): # "@message" params[key] = item.get(params[key][1:], "NOP") else: try: params[key] = params[key].format( **item.data).encode('utf-8') except Exception as e: srv.logging.exception("Parameter %s cannot be formatted" % key) return False message = item.message if method.upper() == 'GET': try: if params is not None: resource = url if not resource.endswith('?'): resource = resource + '?' resource = resource + urllib.parse.urlencode(params) else: resource = url request = urllib.request.Request(resource) if srv.SCRIPTNAME is not None: request.add_header('User-agent', srv.SCRIPTNAME) if basicauth_token is not None: request.add_header("Authorization", "Basic %s" % basicauth_token) resp = urllib.request.urlopen(request, timeout=timeout) data = resp.read() #srv.logging.debug("HTTP response:\n%s" % data) except Exception as e: srv.logging.warn("Cannot GET %s: %s" % (resource, e)) return False return True if method.upper() == 'POST': try: request = urllib.request.Request(url) if params is not None: if tojson is not None: encoded_params = json.dumps(params) request.add_header('Content-Type', 'application/json') else: encoded_params = urllib.parse.urlencode(params) else: if tojson is not None: encoded_params = item.payload request.add_header('Content-Type', 'application/json') else: encoded_params = message request.data = encoded_params.encode('utf-8') if srv.SCRIPTNAME is not None: request.add_header('User-agent', srv.SCRIPTNAME) if basicauth_token is not None: request.add_header("Authorization", "Basic %s" % basicauth_token) srv.logging.debug("before send") resp = urllib.request.urlopen(request, timeout=timeout) data = resp.read() #srv.logging.debug("HTTP response:\n%s" % data) except Exception as e: srv.logging.warn("Cannot POST %s: %s" % (url, e)) return False return True srv.logging.warn("Unsupported HTTP method: %s" % (method)) return False
# contents = base64.b64encode(f.read()) # f.close() with open('23.jpeg', 'rb') as f: # 以二进制读取本地图片 data = f.read() contents = str(base64.b64encode(data), 'utf-8') bodys['IMAGE'] = contents bodys['IMAGE_TYPE'] = '0' # 启用URL方式进行识别 # 内容数据类型是图像文件URL链接 # bodys['IMAGE'] = '图片URL链接' # bodys['IMAGE_TYPE'] = '1' post_data = urllib.parse.urlencode(bodys).encode('utf-8') request = urllib.request.Request(url, post_data) request.add_header('Authorization', 'APPCODE ' + appcode) request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') response = urllib.request.urlopen(request) content = response.read() yzm = '' if (content): print(content.decode('utf-8')) ss = content.decode("utf8") print(ss) ss1 = json.loads(ss) aa = ss1['VERIFY_CODE_ENTITY']['VERIFY_CODE'] # aa = ss1['prism_wordsInfo'][0]['word'] yzm = re.sub('\s', '', aa)
def digitalReleases(days): rDict = {} result = [] currentDate = datetime.date.today() print("Текущая дата: " + currentDate.strftime("%d.%m.%Y")) downloadDates =[currentDate] targetDate = datetime.date.today() - datetime.timedelta(days=days) print("Целевая дата: " + targetDate.strftime("%d.%m.%Y")) iterationDate = datetime.date.today() while (targetDate.year != iterationDate.year) or (targetDate.month != iterationDate.month): iterationDate = iterationDate.replace(day=1) - datetime.timedelta(days=1) downloadDates.append(iterationDate) print("Количество месяцев для загрузки: " + str(len(downloadDates))) for downloadDate in downloadDates: print("Загрузка релизов за " + downloadDate.strftime("%m.%Y") + ".") requestMethod = KINOPOISK_API_RELEAESES.format(downloadDate.strftime("%m.%Y"), KINOPOISK_UUID) timestamp = str(int(round(time.time() * 1000))) hashString = requestMethod + timestamp + KINOPOISK_API_SALT request = urllib.request.Request(KINOPOISK_BASE_URL + requestMethod) request.add_header("Accept-encoding", "gzip") request.add_header("Accept", "application/json") request.add_header("User-Agent", "Android client (6.0.1 / api23), ru.kinopoisk/4.6.5 (86)") request.add_header("Image-Scale", "3") request.add_header("device", "android") request.add_header("ClientId", KINOPOISK_CLIENTID) request.add_header("countryID", "2") request.add_header("cityID", "1") request.add_header("Android-Api-Version", "23") request.add_header("clientDate", datetime.date.today().strftime("%H:%M %d.%m.%Y")) request.add_header("X-TIMESTAMP", timestamp) request.add_header("X-SIGNATURE", hashlib.md5(hashString.encode('utf-8')).hexdigest()) try: response = urllib.request.urlopen(request) except Exception: print("Ошибка соединения при загрузке релизов за " + downloadDate.strftime("%m.%Y") + ". Даём второй шанс.") response = urllib.request.urlopen(request) if response.info().get('Content-Encoding') == 'gzip': gzipFile = gzip.GzipFile(fileobj=response) content = gzipFile.read().decode("utf-8") else: content = response.read().decode("utf-8") if content: tmpDict = json.loads(content) if not tmpDict: raise ValueError("Ошибка загрузки релизов за " + downloadDate.strftime("%m.%Y") + ". Ответ не соответствует JSON.") if tmpDict.get("success") != True: raise ValueError("Ошибка загрузки релизов за " + downloadDate.strftime("%m.%Y") + ". В ответе нет значения success или оно равно False.") items = tmpDict.get("data") if items == None or not isinstance(items, dict): raise ValueError("Ошибка загрузки релизов за " + downloadDate.strftime("%m.%Y") + ". Проблемы со значением data.") items = items.get("items") if items == None or not isinstance(items, list): raise ValueError("Ошибка загрузки релизов за " + downloadDate.strftime("%m.%Y") + ". Проблемы со значением items.") for item in items: if not isinstance(item, dict): raise ValueError("Ошибка загрузки релизов за " + downloadDate.strftime("%m.%Y") + ". Проблемы с одним из элементов items.") filmID = item.get("id") if not isinstance(filmID, int): raise ValueError("Ошибка загрузки релизов за " + downloadDate.strftime("%m.%Y") + ". Проблемы с id в одном из элементов items.") contextData = item.get("contextData") if not isinstance(contextData, dict): raise ValueError("Ошибка загрузки релизов за " + downloadDate.strftime("%m.%Y") + ". Проблемы с contextData в одном из элементов items.") releaseDateStr = contextData.get("releaseDate") if not isinstance(releaseDateStr, str): raise ValueError("Ошибка загрузки релизов за " + downloadDate.strftime("%m.%Y") + ". Проблемы с releaseDate в одном из элементов items.") releaseDate = datetime.datetime.strptime(releaseDateStr, "%Y-%m-%d").date() if targetDate <= releaseDate <= currentDate: rDict[str(filmID)] = releaseDate else: raise ValueError("Ошибка загрузки релизов за " + downloadDate.strftime("%m.%Y") + ".") print("Загружены ID от {} релизов.".format(len(rDict))) for key, value in rDict.items(): temp = {"filmID": key, "releaseDate":value} result.append(temp) return result
import urllib.request import http.cookiejar print(urllib.request) print(http.cookiejar) url = "http://www.baidu.com" # The first method to download a webpage # print('The second method to download a webpage:') # response1 = urllib.request.urlopen(url) # print(response1.getcode()) # print(len(response1.read())) # The second method to download a webpage: print('The second method to download a webpage:') request = urllib.request.Request(url) request.add_header("User-Agent", "Chrome/60.0.3112.78") response2 = urllib.request.urlopen(request) print(response2.getcode()) print(len(response2.read())) # The third method to download a webpage: print('The third method to download a webpage:') cj = http.cookiejar.CookieJar() opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj)) urllib.request.install_opener(opener) response3 = urllib.request.urlopen(url) print(response3.getcode()) print(response3.read())
def filmDetail(filmID): print("Загрузка данных для filmID " + filmID + ".") result = {} requestMethod = KINOPOISK_API_FILMDETAIL.format(filmID, KINOPOISK_UUID) timestamp = str(int(round(time.time() * 1000))) hashString = requestMethod + timestamp + KINOPOISK_API_SALT request = urllib.request.Request(KINOPOISK_BASE_URL2 + requestMethod) request.add_header("Accept-encoding", "gzip") request.add_header("Accept", "application/json") request.add_header("User-Agent", "Android client (6.0.1 / api23), ru.kinopoisk/4.6.5 (86)") request.add_header("Image-Scale", "3") request.add_header("device", "android") request.add_header("ClientId", KINOPOISK_CLIENTID) request.add_header("countryID", "2") request.add_header("cityID", "1") request.add_header("Android-Api-Version", "23") request.add_header("clientDate", datetime.date.today().strftime("%H:%M %d.%m.%Y")) request.add_header("X-TIMESTAMP", timestamp) request.add_header("X-SIGNATURE", hashlib.md5(hashString.encode('utf-8')).hexdigest()) try: response = urllib.request.urlopen(request) except Exception: print("Ошибка соединения при загрузке данных для filmID " + filmID + ". Даём второй шанс.") response = urllib.request.urlopen(request) if response.info().get('Content-Encoding') == 'gzip': gzipFile = gzip.GzipFile(fileobj=response) content = gzipFile.read().decode("utf-8") else: content = response.read().decode("utf-8") if content: tmpDict = json.loads(content) #print(tmpDict) if not tmpDict: raise ValueError("Ошибка загрузки данных для filmID " + filmID + ". Ответ не соответствует JSON.") if tmpDict.get("resultCode") != 0: raise ValueError("Ошибка загрузки данных для filmID " + filmID + ". В ответе нет значения resultCode или оно не равно 0.") itemData = tmpDict.get("data") if itemData == None or not isinstance(itemData, dict): raise ValueError("Ошибка загрузки данных для filmID " + filmID + ". Проблемы со значением data.") nameRU = itemData.get("nameRU") if nameRU == None or not isinstance(nameRU, str): raise ValueError("Ошибка загрузки данных для filmID " + filmID + ". Проблемы со значением nameRU.") nameEN = itemData.get("nameEN") if nameEN == None or not isinstance(nameEN, str): nameEN = "" year = itemData.get("year") if year == None or not isinstance(year, str): raise ValueError("Ошибка загрузки данных для filmID " + filmID + ". Проблемы со значением year.") country = itemData.get("country") if country == None or not isinstance(country, str): raise ValueError("Ошибка загрузки данных для filmID " + filmID + ". Проблемы со значением country.") genre = itemData.get("genre") if genre == None or not isinstance(genre, str): raise ValueError("Ошибка загрузки данных для filmID " + filmID + ". Проблемы со значением genre.") description = itemData.get("description") if description == None or not isinstance(description, str): raise ValueError("Ошибка загрузки данных для filmID " + filmID + ". Проблемы со значением description.") ratingAgeLimits = itemData.get("ratingAgeLimits") if ratingAgeLimits == None or not isinstance(ratingAgeLimits, str): ratingAgeLimits = "" posterURL = itemData.get("posterURL") if posterURL == None or not isinstance(posterURL, str): raise ValueError("Ошибка загрузки данных для filmID " + filmID + ". Проблемы со значением posterURL.") if "?" in posterURL: posterURL = POSTER_URL.format(posterURL, "&") else: posterURL = POSTER_URL.format(posterURL, "?") filmLength = itemData.get("filmLength") if filmLength == None or not isinstance(filmLength, str): raise ValueError("Ошибка загрузки данных для filmID " + filmID + ". Проблемы со значением filmLength.") ratingData = itemData.get("ratingData") if ratingData == None or not isinstance(ratingData, dict): raise ValueError("Ошибка загрузки данных для filmID " + filmID + ". Проблемы со значением ratingData.") ratingKP = ratingData.get("rating") if ratingKP == None or not isinstance(ratingKP, str): ratingKP = "0" ratingIMDb = ratingData.get("ratingIMDb") if ratingIMDb == None or not isinstance(ratingIMDb, str): ratingIMDb = "" directors = [] actors = [] creators = itemData.get("creators") if creators == None or not isinstance(creators, list): raise ValueError("Ошибка загрузки данных для filmID " + filmID + ". Проблемы со значением creators.") for personsGroup in creators: if not isinstance(personsGroup, list): raise ValueError("Ошибка загрузки данных для filmID " + filmID + ". Проблемы со значением creators > personsGroup.") for person in personsGroup: if not isinstance(person, dict): raise ValueError("Ошибка загрузки данных для filmID " + filmID + ". Проблемы со значением creators > personsGroup > person.") if person.get("professionKey") == "director": if person.get("nameRU"): directors.append(person.get("nameRU")) if person.get("professionKey") == "actor": if person.get("nameRU"): actors.append(person.get("nameRU")) else: raise ValueError("Ошибка загрузки данных для filmID " + filmID + ".") if ratingIMDb and ratingKP: rating = "{0:.1f}".format((float(ratingKP) + float(ratingIMDb)) / 2.0 + 0.001) else: rating = ratingKP directorsResult = "" if len(directors) > 0: for director in directors: directorsResult += director directorsResult += ", " if directorsResult.endswith(", "): directorsResult = directorsResult[:-2] actorsResult = "" if len(actors) > 0: for actor in actors: actorsResult += actor actorsResult += ", " if actorsResult.endswith(", "): actorsResult = actorsResult[:-2] result["filmID"] = filmID result["nameRU"] = nameRU result["nameOriginal"] = nameEN result["description"] = description result["year"] = year result["country"] = country result["genre"] = genre result["ratingAgeLimits"] = ratingAgeLimits result["posterURL"] = posterURL result["filmLength"] = filmLength result["ratingKP"] = ratingKP result["ratingIMDb"] = ratingIMDb result["rating"] = rating result["ratingFloat"] = float(rating) result["directors"] = directorsResult result["actors"] = actorsResult #print(result) return result
parser.add_argument('--auth', type=str, action='store', default=os.getenv('AUTH_TOKEN'), metavar="TOKEN", help='GitHub API token (Environment variable AUTH_TOKEN can be also used.)') args = parser.parse_args() if args.filename and args.arch == 'all': parser.error('-a must be specified when you specify -n.') # Get information of GitHub release # see: https://developer.github.com/v3/repos/releases/ if args.auth: # Unauthenticated requests are limited up to 60 requests per hour. # Authenticated requests are allowed up to 5,000 requests per hour. # See: https://developer.github.com/v3/#rate-limiting request = urllib.request.Request(gh_release_url) request.add_header("Authorization", "token " + args.auth) else: request = gh_release_url try: response = urllib.request.urlopen(request) except urllib.error.HTTPError as err: print('GitHub release not found. (%s)' % err.reason, file=sys.stderr) exit(1) rel_info = json.load(io.StringIO(str(response.read(), 'utf-8'))) print('Last release:', rel_info['name']) print('Created at:', rel_info['created_at']) if args.check: exit(0)
print("xx:{}".format(img_size)) region = im.crop((70,200, w-70,700)) #裁剪的区域 region.save("d:/crop_test1.png") f=open('d:/crop_test1.png','rb') ls_f=base64.b64encode(f.read()) f.close() s = bytes.decode(ls_f) bodys[''] = "{\"uid\":\"118.12.0.12\",\"lang\":\"chns\",\"color\":\"color\",\"image\":\""+s+"\"}" post_data = bodys[''] request = urllib.request.Request(url, str.encode(post_data)) request.add_header('Authorization', 'APPCODE ' + appcode) request.add_header('Content-Type', 'application/json; charset=UTF-8') request.add_header('Content-Type', 'application/octet-stream') response = urllib.request.urlopen(request) content = bytes.decode(response.read()) if (content): decode_json = json.loads(content) print(decode_json['textResult']) #pyperclip.copy(''.join(decode_json['textResult'].split())) keyword = ''.join(decode_json['textResult'].split()) #识别的问题文本
#-*- coding: utf-8 -*- import os import sys import urllib.request client_id = "XevQvs1sa3UGxrTDEDoj" client_secret = "oqOQRsUeI2" url = "https://openapi.naver.com/v1/datalab/search" body = '{"startDate":"2017-01-01","endDate":"2017-04-30","timeUnit":"month","keywordGroups":[{"groupName":"한글","keywords":["한글","korean"]},{"groupName":"영어","keywords":["영어","english"]}],"device":"pc","ages":["1","2"],"gender":"f"}' request = urllib.request.Request(url) request.add_header("X-Naver-Client-Id", client_id) request.add_header("X-Naver-Client-Secret", client_secret) request.add_header("Content-Type", "application/json") response = urllib.request.urlopen(request, data=body.encode("utf-8")) rescode = response.getcode() if (rescode == 200): response_body = response.read() print(response_body.decode('utf-8')) else: print("Error Code:" + rescode)
# 在Python3.x 中使用 import urllib.request,urllib.error,对应在 Python2.x 中使用 import urllib2 import urllib.request # python2中的 cookielib 改为 http.cookiejar import http.cookiejar url = 'http://www.baidu.com' print('第一种方法') response1 = urllib.request.urlopen(url) print(response1.getcode()) print(len(response1.read())) print('*' * 50) print('第二种方法') request = urllib.request.Request(url) request.add_header('user-agent', 'Mozilla/5.0') response2 = urllib.request.urlopen(request) print(response2.getcode()) print(len(response2.read())) print('*' * 50) print('第三种方法') cj = http.cookiejar.CookieJar() # 创建一个 cookie 的容器 cookieProcessor = urllib.request.HTTPCookieProcessor(cj) opener = urllib.request.build_opener(cookieProcessor) # 创建一个 opener urllib.request.install_opener(opener) # 安装 opener,此时 urllib 具有 cookie 处理的增强能力 response3 = urllib.request.urlopen(url) print(response3.getcode()) print(cj) print(response3.read())
# 使用Request实例代替url request = urllib.request.Request(url, data=None, headers={}) response = urllib.request.urlopen(request, timeout=10) # 发送数据,即在Request()中添加data参数 data = urllib.parse.urlencode({"act": "login", "email": "*****@*****.**", "password": "******"}) request1 = urllib.request.Request(url, data=data) # POST方法 request2 = urllib.request.Request(url+"?%s" % data) # GET方法 response = urllib.request.urlopen(request, timeout=10) # 发送Header,即在Request()中添加headers参数 request = urllib.request.Request(url, data=data, headers=headers) # 参数中添加header参数 request.add_header("Referer", "http://www.baidu.com") # 另一种添加header的方式,添加Referer是为了应对"反盗链" response = urllib.request.urlopen(request, timeout=10) # 网页抓取引发异常:urllib.error.HTTPError, urllib.error.URLError, 两者存在继承关系 try: urllib.request.urlopen(request, timeout=10) except urllib.error.HTTPError as e: print(e.code, e.reason) except urllib.error.URLError as e: print(e.errno, e.reason) # 使用代理,以防止IP被封或IP次数受限: proxy_handler = urllib.request.ProxyHandler(proxies={"http": "111.123.76.12:8080"})
def _github_api_request(self, url, data=None, method=None, authenticate=False): logging.debug("Making github API request {0}".format(url)) request = urllib.request.Request(url) if method: request.get_method = lambda: method if data == "": # Workaround for PUTs requiring data, even if you have nothing to pass request.add_data(data) elif data: request.add_data(json.dumps(data)) # Manually adding authentication data # Basic works in curl, but urllib2 doesn't # probably because github's API doesn't send a www-authenticate header if authenticate or self._github_have_authorization(): from base64 import encodestring auth = self._github_authorization() if ":" in auth: # username:password base64string = encodestring(auth).replace('\n', '') request.add_header("Authorization", "Basic {0}".format(base64string)) else: # token request.add_header("Authorization", "Bearer {0}".format(auth)) try: response = urllib.request.urlopen(request) except IOError as e: raise Error("GitHub API failure: " + str(e)) if response.code == 204: # 204 = No content return None json_parsed = json.load(response) link_headers = response.info().getallmatchingheaders("Link") if link_headers: logging.debug("Found a Link header in response, analyzing...") link_header = link_headers[0].lstrip("Link:") links_raw = link_header.split(",") links_split_raw = [link.split(";") for link in links_raw] links_split_proc = [(l[1].strip().lstrip('rel="').rstrip('"'), l[0].strip().lstrip("<").rstrip(">")) for l in links_split_raw] links_dict = dict((k, v) for (k, v) in links_split_proc) if "next" in links_dict: logging.debug( "Link with rel=\"next\" found, recursing to deal with pagination" ) rest = self._github_api_request(links_dict["next"], data, method, authenticate) json_parsed += rest return json_parsed
def FaceDetect(pic, token): s='' # 二进制方式打开图片文件 context = ssl._create_unverified_context() global haspic try: f = open(pic, 'rb') # f = open('image.jpg', 'rb') img = base64.b64encode(f.read()) # image.color_imge_take().tobytes() f.close() params = {"image": img,"image_type": "BASE64","face_field": "age,beauty,face_shape,gender,emotion", "max_face_num": 1} params = urllib.parse.urlencode(params).encode(encoding='UTF8') request_url = url + "?access_token=" + token request = urllib.request.Request(url=request_url, data=params) request.add_header('Content-Type', 'application/x-www-form-urlencoded') response = urllib.request.urlopen(request, context=context) content = response.read() if content: js = json.loads(content) if "SUCCESS" in js["error_msg"]: for item in js['result']['face_list']: if 'female' in str(item['gender']): if item['age']<=14: sx = u'小妹妹' if item['age']>14 and item['age']<=28: sx = u'姐姐' if item['age']>28 and item['age']<=55: sx = u'阿姨' if item['age']>55 : sx = u'奶奶' else: if item['age'] <= 14: sx = u'小弟弟' if item['age']>14 and item['age']<28: sx = u'哥哥' if item['age']>28 and item['age']<=55: sx = u'叔叔' if item['age']>55 : sx = u'爷爷' if 'angry' in str(item['emotion']): em = u'你现在很愤怒' elif 'disgust' in str(item['emotion']): em = u'你现在很厌恶' elif 'fear' in str(item['emotion']): em = u'你现在很恐惧' elif 'happy' in str(item['emotion']): em = u'你现在很高兴' elif 'sad' in str(item['emotion']): em = u'你现在很伤心' elif 'surprise' in str(item['emotion']): em = u'你现在很惊讶' else: em = u'表情很平静' if 'square' in str(item['face_shape']): fs = u'正方形' elif 'triangle' in str(item['face_shape']): fs = u'三角形' elif 'oval' in str(item['face_shape']): fs = u'椭圆' elif 'heart' in str(item['face_shape']): fs = u'心形' else: fs = u'圆形' #哈哈哈哈哈哈哈哈哈哈哈 if fs == u'圆形':face_score=item['beauty']+35 else:face_score=item['beauty']+30 if face_score>95: face_score=94.6 s=u'%s你好, 你是%s脸,颜值为 %2.2f 分, %s.' % (sx,fs, face_score,em) print(s) else: print("未识别到人脸") os.remove(pic) haspic=False return s except:return "未识别到人脸"
def downloadfile(url, nombrefichero, headers=None, silent=False, continuar=False, resumir=True): logger.info("url=" + url) logger.info("nombrefichero=" + nombrefichero) if headers is None: headers = [] progreso = None if config.is_xbmc() and nombrefichero.startswith("special://"): import xbmc nombrefichero = xbmc.translatePath(nombrefichero) try: # Si no es XBMC, siempre a "Silent" from platformcode import platformtools # antes # f=open(nombrefichero,"wb") try: import xbmc nombrefichero = xbmc.makeLegalFilename(nombrefichero) except: pass logger.info("nombrefichero=" + nombrefichero) # El fichero existe y se quiere continuar if filetools.exists(nombrefichero) and continuar: f = filetools.file_open(nombrefichero, 'r+b', vfs=VFS) if resumir: exist_size = filetools.getsize(nombrefichero) logger.info("el fichero existe, size=%d" % exist_size) grabado = exist_size f.seek(exist_size) else: exist_size = 0 grabado = 0 # el fichero ya existe y no se quiere continuar, se aborta elif filetools.exists(nombrefichero) and not continuar: logger.info("el fichero existe, no se descarga de nuevo") return -3 # el fichero no existe else: exist_size = 0 logger.info("el fichero no existe") f = filetools.file_open(nombrefichero, 'wb', vfs=VFS) grabado = 0 # Crea el diálogo de progreso if not silent: progreso = platformtools.dialog_progress("plugin", "Descargando...", url, nombrefichero) # Si la plataforma no devuelve un cuadro de diálogo válido, asume modo silencio if progreso is None: silent = True if "|" in url: additional_headers = url.split("|")[1] if "&" in additional_headers: additional_headers = additional_headers.split("&") else: additional_headers = [additional_headers] for additional_header in additional_headers: logger.info("additional_header: " + additional_header) name = re.findall("(.*?)=.*?", additional_header)[0] value = urllib.parse.unquote_plus( re.findall(".*?=(.*?)$", additional_header)[0]) headers.append([name, value]) url = url.split("|")[0] logger.info("url=" + url) # Timeout del socket a 60 segundos socket.setdefaulttimeout(60) h = urllib.request.HTTPHandler(debuglevel=0) request = urllib.request.Request(url) for header in headers: logger.info("Header=" + header[0] + ": " + header[1]) request.add_header(header[0], header[1]) if exist_size > 0: request.add_header('Range', 'bytes=%d-' % (exist_size, )) opener = urllib.request.build_opener(h) urllib.request.install_opener(opener) try: connexion = opener.open(request) except urllib.error.HTTPError as e: logger.error("error %d (%s) al abrir la url %s" % (e.code, e.msg, url)) f.close() if not silent: progreso.close() # El error 416 es que el rango pedido es mayor que el fichero => es que ya está completo if e.code == 416: return 0 else: return -2 try: totalfichero = int(connexion.headers["Content-Length"]) except ValueError: totalfichero = 1 if exist_size > 0: totalfichero = totalfichero + exist_size logger.info("Content-Length=%s" % totalfichero) blocksize = 100 * 1024 bloqueleido = connexion.read(blocksize) logger.info("Iniciando descarga del fichero, bloqueleido=%s" % len(bloqueleido)) maxreintentos = 10 while len(bloqueleido) > 0: try: # Escribe el bloque leido f.write(bloqueleido) grabado += len(bloqueleido) percent = int(float(grabado) * 100 / float(totalfichero)) totalmb = float(float(totalfichero) / (1024 * 1024)) descargadosmb = float(float(grabado) / (1024 * 1024)) # Lee el siguiente bloque, reintentando para no parar todo al primer timeout reintentos = 0 while reintentos <= maxreintentos: try: before = time.time() bloqueleido = connexion.read(blocksize) after = time.time() if (after - before) > 0: velocidad = old_div(len(bloqueleido), (after - before)) falta = totalfichero - grabado if velocidad > 0: tiempofalta = old_div(falta, velocidad) else: tiempofalta = 0 # logger.info(sec_to_hms(tiempofalta)) if not silent: progreso.update( percent, "%.2fMB/%.2fMB (%d%%) %.2f Kb/s %s falta " % (descargadosmb, totalmb, percent, old_div(velocidad, 1024), sec_to_hms(tiempofalta))) break except: reintentos += 1 logger.info( "ERROR en la descarga del bloque, reintento %d" % reintentos) import traceback logger.error(traceback.print_exc()) # El usuario cancelo la descarga try: if progreso.iscanceled(): logger.info("Descarga del fichero cancelada") f.close() progreso.close() return -1 except: pass # Ha habido un error en la descarga if reintentos > maxreintentos: logger.info("ERROR en la descarga del fichero") f.close() if not silent: progreso.close() return -2 except: import traceback logger.error(traceback.print_exc()) f.close() if not silent: progreso.close() # platformtools.dialog_ok('Error al descargar' , 'Se ha producido un error' , 'al descargar el archivo') return -2 except: if url.startswith("rtmp"): error = downloadfileRTMP(url, nombrefichero, silent) if error and not silent: from platformcode import platformtools platformtools.dialog_ok("No puedes descargar ese vídeo", "Las descargas en RTMP aún no", "están soportadas") else: import traceback from pprint import pprint exc_type, exc_value, exc_tb = sys.exc_info() lines = traceback.format_exception(exc_type, exc_value, exc_tb) for line in lines: line_splits = line.split("\n") for line_split in line_splits: logger.error(line_split) try: f.close() except: pass if not silent: try: progreso.close() except: pass logger.info("Fin descarga del fichero")
def set_request(request, crumb): request.add_header( 'Content-Type', 'application/x-stapler-method-invocation;charset=UTF-8') request.add_header('X-Requested-With', 'XMLHttpRequest') request.add_header('Crumb', crumb)
for line in fi: line = line.strip() if line != "": agents.append(line) i = 6835 count = 0 while (i < 9999): try: f = open('results_2018_2.txt', 'a+', encoding='utf-8') url = 'http://wx.triman.com.cn/rkbyw/hksp/queryInfo/0?SPWH=%s00180%s&SFZHM=' % ( r"%E4%BA%BA", i) request = urllib.request.Request(url) agent_index = random.randint(0, 9811 - 1) user_agent = agents[agent_index] request.add_header('User-Agent', user_agent) request.add_header('connection', 'keep-alive') request.add_header('Accept-Encoding', 'gzip') response = urllib.request.urlopen(request) html = response.read() if (response.headers.get('content-encoding', None) == 'gzip'): html = gzip.GzipFile(fileobj=io.BytesIO(html)).read() soup = BeautifulSoup(html, 'html.parser') div = soup.find('div', {'class': 'rediv'}) if div is not None: top = div.find('ul') if top == None: i += 1 continue link = top.findAll('td') if link == None or len(link) < 5: