def get(self, data=None): logger.debug("GET %s", self.url) req = urllib.request.Request(url=self.url, data=data, headers=self.headers) try: if self.opener: request = self.opener.open(req, timeout=self.timeout) else: request = urllib.request.urlopen(req, timeout=self.timeout) except (urllib.error.HTTPError, CertificateError) as error: if error.code == 401: raise UnauthorizedAccess("Access to %s denied" % self.url) else: raise HTTPError("Request to %s failed: %s" % (self.url, error)) except (socket.timeout, urllib.error.URLError) as error: raise HTTPError("Unable to connect to server %s: %s" % (self.url, error)) if request.getcode() > 200: logger.debug("Server responded with status code %s", request.getcode()) try: self.total_size = int(request.info().get("Content-Length").strip()) except AttributeError: logger.warning("Failed to read response's content length") self.total_size = 0 self.response_headers = request.getheaders() self.status_code = request.getcode() if self.status_code > 299: logger.warning("Request responded with code %s", self.status_code) self.content = b"".join(self._iter_chunks(request)) self.info = request.info() request.close() return self
def submit_external_job(request, application_path): ParametersFormSet = formset_factory(ParameterForm, extra=0) if application_path.startswith('abfs:/') and not application_path.startswith('abfs://'): application_path = application_path.replace("abfs:/", "abfs://") elif application_path.startswith('s3a:/') and not application_path.startswith('s3a://'): application_path = application_path.replace('s3a:/', 's3a://') else: application_path = "/" + application_path if application_path.startswith("abfs://"): application_path = abfspath(application_path) if request.method == 'POST': params_form = ParametersFormSet(request.POST) if params_form.is_valid(): mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data]) mapping['dryrun'] = request.POST.get('dryrun_checkbox') == 'on' application_name = os.path.basename(application_path) application_class = Bundle if application_name == 'bundle.xml' else Coordinator if application_name == 'coordinator.xml' else get_workflow() mapping[application_class.get_application_path_key()] = os.path.dirname(application_path) try: submission = Submission(request.user, fs=request.fs, jt=request.jt, properties=mapping) job_id = submission.run(application_path) except RestException as ex: detail = ex._headers.get('oozie-error-message', ex) if 'Max retries exceeded with url' in str(detail): detail = '%s: %s' % (_('The Oozie server is not running'), detail) LOG.exception(smart_str(detail)) raise PopupException(_("Error submitting job %s") % (application_path,), detail=detail) jsonify = request.POST.get('format') == 'json' if jsonify: return JsonResponse({'status': 0, 'job_id': job_id, 'type': 'external_workflow'}, safe=False) else: request.info(_('Oozie job submitted')) view = 'list_oozie_bundle' if application_name == 'bundle.xml' else 'list_oozie_coordinator' if application_name == 'coordinator.xml' else 'list_oozie_workflow' return redirect(reverse('oozie:%s' % view, kwargs={'job_id': job_id})) else: request.error(_('Invalid submission form: %s' % params_form.errors)) else: parameters = Submission(request.user, fs=request.fs, jt=request.jt).get_external_parameters(application_path) initial_params = ParameterForm.get_initial_params(parameters) params_form = ParametersFormSet(initial=initial_params) popup = render('editor/submit_job_popup.mako', request, { 'params_form': params_form, 'name': _('Job'), 'action': reverse('oozie:submit_external_job', kwargs={'application_path': application_path}), 'show_dryrun': os.path.basename(application_path) != 'bundle.xml', 'return_json': request.GET.get('format') == 'json' }, force_template=True).content return JsonResponse(popup, safe=False)
def get_request_filename(request): '''Figure out the filename for an HTTP download.''' # Check to see if a filename is specified in the HTTP headers. if 'Content-Disposition' in request.info(): disposition = request.info()['Content-Disposition'] pieces = re.split('\s*;\s*', disposition) for piece in pieces: if piece.startswith('filename='): return piece[len('filename='):] # If no filename was specified, pick a reasonable default. return os.path.basename(urlsplit(request.url).path) or 'index.html'
def get(self, data=None): logger.debug("GET %s", self.url) req = urllib.request.Request(url=self.url, data=data, headers=self.headers) try: if self.opener: request = self.opener.open(req, timeout=self.timeout) else: request = urllib.request.urlopen(req, timeout=self.timeout) except (urllib.error.HTTPError, CertificateError) as error: raise HTTPError("Unavailable url %s: %s" % (self.url, error)) except (socket.timeout, urllib.error.URLError) as error: raise HTTPError("Unable to connect to server %s: %s" % (self.url, error)) if request.getcode() > 200: logger.debug("Server responded with status code %s", request.getcode()) try: total_size = request.info().get("Content-Length").strip() total_size = int(total_size) except AttributeError: logger.warning("Failed to read response's content length") total_size = 0 self.response_headers = request.getheaders() self.status_code = request.getcode() if self.status_code > 299: logger.warning("Request responded with code %s", self.status_code) chunks = [] while 1: if self.stop_request and self.stop_request.is_set(): self.content = "" return self try: chunk = request.read(self.buffer_size) except socket.timeout: logger.error("Request timed out") self.content = "" return self self.downloaded_size += len(chunk) if self.thread_queue: self.thread_queue.put( (chunk, self.downloaded_size, total_size)) else: chunks.append(chunk) if not chunk: break request.close() self.content = b"".join(chunks) self.info = request.info() return self
def rerun_oozie_coordinator(request, job_id, app_path=None): oozie_coordinator = check_job_access_permission(request, job_id) check_job_edition_permission(oozie_coordinator, request.user) ParametersFormSet = formset_factory(ParameterForm, extra=0) if app_path is None: app_path = oozie_coordinator.coordJobPath else: app_path = urllib.parse.unquote(app_path) return_json = request.GET.get('format') == 'json' if request.method == 'POST': params_form = ParametersFormSet(request.POST) rerun_form = RerunCoordForm(request.POST, oozie_coordinator=oozie_coordinator) if sum([rerun_form.is_valid(), params_form.is_valid()]) == 2: args = {} args['deployment_dir'] = app_path params = { 'type': 'action', 'scope': ','.join(oozie_coordinator.aggreate(rerun_form.cleaned_data['actions'])), 'refresh': rerun_form.cleaned_data['refresh'], 'nocleanup': rerun_form.cleaned_data['nocleanup'], } properties = dict([(param['name'], param['value']) for param in params_form.cleaned_data]) _rerun_coordinator(request, job_id, args, params, properties) if rerun_form.cleaned_data['return_json']: return JsonResponse({'status': 0, 'job_id': job_id}, safe=False) else: request.info(_('Coordinator re-running.')) return redirect(reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id})) else: request.error(_('Invalid submission form: %s') % smart_unicode(rerun_form.errors)) return list_oozie_coordinator(request, job_id) else: rerun_form = RerunCoordForm(oozie_coordinator=oozie_coordinator, return_json=return_json) initial_params = ParameterForm.get_initial_params(oozie_coordinator.conf_dict) params_form = ParametersFormSet(initial=initial_params) return render('dashboard/rerun_coord_popup.mako', request, { 'rerun_form': rerun_form, 'params_form': params_form, 'action': reverse('oozie:rerun_oozie_coord', kwargs={'job_id': job_id, 'app_path': urllib.parse.quote(app_path.encode('utf-8'), safe=SAFE_CHARACTERS_URI_COMPONENTS)}), 'return_json': return_json, 'is_mini': request.GET.get('is_mini', False), }, force_template=True)
def rerun_oozie_bundle(request, job_id, app_path): oozie_bundle = check_job_access_permission(request, job_id) check_job_edition_permission(oozie_bundle, request.user) ParametersFormSet = formset_factory(ParameterForm, extra=0) app_path = urllib.parse.unquote(app_path) if request.method == 'POST': params_form = ParametersFormSet(request.POST) rerun_form = RerunBundleForm(request.POST, oozie_bundle=oozie_bundle) if sum([rerun_form.is_valid(), params_form.is_valid()]) == 2: args = {} args['deployment_dir'] = app_path params = { 'coord-scope': ','.join(rerun_form.cleaned_data['coordinators']), 'refresh': rerun_form.cleaned_data['refresh'], 'nocleanup': rerun_form.cleaned_data['nocleanup'], } if rerun_form.cleaned_data['start'] and rerun_form.cleaned_data['end']: date = { 'date-scope': '%(start)s::%(end)s' % { 'start': utc_datetime_format(rerun_form.cleaned_data['start']), 'end': utc_datetime_format(rerun_form.cleaned_data['end']) } } params.update(date) properties = dict([(param['name'], param['value']) for param in params_form.cleaned_data]) _rerun_bundle(request, job_id, args, params, properties) request.info(_('Bundle re-running.')) return redirect(reverse('oozie:list_oozie_bundle', kwargs={'job_id': job_id})) else: request.error(_('Invalid submission form: %s' % (rerun_form.errors,))) return list_oozie_bundle(request, job_id) else: rerun_form = RerunBundleForm(oozie_bundle=oozie_bundle) initial_params = ParameterForm.get_initial_params(oozie_bundle.conf_dict) params_form = ParametersFormSet(initial=initial_params) return render('dashboard/rerun_bundle_popup.mako', request, { 'rerun_form': rerun_form, 'params_form': params_form, 'action': reverse('oozie:rerun_oozie_bundle', kwargs={'job_id': job_id, 'app_path': urllib.parse.quote(app_path.encode('utf-8'), safe=SAFE_CHARACTERS_URI_COMPONENTS)}), }, force_template=True)
def manage_oozie_jobs(request, job_id, action): if request.method != 'POST': raise PopupException(_('Use a POST request to manage an Oozie job.')) job = check_job_access_permission(request, job_id) check_job_edition_permission(job, request.user) response = {'status': -1, 'data': ''} try: oozie_api = get_oozie(request.user) params = None if action == 'change': pause_time_val = request.POST.get('pause_time') if request.POST.get('clear_pause_time') == 'true': pause_time_val = '' end_time_val = request.POST.get('end_time') if end_time_val: end_time_val = convert_to_server_timezone(end_time_val, TIME_ZONE.get()) if pause_time_val: pause_time_val = convert_to_server_timezone(pause_time_val, TIME_ZONE.get()) params = {'value': 'endtime=%s' % (end_time_val) + ';' 'pausetime=%s' % (pause_time_val) + ';' 'concurrency=%s' % (request.POST.get('concurrency'))} elif action == 'ignore': oozie_api = get_oozie(request.user, api_version="v2") params = { 'type': 'action', 'scope': ','.join(job.aggreate(request.POST.get('actions').split())), } response['data'] = oozie_api.job_control(job_id, action, parameters=params) response['status'] = 0 if 'notification' in request.POST: request.info(_(request.POST.get('notification'))) except RestException as ex: ex_message = ex.message if ex._headers.get('oozie-error-message'): ex_message = ex._headers.get('oozie-error-message') msg = "Error performing %s on Oozie job %s: %s." % (action, job_id, ex_message) LOG.exception(msg) response['data'] = _(msg) return JsonResponse(response)
def update_best_videos(recent_videos): global best_videos for best_video in best_videos: '''minimal_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(minutes=BIG_DELAY) seconds_minimal = time.mktime(minimal_date.timetuple()) seconds_best = time.mktime(best_video.published_at.timetuple()) if seconds_minimal > seconds_best: continue''' recent_videos.append(best_video) best_videos = [] ids = ','.join(str(video.video_id) for video in recent_videos) GET_QUERY = 'https://www.googleapis.com/youtube/v3/videos?part=statistics&id=' + ids + '&key=' + API_KEY request = urllib.request.urlopen(GET_QUERY) videos = json.loads(request.read().decode( request.info().get_param('charset') or 'utf-8')) for video in videos['items']: index = -1 for i in range(len(recent_videos)): if recent_videos[i].video_id == video['id']: index = i break assert (index != -1) rvideo = recent_videos[index] new_video = Video(rvideo.title, rvideo.published_at, rvideo.video_id, int(video['statistics']['commentCount'])) best_videos.append(new_video) best_videos.sort() best_videos = best_videos[:MAX_BEST_VIDEOS_COUNT]
def fetch_data(cmdline_again, keys): identifier = cmdline_again.strip('/') lhostmatch = identifier.find("aos://16777343") if lhostmatch != -1 and keys[0] == 'name': # If player plays on localhost then just pass this info and done return ['(Playing on localhost)', '-', '-', '-', '-', '-'] else: try: # Request json serverlist that buildandshoot hosts. request = urllib.request.urlopen("https://services.buildandshoot.com/serverlist.json", context=SSLCONTEXT) except urllib.error.URLError: logger.warning('No internet connection.') time.sleep(5) return data = request.read() encoding = request.info().get_content_charset('utf-8') serverlist = json.loads(data.decode(encoding)) try: for server_num in range(0, len(serverlist)+1): presence_info = [] if serverlist[server_num]['identifier'] == identifier: current_server = serverlist[server_num] for variable in keys: presence_info.append(current_server[variable]) if len(presence_info) == 6: return presence_info except IndexError: return ['(Server is not broadcasting to master server)', '-', '-', '-', '-', '-']
def get_data_source_one(self): """Retrieves Data from the first Yahoo Finance source""" data = 'http://finance.yahoo.com/webservice/v1/symbols/' + self.stock + '/quote?format=json&view=detail' request = urllib.request.urlopen(data) response = request.read() charset = request.info().get_content_charset('utf-8') self.data_s1 = json.loads(response.decode(charset))
def grab_random_picture(subreddit): print("=> Grabbing random top wallpaper from /r/{}".format(sys.argv[1])) request = urllib.request.urlopen( 'https://www.reddit.com/r/{}/top/.json?count=100&limit=100&t=month'. format(subreddit)) data = json.loads(request.read().decode(request.info().get_param('charset') or 'utf-8')) posts = data['data']['children'] index = random.randint(0, len(posts)) post = posts[index]['data'] url = post['url'] if urllib.parse.urlparse(post['url']).netloc == 'imgur.com' and ( not url.endswith('.png') and not url.endswith('.jpg') and not url.endswith('.jpeg') and not url.endswith('.gif')): url = '{}.png'.format(url) print(post['title']) print(url) return url
def findpost(subredditpage): for post in reddit.subreddit(subredditpage).hot(limit=1000): if post.score > 5 and post.is_self is False and post.id not in post_ids: # Check if post is an image url = post.url request = urllib.request.urlopen(url) mime = request.info()['Content-type'] if mime.endswith("png") or mime.endswith( "jpeg") or mime.endswith("jpg") or mime.endswith( "gif"): # success link is an image credit = post.author.name # save picture image = f"{directory}posts/post{count}.jpg" urllib.request.urlretrieve(url, image) # return dictionary with required info to post located picture if subredditpage == sub_reddit_1: caption = random.choice(caption_list_1) post_ids.append(post.id) result = { "credit": credit, "image": image, "caption": caption } return result elif subredditpage == sub_reddit_2: caption = random.choice(caption_list_2) post_ids.append(post.id) result = { "credit": credit, "image": image, "caption": caption } return result
def update_best_videos(recent_videos): global best_videos for best_video in best_videos: """minimal_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(minutes=BIG_DELAY) seconds_minimal = time.mktime(minimal_date.timetuple()) seconds_best = time.mktime(best_video.published_at.timetuple()) if seconds_minimal > seconds_best: continue""" recent_videos.append(best_video) best_videos = [] ids = ",".join(str(video.video_id) for video in recent_videos) GET_QUERY = "https://www.googleapis.com/youtube/v3/videos?part=statistics&id=" + ids + "&key=" + API_KEY request = urllib.request.urlopen(GET_QUERY) videos = json.loads(request.read().decode(request.info().get_param("charset") or "utf-8")) for video in videos["items"]: index = -1 for i in range(len(recent_videos)): if recent_videos[i].video_id == video["id"]: index = i break assert index != -1 rvideo = recent_videos[index] new_video = Video(rvideo.title, rvideo.published_at, rvideo.video_id, int(video["statistics"]["commentCount"])) best_videos.append(new_video) best_videos.sort() best_videos = best_videos[:MAX_BEST_VIDEOS_COUNT]
def get_html(url): tryes = 5 # Build our request req = urllib.request.Request(url) # Accept gziped content req.add_header('Accepting-encoding', 'gzip') # Fake user aggent req.add_header('User-Agent', USER_AGENT) while tryes > 0: try: request = urllib.request.urlopen(req) break except socket.timeout: tryes -= 1 except urllib.error.URLError as error: if isinstance(error.reason, socket.timeout): tryes -= 1 else: print("URL error: " + error.reason) quit() if request.info().get('Content-Encoding') == 'gzip': buffer = BytesIO(request.read()) uncompressed_buffer = gzip.GzipFile(fileobj=buffer) html = BeautifulSoup(uncompressed_buffer.read(), 'lxml') else: html = BeautifulSoup(request.read(), "lxml") request.close() return html
def rerun_oozie_job(request, job_id, app_path=None): ParametersFormSet = formset_factory(ParameterForm, extra=0) oozie_workflow = check_job_access_permission(request, job_id) check_job_edition_permission(oozie_workflow, request.user) if app_path is None: app_path = oozie_workflow.appPath else: app_path = urllib.parse.unquote(app_path) return_json = request.GET.get('format') == 'json' if request.method == 'POST': rerun_form = RerunForm(request.POST, oozie_workflow=oozie_workflow) params_form = ParametersFormSet(request.POST) if sum([rerun_form.is_valid(), params_form.is_valid()]) == 2: args = {} if request.POST.get('rerun_form_choice') == 'fail_nodes': args['fail_nodes'] = 'true' else: args['skip_nodes'] = ','.join(rerun_form.cleaned_data['skip_nodes']) args['deployment_dir'] = app_path mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data]) _rerun_workflow(request, job_id, args, mapping) if rerun_form.cleaned_data['return_json']: return JsonResponse({'status': 0, 'job_id': job_id}, safe=False) else: request.info(_('Workflow re-running.')) return redirect(reverse('oozie:list_oozie_workflow', kwargs={'job_id': job_id})) else: request.error(_('Invalid submission form: %s %s' % (rerun_form.errors, params_form.errors))) else: rerun_form = RerunForm(oozie_workflow=oozie_workflow, return_json=return_json) initial_params = ParameterForm.get_initial_params(oozie_workflow.conf_dict) params_form = ParametersFormSet(initial=initial_params) return render('dashboard/rerun_workflow_popup.mako', request, { 'rerun_form': rerun_form, 'params_form': params_form, 'action': reverse('oozie:rerun_oozie_job', kwargs={'job_id': job_id, 'app_path': urllib.parse.quote(app_path.encode('utf-8'), safe=SAFE_CHARACTERS_URI_COMPONENTS) }), 'return_json': return_json, 'is_mini': request.GET.get('is_mini', False), }, force_template=True)
def __init__(self, url, path, threads): self.url = url self.path = path self.threads = threads self.filename = url.split('/')[-1] request = urllib.request.urlopen(self.url) metadata = request.info() self.filesize = int(metadata.get_all("Content-Length")[0])
def get_data_source_two(self): """Retrieves Data from the second Yahoo Finance source""" data = "http://query.yahooapis.com/v1/public/yql?q=" \ "SELECT%20*%20FROM%20yahoo.finance.quotes%20WHERE%20symbol%20IN%20('" + self.stock + "')" \ "&format=json&env=http://datatables.org/alltables.env" request = urllib.request.urlopen(data) response = request.read() charset = request.info().get_content_charset('utf-8') self.data_s2 = json.loads(response.decode(charset))
def get_request_filename(request): '''Figure out the filename for an HTTP download.''' # Check to see if a filename is specified in the HTTP headers. if 'Content-Disposition' in request.info(): disposition = request.info()['Content-Disposition'] pieces = re.split(r'\s*;\s*', disposition) for piece in pieces: if piece.startswith('filename='): filename = piece[len('filename='):] # Strip exactly one " from each end. if filename.startswith('"'): filename = filename[1:] if filename.endswith('"'): filename = filename[:-1] # Interpret backslashed quotes. filename = filename.replace('\\"', '"') return filename # If no filename was specified, pick a reasonable default. return os.path.basename(urlsplit(request.url).path) or 'index.html'
def get_request_filename(request): '''Figure out the filename for an HTTP download.''' # Check to see if a filename is specified in the HTTP headers. if 'Content-Disposition' in request.info(): disposition = request.info()['Content-Disposition'] pieces = re.split('\s*;\s*', disposition) for piece in pieces: if piece.startswith('filename='): filename = piece[len('filename='):] # Strip exactly one " from each end. if filename.startswith('"'): filename = filename[1:] if filename.endswith('"'): filename = filename[:-1] # Interpret backslashed quotes. filename = filename.replace('\\"', '"') return filename # If no filename was specified, pick a reasonable default. return os.path.basename(urlsplit(request.url).path) or 'index.html'
def embed_images(chapter, book): global finishedimages global imageUID content = chapter.content.split(sep='src="') output = content[0] for x in range(1, len(content)): imageurl = content[x][:content[x].find('"')] request = get_url(imageurl) if request != 0: headers = request.info() mediatype = headers["Content-Type"] extension = "" if mediatype == "text/plain": extension = ".txt" if mediatype == "text/css": extension = ".css" if mediatype == "image/png": extension = ".png" if mediatype == "image/jpeg": extension = ".jpg" if mediatype == "image/webp": extension = ".webp" if mediatype == "image/bmp": extension = ".bmp" if mediatype == "image/gif": extension = ".gif" if mediatype == "image/svg+xml": extension = ".svg" if mediatype == "image/tiff": extension = ".tiff" #imagefile = "images/" + urllib.parse.quote(imageurl, safe='') + extension if imageurl not in finishedimages: imagefile = "images/" + str(imageUID) + extension item = epub.EpubItem(uid=imagefile, file_name=imagefile, media_type=mediatype, content=request.read()) book.add_item(item) finishedimages[imageurl] = str(imageUID) imageUID += 1 print("embedded " + mediatype + " file") else: imagefile = "images/" + finishedimages[imageurl] + extension else: imagefile = imageurl sectionoutput = 'src="' + imagefile + content[x][content[x].find('"'):] output = output + sectionoutput chapter.content = output
def get(self, data=None): logger.debug("GET %s", self.url) try: req = urllib.request.Request(url=self.url, data=data, headers=self.headers) except ValueError as ex: raise HTTPError("Failed to create HTTP request to %s: %s" % (self.url, ex)) from ex try: if self.opener: request = self.opener.open(req, timeout=self.timeout) else: request = urllib.request.urlopen(req, timeout=self.timeout) # pylint: disable=consider-using-with except (urllib.error.HTTPError, CertificateError) as error: if error.code == 401: raise UnauthorizedAccess("Access to %s denied" % self.url) from error raise HTTPError("%s" % error, code=error.code) from error except (socket.timeout, urllib.error.URLError) as error: raise HTTPError("Unable to connect to server %s: %s" % (self.url, error)) from error self.response_headers = request.getheaders() self.status_code = request.getcode() if self.status_code > 299: logger.warning("Request responded with code %s", self.status_code) try: self.total_size = int(request.info().get("Content-Length").strip()) except AttributeError: self.total_size = 0 self.content = b"".join(self._iter_chunks(request)) self.info = request.info() request.close() return self
def extract_from_url(url, progress_bar=None): request = urllib.request.urlopen(url) if progress_bar is not None: progress_bar.desc = filename = request.info().get_filename() progress_bar.total = int(request.headers.get("Content-length", 0)) with libarchive.stream_reader(request.fp) as stream: for member in stream: if member.isdir: Path(member.path).mkdir() continue with open(member.path, "wb") as fp: for block in member.get_blocks(): fp.write(block) if progress_bar is not None: progress_bar.update(len(block))
def listhosts(subdomain): headers = {"Authorization": "Bearer " + env('TOKEN')} url = 'https://api.digitalocean.com/v2/domains/' + ( env('DOMAIN')) + '/records' req = urllib.request.Request(url, headers=headers) request = urllib.request.urlopen(req) data = json.loads(request.read().decode(request.info().get_param('charset') or 'utf-8')) for i in (data['domain_records']): for k, v in i.items(): if v == subdomain: recid = (i['id']) return recid
def get_data(self, docid, fields): uri = self.solr + self.core + '/get?id=' + urllib.parse.quote( docid ) + '&fl=' + ','.join(fields) request = urllib.request.urlopen( uri ) encoding = request.info().get_content_charset('utf-8') data = request.read() request.close() solr_doc = json.loads(data.decode(encoding)) data = None if 'doc' in solr_doc: data = solr_doc['doc'] return data
def curl(): if len(sys.argv) > 0: # Remove the file name del sys.argv[0] if len(sys.argv) == 0: print("MyCurl: Supply a URL to retrieve") return if sys.argv[0] == '-I': # Only headers if len(sys.argv) > 1: if is_http(sys.argv[1]): print(urllib.request.urlopen(sys.argv[1]).info()) else: print("MyCurl: Only http or https URLs can be requested") else: print("MyCurl: Supply a URL to retrieve") elif sys.argv[0] == '-i': # Header and body if len(sys.argv) > 1: if is_http(sys.argv[1]): request = urllib.request.urlopen(sys.argv[1]) html = request.read() try: html = str(html, encoding='utf-8') print(request.info()) print(html) except Exception: print("NON-TEXTUAL CONTENT") else: print("MyCurl: Only http or https URLs can be requested") else: print("MyCurl: Supply a URL to retrieve") else: # Just body if '-' in sys.argv[0]: print("MyCurl: Only -i and -I arguments are supported") else: if is_http(sys.argv[0]): html = urllib.request.urlopen(sys.argv[0]).read() try: html = str(html, encoding='utf-8') print(html) except Exception: print("NON-TEXTUAL CONTENT") else: print("MyCurl: Only http or https URLs can be requested")
def command(uri, qs): print('command()', uri) url = 'http://' + config['WEBIF']['HOST'] + '/' + uri try: request = urllib.request.urlopen(url, timeout=int( config['WEBIF']['TIMEOUT'])) mimetype = request.info()['Content-Type'] response = request.read() except (urllib.error.HTTPError, urllib.error.URLError) as err: error('command()', 'urllib.error', err) return False return {'data': response, 'headers': {'Content-Type': mimetype}}
def get(self, data=None): logger.debug("GET %s", self.url) req = urllib.request.Request(url=self.url, data=data, headers=self.headers) try: request = urllib.request.urlopen(req, timeout=self.timeout) except (urllib.error.HTTPError, CertificateError) as error: logger.error("Unavailable url (%s): %s", self.url, error) except (socket.timeout, urllib.error.URLError) as error: logger.error("Unable to connect to server (%s): %s", self.url, error) else: # Response code is available with getcode but should 200 if there # is no exception # logger.debug("Got response code: %s", request.getcode()) try: total_size = request.info().get('Content-Length').strip() total_size = int(total_size) except AttributeError: logger.warning("Failed to read response's content length") total_size = 0 chunks = [] while 1: if self.stop_request and self.stop_request.is_set(): self.content = '' return self try: chunk = request.read(self.buffer_size) except socket.timeout: logger.error("Request timed out") self.content = '' return self self.downloaded_size += len(chunk) if self.thread_queue: self.thread_queue.put( (chunk, self.downloaded_size, total_size) ) else: chunks.append(chunk) if not chunk: break request.close() self.content = b''.join(chunks) return self
async def test_relation(model, app): haproxy = model.applications["haproxy"] haproxy_unit = haproxy.units[0] config = await app.get_config() subdomain = config["proxy-subdomain"]["value"] address = f"http://{subdomain}.{haproxy_unit.public_address}.xip.io/admin" print(f"Checking address: {address}") request = urllib.request.urlopen(address) info = request.info() print(f"Info: {info}") assert request.getcode() == 200 server_id = "not found" for item in info.values(): if "SERVERID" in item: server_id = item.split(";")[0] else: continue print(f"server_id: {server_id}") assert subdomain in server_id
def download_file(url, filename): try: # request data from url request = urllib.request.urlopen(url) header = request.info() # writing data to local file... print('Loading file from url.', end='') chunk_size = 1024 local_file = open(filename, 'wb') while True: chunk = request.read(chunk_size) # ...as long there is data if not chunk: break local_file.write(chunk) print('.', end='') except IOError: print('Problem reading url: ', url) print('')
def callback(): progress = Progressbar(root, orient=HORIZONTAL, length=100, mode='determinate') progress.pack() progress.place(x=10, y=200) request = urllib.request.urlopen(url) filename = str( file_name_field.get()) ##filename will be the basepath of url meta = request.info( ) ## get information from request object used for determining size of content file_size = int(meta['Content-Length']) buffer_size = file_size / segments file_name1.configure(text="Filename: " + filename) print("File size : {0:.2f}".format(float( (file_size / 1024) / 1024))) ## get file size in MB file_download_size.configure( text="File Size: {0:.2f} MB".format((file_size / 1024) / 1024)) total_download = 0 fd = open(filename, 'wb') ## save file to current working directory while total_download != file_size: buffer = request.read( int(buffer_size)) ## reading files upto buffer_size fd.write(buffer) total_download += len(buffer) cur_download.configure(text="Downloaded {0}%".format( math.trunc((int(total_download) / int(file_size)) * 100))) progress['value'] = math.trunc( (int(total_download) / int(file_size)) * 100) ## To retrieve percentage of download print("Download success") fd.close() file_download_size.destroy() file_name1.destroy() progress.destroy() cur_download.destroy() messagebox.showinfo( "File Downloaded", "Your file {0} has been saved in current directory".format( filename))
def make_http_request_urllib(logger, url, method): url = _prepare_url_before_http_request(logger, url, method) req = urllib.request.Request(url, data=None, headers={'User-Agent': get_user_agent()}, method=method) logger.debug("urllib.request.urlopen ({}) method={}".format(url, method)) try: with urllib.request.urlopen( req, context=TRequestPolicy.SSL_CONTEXT, timeout=TRequestPolicy.HTTP_TIMEOUT) as request: data = '' if method == "HEAD" else request.read() headers = request.info() TRequestPolicy.register_successful_request() return request.geturl(), headers, data except UnicodeError as exp: raise RobotHttpException( "cannot redirect to cyrillic web domains or some unicode error", url, 520, method) except (ConnectionError, http.client.HTTPException) as exp: raise RobotHttpException(str(exp), url, 520, method) except socket.timeout as exp: logger.error("socket timeout, while getting {}: {}".format(url, exp)) raise RobotHttpException("socket.timeout", url, 504, method) except urllib.error.URLError as exp: code = -1 if hasattr(exp, 'code'): code = exp.code raise RobotHttpException("{} extype:{}".format(str(exp), type(exp)), url, code, method) # except urllib.error.HTTPError as e: if e.code == 503: TRequestPolicy.deal_with_http_code_503(logger) if e.code == 405 and method == "HEAD": return make_http_request_urllib(logger, url, "GET") raise RobotHttpException("{} extype:{}".format(str(e), type(e)), url, e.code, method)
def find_recent_videos(): global last_update now = last_update.strftime("%Y-%m-%dT%H:%M:%S") last_update = datetime.datetime.now(datetime.timezone.utc) GET_QUERY = 'https://www.googleapis.com/youtube/v3/search?publishedAfter=' + str( now) + 'Z&order=relevance&part=snippet&maxResults=50&key=' + API_KEY request = urllib.request.urlopen(GET_QUERY) videos = json.loads(request.read().decode( request.info().get_param('charset') or 'utf-8')) recent_videos = [] for video in videos['items']: try: title = video['snippet']['title'] published_at = parse_youtube_time(video['snippet']['publishedAt']) video_id = video['id']['videoId'] recent_videos.append(Video(title, published_at, video_id)) except: continue return recent_videos
def get_json(self, url): """ Fetch JSON data from a web server and return a dictionary with same. :param str url: URL where to fetch the JSON data from :return: a dictionary with JSON data :rtype: dict """ json_data = dict() try: request = urllib.request.urlopen(url) except urllib.error.URLError as e: self.logger.error("{}: Error opening url: {}".format(type(e).__name__, url)) return json_data data = request.read() encoding = request.info().get_content_charset('utf-8') json_data = json.loads(data.decode(encoding)) return json_data
def download_file(request, output_file, stdout=sys.stdout): digest = hashlib.sha1() file_size_str = request.info().get("Content-Length") file_size = int(file_size_str) if file_size_str is not None else None bytes_read = 0 while True: buf = request.read(4096) if not buf: break digest.update(buf) if output_file: output_file.write(buf) bytes_read += len(buf) percentage = "" kb_downloaded = format_bytes(bytes_read) total_kb = "" if file_size: percentage = " {}%".format(round(100 * bytes_read / file_size)) total_kb = "/" + format_bytes(file_size) print("downloaded{} {}{}".format(percentage, kb_downloaded, total_kb), file=stdout) return digest.hexdigest()
def download_file(request, output_file, stdout=sys.stdout): digest = hashlib.sha1() file_size_str = request.info().get('Content-Length') file_size = int(file_size_str) if file_size_str is not None else None bytes_read = 0 while True: buf = request.read(4096) if not buf: break digest.update(buf) if output_file: output_file.write(buf) bytes_read += len(buf) percentage = '' kb_downloaded = format_bytes(bytes_read) total_kb = '' if file_size: percentage = ' {}%'.format(round(100 * bytes_read / file_size)) total_kb = '/' + format_bytes(file_size) print('downloaded{} {}{}'.format(percentage, kb_downloaded, total_kb), file=stdout) return digest.hexdigest()
def get(self, data=None): req = urllib.request.Request(url=self.url, data=data, headers=self.headers) try: request = urllib.request.urlopen(req, timeout=self.timeout) except (urllib.error.HTTPError, CertificateError) as e: logger.error("Unavailable url (%s): %s", self.url, e) except (socket.timeout, urllib.error.URLError) as e: logger.error("Unable to connect to server (%s): %s", self.url, e) else: try: total_size = request.info().get('Content-Length').strip() total_size = int(total_size) except AttributeError: total_size = 0 chunks = [] while 1: if self.stop_request and self.stop_request.is_set(): self.content = '' return self try: chunk = request.read(self.buffer_size) except socket.timeout as e: logger.error("Request timed out") self.content = '' return self self.downloaded_size += len(chunk) if self.thread_queue: self.thread_queue.put( (chunk, self.downloaded_size, total_size)) else: chunks.append(chunk) if not chunk: break request.close() self.content = b''.join(chunks) return self
def get(self, data=None): req = urllib.request.Request(url=self.url, data=data, headers=self.headers) try: request = urllib.request.urlopen(req, timeout=self.timeout) except (urllib.error.HTTPError, CertificateError) as e: logger.error("Unavailable url (%s): %s", self.url, e) except (socket.timeout, urllib.error.URLError) as e: logger.error("Unable to connect to server (%s): %s", self.url, e) else: try: total_size = request.info().get('Content-Length').strip() total_size = int(total_size) except AttributeError: total_size = 0 chunks = [] while 1: if self.stop_request and self.stop_request.is_set(): self.content = '' return self try: chunk = request.read(self.buffer_size) except socket.timeout as e: logger.error("Request timed out") self.content = '' return self self.downloaded_size += len(chunk) if self.thread_queue: self.thread_queue.put( (chunk, self.downloaded_size, total_size) ) else: chunks.append(chunk) if not chunk: break request.close() self.content = b''.join(chunks) return self
def get_content_by_url(self, url: str) -> str: """Вернет распарсенный контент извлеченный из указанного урла. Подбирает подходящий шаблон обработки страницы и передает его в парсер, который извлечет нужную информацию и вернет ее в метод. Метод используется в основном для получения извлеченной информации из страницы и передачи ее в другие методы, которые будут осуществлять сохранение этого контента. :param url: Адрес страницы которую требуется распарсить и сохранить из нее контент. :return: Готовый к сохранению контент страницы. """ hostname = urllib.parse.urlparse(url).hostname request = urllib.request.urlopen(url) # Попытка чтения страницы. try: # Приведение оригинальной кодировки страницы к правильному строковому виду. html = request.read().decode(request.info().get_charsets()[-1]) except Exception: # В случае возникновения проблем с получением страницы или с преобразованием кодировки просто прервем работу. return '' extractor = ExtractorContent(**self.get_template_by_hostname(hostname)) extractor.feed(html) extractor.close() return extractor.get_text_content()
def find_recent_videos(): global last_update now = last_update.strftime("%Y-%m-%dT%H:%M:%S") last_update = datetime.datetime.now(datetime.timezone.utc) GET_QUERY = ( "https://www.googleapis.com/youtube/v3/search?publishedAfter=" + str(now) + "Z&order=relevance&part=snippet&maxResults=50&key=" + API_KEY ) request = urllib.request.urlopen(GET_QUERY) videos = json.loads(request.read().decode(request.info().get_param("charset") or "utf-8")) recent_videos = [] for video in videos["items"]: try: title = video["snippet"]["title"] published_at = parse_youtube_time(video["snippet"]["publishedAt"]) video_id = video["id"]["videoId"] recent_videos.append(Video(title, published_at, video_id)) except: continue return recent_videos
'download_link': {'$exists': True}, 'file_name': {'$exists': False}, 'error': {'$exists': False}, 'size':{'$gt': 30} }, no_cursor_timeout=True) counter = 0 for doc in docs_arr: counter += 1 docId = doc['_id'] doc.pop("_id", None) download_link = HOST + doc['download_link'] print(str(counter) + '. ' + download_link) try: with urllib.request.urlopen(download_link) as request: request_filename_re = re.search('filename="([^"]*)"', str(request.info())) if request_filename_re is not None: request_filename = request_filename_re.group(1) filename, file_extension = os.path.splitext(request_filename) else: file_extension = re.search('application/(.*)', str(request.info())).group(1) md5 = hashlib.md5() md5.update(download_link.encode()) local_file_name = md5.hexdigest() + file_extension with open(FILES_DIR + local_file_name, 'w+b') as local_file: local_file.write(request.read()) file_size = os.path.getsize(FILES_DIR + local_file_name) doc['file_real_size'] = file_size doc['file_name'] = local_file_name doc['file_extension'] = file_extension documents.update({'_id': docId}, doc)