def _check_host(self, host): """Check is host available""" try: requests.head(host, **self.request_params) return True except requests.ConnectionError: return False
def has_internet(): """Uses www.google.com to check connectivity""" try: requests.head('http://www.google.com', timeout=1) return True except requests.ConnectionError: return False
def check_playable(url, showbusy=False, ignore_dp=False, item=None): try: try: hmf = urlresolver.HostedMediaFile(url=url, include_disabled=False, include_universal=True) if hmf.valid_url() == True: url = hmf.resolve() return url except: pass headers = '' result = None if url.startswith('http') and '.m3u8' in url: result = requests.head(url, timeout=5) if result is None: return None elif url.startswith('http'): result = requests.head(url, timeout=5) if result is None: return None if url == "http://m4ufree.info" or "drive" in url: return None return url except: return None
def view(self, proxy, session): httpproxy = 'http://{}'.format(proxy) errors = 0 while errors < 50: session.set_option('http-proxy', httpproxy) session.set_option('http-headers', HEADERS) session.set_option('http-timeout', self.timeout) try: stream = session.streams(self.url) url = stream['worst'].url except Exception: errors += 5 else: break else: # no break -> no url -> broken proxy return errors = 0 while errors < 50: try: requests.head( url, headers=HEADERS, proxies={'http': httpproxy}, timeout=self.timeout ) except Exception: errors += 2 else: errors = max(0, errors - 1) time.sleep(0.5)
def _check_host(self, host): """Check is host available""" try: requests.head(host, timeout=self.timeout) return True except requests.ConnectionError: return False
def _resolve_image(cls, registry, name, tag, headers=None): url = MANIFESTS_URL % (registry, name, tag) response = requests.head(url, headers=headers) if response.status_code == requests.codes.unauthorized: # solve the auth challenge and retry again authorization = cls._solve_auth_challenge(response, registry) if headers is None: headers = dict() headers.update(authorization) response = requests.head(url, headers=headers) if response.status_code == requests.codes.unauthorized: # its a private repo, raise exception raise DockerClientException('Private Docker repository - %s:%s' % (name, tag)) if response.status_code == requests.codes.ok: image_ref = '%s@%s' % (name, response.headers.get('Docker-Content-Digest')) if registry != DEFAULT_DOCKER_REGISTRY_HOST: image_ref = '%s/%s' % (urlparse(registry).netloc, image_ref) log.info('Resolved %s:%s => %s' % (name, tag, image_ref)) return image_ref # something is wrong response.raise_for_status() raise DockerClientException('Unable to resolve image %s:%s' % (name, tag))
def fetch_metadata(self): """ Do a HEAD request on self.url to try to get metadata (self.length and self.mimetype). Note that while this method fills in those attributes, it does *not* call self.save() - so be sure to do so after calling this method! """ if not self.url: return try: response = requests.head(self.url, timeout=5) if response.status_code == 302: response = requests.head(response.headers['location'], timeout=5) except Exception: pass else: if response.status_code != 200: return self.length = response.headers.get('content-length') self.mimetype = response.headers.get('content-type', '') if self.mimetype in ('application/octet-stream', ''): # We got a not-useful MIME type; guess! guess = mimetypes.guess_type(self.url) if guess[0] is not None: self.mimetype = guess[0]
def check_playable(url): """ checks if passed url is a live link :param str url: stream url :return: playable stream url or None :rtype: str or None """ import urllib import requests try: headers = url.rsplit('|', 1)[1] except: headers = '' headers = urllib.quote_plus(headers).replace('%3D', '=') if ' ' in headers else headers headers = dict(urlparse.parse_qsl(headers)) result = None try: if url.startswith('http') and '.m3u8' in url: result = requests.head(url.split('|')[0], headers=headers, timeout=5) if result is None: return None elif url.startswith('http'): result = requests.head(url.split('|')[0], headers=headers, timeout=5) if result is None: return None except: pass return result
def CheckBundleDigest(self, container, uripath, bundle_data): """ Download the Bundle from CloudFiles into a local path container - the CloudFiles container in which to find the Vault DB bundle_data - a dict containing atlest the 'id' and 'md5' of the bundle localpath - the local path at which to store the downloaded VaultDB """ self.apihost = self._get_container(container) try: fulluri = uripath + '/BUNDLES/' + bundle_data['name'] self.ReInit(self.sslenabled, '/' + fulluri) self.headers['X-Auth-Token'] = self.authenticator.AuthToken self.log.debug('uri: %s', self.Uri) self.log.debug('headers: %s', self.Headers) try: res = requests.head(self.Uri, headers=self.Headers) except requests.exceptions.SSLError as ex: self.log.error('Requests SSLError: {0}'.format(str(ex))) res = requests.head(self.Uri, headers=self.Headers, verify=False) if res.status_code == 404: raise UserWarning('Server failed to find the specified bundle') elif res.status_code >= 300: raise UserWarning('Server responded unexpectedly during download (Code: ' + str(res.status_code) + ' )') else: digest = res.headers['etag'].upper() result = (digest == bundle_data['md5']) self.log.debug('CloudFiles Bundle Digest (' + digest + ') == Bundle MD5 (' + bundle_data['md5'] + ')? ' + str(result)) return result except LookupError: raise UserWarning('Invalid VaultDB Data provided.')
def test_sni_urls(self): """ Test SNI urls :return: """ print "" # Just checking all providers - we should make this error on non-existent urls. for provider in [ provider for provider in providers.makeProviderList() if provider.name not in self.self_signed_cert_providers ]: print "Checking %s" % provider.name try: requests.head(provider.url, verify=certifi.old_where(), timeout=10) except requests.exceptions.Timeout: pass except requests.exceptions.SSLError as error: if u"SSL3_GET_SERVER_CERTIFICATE" not in ex(error): print "SSLError on %s: %s" % (provider.name, ex(error.message)) raise else: print "Cannot verify certificate for %s" % provider.name except Exception: # pylint: disable=broad-except pass
def __init__(self, server, userAgent=_getUserAgent()): self.server = server self.userAgent = userAgent try: requests.head(self.server) except requests.exceptions.ConnectionError, e: raise ValueError("server %s does not look to be alive: %s" % (server, e.message))
def load_units(self): """ Load units of the function descriptor content, section 'virtual_deployment_units' """ if 'virtual_deployment_units' not in self.content: log.error("Function id={0} is missing the " "'virtual_deployment_units' section" .format(self.id)) return for vdu in self.content['virtual_deployment_units']: unit = Unit(vdu['id']) self.associate_unit(unit) # Check vm image URLs # only perform a check if vm_image is a URL vdu_image_path = vdu['vm_image'] if validators.url(vdu_image_path): # Check if is URL/URI. try: # Check if the image URL is accessible # within a short time interval requests.head(vdu_image_path, timeout=1) except (requests.Timeout, requests.ConnectionError): evtlog.log("VDU image not found", "Failed to verify the existence of VDU image at" " the address '{0}'. VDU id='{1}'" .format(vdu_image_path, vdu['id']), self.id, 'evt_vnfd_itg_vdu_image_not_found') return True
def __init__(self, cfg=None, service_url=None, service_mode=None): """ :param cfg: Configuration for the service :param service_url: The service URL :param service_mode: set to "private" if imeji instance runs in "private" mode (any other value considered as standard imeji instance mode ) If the imeji instance is not available or does not run, the instantiation will throw an error message. """ self.cfg = cfg or Config() self.service_url = service_url or self.cfg.get('service', 'url') self.service_mode_private = False or (self.cfg.get('service', 'mode', 'public') == 'private' or service_mode == 'private') self.service_unavailable_message = \ "WARNING : The REST Interface of Imeji at {rest_service} is not available or there is another problem, " \ "check if the service is running under {imeji_service}" \ .format(imeji_service=self.service_url, rest_service=self.service_url + '/rest') # check if Imeji instance is running and notify the user try: requests.head(self.service_url) except Exception as e: raise ImejiError(self.service_unavailable_message, e) user = self.cfg.get('service', 'user', default=None) password = self.cfg.get('service', 'password', default=None) self.session = requests.Session() if user and password: self.session.auth = (user, password) # initialize the request query self.total_number_of_results = self.number_of_results = self.offset = self.size = None
def get_video_redirect_info(tag, format_, hd=False): assert tag assert format_ in ('webm', 'mp4'), format_ if hd: format_ = 'hd_{}'.format(format_) vidly_url = 'https://vid.ly/{}?content=video&format={}'.format( tag, format_ ) req = requests.head(vidly_url) if req.status_code == 404: raise VidlyNotFoundError(tag) assert req.status_code == 302, (req.status_code, vidly_url) req2 = requests.head(req.headers['Location']) try: content_length = int(req2.headers['Content-Length']) except KeyError: raise VideoError( 'Redirect URL lacks a Content-Length ' '(tag:{} url:{} location:{} status:{})'.format( tag, vidly_url, req.headers['Location'], req.status_code, ) ) data = { 'url': req.headers['Location'].split('?t=')[0], 'length': content_length, 'type': req2.headers['Content-Type'], } return data
def request(self, url, request_method=None, auth_method=None, timeout=None, post_data=None, user=None, password=None): timeout = float(timeout) try: if request_method == "1": # GET if (user or password) and auth_method == "2": req = requests.get(url, auth=(user, password), timeout=timeout, verify=False) else: req = requests.get(url, timeout=timeout, verify=False) elif request_method == "2": # POST if (user or password) and auth_method == "2": req = requests.post(url, data=post_data, auth=(user, password), timeout=timeout, verify=False) else: req = requests.post(url, data=post_data, timeout=timeout, verify=False) elif request_method == "3": # HEAD if (user or password) and auth_method == "2": req = requests.head(url, auth=(user, password), timeout=timeout, verify=False) else: req = requests.head(url, timeout=timeout, verify=False) time = req.elapsed except Exception as e: logging.error(e) raise try: code = req.status_code response_time = time.microseconds / 1000 except Exception as e: logging.error(e) raise data = [int(code), float(response_time)] return data
def try_earlier_webarchivals(webarchive_url): '''A webarchive_url turns out to be a bad redirect. So try and earlier webarchive of it to find what was there before the bad redirect. Returns tuple: (result_string, good_url or None) ''' attempts = 0 while True: # request the link but without the redirect, revealing the earlier # webarchivals in the headers print webarchive_url + ' (no redirects)' try: req = requests.head(webarchive_url, headers=USER_AGENT, verify=False, allow_redirects=False) except Exception, e: return ('!! Problem with request %s' % e, None) attempts += 1 # Link header has the options # e.g. from http://webarchive.nationalarchives.gov.uk/+/http://www.dft.gov.uk/statistics/releases/accessibility-2010 # Link: <http://webarchive.nationalarchives.gov.uk/20140508043011/http://www.dft.gov.uk/statistics/releases/accessibility-2010>; rel="memento"; datetime="Thu, 08 May 2014 04:30:11 GMT", <http://webarchive.nationalarchives.gov.uk/20110826141806/http://www.dft.gov.uk/statistics/releases/accessibility-2010>; rel="first memento"; datetime="Fri, 26 Aug 2011 14:18:06 GMT", <http://webarchive.nationalarchives.gov.uk/20140508043011/http://www.dft.gov.uk/statistics/releases/accessibility-2010>; rel="last memento"; datetime="Thu, 08 May 2014 04:30:11 GMT", <http://webarchive.nationalarchives.gov.uk/20140109163921/http://www.dft.gov.uk/statistics/releases/accessibility-2010>; rel="prev memento"; datetime="Thu, 09 Jan 2014 16:39:21 GMT", <http://webarchive.nationalarchives.gov.uk/20140508043011/http://www.dft.gov.uk/statistics/releases/accessibility-2010>; rel="next memento"; datetime="Thu, 08 May 2014 04:30:11 GMT", <http://webarchive.nationalarchives.gov.uk/timegate/http://www.dft.gov.uk/statistics/releases/accessibility-2010>; rel="timegate", <http://www.dft.gov.uk/statistics/releases/accessibility-2010>; rel="original", <http://webarchive.nationalarchives.gov.uk/timemap/http://www.dft.gov.uk/statistics/releases/accessibility-2010>; rel="timemap"; type="application/link-format" links = req.headers['Link'] prev_links = [l.split('; ') for l in links.split(', <') if 'rel="prev memento"' in l] if not prev_links: if attempts == 1: return ('No previous webarchive links', None) return ('No luck after trying %i previous webarchive links' % attempts, None) webarchive_url = prev_links[0][0].strip('<>') # Request the previous url to see if it is archived ok, or whether it # still redirects out print webarchive_url try: req = requests.head(webarchive_url, headers=USER_AGENT, verify=False) except Exception, e: return ('!! Problem with request %s' % e, None)
def find_url_title(self, url): """Retrieve the title of a given URL""" headers = {'User-Agent': 'Wget/1.13.4 (linux-gnu)'} if url.find("://") == -1: url = "http://" + url try: # a HEAD first to thwart attacks requests.head(url, headers=headers, timeout=5) # now the actual request resp = requests.get(url, headers=headers) html = resp.text except requests.RequestException as e: self.logger.warning(e) return url, e.__doc__ except ValueError as e: self.logger.warning(e) return url, "Failed to parse url" else: resp.close() cmphtml = html.lower() start = cmphtml.find("<title") end = cmphtml.find("</title>") if start == -1 or end == -1: return resp.url, "Could not find page title!" else: str.find html = html[start+7:end] html = html[html.find('>')+1:] return resp.url, html.strip()
def link_is_to_image(url, auth=None): """Check if the link points to an image content type. Return True or False accordingly """ if md5s3stash.is_s3_url(url): response = requests.head(url, allow_redirects=True) else: response = requests.head(url, allow_redirects=True, auth=auth) if response.status_code != 200: return False content_type = response.headers.get("content-type", None) if not content_type: return False reg_type = content_type.split("/", 1)[0].lower() # situation where a server returned 'text/html' to HEAD requests # but returned 'image/jpeg' for GET. # try a slower GET if not image type if reg_type != "image": response = requests.get(url, allow_redirects=True, auth=auth) if response.status_code != 200: return False content_type = response.headers.get("content-type", None) if not content_type: return False reg_type = content_type.split("/", 1)[0].lower() return reg_type == "image"
def forward(resource, identifier): """ Redirects request for file to direct URL. Requires global "paths" dictionary is active. resource: a given resource, like "recount2" identifier: relative path to file or directory Return value: Flask redirect response object """ # Log all requests, even weird ones ip = str(request.headers.get('X-Forwarded-For', request.remote_addr)).split(',')[0].strip() print >>_LOGSTREAM, '\t'.join( [time.strftime('%A, %b %d, %Y at %I:%M:%S %p %Z'), str(mmh3.hash128(ip + 'recountsalt')), resource, identifier]) _LOGSTREAM.flush() if resource == 'recount': # Redirect to IDIES URL in order of descending version for i in ['2']: # add versions to precede 2 as they are released if identifier.startswith(' '.join(['v', i, '/'])): idies_url = '/'.join( ['http://idies.jhu.edu/recount/data', identifier] ) idies_response = requests.head(idies_url) if idies_response.status_code == 200: return redirect(idies_url, code=302) # v1 is not explicitly versioned idies_url = '/'.join(['http://idies.jhu.edu/recount/data', identifier]) idies_response = requests.head(idies_url) if idies_response.status_code == 200: return redirect(idies_url, code=302) abort(404)
def add_portmap(cont): if cont['Ports']: # a bit of a crazy comprehension to turn: # Ports': u'49166->8888, 49167->22' # into a useful dict {8888: 49166, 22: 49167} cont['portmap'] = dict([(p['PrivatePort'], p['PublicPort']) for p in cont['Ports']]) # wait until services are up before returning container # TODO this could probably be factored better when next # service added # this should be done via ajax in the browser # this will loop and kill the server if it stalls on docker ipy_wait = shellinabox_wait = True while ipy_wait or shellinabox_wait: if ipy_wait: try: requests.head("http://{host}:{port}".format( host=app.config['SERVICES_HOST'], port=cont['portmap'][8888])) ipy_wait = False except requests.exceptions.ConnectionError: pass if shellinabox_wait: try: requests.head("http://{host}:{port}".format( host=app.config['SERVICES_HOST'], port=cont['portmap'][4200])) shellinabox_wait = False except requests.exceptions.ConnectionError: pass time.sleep(.2) print 'waiting', app.config['SERVICES_HOST'] return cont
def check_http_header(target, status_code=200): """Checks if a certain http URL returns the correct status code.""" return_obj = {} try: # Don't follow redirections if status_code is in the 30x family if status_code / 10 == 30: r = requests.head(target, timeout=10) else: r = requests.head(target, allow_redirects=True, timeout=10) return_obj['valid'] = True return_obj['status_code'] = r.status_code return_obj['status_ok'] = r.status_code == status_code except ValueError as e: logger.error(e) return_obj['valid'] = False return_obj['error'] = 'Error in the target' except requests.exceptions.RequestException as e: logger.error(e) return_obj['valid'] = False return_obj['error'] = 'Error in the request' except Exception as e: logger.error(e) return_obj['valid'] = False return_obj['error'] = 'Unknown error' return return_obj
def test_resources_available(self): blueprint_name = 'openstack-blueprint.yaml' self.repo_dir = clone(self.repo_url, self.workdir) self.blueprint_yaml = self.repo_dir / blueprint_name self.upload_blueprint(self.test_id) invalid_resource_url = 'http://{0}/resources/blueprints/{1}/{2}' \ .format(self.env.management_ip, self.test_id, blueprint_name) try: result = requests.head(invalid_resource_url) self.assertNotEqual( result.status_code, 200, "Resources are available through a different port than 53229.") except ConnectionError: pass valid_resource_url = 'http://{0}:53229/blueprints/{1}/{2}' \ .format(self.env.management_ip, self.test_id, blueprint_name) try: result = requests.head(valid_resource_url) self.assertEqual( result.status_code, 200, "Resources are not available through the port 53229.") except ConnectionError: self.fail("Resources are not available through the port 53229.") self.cfy.delete_blueprint(self.test_id)
def youtube_url_validation(url): if not url: return False m = ups.urlparse(url) domain = '{uri.scheme}://{uri.netloc}/'.format(uri=m) print domain if ("youtube" in domain): v = str(ups.parse_qs(m.query)['v'][0]) print v if (len(v) is 11): print "Yes! URL checks out." try: r = requests.head(url) print r.status_code if r.status_code is 200: print "Yup! Video is there" + str(r.status_code) return True return False except requests.ConnectionError: print("Failed to connect to the URL") return False if ("youtu.be" in domain): print "youtu.be in domain" try: r = requests.head(url) print r.status_code if ((r.status_code == 200) or (r.status_code == 302)): print "Yup! Video is there" + str(r.status_code) return True return False except requests.ConnectionError: print("Failed to connect to the URL") return False
def get_tested_azure_url(image_info): try: # tk = TimeKeeper() # tk.time_now(image_info['flickr_id'] + '_azure_start', print_out=True) azure_url_part = u"http://blmc.blob.core.windows.net/{0[date]}/{0[book_identifier]}_{0[volume]}_{0[page]}_{0[image_idx]}_{0[date]}_imagesize.jpg".format(image_info) azure_url_part = azure_url_part.replace('imagesize', 'embellishments') r = requests.head(azure_url_part, stream=True, timeout=0.3) # tk.time_now(image_info['flickr_id'] + '_azure_embellishments', print_out=True) if r.status_code is requests.codes.ok: return azure_url_part else: azure_url_part = azure_url_part.replace('embellishments', 'medium') r = requests.head(azure_url_part, stream=True, timeout=0.3) # tk.time_now(image_info['flickr_id'] + '_azure_medium', print_out=True) if r.status_code is requests.codes.ok: return azure_url_part else: azure_url_part = azure_url_part.replace('medium', 'plates') r = requests.head(azure_url_part, stream=True, timeout=0.3) # tk.time_now(image_info['flickr_id'] + '_azure_medium', print_out=True) if r.status_code is requests.codes.ok: return azure_url_part else: return None except: return None
def EXTRACT_MP3_LINKS(url): header = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:32.0) Gecko/20100101 Firefox/32.0',} page = requests.get(url,header) soup = BeautifulSoup(page.content) links = soup.findAll("a") #print type (links) for item in links: try: if ".mp3" in item['href']: try: response=requests.head(item['href']) if response.headers['content-type']=='audio/mpeg': SAVEMP3(item['href']) except: pass try: response=requests.head(url+item['href']) #print response if response.headers['content-type']=='audio/mpeg': SAVEMP3(url+item['href']) except: pass except: pass
def correct_url(url, rights): """ correct_url link checker and guesser for wikipedia thunbnail URLs returns a checked (good) URL as a unicode string or None """ urlres = requests.head(url, allow_redirects=True) # thubmnail URL looks good (check the link first) if (urlres.status_code == requests.codes.ok): return url # something is not right # if the attribute page for the image does not exist, then we # won't find a thumbnail, so we may as well give up now rightsres = requests.head(rights) if (rightsres.status_code != requests.codes.ok): return None # okay, there should be a good thumbnail here, just not at the # URL we tried elif (urlres.status_code == 404): return correct_url_404(url) elif (urlres.status_code == 500): return correct_url_500(url) # not sure we can get here, something might be very wrong else: raise Exception("wikipedia thumbnail URL {0} had unexpected" + "status code {1}".format(urlres.status_code, url))
def download_vid(vid): html_downloader = main.Downloader() html_downloader.get(vid[1], vid[2], "html") flv_redirect = app.extract_flv_redirect(vid[2]) headers = { "Accept-Encoding": "gzip,deflate,sdch", "Host": "redirector.googlevideo.com", "Accept-Language": "en-US,en;q=0.8,fr;q=0.6", "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.66 ari/537.36", "Accept": "*/*", "Referer": vid[1], "Connection": "keep-alive", "Cache-Control": "no-cache", } req = requests.head(flv_redirect, headers=headers) file_url = req.headers["location"] req = requests.head(flv_redirect, headers=headers) file_url = req.headers["location"] host = urlparse(file_url).netloc headers["Host"] = host html_downloader.download_file(file_url, "flv/%s.flv" % vid[2], headers)
def VerifyPath(data): # Insert try and catch blocks try: token_name = data["project"]["token_name"] except: token_name = data["project"]["project_name"] channel_names = data["channels"].keys() for i in range(0, len(channel_names)): channel_type = data["channels"][channel_names[i]]["channel_type"] path = data["channels"][channel_names[i]]["data_url"] if channel_type == "timeseries": timerange = data["dataset"]["timerange"] for j in xrange(timerange[0], timerange[1] + 1): # Test for tifs or such? Currently test for just not empty work_path = "{}{}/{}/time{}/".format(path, token_name, channel_names[i], j) resp = requests.head(work_path) assert resp.status_code == 200 else: # Test for tifs or such? Currently test for just not empty work_path = "{}{}/{}/".format(path, token_name, channel_names[i]) resp = requests.head(work_path) print (work_path) assert resp.status_code == 200
def __init__(self, spider=None, *a, **kw): super(GetpagesfromsitemapSpider, self).__init__(*a, **kw) try: cnx = mysql.connector.connect(**config) except mysql.connector.Error as err: if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: print("Something is wrong with your user name or password") elif err.errno == errorcode.ER_BAD_DB_ERROR: print("Database does not exists") else: print(err) else: self.spider = spider cursor = cnx.cursor() l = [] url = "https://channelstore.roku.com" resp = requests.head(url + "/sitemap.xml") if (resp.status_code != 404): l.append(resp.url) else: resp = requests.head(url + "/robots.txt") if (resp.status_code == 200): l.append(resp.url) self.sitemap_urls = l print self.sitemap_urls
def test_doesnt_delete_shared_files(self): """ Make sure that a file shared between two file objects doesn't get deleted when one of the file objects gets deleted """ c = _create_expired_contentnode() file_on_disk = ContentFile("test") f = File.objects.create( contentnode_id=c.pk, file_on_disk=file_on_disk, checksum="aaa", ) f.file_on_disk.save("aaa.jpg", file_on_disk) file_url = f.file_on_disk.url c2 = ContentNode.objects.create(kind_id=content_kinds.TOPIC, title="test") f2 = File.objects.create( contentnode_id=c2.pk, file_on_disk=file_on_disk, checksum="aaa", ) f2.file_on_disk.save("aaa.jpg", file_on_disk) # check that file_url exists before cleaning up requests.head(file_url).raise_for_status() clean_up_contentnodes() # the file should still be available response = requests.head(file_url) assert response.status_code == 200
def extract_summary(doc, document): summary = original_extract_summary(doc, document) if any(summary.startswith(tag) for tag in (":param ", ":rtype: ")): return "" return summary autosummary.extract_summary = extract_summary # Configuration for sphinx.ext.intersphinx # https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html intersphinx_mapping = {} if requests.head("https://hail.is/docs/0.2/objects.inv").status_code == 200: intersphinx_mapping["hail"] = ("https://hail.is/docs/0.2", None) else: print("Unable to link to Hail docs (cannot access objects.inv)", file=sys.stderr) # sphinx_autodoc_typehints generates references with qualified names. # Since Hail re-exports many objects from higher level packages/modules, # Hail's documentation does not list all objects by their qualified name. # For example, Table is documented as hail.Table, not hail.table.Table. # Thus, intersphinx cannot link some of the Hail references generated by # sphinx_autodoc_typehints. # # To work around this, override sphinx_autodoc_typehints's get_annotation_module # function and map the qualified names to what Hail's documentation uses. original_get_annotation_module = sphinx_autodoc_typehints.get_annotation_module
def add_yaml_file_content_to_database(self, filepath, deleteFile=False): """*given a file to a yaml file, add yaml file content to database* **Key Arguments:** - ``filepath`` -- the path to the yaml file - ``deleteFile`` -- delete the yaml file when its content has been added to the database. Default *False* **Return:** - None **Usage:** To parse and import the contents of a single yaml file into the database, use the following: .. code-block:: python from fundamentals.mysql import yaml_to_database # PARSE YAML FILE CONTENTS AND ADD TO DATABASE yaml2db = yaml_to_database( log=log, settings=settings, dbConn=dbConn ) yaml2db.add_yaml_file_content_to_database( filepath=${1:"/path/to/file.yaml"}, deleteFile=True ) """ self.log.debug( 'completed the ````add_yaml_file_content_to_database`` method') import codecs try: self.log.debug("attempting to open the file %s" % (filepath, )) readFile = codecs.open(filepath, encoding='utf-8', mode='r') thisData = readFile.read() readFile.close() except IOError as e: message = 'could not open the file %s' % (filepath, ) self.log.critical(message) raise IOError(message) readFile.close() matchObject = re.finditer( r'(^|\n)(?P<key>[^\:]*)\:\s(?P<value>.*?)(\n|$)', thisData, flags=re.M | re.S # re.S ) yamlContent = {} for match in matchObject: if match.group("value")[0] == '"' and match.group( "value")[-1] == '"': v = match.group("value")[1:-1] elif match.group("value")[0] == "'" and match.group( "value")[-1] == "'": v = match.group("value")[1:-1] else: v = match.group("value") yamlContent[match.group("key")] = v if "table" not in yamlContent: self.log.warning( 'A table value is need in the yaml content to indicate which database table to add the content to: %(filepath)s' % locals()) return None # NOTE THERE MAY BE MORE THAN ONE DATABASE TABLE dbTablesTmp = yamlContent["table"].split(",") del yamlContent["table"] dbTables = [] dbTables[:] = [d.strip() for d in dbTablesTmp] # UNSHORTEN URL try: r = requests.head(yamlContent["url"], allow_redirects=True) yamlContent["url"] = r.url except: pass yamlContent["original_yaml_path"] = filepath if "url" in yamlContent: uniqueKeyList = ["url"] else: uniqueKeyList = [] for t in dbTables: convert_dictionary_to_mysql_table(dbConn=self.dbConn, log=self.log, dictionary=yamlContent, dbTableName=t, uniqueKeyList=uniqueKeyList, dateModified=True, returnInsertOnly=False, replace=True) if deleteFile: os.remove(filepath) self.log.debug( 'completed the ``add_yaml_file_content_to_database`` method') return None
def url_ok(url): try: r = requests.head(url) return r.status_code == 200 except ConnectionError: return False
import requests # Set the target webpage url = "http://172.18.58.238/headers.php" r = requests.get(url) # This will get the full page print(r.text) print("Status code:") print("\t OK", ) # This will just get just the headers h = requests.head(url) print("Header:") # To print line by line for x in h.headers: print("\t ", x, ":", h.headers[x]) import requests h = {"User-Agent": "mobile"} r = requests.get("http://172.18.58.238/headers.php", headers=h) print(r.content) import scrapy class NewSpider(scrapy.Spider): name = "new_spider" start_urls = ["http://172.18.58.238/multi/"] def parse(self, response): css_selector = 'img' for x in response.css(css_selector): newsel = '@src' yield {
# -*- coding: utf-8 -*- """ Created on Thu Jun 13 15:22:46 2019 @author: 44100521 """ import requests import os os.chdir('C:\Z Drive\Python Folder\WebScrapping') int = range(1, 2000) f1 = open('b1.out', 'a') for i in int: s = 'https://www.nycoedsoccer.com/league/' + str(i) + '/schedule/' ret = requests.head(s, timeout=5) #f1.write(str(ret.status_code)+','+str(i)+'\n') print(i, ret.status_code, file=f1) f1.close() print('Completed!') # # Below is to get HTML for that webpage # int = 130 #range(130, 131) # #f1=open('./b.out', 'a') # #for i in int: # s = 'https://www.nycoedsoccer.com/league/'+str(int)+'/schedule/' # #s = 'http://nycoedsoccer.com' # ret = requests.get(s,timeout=5)
def get_mimetype(url): page = requests.head(url) content_type = page.headers.get('Content-Type', '') content_type = content_type.split(';')[0] # Strip out the encoding return url, content_type
targetsHealth = getTargetHealth(elbv2_client, targets, targetGroupARN) for indx, targetHealth in enumerate(targetsHealth["TargetHealthDescriptions"]): targetState = targetHealth["TargetHealth"]["State"] targetId = targetHealth["Target"]["Id"] # targetReason=targetHealth["TargetHealth"]["Reason"] while targetState != 'healthy': print('Service %s on instance %s is in %s state' % (ECSServiceName, targetId, targetState)) time.sleep(60) targetHealth = getTargetHealth( elbv2_client, targets, targetGroupARN)["TargetHealthDescriptions"][indx] targetState = targetHealth["TargetHealth"]["State"] # targetReason = targetHealth["TargetHealth"]["Reason"] else: print('Service %s on instance %s is in %s state \n' % (ECSServiceName, targetId, targetState)) # Checking via http code r = requests.head("http://" + loadBalancerDNSName) if r.status_code == 200: print("Finished successfully") print('Service %s was sucessfully created on %s instances' % (ECSServiceName, instancesIDList)) print("DNS name is " + loadBalancerDNSName) else: print( 'Error: Service %s was created on %s, but not available by %s address ' % (ECSServiceName, instancesIDList, loadBalancerDNSName))
continue print(songlink) # 生成歌曲存放目录 songdir = "songs_dir" if not os.path.exists(songdir): os.makedirs(songdir) # 生成文件地址 songname = d["data"]["songList"][0]["songName"] artistName = d["data"]["songList"][0]["artistName"] filename = ("%s/%s/%s-%s.flac" % (CURRENT_PATH, songdir, songname, artistName)) f = urllib.request.urlopen(songlink) headers = requests.head(songlink).headers size = round(int(headers['Content-Length']) / (1024 ** 2), 2) # B -> KB -> MB, round - 四舍五入(2 位小数) # 开始写入文件 if not os.path.isfile(filename) or os.path.getsize(filename) < minimumsize: # Delete useless flacs print("%s is downloading now ......\n" % songname) if size >= minimumsize: with open(filename, "wb") as code: # 写入歌曲文件 code.write(f.read()) # 读取 f 全部内容,然后写入 code else: print("The size of %s (%r Mb) is less than 10 Mb, skipping...\n" % (filename, size)) else: print("%s is already downloaded. Finding next song...\n" % songname) print("================================================================\n")
def get_from_cache(url, cache_dir=None): """ Given a URL, look for the corresponding dataset in the local cache. If it's not there, download it. Then return the path to the cached file. """ if cache_dir is None: cache_dir = PYTORCH_PRETRAINED_BERT_CACHE if sys.version_info[0] == 3 and isinstance(cache_dir, Path): cache_dir = str(cache_dir) if not os.path.exists(cache_dir): os.makedirs(cache_dir) # Get eTag to add to filename, if it exists. if url.startswith("s3://"): etag = s3_etag(url) else: response = requests.head(url, allow_redirects=True) if response.status_code != 200: raise IOError( "HEAD request failed for url {} with status code {}".format( url, response.status_code)) etag = response.headers.get("ETag") filename = url_to_filename(url, etag) # get cache path to put the file cache_path = os.path.join(cache_dir, filename) if not os.path.exists(cache_path): # Download to temporary file, then copy to cache dir once finished. # Otherwise you get corrupt cache entries if the download gets interrupted. with tempfile.NamedTemporaryFile() as temp_file: logger.info("%s not found in cache, downloading to %s", url, temp_file.name) # GET file object if url.startswith("s3://"): s3_get(url, temp_file) else: http_get(url, temp_file) # we are copying the file before closing it, so flush to avoid truncation temp_file.flush() # shutil.copyfileobj() starts at the current position, so go to the start temp_file.seek(0) logger.info("copying %s to cache at %s", temp_file.name, cache_path) with open(cache_path, 'wb') as cache_file: shutil.copyfileobj(temp_file, cache_file) logger.info("creating metadata file for %s", cache_path) meta = {'url': url, 'etag': etag} meta_path = cache_path + '.json' with open(meta_path, 'w', encoding="utf-8") as meta_file: json.dump(meta, meta_file) logger.info("removing temp file %s", temp_file.name) return cache_path
def get_from_cache( url, cache_dir=None, force_download=False, proxies=None, etag_timeout=10, resume_download=False, user_agent: Union[Dict, str, None] = None, local_files_only=False, ) -> Optional[str]: """ Given a URL, look for the corresponding file in the local cache. If it's not there, download it. Then return the path to the cached file. Return: None in case of non-recoverable file (non-existent or inaccessible url + no cache on disk). Local path (string) otherwise """ if cache_dir is None: cache_dir = TRANSFORMERS_CACHE if isinstance(cache_dir, Path): cache_dir = str(cache_dir) os.makedirs(cache_dir, exist_ok=True) etag = None if not local_files_only: try: response = requests.head(url, allow_redirects=True, proxies=proxies, timeout=etag_timeout) if response.status_code == 200: etag = response.headers.get("ETag") except (EnvironmentError, requests.exceptions.Timeout): # etag is already None pass filename = url_to_filename(url, etag) # get cache path to put the file cache_path = os.path.join(cache_dir, filename) # etag is None = we don't have a connection, or url doesn't exist, or is otherwise inaccessible. # try to get the last downloaded one if etag is None: if os.path.exists(cache_path): return cache_path else: matching_files = [ file for file in fnmatch.filter(os.listdir(cache_dir), filename.split(".")[0] + ".*") if not file.endswith(".json") and not file.endswith(".lock") ] if len(matching_files) > 0: return os.path.join(cache_dir, matching_files[-1]) else: # If files cannot be found and local_files_only=True, # the models might've been found if local_files_only=False # Notify the user about that if local_files_only: raise ValueError( "Cannot find the requested files in the cached path and outgoing traffic has been" " disabled. To enable model look-ups and downloads online, set 'local_files_only'" " to False.") return None # From now on, etag is not None. if os.path.exists(cache_path) and not force_download: return cache_path # Prevent parallel downloads of the same file with a lock. lock_path = cache_path + ".lock" with FileLock(lock_path): # If the download just completed while the lock was activated. if os.path.exists(cache_path) and not force_download: # Even if returning early like here, the lock will be released. return cache_path if resume_download: incomplete_path = cache_path + ".incomplete" @contextmanager def _resumable_file_manager(): with open(incomplete_path, "a+b") as f: yield f temp_file_manager = _resumable_file_manager if os.path.exists(incomplete_path): resume_size = os.stat(incomplete_path).st_size else: resume_size = 0 else: temp_file_manager = partial(tempfile.NamedTemporaryFile, dir=cache_dir, delete=False) resume_size = 0 # Download to temporary file, then copy to cache dir once finished. # Otherwise you get corrupt cache entries if the download gets interrupted. with temp_file_manager() as temp_file: logger.info( "%s not found in cache or force_download set to True, downloading to %s", url, temp_file.name) http_get(url, temp_file, proxies=proxies, resume_size=resume_size, user_agent=user_agent) logger.info("storing %s in cache at %s", url, cache_path) os.replace(temp_file.name, cache_path) logger.info("creating metadata file for %s", cache_path) meta = {"url": url, "etag": etag} meta_path = cache_path + ".json" with open(meta_path, "w") as meta_file: json.dump(meta, meta_file) return cache_path
def resolve(self, sources, args, pack_select=False): try: if 'showInfo' in args: background = args['showInfo']['art']['fanart'] else: background = args['fanart'] self.setText("Begining Link Resolver") self.setBackground(background) stream_link = None loop_count = 0 # Begin resolving links tools.log('Attempting to Resolve file link', 'info') for i in sources: debrid_provider = i.get('debrid_provider', '') loop_count += 1 try: if self.is_canceled(): self.close() return if 'size' in i: i['info'].append(tools.source_size_display(i['size'])) loop_count_string = "(" + str(loop_count) + " of " + str( len(sources)) + ")" line1 = tools.lang(32036) + "%s - %s" % (tools.colorString( i['release_title']), loop_count_string) line2 = tools.lang(32037) + "%s | Source: %s" % ( tools.colorString(debrid_provider.upper()), tools.colorString(i['source'])) line3 = tools.lang(32038) + '%s | Info: %s' % ( tools.colorString(i['quality']), tools.colorString(" ".join(i['info']))) self.setText(line1) self.setText2(line2) self.setText3(line3) if i['type'] == 'torrent': if i['debrid_provider'] == 'premiumize': stream_link = self.premiumizeResolve( i, args, pack_select) elif i['debrid_provider'] == 'real_debrid': stream_link = self.realdebridResolve(i, args) if stream_link is None: tools.log('Failed to resolve for torrent %s' % i['release_title']) continue else: self.return_data = stream_link self.close() return elif i['type'] == 'hoster': # Quick fallback to speed up resolving while direct and free hosters are not supported if 'debrid_provider' not in i: continue provider = i['provider_imports'] providerModule = __import__('%s.%s' % (provider[0], provider[1]), fromlist=['']) providerModule = providerModule.source() try: i['url'] = providerModule.resolve(i['url']) except: import traceback traceback.print_exc() pass if i['url'] is None: continue if 'debrid_provider' in i: if i['debrid_provider'] == 'premiumize' and tools.getSetting( 'premiumize.enabled') == 'true': stream_link = self.premiumizeResolve(i, args) if stream_link is None: continue if i['debrid_provider'] == 'real_debrid': stream_link = self.realdebridResolve(i, args) if stream_link is None: continue else: # Currently not supporting free hosters at this point in time # ResolveURL and Direct link testing needs to be tested first continue try: try: headers = i['url'].rsplit('|', 1)[1] except: headers = '' headers = tools.quote_plus(headers).replace( '%3D', '=') if ' ' in headers else headers headers = dict(tools.parse_qsl(headers)) live_check = requests.head(i['url'], headers=headers) if not live_check.status_code == 200: continue stream_link = i['url'] except: stream_link = None if stream_link is None: continue else: self.return_data = stream_link self.close() return continue except: import traceback traceback.print_exc() continue self.close() return except: import traceback traceback.print_exc() self.close() return
# https://gosmcom.tistory.com/130 import requests response = requests.get('https://api.githuub.com/events') print(response.text) response = requests.post('https://httpbin.org/post',data={'key':'value'}) response = requests.put('https://httpbin.org/put',data={'key':'value'}) response = requests.delete('https://httpbin.org/delete') response = requests.head('https://httpbin.org/get') response = requests.options('https://httpbin.org/get') payload = {'key1':'value1','key2':'value2'} response = requests.get('https://httpbin.org/get',params=payload) import requests # 웹요청 방식 - 정식 # 'https//search.naver.com/search.naver?query=아이스크림" host = 'https//search.naver.com' path = '/search.naver' params = {'query' : '아이스크림'} url = host + path response = requests.get(url,params = params) #응답 데이터 속성 print(response.status_code)#응답 상태 코드 print(response.url) # 요청했던 url print(response.text) # 응답데이터 str- 웹페이지의 소스코드나 문자데이터, json 확인 싯 print(response.content) # 응답데이터 byte - 음악, 비디오 등 byte 자체를 받아 저장할 때 print(response.encoding) # 응답데이터의 인코딩 방식 print(response.headers) # 응답 데이터의 헤더
def proxy(path=""): SCHEME_PREFIX = "https://" EXPIRY_LIMIT = 10000 url_bits = request.url.split("?", 1) qs = "?" + url_bits[1] if len(url_bits) > 1 else "" # queries expiry_time = datetime.now() + timedelta(seconds=EXPIRY_LIMIT) # Extract host host = [h[1] for h in request.headers if h[0].lower() == 'host'][0] # Extract custom header # print("custom") # print("HEADER",request.headers) # NOT PRINTING # print([h[1] for h in request.headers]) # ['www.ezoic.com', 'curl/7.64.1', '*/*', 'True'] # print([h[0] for h in request.headers]) # ['www.ezoic.com', 'curl/7.64.1', '*/*', 'True'] # print("remove", [ h[1] for h in request.headers if h[0].lower() == 'remove-from-cache' ]) # NOT PRINTING remove_from_cache = [ h[1] for h in request.headers if h[0].lower() == 'remove-from-cache' ] if not remove_from_cache: remove_from_cache = False recreated_request = SCHEME_PREFIX + host + "/" + str( path) + qs # TODO Not ideal, not safe # recreated_request => https://www.ezoic.com/ad-tester-details/ cache_key = (recreated_request, request.method) if remove_from_cache: # header is true if cache_key not in cache.main_cache or cache[cache_key][ 1] < datetime.now(): # Construct response saying element wasn not present to begin with response = Response("Element not present in cache", 200) #Make response content else: cache.main_cache.pop(cache_key, None) # Construct appropriate response converying element was found and removed response = Response("Element present in cache, Deleted", 200) return response if request.method in {"GET", 'HEAD'}: # print("Cache check functional") if cache_key not in cache.main_cache or cache[cache_key][ 1] < datetime.now(): # namedTuple # try catches TEST Case print("From Server, moving to Cache") if request.method == "HEAD": resp = requests.head(recreated_request, headers=dict(request.headers)) else: resp = requests.get(recreated_request, headers=dict(request.headers)) if "cache-control" in resp.headers: value = resp.headers["cache-control"] if "max-age" in value: expiry_time = datetime.now() + timedelta( seconds=int( value.split('max-age=')[1].split(",")[0])) # check # expiry_time = datetime.now() - datetime.now() + timedelta(seconds = 100) # hack chage later, cast # expiry_time = datetime.now() - timedelta(seconds = int(value.split('max-age=')[1].split(",")[0])) print("Expiry time", expiry_time) # 'Cache-Control': 'public, max-age=60, s-maxage=60' response = Response(resp.content, resp.status_code) cache[ cache_key] = response, expiry_time # Need to add expiry_time absolute #syntax return cache.main_cache[cache_key][0] elif request.method == 'POST': req_data = request.get_data() resp = requests.post(recreated_request, headers=dict(request.headers), data=req_data) response = Response(resp.content, resp.status_code) return response elif request.method == "DELETE": resp = requests.delete(recreated_request, headers=dict(request.headers)) response = Response(resp.content, resp.status_code) return response
def download_file( url, file_path, md5_hash=None, timeout=10, block_size=1024 * 1024, show_progress=True, ): """Resumable download. Expect the server to support byte ranges. Parameters ---------- url: string URL file_path: string Local file path to store the downloaded file md5_hash: string Expected MD5 string of downloaded file timeout: int Seconds to wait before terminating request block_size: int Chunkx of bytes to read (default: 1024 * 1024 = 1MB) show_progress: bool Show progress bar """ if os.path.exists(file_path) and os.path.getsize(file_path): return tmp_file_path = file_path + ".part" first_byte = os.path.getsize(tmp_file_path) if os.path.exists(tmp_file_path) else 0 file_mode = "ab" if first_byte else "wb" file_size = -1 try: file_size = int(requests.head(url).headers["Content-length"]) headers = {"Range": "bytes=%s-" % first_byte} r = requests.get(url, headers=headers, stream=True) if show_progress: desc = "Downloading {}".format(url.split("/")[-1]) pbar = tqdm( total=file_size, initial=first_byte, unit="B", unit_scale=True, desc=desc, ) with open(tmp_file_path, file_mode) as f: for chunk in r.iter_content(chunk_size=block_size): if chunk: # filter out keep-alive new chunks f.write(chunk) if show_progress: pbar.update(block_size) if show_progress: pbar.close() except IOError as e: sys.stderr.write("IO Error - {}\n".format(e)) finally: # Move the temp file to desired location if file_size == os.path.getsize(tmp_file_path): # if there's a hash value, validate the file if md5_hash and not md5_validate_file(tmp_file_path, md5_hash): raise Exception("Error validating the file against its MD5 hash") shutil.move(tmp_file_path, file_path) elif file_size == -1: raise Exception("Error getting Content-Length from server: %s" % url)
def test_head_405(self): body = '{}' resp = requests.head(_SERVER_BASE_URL, data=body) assert resp.status_code == 405
def url_ok(url): try: r = requests.head(url, timeout=1, verify=False) return r.status_code == 200 except requests.exceptions.RequestException as e: site = 0
def main(): if len(sys.argv) != 3: print("Formato TestServidorWeb <maquina> <puerto>") sys.exit() # Leemos los argumentos necesarios maquina = sys.argv[1] puerto = int(sys.argv[2]) urlBase = "http://" + maquina + ":" + str(puerto) + "/" aciertos = 0 totalTests = 0 print("\nComprobando servidor: " + urlBase + "\n=====================\n\n") # 1 Comprobar multithread test = "Multihilo" print(test + ("." * (30 - len(test))), end=" ") totalTests = totalTests + 1 try: # Creamos el socket orientado a conexión socketCliente = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # Establecemos un timeout de 300 segs socketCliente.settimeout(300000) # Iniciamos la conexión con el servidor. socketCliente.connect((maquina, puerto)) # Una vez iniciada la conexión, realizamos la consulta en otro hilo. r = requests.get(urlBase) print("OK") aciertos = aciertos + 1 except socket.timeout: print("FALLO") except: print("FALLO") finally: socketCliente.close() # 2 Peticion no soportada test = "Petición no soportada" print(test + ("." * (30 - len(test))), end=" ") totalTests = totalTests + 1 try: r = requests.post(urlBase) if r.status_code == 400 or r.status_code == 501: print("OK") aciertos = aciertos + 1 else: print("FALLO") except: print("FALLO") # 3 Petición incorrecta test = "Petición incorrecta" print(test + ("." * (30 - len(test))), end=" ") totalTests = totalTests + 1 try: # Creamos el socket orientado a conexión socketCliente = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # Establecemos un timeout de 300 segs socketCliente.settimeout(300000) # Iniciamos la conexión con el servidor. socketCliente.connect((maquina, puerto)) # Realizamos una consulta mal formada. socketCliente.send("42 BIEN\n\n".encode()) r = socketCliente.recv(4096) if r.decode("UTF-8").upper().startswith("HTTP/1.0 400 BAD REQUEST") \ or r.decode("UTF-8").upper().startswith("HTTP/1.1 400 BAD REQUEST"): print("OK") aciertos = aciertos + 1 else: print("FALLO") except socket.timeout: print("FALLO") except: print("FALLO") finally: socketCliente.close() # 4 Fichero no existente test = "Fichero no encontrado" print(test + ("." * (30 - len(test))), end=" ") totalTests = totalTests + 1 try: r = requests.get(urlBase + "invent.fake") if r.status_code == 404: print("OK") aciertos = aciertos + 1 else: print("FALLO") except: print("FALLO") # 5 HEAD TXT test = "Head TXT" print(test + ("." * (30 - len(test))), end=" ") totalTests = totalTests + 1 try: r = requests.head(urlBase + "fichero.txt") if ((r.status_code == 200) and (len(r.text) == 0) and checkHeaders(r.headers, "fichero.txt")): print("OK") aciertos = aciertos + 1 else: print("FALLO") except: print("FALLO") # 6 GET TXT test = "Get TXT" print(test + ("." * (30 - len(test))), end=" ") totalTests = totalTests + 1 try: r = requests.get(urlBase + "fichero.txt") if (r.status_code == 200 and checkHeaders(r.headers, "fichero.txt") and checkContent(r.content, "fichero.txt")): print("OK") aciertos = aciertos + 1 else: print("FALLO") except: print("FALLO") # 7 HEAD HTML test = "Head HTML" print(test + ("." * (30 - len(test))), end=" ") totalTests = totalTests + 1 try: r = requests.head(urlBase + "index.html") if ((r.status_code == 200) and (len(r.text) == 0) and checkHeaders(r.headers, "index.html")): print("OK") aciertos = aciertos + 1 else: print("FALLO") except: print("FALLO") # 8 GET HTML test = "Get HTML" print(test + ("." * (30 - len(test))), end=" ") totalTests = totalTests + 1 try: r = requests.get(urlBase + "index.html") if (r.status_code == 200 and checkHeaders(r.headers, "index.html") and checkContent(r.content, "index.html")): print("OK") aciertos = aciertos + 1 else: print("FALLO") except: print("FALLO") # 9 HEAD JPG test = "Head JPG" print(test + ("." * (30 - len(test))), end=" ") totalTests = totalTests + 1 try: r = requests.head(urlBase + "frightened_socket.jpg") if (r.status_code == 200 and len(r.text) == 0 and checkHeaders(r.headers, "frightened_socket.jpg")): print("OK") aciertos = aciertos + 1 else: print("FALLO") except: print("FALLO") # 10 GET JPG test = "Get JPG" print(test + ("." * (30 - len(test))), end=" ") totalTests = totalTests + 1 try: r = requests.get(urlBase + "frightened_socket.jpg") if (r.status_code == 200 and checkHeaders(r.headers, "frightened_socket.jpg") and checkContent(r.content, "frightened_socket.jpg")): print("OK") aciertos = aciertos + 1 else: print("FALLO") except: print("FALLO") # 11 HEAD GIF test = "Head GIF" print(test + ("." * (30 - len(test))), end=" ") totalTests = totalTests + 1 try: r = requests.head(urlBase + "seven_segment_display.gif") if (r.status_code == 200 and len(r.text) == 0 and checkHeaders(r.headers, "seven_segment_display.gif")): print("OK") aciertos = aciertos + 1 else: print("FALLO") except: print("FALLO") # 12 GET GIF test = "Get GIF" print(test + ("." * (30 - len(test))), end=" ") totalTests = totalTests + 1 try: r = requests.get(urlBase + "seven_segment_display.gif") if (r.status_code == 200 and checkHeaders(r.headers, "seven_segment_display.gif") and checkContent(r.content, "seven_segment_display.gif")): print("OK") aciertos = aciertos + 1 else: print("FALLO") except: print("FALLO") print("\n\nPuntuación: " + str(aciertos) + "/" + str(totalTests))
def checkVersionExists(version): url = releaseUrl(version) info("Checking %s" % url) request = requests.head(url) if request.status_code == 200 or request.status_code == 302: error("Version already exists")
# http://httpbin.org/get 페이지에서 get방식으로 HTTP 리스폰스 받아보기 resp = requests.get("http://httpbin.org/get") # key-value 쌍을 딕셔너리로 넣어야 함. print(resp.text) print(resp.request.headers ) # get방식은 body가 header에 들어가 있다. maximum length가 있기 때문에 전부 다 받아올 수가 없음. print(resp.request.body) # http://httpbin.org/get 페이지에서 post방식으로 HTTP 리스폰스 받아보기 resp = requests.post("http://httpbin.org/post") # post방식으로 리스폰스를 받아옴 print(resp.text) print(resp.request.headers) print(resp.request.body) # http://httpbin.org/get 페이지에서 head방식으로 HTTP 리스폰스 받아보기 resp = requests.head("http://httpbin.org/head") # head만 읽어온다. print(resp.text) print(resp.request.headers) print(resp.request.body) # ----------------------------------------------------------------------------------------------------------------------------------------- # getDownload()가 에러를 잘 걸러내는지 테스트하기 url_getDownloadtest = "http://www.crawler-test.com/status_codes/status_" # retries가 3번 출력된 후 500을 반환함. html = getDownload(url_getDownloadtest + "500", {"q": "test"}) print(html.url) # --------------------------------- postDownload()를 이용해서 pythonscraping 받아오기 -------------------------------------------- url_postDownloadtest = "http://pythonscraping.com/pages/files/processing.php" # 서버에서 처리되서 돌아오는 값을 받아야 하므로 html이 아닌 php로 보내고 받아온다. 국내 소규모 사이트는 대부분 이런식으로 구성되어 있다. data = { "firstname": "1234",
def test_url(href): """ make a HEAD request and return True if the status is 200 """ r = requests.head(href) if r.status_code == 200: return True return False
def download(self, url, overwrite=False): """ :param url: Web Path to file eg:(http://google.com/images/randomimage.jpeg) :param overwrite: opt. This will trigger a removal any conflicting files prior to download :return: Bool - True = Completed successfully / False = Cancelled """ g.log("Downloading file: {}".format(url)) if not url or not url.startswith("http"): raise InvalidWebPath(url) if self.output_filename is None: self.output_filename = url.split("/")[-1] g.log( "Filename: {} - Location: {}".format( self.output_filename, self.storage_location ) ) output_file = self._create_file(url, overwrite) self._output_file = output_file g.log("Created file - {}".format(self._output_path)) head = requests.head(url) if head.status_code != 200: g.log("Server did not respond correctly to the head request") self._handle_failure() raise requests.exceptions.ConnectionError(head.status_code) self.url_hash = tools.md5_hash(url) if not self._add_download_to_dm(): g.log("Failed to create download manager task", "error") self._handle_failure() return self.file_size = int(head.headers.get("content-length", None)) self.progress = 0 self.speed = 0 self.status = "downloading" for chunk in requests.get(url, stream=True).iter_content(1024 * 1024): if g.abort_requested(): self._handle_failure() g.log( "Shutdown requested - Cancelling download: {}".format( self.output_filename ), "warning", ) self.cancel_download() if self._is_canceled(): g.log( "User cancellation - Cancelling download: {}".format( self.output_filename ), "warning", ) self.cancel_download() self.status = "canceled" return False result = output_file.write(chunk) if not result: self._handle_failure() self.status = "failed" g.log( "Failed to fetch chunk from remote server -" " Cancelling download: {}".format(self.output_filename), "error", ) raise GeneralIOError(self.output_filename) else: self._update_status(len(chunk)) g.log( "Download has completed successfully - Filename: {}".format( self.output_filename ) ) return True
rootURL = "https://cdn.jsdelivr.net/gh/mainstringargs/quest-slayer@master/images/" for file in os.listdir("."): if file.endswith(".json"): with open(file) as json_file: data = json.load(json_file) if ('imageURL' in data and data['imageURL'] != 'imageURL'): #print(data['title'] +" " + " "+data['imageURL']) imgFileName = data['title'].replace("–", "_").replace( " ", "_").replace("(", "_").replace(")", "_").replace( ",", "_").replace(":", "_").replace("?", "_") + ".jpg" download(data['imageURL'], "../../images/" + imgFileName) data['imageURL'] = rootURL + imgFileName r = requests.head(data['linkURL'], allow_redirects=True) data['linkURL'] = (r.url.replace('questconquerer-20', 'mainstringarg-20')) for questItem in data['questEntries']: if ('imageURL' in questItem and questItem['imageURL'] != 'imageURL' and questItem['imageURL'] != ''): #print(questItem['title'] + " "+ questItem['imageURL']) imgFileName = questItem['title'].replace( "–", "_").replace(" ", "_").replace( "(", "_").replace(")", "_").replace( ",", "_").replace(":", "_").replace( "?", "_") + ".jpg" download(questItem['imageURL'], "../../images/" + imgFileName)
def url_ok(url): r = requests.head(url) ## use the commented row below if you need basic authentication ## r = requests.head(url, auth=(api_user, api_key)) return r.status_code == 200
def test_url(url): response = requests.head(url) if response.status_code > 299: return False return True
def makerow(): return {"row": None, "resource": None} ## URL Status check function def url_ok(url): r = requests.head(url) ## use the commented row below if you need basic authentication ## r = requests.head(url, auth=(api_user, api_key)) return r.status_code == 200 ## set http status code http_status_code = str(requests.head(url).status_code) ## use the commented row below if you need basic authentication ## http_status_code = str(requests.head(url, auth=(api_user, api_key)).status_code) ## if url_ok(url): print("Vipunen API was available with status code: " + http_status_code) else: print("Vipunen API was not accessible and returned status code:" + http_status_code) exit ## Finally get the data response = requests.get(url).json() ## use the commented row below if you need basic authentication
def is_url_an_html_page(self): response = requests.head(self.url, allow_redirects=True) if "Content-Type" in response.headers: if "text/html" in response.headers["Content-Type"]: return True return False
def test_HTTP_200_OK_HEAD(self): r = head(httpbin('get')) self.assertEqual(r.status_code, 200)
def get_webm_actual_url(self, url): headers = {'Accept': 'video/webm'} res = requests.head(url, headers=headers) return res.headers['Location']
def test_head_content(self): """Test that empty bodies are properly supported.""" r = requests.head(httpbin('headers')) r.content r.text
if current_char_int > 0x7E: print('\nUh oh, broke') sys.exit(1) # Create guess for the authorization guess = flag + chr(current_char_int).encode('ascii') # Base64 because that's how http basic authentication works guess_b64 = base64.b64encode(guess) # Print out progress print('\r' + guess.decode('utf-8'), end='') # Request the page, only need headers, so use HEAD instead of GET headers = {b'Authorization': b'Basic ' + guess_b64} response = requests.head(url, headers=headers) num_requests += 1 # We're authenticated if we get a 200 if response.status_code == 200: # Add the last character we tried to the flag flag += chr(current_char_int).encode('ascii') break try: # Get oracle-esque header current_index = int(response.headers['Progress']) except KeyError: pass # Found character