Ejemplo n.º 1
0
 def _check_host(self, host):
     """Check is host available"""
     try:
         requests.head(host, **self.request_params)
         return True
     except requests.ConnectionError:
         return False
Ejemplo n.º 2
0
def has_internet():
    """Uses www.google.com to check connectivity"""
    try:
        requests.head('http://www.google.com', timeout=1)
        return True
    except requests.ConnectionError:
        return False
Ejemplo n.º 3
0
def check_playable(url, showbusy=False, ignore_dp=False, item=None):
    try:
        try:
            hmf = urlresolver.HostedMediaFile(url=url, include_disabled=False, include_universal=True)
            if hmf.valid_url() == True:
                url = hmf.resolve()
                return url
        except:
            pass
        headers = ''

        result = None

        if url.startswith('http') and '.m3u8' in url:
            result = requests.head(url, timeout=5)
            if result is None:
                return None

        elif url.startswith('http'):
            result = requests.head(url, timeout=5)
            if result is None:
                return None

        if url == "http://m4ufree.info" or "drive" in url:
            return None

        return url
    except:
        return None
Ejemplo n.º 4
0
Archivo: bot.py Proyecto: Dav1dde/tav
    def view(self, proxy, session):
        httpproxy = 'http://{}'.format(proxy)

        errors = 0
        while errors < 50:
            session.set_option('http-proxy', httpproxy)
            session.set_option('http-headers', HEADERS)
            session.set_option('http-timeout', self.timeout)

            try:
                stream = session.streams(self.url)
                url = stream['worst'].url
            except Exception:
                errors += 5
            else:
                break
        else:
            # no break -> no url -> broken proxy
            return

        errors = 0
        while errors < 50:
            try:
                requests.head(
                    url, headers=HEADERS,
                    proxies={'http': httpproxy}, timeout=self.timeout
                )
            except Exception:
                errors += 2
            else:
                errors = max(0, errors - 1)

            time.sleep(0.5)
Ejemplo n.º 5
0
 def _check_host(self, host):
     """Check is host available"""
     try:
         requests.head(host, timeout=self.timeout)
         return True
     except requests.ConnectionError:
         return False
Ejemplo n.º 6
0
  def _resolve_image(cls, registry, name, tag, headers=None):
    url = MANIFESTS_URL % (registry, name, tag)
    response = requests.head(url, headers=headers)

    if response.status_code == requests.codes.unauthorized:
      # solve the auth challenge and retry again
      authorization = cls._solve_auth_challenge(response, registry)
      if headers is None:
        headers = dict()
      headers.update(authorization)
      response = requests.head(url, headers=headers)

      if response.status_code == requests.codes.unauthorized:
        # its a private repo, raise exception
        raise DockerClientException('Private Docker repository - %s:%s' % (name, tag))

    if response.status_code == requests.codes.ok:
      image_ref = '%s@%s' % (name, response.headers.get('Docker-Content-Digest'))

      if registry != DEFAULT_DOCKER_REGISTRY_HOST:
        image_ref = '%s/%s' % (urlparse(registry).netloc, image_ref)

      log.info('Resolved %s:%s => %s' % (name, tag, image_ref))
      return image_ref

    # something is wrong
    response.raise_for_status()
    raise DockerClientException('Unable to resolve image %s:%s' % (name, tag))
Ejemplo n.º 7
0
    def fetch_metadata(self):
        """
        Do a HEAD request on self.url to try to get metadata
        (self.length and self.mimetype).

        Note that while this method fills in those attributes, it does *not*
        call self.save() - so be sure to do so after calling this method!

        """
        if not self.url:
            return

        try:
            response = requests.head(self.url, timeout=5)
            if response.status_code == 302:
                response = requests.head(response.headers['location'],
                                         timeout=5)
        except Exception:
            pass
        else:
            if response.status_code != 200:
                return
            self.length = response.headers.get('content-length')
            self.mimetype = response.headers.get('content-type', '')
            if self.mimetype in ('application/octet-stream', ''):
                # We got a not-useful MIME type; guess!
                guess = mimetypes.guess_type(self.url)
                if guess[0] is not None:
                    self.mimetype = guess[0]
Ejemplo n.º 8
0
def check_playable(url):
    """
checks if passed url is a live link
    :param str url: stream url
    :return: playable stream url or None
    :rtype: str or None
    """
    import urllib
    import requests
    try:
        headers = url.rsplit('|', 1)[1]
    except:
        headers = ''
    headers = urllib.quote_plus(headers).replace('%3D', '=') if ' ' in headers else headers
    headers = dict(urlparse.parse_qsl(headers))

    result = None
    try:
        if url.startswith('http') and '.m3u8' in url:
            result = requests.head(url.split('|')[0], headers=headers, timeout=5)
            if result is None:
                return None

        elif url.startswith('http'):
            result = requests.head(url.split('|')[0], headers=headers, timeout=5)
            if result is None:
                return None
    except:
        pass

    return result
Ejemplo n.º 9
0
 def CheckBundleDigest(self, container, uripath, bundle_data):
     """
     Download the Bundle from CloudFiles into a local path
         container - the CloudFiles container in which to find the Vault DB
         bundle_data - a dict containing atlest the 'id'  and 'md5' of the bundle
         localpath - the local path at which to store the downloaded VaultDB
     """
     self.apihost = self._get_container(container)
     try:
         fulluri = uripath + '/BUNDLES/' + bundle_data['name']
         self.ReInit(self.sslenabled, '/' + fulluri)
         self.headers['X-Auth-Token'] = self.authenticator.AuthToken
         self.log.debug('uri: %s', self.Uri)
         self.log.debug('headers: %s', self.Headers)
         try:
             res = requests.head(self.Uri, headers=self.Headers)
         except requests.exceptions.SSLError as ex:
             self.log.error('Requests SSLError: {0}'.format(str(ex)))
             res = requests.head(self.Uri, headers=self.Headers, verify=False)
         if res.status_code == 404:
             raise UserWarning('Server failed to find the specified bundle')
         elif res.status_code >= 300:
             raise UserWarning('Server responded unexpectedly during download (Code: ' + str(res.status_code) + ' )')
         else:
             digest = res.headers['etag'].upper()
             result = (digest == bundle_data['md5'])
             self.log.debug('CloudFiles Bundle Digest (' + digest + ') == Bundle MD5 (' + bundle_data['md5'] + ')? ' + str(result))
             return result
     except LookupError:
         raise UserWarning('Invalid VaultDB Data provided.')
Ejemplo n.º 10
0
 def test_sni_urls(self):
     """
     Test SNI urls
     :return:
     """
     print ""
     # Just checking all providers - we should make this error on non-existent urls.
     for provider in [
         provider
         for provider in providers.makeProviderList()
         if provider.name not in self.self_signed_cert_providers
     ]:
         print "Checking %s" % provider.name
         try:
             requests.head(provider.url, verify=certifi.old_where(), timeout=10)
         except requests.exceptions.Timeout:
             pass
         except requests.exceptions.SSLError as error:
             if u"SSL3_GET_SERVER_CERTIFICATE" not in ex(error):
                 print "SSLError on %s: %s" % (provider.name, ex(error.message))
                 raise
             else:
                 print "Cannot verify certificate for %s" % provider.name
         except Exception:  # pylint: disable=broad-except
             pass
Ejemplo n.º 11
0
 def __init__(self, server, userAgent=_getUserAgent()):
     self.server = server
     self.userAgent = userAgent
     try:
         requests.head(self.server)
     except requests.exceptions.ConnectionError, e:
         raise ValueError("server %s does not look to be alive: %s" % (server, e.message))
Ejemplo n.º 12
0
    def load_units(self):
        """
        Load units of the function descriptor content, section
        'virtual_deployment_units'
        """
        if 'virtual_deployment_units' not in self.content:
            log.error("Function id={0} is missing the "
                      "'virtual_deployment_units' section"
                      .format(self.id))
            return

        for vdu in self.content['virtual_deployment_units']:
            unit = Unit(vdu['id'])
            self.associate_unit(unit)

            # Check vm image URLs
            # only perform a check if vm_image is a URL
            vdu_image_path = vdu['vm_image']
            if validators.url(vdu_image_path):  # Check if is URL/URI.
                try:
                    # Check if the image URL is accessible
                    # within a short time interval
                    requests.head(vdu_image_path, timeout=1)

                except (requests.Timeout, requests.ConnectionError):

                    evtlog.log("VDU image not found",
                               "Failed to verify the existence of VDU image at"
                               " the address '{0}'. VDU id='{1}'"
                               .format(vdu_image_path, vdu['id']),
                               self.id,
                               'evt_vnfd_itg_vdu_image_not_found')
        return True
Ejemplo n.º 13
0
Archivo: api.py Proyecto: MPDL/pyimeji
    def __init__(self, cfg=None, service_url=None, service_mode=None):
        """

        :param cfg: Configuration for the service
        :param service_url: The service URL
        :param service_mode: set to "private" if imeji instance runs in "private" mode
           (any other value considered as standard imeji instance mode )

        If the imeji instance is not available or does not run, the instantiation will throw an error message.
        """
        self.cfg = cfg or Config()
        self.service_url = service_url or self.cfg.get('service', 'url')
        self.service_mode_private = False or (self.cfg.get('service', 'mode', 'public') == 'private' or service_mode == 'private')
        self.service_unavailable_message = \
            "WARNING : The REST Interface of Imeji at {rest_service} is not available or there is another problem, " \
            "check if the service is running under {imeji_service}" \
                .format(imeji_service=self.service_url, rest_service=self.service_url + '/rest')

        # check if Imeji instance is running and notify the user
        try:
            requests.head(self.service_url)
        except Exception as e:
            raise ImejiError(self.service_unavailable_message, e)

        user = self.cfg.get('service', 'user', default=None)
        password = self.cfg.get('service', 'password', default=None)
        self.session = requests.Session()
        if user and password:
            self.session.auth = (user, password)
        # initialize the request query
        self.total_number_of_results = self.number_of_results = self.offset = self.size = None
Ejemplo n.º 14
0
def get_video_redirect_info(tag, format_, hd=False):
    assert tag
    assert format_ in ('webm', 'mp4'), format_
    if hd:
        format_ = 'hd_{}'.format(format_)
    vidly_url = 'https://vid.ly/{}?content=video&format={}'.format(
        tag,
        format_
    )
    req = requests.head(vidly_url)
    if req.status_code == 404:
        raise VidlyNotFoundError(tag)
    assert req.status_code == 302, (req.status_code, vidly_url)
    req2 = requests.head(req.headers['Location'])
    try:
        content_length = int(req2.headers['Content-Length'])
    except KeyError:
        raise VideoError(
            'Redirect URL lacks a Content-Length '
            '(tag:{} url:{} location:{} status:{})'.format(
                tag,
                vidly_url,
                req.headers['Location'],
                req.status_code,
            )
        )
    data = {
        'url': req.headers['Location'].split('?t=')[0],
        'length': content_length,
        'type': req2.headers['Content-Type'],
    }
    return data
Ejemplo n.º 15
0
 def request(self, url, request_method=None, auth_method=None, timeout=None, post_data=None,
             user=None, password=None):
     timeout = float(timeout)
     try:
         if request_method == "1":
             # GET
             if (user or password) and auth_method == "2":
                 req = requests.get(url, auth=(user, password), timeout=timeout, verify=False)
             else:
                 req = requests.get(url, timeout=timeout, verify=False)
         elif request_method == "2":
             # POST
             if (user or password) and auth_method == "2":
                 req = requests.post(url, data=post_data, auth=(user, password), timeout=timeout, verify=False)
             else:
                 req = requests.post(url, data=post_data, timeout=timeout, verify=False)
         elif request_method == "3":
             # HEAD
             if (user or password) and auth_method == "2":
                 req = requests.head(url, auth=(user, password), timeout=timeout, verify=False)
             else:
                 req = requests.head(url, timeout=timeout, verify=False)
         time = req.elapsed
     except Exception as e:
         logging.error(e)
         raise
     try:
         code = req.status_code
         response_time = time.microseconds / 1000
     except Exception as e:
         logging.error(e)
         raise
     data = [int(code), float(response_time)]
     return data
Ejemplo n.º 16
0
def try_earlier_webarchivals(webarchive_url):
    '''A webarchive_url turns out to be a bad redirect. So try and earlier
    webarchive of it to find what was there before the bad redirect.

    Returns tuple: (result_string, good_url or None)
    '''
    attempts = 0
    while True:
        # request the link but without the redirect, revealing the earlier
        # webarchivals in the headers
        print webarchive_url + ' (no redirects)'
        try:
            req = requests.head(webarchive_url, headers=USER_AGENT, verify=False, allow_redirects=False)
        except Exception, e:
            return ('!! Problem with request %s' % e, None)
        attempts += 1
        # Link header has the options
        # e.g. from http://webarchive.nationalarchives.gov.uk/+/http://www.dft.gov.uk/statistics/releases/accessibility-2010
        # Link: <http://webarchive.nationalarchives.gov.uk/20140508043011/http://www.dft.gov.uk/statistics/releases/accessibility-2010>; rel="memento"; datetime="Thu, 08 May 2014 04:30:11 GMT", <http://webarchive.nationalarchives.gov.uk/20110826141806/http://www.dft.gov.uk/statistics/releases/accessibility-2010>; rel="first memento"; datetime="Fri, 26 Aug 2011 14:18:06 GMT", <http://webarchive.nationalarchives.gov.uk/20140508043011/http://www.dft.gov.uk/statistics/releases/accessibility-2010>; rel="last memento"; datetime="Thu, 08 May 2014 04:30:11 GMT", <http://webarchive.nationalarchives.gov.uk/20140109163921/http://www.dft.gov.uk/statistics/releases/accessibility-2010>; rel="prev memento"; datetime="Thu, 09 Jan 2014 16:39:21 GMT", <http://webarchive.nationalarchives.gov.uk/20140508043011/http://www.dft.gov.uk/statistics/releases/accessibility-2010>; rel="next memento"; datetime="Thu, 08 May 2014 04:30:11 GMT", <http://webarchive.nationalarchives.gov.uk/timegate/http://www.dft.gov.uk/statistics/releases/accessibility-2010>; rel="timegate", <http://www.dft.gov.uk/statistics/releases/accessibility-2010>; rel="original", <http://webarchive.nationalarchives.gov.uk/timemap/http://www.dft.gov.uk/statistics/releases/accessibility-2010>; rel="timemap"; type="application/link-format"
        links = req.headers['Link']
        prev_links = [l.split('; ') for l in links.split(', <')
                      if 'rel="prev memento"' in l]
        if not prev_links:
            if attempts == 1:
                return ('No previous webarchive links', None)
            return ('No luck after trying %i previous webarchive links' % attempts, None)
        webarchive_url = prev_links[0][0].strip('<>')
        # Request the previous url to see if it is archived ok, or whether it
        # still redirects out
        print webarchive_url
        try:
            req = requests.head(webarchive_url, headers=USER_AGENT, verify=False)
        except Exception, e:
            return ('!! Problem with request %s' % e, None)
Ejemplo n.º 17
0
 def find_url_title(self, url):
     """Retrieve the title of a given URL"""
     headers = {'User-Agent': 'Wget/1.13.4 (linux-gnu)'}
     if url.find("://") == -1:
         url = "http://" + url
     try:
         # a HEAD first to thwart attacks
         requests.head(url, headers=headers, timeout=5)
         # now the actual request
         resp = requests.get(url, headers=headers)
         html = resp.text
     except requests.RequestException as e:
         self.logger.warning(e)
         return url, e.__doc__
     except ValueError as e:
         self.logger.warning(e)
         return url, "Failed to parse url"
     else:
         resp.close()
         cmphtml = html.lower()
         start = cmphtml.find("<title")
         end = cmphtml.find("</title>")
         if start == -1 or end == -1:
             return resp.url, "Could not find page title!"
         else:
             str.find
             html = html[start+7:end]
             html = html[html.find('>')+1:]
             return resp.url, html.strip()
Ejemplo n.º 18
0
def link_is_to_image(url, auth=None):
    """Check if the link points to an image content type.
    Return True or False accordingly
    """
    if md5s3stash.is_s3_url(url):
        response = requests.head(url, allow_redirects=True)
    else:
        response = requests.head(url, allow_redirects=True, auth=auth)
    if response.status_code != 200:
        return False
    content_type = response.headers.get("content-type", None)
    if not content_type:
        return False
    reg_type = content_type.split("/", 1)[0].lower()
    # situation where a server returned 'text/html' to HEAD requests
    # but returned 'image/jpeg' for GET.
    # try a slower GET if not image type
    if reg_type != "image":
        response = requests.get(url, allow_redirects=True, auth=auth)
        if response.status_code != 200:
            return False
        content_type = response.headers.get("content-type", None)
        if not content_type:
            return False
        reg_type = content_type.split("/", 1)[0].lower()
    return reg_type == "image"
Ejemplo n.º 19
0
def forward(resource, identifier):
    """ Redirects request for file to direct URL.

        Requires global "paths" dictionary is active. 

        resource: a given resource, like "recount2"
        identifier: relative path to file or directory

        Return value: Flask redirect response object
    """
    # Log all requests, even weird ones
    ip = str(request.headers.get('X-Forwarded-For',
                        request.remote_addr)).split(',')[0].strip()
    print >>_LOGSTREAM, '\t'.join(
        [time.strftime('%A, %b %d, %Y at %I:%M:%S %p %Z'),
             str(mmh3.hash128(ip + 'recountsalt')),
             resource,
             identifier])
    _LOGSTREAM.flush()
    if resource == 'recount':
        # Redirect to IDIES URL in order of descending version
        for i in ['2']: # add versions to precede 2 as they are released
            if identifier.startswith(' '.join(['v', i, '/'])):
                idies_url = '/'.join(
                            ['http://idies.jhu.edu/recount/data', identifier]
                        )
                idies_response = requests.head(idies_url)
                if idies_response.status_code == 200:
                    return redirect(idies_url, code=302)
        # v1 is not explicitly versioned
        idies_url = '/'.join(['http://idies.jhu.edu/recount/data', identifier])
        idies_response = requests.head(idies_url)
        if idies_response.status_code == 200:
            return redirect(idies_url, code=302)
    abort(404)
Ejemplo n.º 20
0
def add_portmap(cont):
    if cont['Ports']:
        # a bit of a crazy comprehension to turn:
        # Ports': u'49166->8888, 49167->22'
        # into a useful dict {8888: 49166, 22: 49167}
        cont['portmap'] = dict([(p['PrivatePort'], p['PublicPort']) for p in cont['Ports']])

        # wait until services are up before returning container
        # TODO this could probably be factored better when next
        # service added
        # this should be done via ajax in the browser
        # this will loop and kill the server if it stalls on docker
        ipy_wait = shellinabox_wait = True
        while ipy_wait or shellinabox_wait:
            if ipy_wait:
                try:
                    requests.head("http://{host}:{port}".format(
                            host=app.config['SERVICES_HOST'],
                            port=cont['portmap'][8888]))
                    ipy_wait = False
                except requests.exceptions.ConnectionError:
                    pass

            if shellinabox_wait:
                try:
                    requests.head("http://{host}:{port}".format(
                            host=app.config['SERVICES_HOST'],
                            port=cont['portmap'][4200]))
                    shellinabox_wait = False
                except requests.exceptions.ConnectionError:
                    pass
            time.sleep(.2)
            print 'waiting', app.config['SERVICES_HOST']
        return cont
Ejemplo n.º 21
0
def check_http_header(target, status_code=200):
    """Checks if a certain http URL returns the correct status code."""

    return_obj = {}

    try:
        # Don't follow redirections if status_code is in the 30x family
        if status_code / 10 == 30:
            r = requests.head(target, timeout=10)
        else:
            r = requests.head(target, allow_redirects=True, timeout=10)

        return_obj['valid'] = True
        return_obj['status_code'] = r.status_code
        return_obj['status_ok'] = r.status_code == status_code

    except ValueError as e:
        logger.error(e)
        return_obj['valid'] = False
        return_obj['error'] = 'Error in the target'

    except requests.exceptions.RequestException as e:
        logger.error(e)
        return_obj['valid'] = False
        return_obj['error'] = 'Error in the request'

    except Exception as e:
        logger.error(e)
        return_obj['valid'] = False
        return_obj['error'] = 'Unknown error'

    return return_obj
    def test_resources_available(self):
        blueprint_name = 'openstack-blueprint.yaml'
        self.repo_dir = clone(self.repo_url, self.workdir)
        self.blueprint_yaml = self.repo_dir / blueprint_name

        self.upload_blueprint(self.test_id)

        invalid_resource_url = 'http://{0}/resources/blueprints/{1}/{2}' \
            .format(self.env.management_ip, self.test_id, blueprint_name)

        try:
            result = requests.head(invalid_resource_url)
            self.assertNotEqual(
                result.status_code, 200,
                "Resources are available through a different port than 53229.")
        except ConnectionError:
            pass

        valid_resource_url = 'http://{0}:53229/blueprints/{1}/{2}' \
            .format(self.env.management_ip, self.test_id, blueprint_name)

        try:
            result = requests.head(valid_resource_url)
            self.assertEqual(
                result.status_code, 200,
                "Resources are not available through the port 53229.")
        except ConnectionError:
            self.fail("Resources are not available through the port 53229.")

        self.cfy.delete_blueprint(self.test_id)
Ejemplo n.º 23
0
def youtube_url_validation(url):
	if not url:
		return False
	m = ups.urlparse(url)
	domain = '{uri.scheme}://{uri.netloc}/'.format(uri=m)
	print domain
	if ("youtube" in domain):
		v = str(ups.parse_qs(m.query)['v'][0])
		print v
		if (len(v) is 11):
			print "Yes! URL checks out."
			try:
			    r = requests.head(url)
			    print r.status_code
			    if r.status_code is 200:
			    	print "Yup! Video is there" + str(r.status_code)
			    	return True
				return False
			except requests.ConnectionError:
			    print("Failed to connect to the URL")
			    return False
	if ("youtu.be" in domain):
		print "youtu.be in domain"
		try:
			r = requests.head(url)
			print r.status_code
			if ((r.status_code == 200) or (r.status_code == 302)):
				print "Yup! Video is there" + str(r.status_code)
				return True
			return False
		except requests.ConnectionError:
			print("Failed to connect to the URL")
			return False
Ejemplo n.º 24
0
def get_tested_azure_url(image_info):
    try:
        # tk = TimeKeeper()
        # tk.time_now(image_info['flickr_id'] + '_azure_start', print_out=True)
        azure_url_part = u"http://blmc.blob.core.windows.net/{0[date]}/{0[book_identifier]}_{0[volume]}_{0[page]}_{0[image_idx]}_{0[date]}_imagesize.jpg".format(image_info)

        azure_url_part = azure_url_part.replace('imagesize', 'embellishments')
        r = requests.head(azure_url_part, stream=True, timeout=0.3)
        # tk.time_now(image_info['flickr_id'] + '_azure_embellishments', print_out=True)

        if r.status_code is requests.codes.ok:
            return azure_url_part
        else:
            azure_url_part = azure_url_part.replace('embellishments', 'medium')
            r = requests.head(azure_url_part, stream=True, timeout=0.3)
            # tk.time_now(image_info['flickr_id'] + '_azure_medium', print_out=True)

            if r.status_code is requests.codes.ok:
                return azure_url_part
            else:
                azure_url_part = azure_url_part.replace('medium', 'plates')
                r = requests.head(azure_url_part, stream=True, timeout=0.3)
                # tk.time_now(image_info['flickr_id'] + '_azure_medium', print_out=True)
                if r.status_code is requests.codes.ok:
                    return azure_url_part
                else:
                    return None
    except:
        return None
Ejemplo n.º 25
0
def EXTRACT_MP3_LINKS(url):
    header = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:32.0) Gecko/20100101 Firefox/32.0',}
    page = requests.get(url,header)
    soup = BeautifulSoup(page.content)
    links = soup.findAll("a")
    #print type (links)
    for item in links:
        try:
            if ".mp3" in item['href']:
                try:
                    response=requests.head(item['href'])

                    if response.headers['content-type']=='audio/mpeg':
                        SAVEMP3(item['href'])
                except:
                    pass
                try:
                    response=requests.head(url+item['href'])
                    #print response
                    if response.headers['content-type']=='audio/mpeg':
                        SAVEMP3(url+item['href'])
                except:
                    pass
        except:
            pass
def correct_url(url, rights):
    """
correct_url

link checker and guesser for wikipedia thunbnail URLs

returns a checked (good) URL as a unicode string or None
"""
    urlres = requests.head(url, allow_redirects=True)
    # thubmnail URL looks good (check the link first)
    if (urlres.status_code == requests.codes.ok):
        return url

    # something is not right
    # if the attribute page for the image does not exist, then we
    # won't find a thumbnail, so we may as well give up now
    rightsres = requests.head(rights)
    if (rightsres.status_code != requests.codes.ok):
        return None

    # okay, there should be a good thumbnail here, just not at the
    # URL we tried

    elif (urlres.status_code == 404):
        return correct_url_404(url)
    elif (urlres.status_code == 500):
        return correct_url_500(url)
    # not sure we can get here, something might be very wrong
    else:
        raise Exception("wikipedia thumbnail URL {0} had unexpected" +
                        "status code {1}".format(urlres.status_code, url))
Ejemplo n.º 27
0
def download_vid(vid):
    html_downloader = main.Downloader()
    html_downloader.get(vid[1], vid[2], "html")

    flv_redirect = app.extract_flv_redirect(vid[2])

    headers = {
        "Accept-Encoding": "gzip,deflate,sdch",
        "Host": "redirector.googlevideo.com",
        "Accept-Language": "en-US,en;q=0.8,fr;q=0.6",
        "User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.66 ari/537.36",
        "Accept": "*/*",
        "Referer": vid[1],
        "Connection": "keep-alive",
        "Cache-Control": "no-cache",
    }

    req = requests.head(flv_redirect, headers=headers)
    file_url = req.headers["location"]

    req = requests.head(flv_redirect, headers=headers)
    file_url = req.headers["location"]

    host = urlparse(file_url).netloc
    headers["Host"] = host

    html_downloader.download_file(file_url, "flv/%s.flv" % vid[2], headers)
Ejemplo n.º 28
0
def VerifyPath(data):
    # Insert try and catch blocks
    try:
        token_name = data["project"]["token_name"]
    except:
        token_name = data["project"]["project_name"]

    channel_names = data["channels"].keys()

    for i in range(0, len(channel_names)):
        channel_type = data["channels"][channel_names[i]]["channel_type"]
        path = data["channels"][channel_names[i]]["data_url"]

        if channel_type == "timeseries":
            timerange = data["dataset"]["timerange"]
            for j in xrange(timerange[0], timerange[1] + 1):
                # Test for tifs or such? Currently test for just not empty
                work_path = "{}{}/{}/time{}/".format(path, token_name, channel_names[i], j)
                resp = requests.head(work_path)
                assert resp.status_code == 200
        else:
            # Test for tifs or such? Currently test for just not empty
            work_path = "{}{}/{}/".format(path, token_name, channel_names[i])
            resp = requests.head(work_path)
            print (work_path)
            assert resp.status_code == 200
    def __init__(self, spider=None, *a, **kw):
        super(GetpagesfromsitemapSpider, self).__init__(*a, **kw)
        try:
            cnx = mysql.connector.connect(**config)
        except mysql.connector.Error as err:
            if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
                print("Something is wrong with your user name or password")
            elif err.errno == errorcode.ER_BAD_DB_ERROR:
                print("Database does not exists")
            else:
	            print(err)
        else:
            self.spider = spider
            cursor = cnx.cursor()
            l = []
            url = "https://channelstore.roku.com"
            resp = requests.head(url + "/sitemap.xml")
            if (resp.status_code != 404):
                l.append(resp.url)
            else:
                resp = requests.head(url + "/robots.txt")
                if (resp.status_code == 200):
                    l.append(resp.url)
        self.sitemap_urls = l
        print self.sitemap_urls
    def test_doesnt_delete_shared_files(self):
        """
        Make sure that a file shared between two file objects doesn't
        get deleted when one of the file objects gets deleted
        """
        c = _create_expired_contentnode()
        file_on_disk = ContentFile("test")
        f = File.objects.create(
            contentnode_id=c.pk,
            file_on_disk=file_on_disk,
            checksum="aaa",
        )
        f.file_on_disk.save("aaa.jpg", file_on_disk)
        file_url = f.file_on_disk.url

        c2 = ContentNode.objects.create(kind_id=content_kinds.TOPIC, title="test")
        f2 = File.objects.create(
            contentnode_id=c2.pk,
            file_on_disk=file_on_disk,
            checksum="aaa",
        )
        f2.file_on_disk.save("aaa.jpg", file_on_disk)

        # check that file_url exists before cleaning up
        requests.head(file_url).raise_for_status()
        clean_up_contentnodes()

        # the file should still be available
        response = requests.head(file_url)
        assert response.status_code == 200
Ejemplo n.º 31
0
def extract_summary(doc, document):
    summary = original_extract_summary(doc, document)
    if any(summary.startswith(tag) for tag in (":param ", ":rtype: ")):
        return ""
    return summary


autosummary.extract_summary = extract_summary


# Configuration for sphinx.ext.intersphinx
# https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html
intersphinx_mapping = {}

if requests.head("https://hail.is/docs/0.2/objects.inv").status_code == 200:
    intersphinx_mapping["hail"] = ("https://hail.is/docs/0.2", None)
else:
    print("Unable to link to Hail docs (cannot access objects.inv)", file=sys.stderr)

# sphinx_autodoc_typehints generates references with qualified names.
# Since Hail re-exports many objects from higher level packages/modules,
# Hail's documentation does not list all objects by their qualified name.
# For example, Table is documented as hail.Table, not hail.table.Table.
# Thus, intersphinx cannot link some of the Hail references generated by
# sphinx_autodoc_typehints.
#
# To work around this, override sphinx_autodoc_typehints's get_annotation_module
# function and map the qualified names to what Hail's documentation uses.
original_get_annotation_module = sphinx_autodoc_typehints.get_annotation_module
Ejemplo n.º 32
0
    def add_yaml_file_content_to_database(self, filepath, deleteFile=False):
        """*given a file to a yaml file, add yaml file content to database*

        **Key Arguments:**
            - ``filepath`` -- the path to the yaml file
            - ``deleteFile`` -- delete the yaml file when its content has been added to the database. Default *False*

        **Return:**
            - None

        **Usage:**

            To parse and import the contents of a single yaml file into the database, use the following:

            .. code-block:: python 

                from fundamentals.mysql import yaml_to_database
                # PARSE YAML FILE CONTENTS AND ADD TO DATABASE
                yaml2db = yaml_to_database(
                    log=log,
                    settings=settings,
                    dbConn=dbConn
                ) 
                yaml2db.add_yaml_file_content_to_database(
                    filepath=${1:"/path/to/file.yaml"},
                    deleteFile=True
                )
        """
        self.log.debug(
            'completed the ````add_yaml_file_content_to_database`` method')

        import codecs
        try:
            self.log.debug("attempting to open the file %s" % (filepath, ))
            readFile = codecs.open(filepath, encoding='utf-8', mode='r')
            thisData = readFile.read()
            readFile.close()
        except IOError as e:
            message = 'could not open the file %s' % (filepath, )
            self.log.critical(message)
            raise IOError(message)
        readFile.close()

        matchObject = re.finditer(
            r'(^|\n)(?P<key>[^\:]*)\:\s(?P<value>.*?)(\n|$)',
            thisData,
            flags=re.M | re.S  # re.S
        )

        yamlContent = {}
        for match in matchObject:
            if match.group("value")[0] == '"' and match.group(
                    "value")[-1] == '"':
                v = match.group("value")[1:-1]
            elif match.group("value")[0] == "'" and match.group(
                    "value")[-1] == "'":
                v = match.group("value")[1:-1]
            else:
                v = match.group("value")
            yamlContent[match.group("key")] = v

        if "table" not in yamlContent:
            self.log.warning(
                'A table value is need in the yaml content to indicate which database table to add the content to: %(filepath)s'
                % locals())
            return None

        # NOTE THERE MAY BE MORE THAN ONE DATABASE TABLE
        dbTablesTmp = yamlContent["table"].split(",")
        del yamlContent["table"]
        dbTables = []
        dbTables[:] = [d.strip() for d in dbTablesTmp]

        # UNSHORTEN URL
        try:
            r = requests.head(yamlContent["url"], allow_redirects=True)
            yamlContent["url"] = r.url
        except:
            pass

        yamlContent["original_yaml_path"] = filepath

        if "url" in yamlContent:
            uniqueKeyList = ["url"]
        else:
            uniqueKeyList = []

        for t in dbTables:
            convert_dictionary_to_mysql_table(dbConn=self.dbConn,
                                              log=self.log,
                                              dictionary=yamlContent,
                                              dbTableName=t,
                                              uniqueKeyList=uniqueKeyList,
                                              dateModified=True,
                                              returnInsertOnly=False,
                                              replace=True)
        if deleteFile:
            os.remove(filepath)

        self.log.debug(
            'completed the ``add_yaml_file_content_to_database`` method')
        return None
Ejemplo n.º 33
0
def url_ok(url):
    try:
        r = requests.head(url)
        return r.status_code == 200
    except ConnectionError:
        return False
Ejemplo n.º 34
0
import requests
# Set the target webpage
url = "http://172.18.58.238/headers.php"
r = requests.get(url)
# This will get the full page
print(r.text)
print("Status code:")
print("\t OK", )
# This will just get just the headers
h = requests.head(url)
print("Header:")
# To print line by line
for x in h.headers:
    print("\t ", x, ":", h.headers[x])
import requests
h = {"User-Agent": "mobile"}
r = requests.get("http://172.18.58.238/headers.php", headers=h)
print(r.content)

import scrapy


class NewSpider(scrapy.Spider):
    name = "new_spider"
    start_urls = ["http://172.18.58.238/multi/"]

    def parse(self, response):
        css_selector = 'img'
        for x in response.css(css_selector):
            newsel = '@src'
            yield {
Ejemplo n.º 35
0
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 13 15:22:46 2019

@author: 44100521
"""
import requests
import os
os.chdir('C:\Z Drive\Python Folder\WebScrapping')

int = range(1, 2000)
f1 = open('b1.out', 'a')
for i in int:
    s = 'https://www.nycoedsoccer.com/league/' + str(i) + '/schedule/'
    ret = requests.head(s, timeout=5)

    #f1.write(str(ret.status_code)+','+str(i)+'\n')

    print(i, ret.status_code, file=f1)

f1.close()
print('Completed!')

# # Below is to get HTML for that webpage

# int = 130 #range(130, 131)
# #f1=open('./b.out', 'a')
# #for i in int:
# s = 'https://www.nycoedsoccer.com/league/'+str(int)+'/schedule/'
# #s = 'http://nycoedsoccer.com'
# ret = requests.get(s,timeout=5)
Ejemplo n.º 36
0
 def get_mimetype(url):
     page = requests.head(url)
     content_type = page.headers.get('Content-Type', '')
     content_type = content_type.split(';')[0]  # Strip out the encoding
     return url, content_type
Ejemplo n.º 37
0
targetsHealth = getTargetHealth(elbv2_client, targets, targetGroupARN)

for indx, targetHealth in enumerate(targetsHealth["TargetHealthDescriptions"]):
    targetState = targetHealth["TargetHealth"]["State"]
    targetId = targetHealth["Target"]["Id"]
    # targetReason=targetHealth["TargetHealth"]["Reason"]
    while targetState != 'healthy':
        print('Service %s on instance %s is in %s state' %
              (ECSServiceName, targetId, targetState))
        time.sleep(60)
        targetHealth = getTargetHealth(
            elbv2_client, targets,
            targetGroupARN)["TargetHealthDescriptions"][indx]
        targetState = targetHealth["TargetHealth"]["State"]
        # targetReason = targetHealth["TargetHealth"]["Reason"]
    else:
        print('Service %s on instance %s is in %s state \n' %
              (ECSServiceName, targetId, targetState))

# Checking via http code
r = requests.head("http://" + loadBalancerDNSName)
if r.status_code == 200:
    print("Finished successfully")
    print('Service %s was sucessfully created on %s instances' %
          (ECSServiceName, instancesIDList))
    print("DNS name is " + loadBalancerDNSName)
else:
    print(
        'Error: Service %s was created on %s, but not available by %s address '
        % (ECSServiceName, instancesIDList, loadBalancerDNSName))
Ejemplo n.º 38
0
        continue
    print(songlink)

    # 生成歌曲存放目录
    songdir = "songs_dir"
    if not os.path.exists(songdir):
        os.makedirs(songdir)

    # 生成文件地址
    songname = d["data"]["songList"][0]["songName"]
    artistName = d["data"]["songList"][0]["artistName"]
    filename = ("%s/%s/%s-%s.flac" %
                (CURRENT_PATH, songdir, songname, artistName))

    f = urllib.request.urlopen(songlink)
    headers = requests.head(songlink).headers
    size = round(int(headers['Content-Length']) / (1024 ** 2), 2)  # B -> KB -> MB, round - 四舍五入(2 位小数)
    # 开始写入文件
    if not os.path.isfile(filename) or os.path.getsize(filename) < minimumsize:  # Delete useless flacs
        print("%s is downloading now ......\n" % songname)
        if size >= minimumsize:
            with open(filename, "wb") as code:  # 写入歌曲文件
                code.write(f.read())  # 读取 f 全部内容,然后写入 code
        else:
            print("The size of %s (%r Mb) is less than 10 Mb, skipping...\n" %
                  (filename, size))
    else:
        print("%s is already downloaded. Finding next song...\n" % songname)


print("================================================================\n")
Ejemplo n.º 39
0
def get_from_cache(url, cache_dir=None):
    """
    Given a URL, look for the corresponding dataset in the local cache.
    If it's not there, download it. Then return the path to the cached file.
    """
    if cache_dir is None:
        cache_dir = PYTORCH_PRETRAINED_BERT_CACHE
    if sys.version_info[0] == 3 and isinstance(cache_dir, Path):
        cache_dir = str(cache_dir)

    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)

    # Get eTag to add to filename, if it exists.
    if url.startswith("s3://"):
        etag = s3_etag(url)
    else:
        response = requests.head(url, allow_redirects=True)
        if response.status_code != 200:
            raise IOError(
                "HEAD request failed for url {} with status code {}".format(
                    url, response.status_code))
        etag = response.headers.get("ETag")

    filename = url_to_filename(url, etag)

    # get cache path to put the file
    cache_path = os.path.join(cache_dir, filename)

    if not os.path.exists(cache_path):
        # Download to temporary file, then copy to cache dir once finished.
        # Otherwise you get corrupt cache entries if the download gets interrupted.
        with tempfile.NamedTemporaryFile() as temp_file:
            logger.info("%s not found in cache, downloading to %s", url,
                        temp_file.name)

            # GET file object
            if url.startswith("s3://"):
                s3_get(url, temp_file)
            else:
                http_get(url, temp_file)

            # we are copying the file before closing it, so flush to avoid truncation
            temp_file.flush()
            # shutil.copyfileobj() starts at the current position, so go to the start
            temp_file.seek(0)

            logger.info("copying %s to cache at %s", temp_file.name,
                        cache_path)
            with open(cache_path, 'wb') as cache_file:
                shutil.copyfileobj(temp_file, cache_file)

            logger.info("creating metadata file for %s", cache_path)
            meta = {'url': url, 'etag': etag}
            meta_path = cache_path + '.json'
            with open(meta_path, 'w', encoding="utf-8") as meta_file:
                json.dump(meta, meta_file)

            logger.info("removing temp file %s", temp_file.name)

    return cache_path
Ejemplo n.º 40
0
def get_from_cache(
    url,
    cache_dir=None,
    force_download=False,
    proxies=None,
    etag_timeout=10,
    resume_download=False,
    user_agent: Union[Dict, str, None] = None,
    local_files_only=False,
) -> Optional[str]:
    """
    Given a URL, look for the corresponding file in the local cache.
    If it's not there, download it. Then return the path to the cached file.

    Return:
        None in case of non-recoverable file (non-existent or inaccessible url + no cache on disk).
        Local path (string) otherwise
    """
    if cache_dir is None:
        cache_dir = TRANSFORMERS_CACHE
    if isinstance(cache_dir, Path):
        cache_dir = str(cache_dir)

    os.makedirs(cache_dir, exist_ok=True)

    etag = None
    if not local_files_only:
        try:
            response = requests.head(url,
                                     allow_redirects=True,
                                     proxies=proxies,
                                     timeout=etag_timeout)
            if response.status_code == 200:
                etag = response.headers.get("ETag")
        except (EnvironmentError, requests.exceptions.Timeout):
            # etag is already None
            pass

    filename = url_to_filename(url, etag)

    # get cache path to put the file
    cache_path = os.path.join(cache_dir, filename)

    # etag is None = we don't have a connection, or url doesn't exist, or is otherwise inaccessible.
    # try to get the last downloaded one
    if etag is None:
        if os.path.exists(cache_path):
            return cache_path
        else:
            matching_files = [
                file for file in fnmatch.filter(os.listdir(cache_dir),
                                                filename.split(".")[0] + ".*")
                if not file.endswith(".json") and not file.endswith(".lock")
            ]
            if len(matching_files) > 0:
                return os.path.join(cache_dir, matching_files[-1])
            else:
                # If files cannot be found and local_files_only=True,
                # the models might've been found if local_files_only=False
                # Notify the user about that
                if local_files_only:
                    raise ValueError(
                        "Cannot find the requested files in the cached path and outgoing traffic has been"
                        " disabled. To enable model look-ups and downloads online, set 'local_files_only'"
                        " to False.")
                return None

    # From now on, etag is not None.
    if os.path.exists(cache_path) and not force_download:
        return cache_path

    # Prevent parallel downloads of the same file with a lock.
    lock_path = cache_path + ".lock"
    with FileLock(lock_path):

        # If the download just completed while the lock was activated.
        if os.path.exists(cache_path) and not force_download:
            # Even if returning early like here, the lock will be released.
            return cache_path

        if resume_download:
            incomplete_path = cache_path + ".incomplete"

            @contextmanager
            def _resumable_file_manager():
                with open(incomplete_path, "a+b") as f:
                    yield f

            temp_file_manager = _resumable_file_manager
            if os.path.exists(incomplete_path):
                resume_size = os.stat(incomplete_path).st_size
            else:
                resume_size = 0
        else:
            temp_file_manager = partial(tempfile.NamedTemporaryFile,
                                        dir=cache_dir,
                                        delete=False)
            resume_size = 0

        # Download to temporary file, then copy to cache dir once finished.
        # Otherwise you get corrupt cache entries if the download gets interrupted.
        with temp_file_manager() as temp_file:
            logger.info(
                "%s not found in cache or force_download set to True, downloading to %s",
                url, temp_file.name)

            http_get(url,
                     temp_file,
                     proxies=proxies,
                     resume_size=resume_size,
                     user_agent=user_agent)

        logger.info("storing %s in cache at %s", url, cache_path)
        os.replace(temp_file.name, cache_path)

        logger.info("creating metadata file for %s", cache_path)
        meta = {"url": url, "etag": etag}
        meta_path = cache_path + ".json"
        with open(meta_path, "w") as meta_file:
            json.dump(meta, meta_file)

    return cache_path
Ejemplo n.º 41
0
    def resolve(self, sources, args, pack_select=False):
        try:
            if 'showInfo' in args:
                background = args['showInfo']['art']['fanart']
            else:
                background = args['fanart']

            self.setText("Begining Link Resolver")
            self.setBackground(background)
            stream_link = None
            loop_count = 0
            # Begin resolving links
            tools.log('Attempting to Resolve file link', 'info')
            for i in sources:
                debrid_provider = i.get('debrid_provider', '')
                loop_count += 1
                try:
                    if self.is_canceled():
                        self.close()
                        return
                    if 'size' in i:
                        i['info'].append(tools.source_size_display(i['size']))
                    loop_count_string = "(" + str(loop_count) + " of " + str(
                        len(sources)) + ")"
                    line1 = tools.lang(32036) + "%s - %s" % (tools.colorString(
                        i['release_title']), loop_count_string)
                    line2 = tools.lang(32037) + "%s | Source: %s" % (
                        tools.colorString(debrid_provider.upper()),
                        tools.colorString(i['source']))
                    line3 = tools.lang(32038) + '%s | Info: %s' % (
                        tools.colorString(i['quality']),
                        tools.colorString(" ".join(i['info'])))

                    self.setText(line1)
                    self.setText2(line2)
                    self.setText3(line3)

                    if i['type'] == 'torrent':
                        if i['debrid_provider'] == 'premiumize':
                            stream_link = self.premiumizeResolve(
                                i, args, pack_select)
                        elif i['debrid_provider'] == 'real_debrid':
                            stream_link = self.realdebridResolve(i, args)

                        if stream_link is None:
                            tools.log('Failed to resolve for torrent %s' %
                                      i['release_title'])
                            continue
                        else:
                            self.return_data = stream_link
                            self.close()
                            return

                    elif i['type'] == 'hoster':
                        # Quick fallback to speed up resolving while direct and free hosters are not supported
                        if 'debrid_provider' not in i:
                            continue
                        provider = i['provider_imports']
                        providerModule = __import__('%s.%s' %
                                                    (provider[0], provider[1]),
                                                    fromlist=[''])
                        providerModule = providerModule.source()

                        try:
                            i['url'] = providerModule.resolve(i['url'])
                        except:
                            import traceback
                            traceback.print_exc()
                            pass

                        if i['url'] is None:
                            continue

                        if 'debrid_provider' in i:
                            if i['debrid_provider'] == 'premiumize' and tools.getSetting(
                                    'premiumize.enabled') == 'true':
                                stream_link = self.premiumizeResolve(i, args)
                                if stream_link is None:
                                    continue

                            if i['debrid_provider'] == 'real_debrid':
                                stream_link = self.realdebridResolve(i, args)
                                if stream_link is None:
                                    continue

                        else:
                            # Currently not supporting free hosters at this point in time
                            # ResolveURL and Direct link testing needs to be tested first
                            continue
                            try:
                                try:
                                    headers = i['url'].rsplit('|', 1)[1]
                                except:
                                    headers = ''

                                headers = tools.quote_plus(headers).replace(
                                    '%3D', '=') if ' ' in headers else headers
                                headers = dict(tools.parse_qsl(headers))

                                live_check = requests.head(i['url'],
                                                           headers=headers)

                                if not live_check.status_code == 200:
                                    continue

                                stream_link = i['url']
                            except:
                                stream_link = None

                        if stream_link is None:
                            continue
                        else:
                            self.return_data = stream_link
                            self.close()
                            return
                    continue

                except:
                    import traceback
                    traceback.print_exc()
                    continue

            self.close()
            return
        except:
            import traceback
            traceback.print_exc()
            self.close()
            return
# https://gosmcom.tistory.com/130
import requests
response = requests.get('https://api.githuub.com/events')
print(response.text)
response = requests.post('https://httpbin.org/post',data={'key':'value'})
response = requests.put('https://httpbin.org/put',data={'key':'value'})
response = requests.delete('https://httpbin.org/delete')
response = requests.head('https://httpbin.org/get')
response = requests.options('https://httpbin.org/get')

payload = {'key1':'value1','key2':'value2'}
response = requests.get('https://httpbin.org/get',params=payload)


import requests
# 웹요청 방식 - 정식
# 'https//search.naver.com/search.naver?query=아이스크림"
host = 'https//search.naver.com'
path = '/search.naver'
params = {'query' : '아이스크림'}
url = host + path

response = requests.get(url,params = params)

#응답 데이터 속성
print(response.status_code)#응답 상태 코드
print(response.url) # 요청했던 url
print(response.text) # 응답데이터 str- 웹페이지의 소스코드나 문자데이터, json 확인 싯
print(response.content) # 응답데이터 byte - 음악, 비디오 등 byte 자체를 받아 저장할 때
print(response.encoding) # 응답데이터의 인코딩 방식
print(response.headers) # 응답 데이터의 헤더
Ejemplo n.º 43
0
def proxy(path=""):

    SCHEME_PREFIX = "https://"
    EXPIRY_LIMIT = 10000
    url_bits = request.url.split("?", 1)
    qs = "?" + url_bits[1] if len(url_bits) > 1 else ""  # queries

    expiry_time = datetime.now() + timedelta(seconds=EXPIRY_LIMIT)
    # Extract host
    host = [h[1] for h in request.headers if h[0].lower() == 'host'][0]

    # Extract custom header
    # print("custom")
    # print("HEADER",request.headers)  # NOT PRINTING
    # print([h[1] for h in request.headers]) # ['www.ezoic.com', 'curl/7.64.1', '*/*', 'True']
    # print([h[0] for h in request.headers])  # ['www.ezoic.com', 'curl/7.64.1', '*/*', 'True']

    # print("remove",  [ h[1] for h in request.headers if h[0].lower() == 'remove-from-cache' ])  # NOT PRINTING
    remove_from_cache = [
        h[1] for h in request.headers if h[0].lower() == 'remove-from-cache'
    ]

    if not remove_from_cache:
        remove_from_cache = False

    recreated_request = SCHEME_PREFIX + host + "/" + str(
        path) + qs  # TODO Not ideal, not safe
    # recreated_request => https://www.ezoic.com/ad-tester-details/

    cache_key = (recreated_request, request.method)

    if remove_from_cache:  # header is true
        if cache_key not in cache.main_cache or cache[cache_key][
                1] < datetime.now():
            # Construct response saying element wasn not present to begin with
            response = Response("Element not present in cache",
                                200)  #Make response content
        else:
            cache.main_cache.pop(cache_key, None)
            # Construct appropriate response converying element was found and removed
            response = Response("Element present in cache, Deleted", 200)
        return response

    if request.method in {"GET", 'HEAD'}:
        # print("Cache check functional")
        if cache_key not in cache.main_cache or cache[cache_key][
                1] < datetime.now():  # namedTuple   # try catches TEST Case
            print("From Server, moving to Cache")
            if request.method == "HEAD":
                resp = requests.head(recreated_request,
                                     headers=dict(request.headers))
            else:
                resp = requests.get(recreated_request,
                                    headers=dict(request.headers))

            if "cache-control" in resp.headers:
                value = resp.headers["cache-control"]
                if "max-age" in value:
                    expiry_time = datetime.now() + timedelta(
                        seconds=int(
                            value.split('max-age=')[1].split(",")[0]))  # check
                    # expiry_time = datetime.now() - datetime.now()  + timedelta(seconds = 100)   # hack chage later, cast
                    # expiry_time =  datetime.now() - timedelta(seconds = int(value.split('max-age=')[1].split(",")[0]))
                    print("Expiry time", expiry_time)

            # 'Cache-Control': 'public, max-age=60, s-maxage=60'
            response = Response(resp.content, resp.status_code)
            cache[
                cache_key] = response, expiry_time  # Need to add expiry_time absolute  #syntax

        return cache.main_cache[cache_key][0]

    elif request.method == 'POST':
        req_data = request.get_data()
        resp = requests.post(recreated_request,
                             headers=dict(request.headers),
                             data=req_data)
        response = Response(resp.content, resp.status_code)
        return response

    elif request.method == "DELETE":
        resp = requests.delete(recreated_request,
                               headers=dict(request.headers))
        response = Response(resp.content, resp.status_code)
        return response
Ejemplo n.º 44
0
def download_file(
    url,
    file_path,
    md5_hash=None,
    timeout=10,
    block_size=1024 * 1024,
    show_progress=True,
):
    """Resumable download.
    Expect the server to support byte ranges.

    Parameters
    ----------
    url: string
         URL
    file_path: string
               Local file path to store the downloaded file
    md5_hash: string
              Expected MD5 string of downloaded file
    timeout: int
             Seconds to wait before terminating request
    block_size: int
                Chunkx of bytes to read (default: 1024 * 1024 = 1MB)
    show_progress: bool
                   Show progress bar
    """
    if os.path.exists(file_path) and os.path.getsize(file_path):
        return
    tmp_file_path = file_path + ".part"
    first_byte = os.path.getsize(tmp_file_path) if os.path.exists(tmp_file_path) else 0
    file_mode = "ab" if first_byte else "wb"
    file_size = -1
    try:
        file_size = int(requests.head(url).headers["Content-length"])
        headers = {"Range": "bytes=%s-" % first_byte}
        r = requests.get(url, headers=headers, stream=True)
        if show_progress:
            desc = "Downloading {}".format(url.split("/")[-1])
            pbar = tqdm(
                total=file_size,
                initial=first_byte,
                unit="B",
                unit_scale=True,
                desc=desc,
            )
        with open(tmp_file_path, file_mode) as f:
            for chunk in r.iter_content(chunk_size=block_size):
                if chunk:  # filter out keep-alive new chunks
                    f.write(chunk)
                    if show_progress:
                        pbar.update(block_size)
        if show_progress:
            pbar.close()
    except IOError as e:
        sys.stderr.write("IO Error - {}\n".format(e))
    finally:
        # Move the temp file to desired location
        if file_size == os.path.getsize(tmp_file_path):
            # if there's a hash value, validate the file
            if md5_hash and not md5_validate_file(tmp_file_path, md5_hash):
                raise Exception("Error validating the file against its MD5 hash")
            shutil.move(tmp_file_path, file_path)
        elif file_size == -1:
            raise Exception("Error getting Content-Length from server: %s" % url)
Ejemplo n.º 45
0
 def test_head_405(self):
     body = '{}'
     resp = requests.head(_SERVER_BASE_URL, data=body)
     assert resp.status_code == 405
Ejemplo n.º 46
0
def url_ok(url):
    try:
        r = requests.head(url, timeout=1, verify=False)
        return r.status_code == 200
    except requests.exceptions.RequestException as e:
        site = 0
Ejemplo n.º 47
0
def main():

    if len(sys.argv) != 3:
        print("Formato TestServidorWeb <maquina> <puerto>")
        sys.exit()

    # Leemos los argumentos necesarios
    maquina = sys.argv[1]
    puerto = int(sys.argv[2])
    urlBase = "http://" + maquina + ":" + str(puerto) + "/"

    aciertos = 0
    totalTests = 0

    print("\nComprobando servidor: " + urlBase + "\n=====================\n\n")

    # 1 Comprobar multithread
    test = "Multihilo"
    print(test + ("." * (30 - len(test))), end=" ")
    totalTests = totalTests + 1
    try:
        # Creamos el socket orientado a conexión
        socketCliente = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        # Establecemos un timeout de 300 segs
        socketCliente.settimeout(300000)
        # Iniciamos la conexión con el servidor.
        socketCliente.connect((maquina, puerto))
        # Una vez iniciada la conexión, realizamos la consulta en otro hilo.
        r = requests.get(urlBase)
        print("OK")
        aciertos = aciertos + 1
    except socket.timeout:
        print("FALLO")
    except:
        print("FALLO")
    finally:
        socketCliente.close()

    # 2 Peticion no soportada
    test = "Petición no soportada"
    print(test + ("." * (30 - len(test))), end=" ")
    totalTests = totalTests + 1
    try:
        r = requests.post(urlBase)
        if r.status_code == 400 or r.status_code == 501:
            print("OK")
            aciertos = aciertos + 1
        else:
            print("FALLO")
    except:
        print("FALLO")

    # 3 Petición incorrecta
    test = "Petición incorrecta"
    print(test + ("." * (30 - len(test))), end=" ")
    totalTests = totalTests + 1
    try:
        # Creamos el socket orientado a conexión
        socketCliente = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        # Establecemos un timeout de 300 segs
        socketCliente.settimeout(300000)
        # Iniciamos la conexión con el servidor.
        socketCliente.connect((maquina, puerto))
        # Realizamos una consulta mal formada.
        socketCliente.send("42 BIEN\n\n".encode())
        r = socketCliente.recv(4096)
        if r.decode("UTF-8").upper().startswith("HTTP/1.0 400 BAD REQUEST") \
   or r.decode("UTF-8").upper().startswith("HTTP/1.1 400 BAD REQUEST"):
            print("OK")
            aciertos = aciertos + 1
        else:
            print("FALLO")
    except socket.timeout:
        print("FALLO")
    except:
        print("FALLO")
    finally:
        socketCliente.close()

    # 4 Fichero no existente
    test = "Fichero no encontrado"
    print(test + ("." * (30 - len(test))), end=" ")
    totalTests = totalTests + 1
    try:
        r = requests.get(urlBase + "invent.fake")
        if r.status_code == 404:
            print("OK")
            aciertos = aciertos + 1
        else:
            print("FALLO")
    except:
        print("FALLO")

    # 5 HEAD TXT
    test = "Head TXT"
    print(test + ("." * (30 - len(test))), end=" ")
    totalTests = totalTests + 1
    try:
        r = requests.head(urlBase + "fichero.txt")
        if ((r.status_code == 200) and (len(r.text) == 0)
                and checkHeaders(r.headers, "fichero.txt")):
            print("OK")
            aciertos = aciertos + 1
        else:
            print("FALLO")
    except:
        print("FALLO")

    # 6 GET TXT
    test = "Get TXT"
    print(test + ("." * (30 - len(test))), end=" ")
    totalTests = totalTests + 1
    try:
        r = requests.get(urlBase + "fichero.txt")
        if (r.status_code == 200 and checkHeaders(r.headers, "fichero.txt")
                and checkContent(r.content, "fichero.txt")):
            print("OK")
            aciertos = aciertos + 1
        else:
            print("FALLO")
    except:
        print("FALLO")

    # 7 HEAD HTML
    test = "Head HTML"
    print(test + ("." * (30 - len(test))), end=" ")
    totalTests = totalTests + 1
    try:
        r = requests.head(urlBase + "index.html")
        if ((r.status_code == 200) and (len(r.text) == 0)
                and checkHeaders(r.headers, "index.html")):
            print("OK")
            aciertos = aciertos + 1
        else:
            print("FALLO")
    except:
        print("FALLO")

    # 8 GET HTML
    test = "Get HTML"
    print(test + ("." * (30 - len(test))), end=" ")
    totalTests = totalTests + 1
    try:
        r = requests.get(urlBase + "index.html")
        if (r.status_code == 200 and checkHeaders(r.headers, "index.html")
                and checkContent(r.content, "index.html")):
            print("OK")
            aciertos = aciertos + 1
        else:
            print("FALLO")
    except:
        print("FALLO")

    # 9 HEAD JPG
    test = "Head JPG"
    print(test + ("." * (30 - len(test))), end=" ")
    totalTests = totalTests + 1
    try:
        r = requests.head(urlBase + "frightened_socket.jpg")
        if (r.status_code == 200 and len(r.text) == 0
                and checkHeaders(r.headers, "frightened_socket.jpg")):
            print("OK")
            aciertos = aciertos + 1
        else:
            print("FALLO")
    except:
        print("FALLO")

    # 10 GET JPG
    test = "Get JPG"
    print(test + ("." * (30 - len(test))), end=" ")
    totalTests = totalTests + 1
    try:
        r = requests.get(urlBase + "frightened_socket.jpg")
        if (r.status_code == 200
                and checkHeaders(r.headers, "frightened_socket.jpg")
                and checkContent(r.content, "frightened_socket.jpg")):
            print("OK")
            aciertos = aciertos + 1
        else:
            print("FALLO")
    except:
        print("FALLO")

    # 11 HEAD GIF
    test = "Head GIF"
    print(test + ("." * (30 - len(test))), end=" ")
    totalTests = totalTests + 1
    try:
        r = requests.head(urlBase + "seven_segment_display.gif")
        if (r.status_code == 200 and len(r.text) == 0
                and checkHeaders(r.headers, "seven_segment_display.gif")):
            print("OK")
            aciertos = aciertos + 1
        else:
            print("FALLO")
    except:
        print("FALLO")

    # 12 GET GIF
    test = "Get GIF"
    print(test + ("." * (30 - len(test))), end=" ")
    totalTests = totalTests + 1
    try:
        r = requests.get(urlBase + "seven_segment_display.gif")
        if (r.status_code == 200
                and checkHeaders(r.headers, "seven_segment_display.gif")
                and checkContent(r.content, "seven_segment_display.gif")):
            print("OK")
            aciertos = aciertos + 1
        else:
            print("FALLO")
    except:
        print("FALLO")

    print("\n\nPuntuación: " + str(aciertos) + "/" + str(totalTests))
Ejemplo n.º 48
0
def checkVersionExists(version):
    url = releaseUrl(version)
    info("Checking %s" % url)
    request = requests.head(url)
    if request.status_code == 200 or request.status_code == 302:
        error("Version already exists")
Ejemplo n.º 49
0
# http://httpbin.org/get 페이지에서 get방식으로 HTTP 리스폰스 받아보기
resp = requests.get("http://httpbin.org/get")  # key-value 쌍을 딕셔너리로 넣어야 함.
print(resp.text)
print(resp.request.headers
      )  # get방식은 body가 header에 들어가 있다. maximum length가 있기 때문에 전부 다 받아올 수가 없음.
print(resp.request.body)

# http://httpbin.org/get 페이지에서 post방식으로 HTTP 리스폰스 받아보기
resp = requests.post("http://httpbin.org/post")  # post방식으로 리스폰스를 받아옴
print(resp.text)
print(resp.request.headers)
print(resp.request.body)

# http://httpbin.org/get 페이지에서 head방식으로 HTTP 리스폰스 받아보기
resp = requests.head("http://httpbin.org/head")  # head만 읽어온다.
print(resp.text)
print(resp.request.headers)
print(resp.request.body)
# -----------------------------------------------------------------------------------------------------------------------------------------

# getDownload()가 에러를 잘 걸러내는지 테스트하기
url_getDownloadtest = "http://www.crawler-test.com/status_codes/status_"  # retries가 3번 출력된 후 500을 반환함.
html = getDownload(url_getDownloadtest + "500", {"q": "test"})
print(html.url)

# --------------------------------- postDownload()를 이용해서 pythonscraping 받아오기 --------------------------------------------

url_postDownloadtest = "http://pythonscraping.com/pages/files/processing.php"  # 서버에서 처리되서 돌아오는 값을 받아야 하므로 html이 아닌 php로 보내고 받아온다. 국내 소규모 사이트는 대부분 이런식으로 구성되어 있다.
data = {
    "firstname": "1234",
Ejemplo n.º 50
0
def test_url(href):
    """ make a HEAD request and return True if the status is 200 """
    r = requests.head(href)
    if r.status_code == 200:
        return True
    return False
    def download(self, url, overwrite=False):
        """

        :param url: Web Path to file eg:(http://google.com/images/randomimage.jpeg)
        :param overwrite: opt. This will trigger a removal any conflicting files prior to download
        :return: Bool - True = Completed successfully / False = Cancelled
        """
        g.log("Downloading file: {}".format(url))
        if not url or not url.startswith("http"):
            raise InvalidWebPath(url)

        if self.output_filename is None:
            self.output_filename = url.split("/")[-1]
        g.log(
            "Filename: {} - Location: {}".format(
                self.output_filename, self.storage_location
            )
        )
        output_file = self._create_file(url, overwrite)
        self._output_file = output_file
        g.log("Created file - {}".format(self._output_path))
        head = requests.head(url)

        if head.status_code != 200:
            g.log("Server did not respond correctly to the head request")
            self._handle_failure()
            raise requests.exceptions.ConnectionError(head.status_code)

        self.url_hash = tools.md5_hash(url)
        if not self._add_download_to_dm():
            g.log("Failed to create download manager task", "error")
            self._handle_failure()
            return

        self.file_size = int(head.headers.get("content-length", None))
        self.progress = 0
        self.speed = 0
        self.status = "downloading"

        for chunk in requests.get(url, stream=True).iter_content(1024 * 1024):
            if g.abort_requested():
                self._handle_failure()
                g.log(
                    "Shutdown requested - Cancelling download: {}".format(
                        self.output_filename
                    ),
                    "warning",
                )
                self.cancel_download()
            if self._is_canceled():
                g.log(
                    "User cancellation - Cancelling download: {}".format(
                        self.output_filename
                    ),
                    "warning",
                )
                self.cancel_download()
                self.status = "canceled"
                return False
            result = output_file.write(chunk)
            if not result:
                self._handle_failure()
                self.status = "failed"
                g.log(
                    "Failed to fetch chunk from remote server -"
                    " Cancelling download: {}".format(self.output_filename),
                    "error",
                )
                raise GeneralIOError(self.output_filename)
            else:
                self._update_status(len(chunk))

        g.log(
            "Download has completed successfully - Filename: {}".format(
                self.output_filename
            )
        )
        return True
Ejemplo n.º 52
0
rootURL = "https://cdn.jsdelivr.net/gh/mainstringargs/quest-slayer@master/images/"

for file in os.listdir("."):
    if file.endswith(".json"):
        with open(file) as json_file:
            data = json.load(json_file)
            if ('imageURL' in data and data['imageURL'] != 'imageURL'):
                #print(data['title'] +" " + " "+data['imageURL'])
                imgFileName = data['title'].replace("–", "_").replace(
                    " ", "_").replace("(", "_").replace(")", "_").replace(
                        ",", "_").replace(":", "_").replace("?", "_") + ".jpg"
                download(data['imageURL'], "../../images/" + imgFileName)
                data['imageURL'] = rootURL + imgFileName

                r = requests.head(data['linkURL'], allow_redirects=True)
                data['linkURL'] = (r.url.replace('questconquerer-20',
                                                 'mainstringarg-20'))

                for questItem in data['questEntries']:
                    if ('imageURL' in questItem
                            and questItem['imageURL'] != 'imageURL'
                            and questItem['imageURL'] != ''):
                        #print(questItem['title'] + " "+ questItem['imageURL'])
                        imgFileName = questItem['title'].replace(
                            "–", "_").replace(" ", "_").replace(
                                "(", "_").replace(")", "_").replace(
                                    ",", "_").replace(":", "_").replace(
                                        "?", "_") + ".jpg"
                        download(questItem['imageURL'],
                                 "../../images/" + imgFileName)
Ejemplo n.º 53
0
def url_ok(url):
    r = requests.head(url)
    ##  use the commented row below if you need basic authentication
    ##    r = requests.head(url,  auth=(api_user, api_key))
    return r.status_code == 200
Ejemplo n.º 54
0
 def test_url(url):
     response = requests.head(url)
     if response.status_code > 299:
         return False
     return True
Ejemplo n.º 55
0

def makerow():
    return {"row": None, "resource": None}


## URL Status check function
def url_ok(url):
    r = requests.head(url)
    ##  use the commented row below if you need basic authentication
    ##    r = requests.head(url,  auth=(api_user, api_key))
    return r.status_code == 200


## set http status code
http_status_code = str(requests.head(url).status_code)
## use the commented row below if you need basic authentication
## http_status_code = str(requests.head(url,  auth=(api_user, api_key)).status_code)

##
if url_ok(url):
    print("Vipunen API was available with status code: " + http_status_code)
else:
    print("Vipunen API was not accessible and returned status code:" +
          http_status_code)
    exit

## Finally get the data
response = requests.get(url).json()

## use the commented row below if you need basic authentication
Ejemplo n.º 56
0
 def is_url_an_html_page(self):
     response = requests.head(self.url, allow_redirects=True)
     if "Content-Type" in response.headers:
         if "text/html" in response.headers["Content-Type"]:
             return True
     return False
Ejemplo n.º 57
0
 def test_HTTP_200_OK_HEAD(self):
     r = head(httpbin('get'))
     self.assertEqual(r.status_code, 200)
Ejemplo n.º 58
0
 def get_webm_actual_url(self, url):
     headers = {'Accept': 'video/webm'}
     res = requests.head(url, headers=headers)
     return res.headers['Location']
Ejemplo n.º 59
0
    def test_head_content(self):
        """Test that empty bodies are properly supported."""

        r = requests.head(httpbin('headers'))
        r.content
        r.text
Ejemplo n.º 60
0
    if current_char_int > 0x7E:
        print('\nUh oh, broke')
        sys.exit(1)

    # Create guess for the authorization
    guess = flag + chr(current_char_int).encode('ascii')

    # Base64 because that's how http basic authentication works
    guess_b64 = base64.b64encode(guess)

    # Print out progress
    print('\r' + guess.decode('utf-8'), end='')

    # Request the page, only need headers, so use HEAD instead of GET
    headers = {b'Authorization': b'Basic ' + guess_b64}
    response = requests.head(url, headers=headers)
    num_requests += 1

    # We're authenticated if we get a 200
    if response.status_code == 200:
        # Add the last character we tried to the flag
        flag += chr(current_char_int).encode('ascii')
        break

    try:
        # Get oracle-esque header
        current_index = int(response.headers['Progress'])
    except KeyError:
        pass

    # Found character