Example #1
1
	def read_url(self, url):
		print('URL: ' + url)
		request = urllib.request.urlopen(url)
		if request.getcode() == 200:
			return request.read()
Example #2
0
def url_req(url):
    request = urllib.request.urlopen(url)
    if (request.getcode() == 200):
        data = json.loads(request.read())
    else:
        print("Error receiving data", request.getcode())
    return data
Example #3
0
    def get(self, data=None):
        logger.debug("GET %s", self.url)
        req = urllib.request.Request(url=self.url,
                                     data=data,
                                     headers=self.headers)
        try:
            if self.opener:
                request = self.opener.open(req, timeout=self.timeout)
            else:
                request = urllib.request.urlopen(req, timeout=self.timeout)
        except (urllib.error.HTTPError, CertificateError) as error:
            if error.code == 401:
                raise UnauthorizedAccess("Access to %s denied" % self.url)
            else:
                raise HTTPError("Request to %s failed: %s" % (self.url, error))
        except (socket.timeout, urllib.error.URLError) as error:
            raise HTTPError("Unable to connect to server %s: %s" %
                            (self.url, error))
        if request.getcode() > 200:
            logger.debug("Server responded with status code %s",
                         request.getcode())
        try:
            self.total_size = int(request.info().get("Content-Length").strip())
        except AttributeError:
            logger.warning("Failed to read response's content length")
            self.total_size = 0

        self.response_headers = request.getheaders()
        self.status_code = request.getcode()
        if self.status_code > 299:
            logger.warning("Request responded with code %s", self.status_code)
        self.content = b"".join(self._iter_chunks(request))
        self.info = request.info()
        request.close()
        return self
Example #4
0
    def get(self, data=None):
        logger.debug("GET %s", self.url)
        req = urllib.request.Request(url=self.url,
                                     data=data,
                                     headers=self.headers)
        try:
            if self.opener:
                request = self.opener.open(req, timeout=self.timeout)
            else:
                request = urllib.request.urlopen(req, timeout=self.timeout)
        except (urllib.error.HTTPError, CertificateError) as error:
            raise HTTPError("Unavailable url %s: %s" % (self.url, error))
        except (socket.timeout, urllib.error.URLError) as error:
            raise HTTPError("Unable to connect to server %s: %s" %
                            (self.url, error))
        if request.getcode() > 200:
            logger.debug("Server responded with status code %s",
                         request.getcode())
        try:
            total_size = request.info().get("Content-Length").strip()
            total_size = int(total_size)
        except AttributeError:
            logger.warning("Failed to read response's content length")
            total_size = 0

        self.response_headers = request.getheaders()
        self.status_code = request.getcode()
        if self.status_code > 299:
            logger.warning("Request responded with code %s", self.status_code)
        chunks = []
        while 1:
            if self.stop_request and self.stop_request.is_set():
                self.content = ""
                return self
            try:
                chunk = request.read(self.buffer_size)
            except socket.timeout:
                logger.error("Request timed out")
                self.content = ""
                return self
            self.downloaded_size += len(chunk)
            if self.thread_queue:
                self.thread_queue.put(
                    (chunk, self.downloaded_size, total_size))
            else:
                chunks.append(chunk)
            if not chunk:
                break
        request.close()
        self.content = b"".join(chunks)
        self.info = request.info()
        return self
Example #5
0
 def get_page(self, url):
     self.num += 1
     try:
         request = urllib.request.urlopen(url)
     except:
         return False
     print("正在进行第" + str(self.num) + "爬取,url[" + url + "]...")
     if request.getcode() != 200:
         print("爬取失败,错误代码:[" + request.getcode() + "]")
         return False
     else:
         self.success_num += 1
         print("爬取成功,正在分析网页内容...")
         return request.read().decode("utf-8")
Example #6
0
def downfile(url, path):
    """下载文件"""
    request = urllib.request.urlopen(url, data=None, timeout=60)
    if request.getcode() == 200:
        data = request.read()
        with open(path, 'wb') as filestream:
            filestream.write(data)
Example #7
0
 def Downfile(self, url, dir, name="", isSave=False):
     """下载文件"""
     if not os.path.exists(dir):
         os.mkdir(dir)
     if name != "":
         filePath = os.path.join(dir, name)
         if os.path.exists(filePath):
             with open(filePath, "rb") as filestream:
                 return filestream.read()
         else:
             print(url)
             headers = {
                 "Upgrade-Insecure-Requests":
                 "1",
                 "User-Agent":
                 "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"
             }
             requestContent = urllib.request.Request(url=url,
                                                     headers=headers)
             request = urllib.request.urlopen(requestContent,
                                              data=None,
                                              timeout=60)
             if request.getcode() == 200:
                 data = request.read()
                 if isSave:
                     with open(filePath, 'wb') as filestream:
                         # print(filePath)
                         filestream.write(data)
                 return data
Example #8
0
    def download(self, url):

        if url is not None:
            try:
                # Python3,加上超时检测
                request = urllib.request.urlopen(url, timeout=10)
                if request.getcode() == 200:
                    # return request.read().decode("utf-8")
                    return request.read()
                else:
                    return None

            except Exception as err:
                print(str(err))
        else:
            return None


# requests模块 用这个方法输出乱码
# import requests
#
# class HtmlDownloader(object):
#
#     def download(self, url):
#         if url is None:
#             return None
#
#         response = requests.get(url)
#
#         if response.status_code != 200:
#             return None #请求失败
#         else:
#             return response.text #返回下载好的内容
async def download_coroutine(url):
    l=0
    try:
        request = urllib.request.urlopen(url)
        if request.getcode() == 200:
            print('Web site exists')
        else:
            l=1
            print("Website returned response code: {code}".format(code=request.status_code))
    except:
        print('Web site does not exist')
        l=1
    filename = os.path.basename(url)
    if l==0:
        with open(filename, 'wb') as file_handle:
            while True:
                chunk = request.read(1024)
                if not chunk:
                    break
                file_handle.write(chunk)
        msg = 'Finished downloading {filename}'.format(filename=filename)
    if l==1:
        return 'ERROR'
    else:
        return filename
def extractRealSupportedURI(uri):
    """
        Returns "real" URI if it survives redirects and returns a 200.

        Returns None otherwise.
    """

    realURI = None

    try:
        # this function follows the URI, resolving all redirects,
        # and detects redirect loops
        # iri2uri is needed for IRIs
        request = urllib.request.urlopen(httplib2.iri2uri(uri))
        
        if request.getcode() == 200:
            realURI = request.geturl()

    except urllib.error.HTTPError as e:
        # something went wrong, we don't care what
        realURI = None

    except urllib.error.URLError as e:
        # something went wrong, we don't care what
        realURI = None

    except UnicodeError as e:
        # something went very wrong with the IRI decoding
        realURI = None

    return realURI
Example #11
0
def get_city():
    request = urllib.request.urlopen("http://ip-api.com/json/" + get_ip())
    if request.getcode() == 200:
        data = json.loads(request.read())
        return data["city"]
    else:
        print(f"E: {request.getcode()}")
Example #12
0
def get_ip():
    request = urllib.request.urlopen("https://api.ipify.org?format=json")
    if request.getcode() == 200:
        data = json.loads(request.read())
        return data["ip"]
    else:
        print(f"E: {request.getcode()}")
    def test_templates_loaded(self):
        with open('./al_visitor_site/resources/template_urls.txt', 'r') as fp:
            urls = [line for line in fp.readlines()]

        count = 0
        fails = 0
        for prod_url in urls:
            code = 0
            test_url = prod_url.replace('https://www.angieslist.com',
                                        self.visitor_site_url)

            try:
                request = urllib.request.urlopen(test_url)
                count += 1
                code = request.getcode()
                print("URL %s tested. code %s from: %s" %
                      (count, code, test_url))
                self.assertTrue(200 <= code < 400)

            except urllib.error.HTTPError as e:
                fail += 1
                print("FAIL: Recieved http code %s from %s" %
                      (e.code, test_url))
            except urllib.error.URLError:
                fail += 1
                print("FAIL: URL error - %s" % test_url)
            except ConnectionRefusedError as cre:
                fail += 1
                print("FAIL: Connection refused - %s" % test_url)

        print("-------------------\n\n%s URL(s) tested\n-------------------" %
              count)
Example #14
0
 def get_stats(self):
     request = urllib.request.urlopen(self.battlemetrics_url)
     if (request.getcode() == 200):
         response = request.read()
         self.server_stats = json.loads(response)
         return self.server_stats
     else:
         exit(1)
def get_statuscode(url):
    """get statuscode of a url"""
    try:
        request = urllib.request.urlopen(url)
    except urllib.error.HTTPError as e:
        return e.code
    else:
        return request.getcode()
Example #16
0
async def test_connection(model, app):
    # Ignore self signed SSL cert directly on unit
    ctx = ssl.create_default_context()
    ctx.check_hostname = False
    ctx.verify_mode = ssl.CERT_NONE

    unifi_unit = app.units[0]
    address = f"https://{unifi_unit.public_address}.xip.io:8443"
    print(f"Checking address: {address}")
    request = urllib.request.urlopen(address, context=ctx)
    assert request.getcode() == 200
Example #17
0
    def __is_public_repository(self, url):
        request = urllib.request.urlopen(url)
        request_url = request.geturl()

        if url == request_url or url.rsplit('.', 1)[0] == request_url:
            try:
                if request.getcode() == 200:
                    return True
            except URLError:
                pass
        return False
Example #18
0
 def get_album_art_url(self, album_id):
     # Sadly we cannot determine, if the Beets library really contains album
     # art. Thus we need to ask for it and check the status code.
     url = "{0}/album/{1}/art".format(self.api_endpoint, album_id)
     try:
         request = urllib.request.urlopen(url)
     except IOError:
         # DNS problem or similar
         return None
     request.close()
     return url if request.getcode() == 200 else None
def get_location_description(lat, lon):
    url = "https://nominatim.openstreetmap.org/search.php?q=" + str(
        lat) + "%2C" + str(lon) + "&polygon_geojson=1&format=jsonv2"
    request = urllib.request.urlopen(url)
    if (request.getcode() != 200):
        return ""
    data = json.loads(request.read())
    if len(data) == 0:
        return ""
    if 'display_name' in data[0]:
        return data[0]['display_name']
    return ""
Example #20
0
    def read_metadata(self, uri):
        metadata = {}

        request = urllib.request.urlopen(uri)

        if request.getcode() == 200:
            try:
                metadata = json.loads(request.read())
            except (TypeError, json.JSONDecodeError):
                pass

        return metadata
Example #21
0
    def test_wait_for_request(self):
        """Method used to test if the callback server is running and responding as expected."""
        server_thread = threading.Thread(
            target=callback_server.CallbackServer.wait_for_request)
        server_thread.start()

        request = urllib.request.urlopen(properties.SPOTIFY_REDIRECT_URL)
        http_code = request.getcode()
        response_body = request.read()

        self.assertEqual(http_code, 200)
        self.assertEqual(response_body, properties.CALLBACK_RESPONSE_BODY)
Example #22
0
def get_ron_swanson_quote() -> Quote:
    """
    Get a quote from Ron Swanson
    :return: Quote, can be empty
    """
    url = 'https://ron-swanson-quotes.herokuapp.com/v2/quotes'
    with urllib.request.urlopen(url) as request:
        if request.getcode() != 200:
            logging.error(
                f'Failed to reach DarkSky, error code = {request.getcode()}')
            return Quote('', '')
        data = json.loads(request.read())
        return Quote(data[0], 'Ron Swanson')
Example #23
0
def get_city(default_city="London"):
    try:
        request = urllib.request.urlopen("https://ipapi.co/json")
        if request.getcode() == 200:
            try:
                data = json.loads(request.read())
                return return_city(data["city"])
            except json.JSONDecodeError:
                print("E: Couldn't load Json data.")
        else:
            print(f"E: {request.getcode()}")
    except:
        return return_city(default_city)
Example #24
0
    def raw_stripes(self,
                    dataset,
                    columns,
                    rgid,
                    use_cache=None,
                    compress=False):
        if use_cache is None: use_cache = self.UseDataCache
        out_data = {}

        column_list = ",".join(columns)
        url = "./stripes?ds=%s&columns=%s&rgid=%s&compressed=%s" % \
            (dataset, column_list, rgid, "yes" if compress else "no")
        #print "url:", url
        request = self.requestWithRetries(url,
                                          bypass_cache=not use_cache,
                                          timeout=120)
        data = request.read()
        #print "data: %s" % (repr(data[:100]),)
        header_end = data.index(b"\n")
        header = data[:header_end]
        i = header_end + 1
        #print "header: [%s]" %(header,)
        for w in header.split():
            try:
                cn, length = w.split(b":")
                length = int(length)
                cn = to_str(cn)
            except ValueError:
                sys.stderr.write("Error parsing header [%s] url:%s status:%s" %
                                 (header, url, request.getcode()))
                sys.stderr.write("request status=%s" % (request.getcode(), ))
                sys.stderr.write(traceback.format_exc() + "\n")
                sys.exit(1)
            segment = data[i:i + length]
            if compress:
                segment = zlib.decompress(segment)
            out_data[cn] = segment
            i += length
        return out_data
Example #25
0
    def is_site_available(self, site="", path=""):
        '''This function retrieves the status code of a web site by requesting
        HEAD data from the host. This means that it only requests the headers.
        If the host cannot be reached or something else goes wrong, it returns
        False.
        
        This will only work if the self.set_no_proxy method is used before
        this method is called.

        :param site: string; fqdn (domain); ex: http://www.google.com/ (Default value = "")
        :param path: string; the rest of the URL; ex: docs/about (Default value = "")
        :returns: retval
        :rtype: bool
@author: ???
@change: 02/12/2018 - Breen Malmberg - added doc string decorators; proxy
        will now be set for the test if the use_proxy argument in __init__ is
        True.

        '''

        retval = True

        try:

            if self.use_proxy:
                self.set_proxy()

            page = site + path
            req = urllib.request.Request(
                page, headers={'User-Agent': "Magic Browser"})
            req.add_header('User-agent', 'Firefox/31.5.0')
            request = urllib.request.urlopen(req, timeout=3)
            retcode = request.getcode()

            # get the first digit of the return code
            # if it is not in the 200 range, then an error has occurred
            # (all http successful response codes are in the 2xx range)
            idd = int(str(retcode)[:1])
            if idd != 2:
                self.logger.log(
                    LogPriority.DEBUG, "Failed to reach specified page: " +
                    str(page) + " with HTTP error code: " + str(retcode))

            if retval:
                self.logger.log(LogPriority.DEBUG, "Site is available.")

        except Exception:
            raise

        return retval
Example #26
0
def openweather(city_name, units, api_key):
    try:
        request = urllib.request.urlopen(
            f"{OPENWEATHER_URL}?q={city_name.replace(' ', '+')}&units={units}&appid={api_key}"
        )
        if request.getcode() == 200:
            data = json.loads(request.read())

            _id = data["id"]
            name = data["name"]
            timezone = data["timezone"]
            country = data["sys"]["country"]
            sunrise = data["sys"]["sunrise"]
            sunset = data["sys"]["sunset"]

            temp = data["main"]["temp"]
            temp_min = data["main"]["temp_min"]
            temp_max = data["main"]["temp_max"]
            humidity = data["main"]["humidity"]
            feels_like = data["main"]["feels_like"]
            description = data["weather"][0]["description"]

            if units == "metric":
                unit = "ºC"
            elif units == "imperial":
                unit = "ºF"
            else:
                unit = " K"

            return (
                _id,
                name,
                timezone,
                country,
                sunrise,
                sunset,
                temp,
                temp_min,
                temp_max,
                humidity,
                feels_like,
                description,
                unit,
            )

        else:
            print(f"E: {request.getcode()}")

    except urllib.error.HTTPError as e:
        print(e)
Example #27
0
    def get_report(self):
        url = 'https://api.darksky.net/forecast/' + self._api.key() + '/' + \
              self._location.lat() + ',' + self._location.lon() + \
              '?lang=' + self.lang() + '&units=si&exclude=daily'
        logging.info(f'Contacting DarkSky...')

        with urllib.request.urlopen(url) as request:
            if request.getcode() != 200:
                logging.error(f'Failed to reach DarkSky, error code = {request.getcode()}')
                return False
            data = json.loads(request.read())
            logging.info('Data retrieved from DarkSky')

        self._read_json(data)
        return True
Example #28
0
def iso3():
    try:
        return json.loads(open(Path(__file__).parent / "iso3.json").read())
    except:
        try:
            request = urllib.request.urlopen("http://country.io/iso3.json")
            if request.getcode() == 200:
                try:
                    return json.loads(request.read())
                except json.JSONDecodeError:
                    print("E: Couldn't load Json data.")
            else:
                print(f"E: {request.getcode()}")
        except urllib.error.HTTPError:
            print("E: 404, url not found!")
Example #29
0
def geturl(url):
    while True:
        try:
            request = urllib.request.urlopen(url)
        except urllib.error.HTTPError as e:
            err = e.read()
            status = e.getcode()
        else:
            status = request.getcode()

        if status == 200 and request != None:
            content = request.read()
            content = content.decode("utf-8")
            return json.loads(content)
            break
Example #30
0
def downloadImage(url, path, filename):
    """
    Result :
    [0] = Status
    [1] = Filename
    [2] = URL
    [3] = Full path
    [4] = Start time (time)
    [5] = Finish time (time)
    """
    startTime = time.time()
    res = []
    reqCode = 0
    fileNameTemp = ""
    
    fullpath = pathCombine(path, filename)
    if(checkConnection() == True):
        try:
            request = urllib.request.urlopen(url, timeout=30)
            reqCode = request.getcode()
        except (timeout, socket.error,urllib.error.URLError, urllib.error.HTTPError): 
            reqCode = 408
            fileNameTemp = "-"
        if(reqCode > 199 and reqCode < 300):
            if(os.getcwd() != path):
                os.chdir(path)
            try:
                downloading, downloadHeader = urllib.request.urlretrieve(url,fullpath)
                fileNameTemp = filename
            except (urllib.error.URLError) as e1:
                reqCode = 901
                fileNameTemp = "-"
            except (urllib.error.HTTPError) as e2:
                reqCode = e2.code
                fileNameTemp = "-"
            except (urllib.error.ContentTooShortError) as e:
                reqCode = 902
                fileNameTemp = "-"
            else:
                if(reqCode == 0):
                    reqCode = -1
                
        finishTime = time.time()
        res = [statusDownload(reqCode), fileNameTemp, url, path, startTime, finishTime] 
        
        return res
    else:
        print("Connection is lost")
Example #31
0
def openweather(city, lang, unit, api_key=OPENWEATHER_API_KEY):
    try:
        url = f"{OPENWEATHER_URL}?q={city}&lang={lang}&units={unit}&appid={api_key}"
        request = urllib.request.urlopen(url)
        if request.getcode() == 200:
            data = json.loads(request.read())
            return {
                "name": data["name"],
                "country": iso3().get(data["sys"]["country"]),
                "temp": int(data["main"]["temp"]),
                "unit": check_unit(unit),
                "description": data["weather"][0]["description"],
            }
        else:
            print(f"E: {request.getcode()}")
    except:
        return None
Example #32
0
 def lookup_raw(self, url=None, **kwargs):
     url = url or self.url
     if url is None:
         raise Error('must specify url')
     kwargs['url'] = url
     request_url = (self.endpoint % kwargs) + '?' + urlencode(kwargs)
     try:
         request = urllib.request.urlopen(
             urllib.request.Request(request_url,
                                    headers={'User-Agent':
                                             'Python/oEmbed'}))
     except urllib.error.HTTPError as request:
         pass
     code = request.getcode()
     if code == 200:
         return request.read()
     raise _http_errors.get(code, HTTPError)(code, request_url)
def probarPermutacion(palabra, fuentes):
    # print("buscando \"{}\"".format(palabra))
    for fuente in fuentes:
        direccion = fuente + palabra
        # si encuentra la permutación en esta fuente, entonces esta es la palabra.
        request = None
        try:
            request = urllib.request.urlopen(direccion)
            codigo = request.getcode()
        except urllib.error.HTTPError as error:
            codigo = error.code
        finally:
            if request:
                request.close()
        if codigo == 200:
            print("Encontré \"{}\" en \"{}\".".format(palabra, direccion))
            return True
    return False
async def test_relation(model, app):
    haproxy = model.applications["haproxy"]
    haproxy_unit = haproxy.units[0]

    config = await app.get_config()
    subdomain = config["proxy-subdomain"]["value"]
    address = f"http://{subdomain}.{haproxy_unit.public_address}.xip.io/admin"
    print(f"Checking address: {address}")
    request = urllib.request.urlopen(address)
    info = request.info()
    print(f"Info: {info}")
    assert request.getcode() == 200
    server_id = "not found"
    for item in info.values():
        if "SERVERID" in item:
            server_id = item.split(";")[0]
        else:
            continue
    print(f"server_id: {server_id}")
    assert subdomain in server_id
Example #35
0
def getTimeMap(uri):
    
    urit = "http://mementoproxy.cs.odu.edu/aggr/timemap/link/" + uri

    try:
        request = urllib.request.urlopen(urit)

        if request.getcode() == 200:
            timemap = request.readall()
            request.close()
        else:
            timemap = None
            request.close()

    except urllib.error.HTTPError as e:
        timemap = None

    except urllib.error.URLError as e:
        timemap = None

    return timemap
Example #36
0
def get():
	"""
	Returns properly formatted weather for Rochester, NY
	City can be changed by grabbing the proper openweathermap.org url.
	"""
	weather_string = "Weather Unavailable"

	weather_url = "http://api.openweathermap.org/data/2.1/weather/city/5134086"
	request = urllib.request.urlopen(weather_url)
	weather_info = json.loads(request.read().decode("utf-8"))
	if(request.getcode() not in range(200, 300)):
		request.close
		return weather_string
	request.close

	if weather_info is not None:
		temp = str(k_to_f(weather_info['main']['temp']))
		#state = str(weather_info['weather'][0]['main'])
		desc = str(weather_info['weather'][0]['description'])
		#weather_string = temp + " degrees, " + desc
		weather_string = temp + "°F, " + desc
	
	return weather_string