def _sql_files(self): """ Sometimes the manifest file is removed or missing, let's try to enumerate the possible SQL installation scripts and infer the possible version number """ # First of all let's test if we can access the SQL directory base_url = self.parentArgs.url.strip('/') + '/administrator/components/com_admin/sql/updates/mysql/' try: response = requests_head(base_url + '3.0.0.sql', verify=False, allow_redirects=True) except ConnectionError: return [] # Bummer, something went wrong or the site is protected by a WAF if response.status_code != 200: return [] # If I'm here, it means that I can do that, now let's try to detect the correct version with open('data/sql.json', 'rb') as sql_json: sql_versions = json.load(sql_json, object_pairs_hook=OrderedDict) # Let's reverse the order so we can test for the most recent ones first sql_versions = OrderedDict(sorted(sql_versions.items(), reverse=True)) detected_file = [] excluded_versions = [] # Let's try from the most recent until the oldest for filename, versions in sql_versions.iteritems(): try: response = requests_head(base_url + filename, verify=False, allow_redirects=True) except ConnectionError: pass if response.status_code == 200: detected_file = filename break else: excluded_versions.extend(versions) if detected_file: excluded_versions = set(excluded_versions) candidates = sql_versions.get(detected_file, []) return list(set(candidates) - excluded_versions) return []
def check_content_headers(url: str) -> bool: """Check content-type and content-length of the response. Return True if content-type is text/* and content-length is less than 1MB. Also return True if no information is available. Else return False. """ response_headers = requests_head(url, headers=USER_AGENT_HEADER).headers if 'content-length' in response_headers: megabytes = int(response_headers['content-length']) / 1000000 if megabytes > 1: raise ContentLengthError('Content-length was too long. (' + format(megabytes, '.2f') + ' MB)') content_type = response_headers.get('content-type') if content_type: if content_type.startswith('text/'): return True raise ContentTypeError('Invalid content-type: ' + content_type + ' (URL-content is supposed to be text/html)') return True
def nwis_sites(): """ NWIS site geojson retrieval Uses the same query parameters as described here, http://waterservices.usgs.gov/rest/Site-Test-Tool.html. If no major filters are used, the service downloads data for all US huc 2's. In addition there is no restriction on the number of hucs defined in the 'huc' parameters. Returns a geojson feature collection containing all of the NWIS sites in the query. --- tags: - Sites parameters: - name: huc in: query type: string description: list of huc 2's or huc 8's separated by commas - name: sites in: query type: string description: list of site numbers separated by commas - name: stateCd in: query type: string description: US Postal Code (example WI) - name: bBox in: query type: string description: west,sout,east,north - name: countyCd in: query type: string description: 5 digit county code - name: startDt in: query type: string description: start date (example 1990-01-01) - name: endDt in: query type: string description: end date (example 1990-01-01) - name: siteStatus in: query type: string description: all | active | inactive - name: siteType in: query type: string description: Example 'ST' - name: hasDataTypeCd in: query type: string description: list of allowed values - name: parameterCd in: query type: string description: USGS parameter code responses: 200: description: Returns a geojson feature collection of NWIS sites schema: type: object required: - crs - type - features properties: crs: type: object type: type: string features: type: array items: type: object required: - geometry - type properties: geometry: type: array items: type: number type: type: string properties: type: array items: type: object properties: hucCode: type: string agencyCd: type: string siteNumber: type: string stationName: type: string SiteTypeCode: type: string SiteType: type: string url: type: string example: crs: type: name properties: name: 'urn:ogc:def:crs:EPSG::4326' type: FeatureCollection features: - geometery: type: Point coordinates: [-67.1, 46.5] type: Feature properties: - hucCode: '01010101' agencyCd: USGS siteNumber: '00336840' stationName: Name of your station SiteTypeCode: ST SiteType: Stream url: 'http://waterdata.usgs.gov/nwis/inventory?agency_code=USGS&site_no=00336840' 400: description: Bad Request schema: type: string """ site_request_params = dict(request.args) site_request_params['format'] = 'rdb' params_list = [] # If none of the required service arguments are used, we will retrieve the entire huc_values = None if not any(k.lower() in REQUIRED_SERVICE_ARGUMENTS for k in request.args.keys()): huc_values = US_HUC2s elif 'huc' in [key.lower() for key in site_request_params.keys()]: huc_values = request.args['huc'].split(',') # Check for limits on the number of huc2s and huc8s. If it exceeds NWIS limitation, then # multiple service calls will be needed. if huc_values: huc2s = [hucid for hucid in huc_values if is_huc2(hucid)] huc8s = [hucid for hucid in huc_values if is_huc8(hucid)] huc2_count = len(huc2s) huc8_count = len(huc8s) if huc2_count > MAX_HUC2 or huc8_count > MAX_HUC8: # Make a requests.get for each huc2 and for groups of 10 huc8s. # We add these as a one element array because that's the way we get them from requests.args for huc2 in huc2s: new_params = dict(site_request_params) new_params['huc'] = [huc2] params_list.append(new_params) for index in range(((huc8_count - 1) // MAX_HUC8) + 1): new_params = dict(site_request_params) new_params['huc'] = [ ','.join(huc8s[index * MAX_HUC8:min(huc8_count, (index + 1) * MAX_HUC8)]) ] params_list.append(new_params) # If we don't have to make multiple requests just add the site_request_params received to params_list if not params_list: params_list.append(site_request_params) # Make a head request to validate the parameters. If it fails, don't proceed, just return the error head_response = requests_head(NWIS_SITES_SERVICE_ENDPOINT, params=params_list[0]) msg = create_request_resp_log_msg(head_response) app.logger.debug(msg) if head_response.status_code != 200: response = make_response(head_response.reason, head_response.status_code) else: response = Response(site_geojson_generator(params_list), content_type='application/json') return response
def _build_base_url(base_url=None, host="", port=None): """ Determine the Base URL for the Web API endpoints. A URL is only actually built here if it's the first time here or the context was re-initialized. Otherwise, the most recently built URL is used. If the user doesn't provide a scheme for the URL, it will try HTTP first and fall back to HTTPS if that doesn't work. While this is probably backwards, qBittorrent or an intervening proxy can simply redirect to HTTPS and that'll be respected. Additionally, if users want to augment the path to the API endpoints, any path provided here will be preserved in the returned Base URL and prefixed to all subsequent API calls. :param base_url: if the URL was already built, this is the base URL :param host: user provided hostname for WebUI :return: base URL for Web API endpoint """ if base_url is not None: return base_url # urlparse requires some sort of schema for parsing to work at all if not host.lower().startswith(("http:", "https:", "//")): host = "//" + host base_url = urlparse(url=host) logger.debug("Parsed user URL: %s" % repr(base_url)) # default to HTTP if user didn't specify user_scheme = base_url.scheme base_url = base_url._replace( scheme="http") if not user_scheme else base_url alt_scheme = "https" if base_url.scheme == "http" else "http" # add port number if host doesn't contain one if port is not None and not isinstance(base_url.port, int): base_url = base_url._replace(netloc="%s:%s" % (base_url.netloc, port)) # detect whether Web API is configured for HTTP or HTTPS logger.debug("Detecting scheme for URL...") try: # skip verification here...if there's a problem, we'll catch it during the actual API call r = requests_head(base_url.geturl(), allow_redirects=True, verify=False) # if WebUI eventually supports sending a redirect from HTTP to HTTPS then # Requests will automatically provide a URL using HTTPS. # For instance, the URL returned below will use the HTTPS scheme. # >>> requests.head('http://grc.com', allow_redirects=True).url scheme = urlparse(r.url).scheme except requests_exceptions.RequestException: # assume alternative scheme will work...we'll fail later if neither are working scheme = alt_scheme # use detected scheme logger.debug("Using %s scheme" % scheme.upper()) base_url = base_url._replace(scheme=scheme) if user_scheme and user_scheme != scheme: logger.warning( "Using '%s' instead of requested '%s' to communicate with qBittorrent" % (scheme, user_scheme)) # ensure URL always ends with a forward-slash base_url = base_url.geturl() if not base_url.endswith("/"): base_url = base_url + "/" logger.debug("Base URL: %s" % base_url) return base_url
def nwis_sites(): """ NWIS site geojson retrieval Uses the same query parameters as described here, http://waterservices.usgs.gov/rest/Site-Test-Tool.html. If no major filters are used, the service downloads data for all US huc 2's. In addition there is no restriction on the number of hucs defined in the 'huc' parameters. Returns a geojson feature collection containing all of the NWIS sites in the query. --- tags: - Sites parameters: - name: huc in: query type: string description: list of huc 2's or huc 8's separated by commas - name: sites in: query type: string description: list of site numbers separated by commas - name: stateCd in: query type: string description: US Postal Code (example WI) - name: bBox in: query type: string description: west,sout,east,north - name: countyCd in: query type: string description: 5 digit county code - name: startDt in: query type: string description: start date (example 1990-01-01) - name: endDt in: query type: string description: end date (example 1990-01-01) - name: siteStatus in: query type: string description: all | active | inactive - name: siteType in: query type: string description: Example 'ST' - name: hasDataTypeCd in: query type: string description: list of allowed values - name: parameterCd in: query type: string description: USGS parameter code responses: 200: description: Returns a geojson feature collection of NWIS sites schema: type: object required: - crs - type - features properties: crs: type: object type: type: string features: type: array items: type: object required: - geometry - type properties: geometry: type: array items: type: number type: type: string properties: type: array items: type: object properties: hucCode: type: string agencyCd: type: string siteNumber: type: string stationName: type: string SiteTypeCode: type: string SiteType: type: string url: type: string example: crs: type: name properties: name: 'urn:ogc:def:crs:EPSG::4326' type: FeatureCollection features: - geometery: type: Point coordinates: [-67.1, 46.5] type: Feature properties: - hucCode: '01010101' agencyCd: USGS siteNumber: '00336840' stationName: Name of your station SiteTypeCode: ST SiteType: Stream url: 'http://waterdata.usgs.gov/nwis/inventory?agency_code=USGS&site_no=00336840' 400: description: Bad Request schema: type: string """ site_request_params = dict(request.args) site_request_params['format'] = 'rdb' params_list = [] # If none of the required service arguments are used, we will retrieve the entire huc_values = None if not any(k.lower() in REQUIRED_SERVICE_ARGUMENTS for k in request.args.keys()): huc_values = US_HUC2s elif 'huc' in [key.lower() for key in site_request_params.keys()]: huc_values = request.args['huc'].split(',') # Check for limits on the number of huc2s and huc8s. If it exceeds NWIS limitation, then # multiple service calls will be needed. if huc_values: huc2s = [hucid for hucid in huc_values if is_huc2(hucid)] huc8s = [hucid for hucid in huc_values if is_huc8(hucid)] huc2_count = len(huc2s) huc8_count = len(huc8s) if huc2_count > MAX_HUC2 or huc8_count > MAX_HUC8: # Make a requests.get for each huc2 and for groups of 10 huc8s. # We add these as a one element array because that's the way we get them from requests.args for huc2 in huc2s: new_params = dict(site_request_params) new_params['huc'] = [huc2] params_list.append(new_params) for index in range(((huc8_count - 1) // MAX_HUC8) + 1): new_params = dict(site_request_params) new_params['huc'] = [','.join(huc8s[index * MAX_HUC8:min(huc8_count, (index + 1) * MAX_HUC8)])] params_list.append(new_params) # If we don't have to make multiple requests just add the site_request_params received to params_list if not params_list: params_list.append(site_request_params) # Make a head request to validate the parameters. If it fails, don't proceed, just return the error head_response = requests_head(NWIS_SITES_SERVICE_ENDPOINT, params=params_list[0]) msg = create_request_resp_log_msg(head_response) app.logger.debug(msg) if head_response.status_code != 200: response = make_response(head_response.reason, head_response.status_code) else: response = Response(site_geojson_generator(params_list), content_type='application/json') return response