def get(self, request): from geonode.geoserver import ows out = {'success': True} data = [] out['data'] = data # WMS _raw_url = ows._wms_get_capabilities() _url = urlsplit(_raw_url) headers, access_token = get_headers(request, _url, _raw_url) if access_token: _j = '&' if _url.query else '?' _raw_url = _j.join( [_raw_url, 'access_token={}'.format(access_token)]) data.append({'url': _raw_url, 'type': 'OGC:WMS'}) # WCS _raw_url = ows._wcs_get_capabilities() _url = urlsplit(_raw_url) headers, access_token = get_headers(request, _url, _raw_url) if access_token: _j = '&' if _url.query else '?' _raw_url = _j.join( [_raw_url, 'access_token={}'.format(access_token)]) data.append({'url': _raw_url, 'type': 'OGC:WCS'}) # WFS _raw_url = ows._wfs_get_capabilities() _url = urlsplit(_raw_url) headers, access_token = get_headers(request, _url, _raw_url) if access_token: _j = '&' if _url.query else '?' _raw_url = _j.join( [_raw_url, 'access_token={}'.format(access_token)]) data.append({'url': _raw_url, 'type': 'OGC:WFS'}) # catalogue from configuration for catname, catconf in settings.CATALOGUE.items(): # CSW _raw_url = catconf['URL'] _url = urlsplit(_raw_url) headers, access_token = get_headers(request, _url, _raw_url) if access_token: _j = '&' if _url.query else '?' _raw_url = _j.join( [_raw_url, 'access_token={}'.format(access_token)]) data.append({'url': _raw_url, 'type': 'OGC:CSW'}) # main site url data.append({'url': settings.SITEURL, 'type': 'WWW:LINK'}) return json_response(out)
def proxy(request, url=None, response_callback=None, sec_chk_hosts=True, sec_chk_rules=True, timeout=None, allowed_hosts=[], **kwargs): # Request default timeout if not timeout: timeout = TIMEOUT # Security rules and settings PROXY_ALLOWED_HOSTS = getattr(settings, 'PROXY_ALLOWED_HOSTS', ()) # Sanity url checks if 'url' not in request.GET and not url: return HttpResponse( "The proxy service requires a URL-encoded URL as a parameter.", status=400, content_type="text/plain") raw_url = url or request.GET['url'] raw_url = urljoin(settings.SITEURL, raw_url) if raw_url.startswith("/") else raw_url url = urlsplit(raw_url) scheme = str(url.scheme) locator = str(url.path) if url.query != "": locator += '?' + url.query if url.fragment != "": locator += '#' + url.fragment # White-Black Listing Hosts site_url = urlsplit(settings.SITEURL) if sec_chk_hosts and not settings.DEBUG: # Attach current SITEURL if site_url.hostname not in PROXY_ALLOWED_HOSTS: PROXY_ALLOWED_HOSTS += (site_url.hostname, ) # Attach current hostname if check_ogc_backend(geoserver.BACKEND_PACKAGE): from geonode.geoserver.helpers import ogc_server_settings hostname = ( ogc_server_settings.hostname, ) if ogc_server_settings else () if hostname not in PROXY_ALLOWED_HOSTS: PROXY_ALLOWED_HOSTS += hostname # Check OWS regexp if url.query and ows_regexp.match(url.query): ows_tokens = ows_regexp.match(url.query).groups() if len( ows_tokens ) == 4 and 'version' == ows_tokens[0] and StrictVersion( ows_tokens[1]) >= StrictVersion("1.0.0") and StrictVersion( ows_tokens[1] ) <= StrictVersion("3.0.0") and ows_tokens[2].lower() in ( 'getcapabilities') and ows_tokens[3].upper() in ( 'OWS', 'WCS', 'WFS', 'WMS', 'WPS', 'CSW'): if url.hostname not in PROXY_ALLOWED_HOSTS: PROXY_ALLOWED_HOSTS += (url.hostname, ) # Check Remote Services base_urls from geonode.services.models import Service for _s in Service.objects.all(): _remote_host = urlsplit(_s.base_url).hostname PROXY_ALLOWED_HOSTS += (_remote_host, ) if not validate_host(url.hostname, PROXY_ALLOWED_HOSTS): return HttpResponse( "DEBUG is set to False but the host of the path provided to the proxy service" " is not in the PROXY_ALLOWED_HOSTS setting.", status=403, content_type="text/plain") # Security checks based on rules; allow only specific requests if sec_chk_rules: # TODO: Not yet implemented pass # Collecting headers and cookies headers, access_token = get_headers(request, url, raw_url, allowed_hosts=allowed_hosts) # Inject access_token if necessary parsed = urlparse(raw_url) parsed._replace(path=locator.encode('utf8')) if parsed.netloc == site_url.netloc and scheme != site_url.scheme: parsed = parsed._replace(scheme=site_url.scheme) _url = parsed.geturl() # Some clients / JS libraries generate URLs with relative URL paths, e.g. # "http://host/path/path/../file.css", which the requests library cannot # currently handle (https://github.com/kennethreitz/requests/issues/2982). # We parse and normalise such URLs into absolute paths before attempting # to proxy the request. _url = URL.from_text(_url).normalize().to_text() if request.method == "GET" and access_token and 'access_token' not in _url: query_separator = '&' if '?' in _url else '?' _url = ('%s%saccess_token=%s' % (_url, query_separator, access_token)) _data = request.body.decode('utf-8') # Avoid translating local geoserver calls into external ones if check_ogc_backend(geoserver.BACKEND_PACKAGE): from geonode.geoserver.helpers import ogc_server_settings _url = _url.replace('%s%s' % (settings.SITEURL, 'geoserver'), ogc_server_settings.LOCATION.rstrip('/')) _data = _data.replace('%s%s' % (settings.SITEURL, 'geoserver'), ogc_server_settings.LOCATION.rstrip('/')) response, content = http_client.request(_url, method=request.method, data=_data, headers=headers, timeout=timeout, user=request.user) content = response.content or response.reason status = response.status_code content_type = response.headers.get('Content-Type') if status >= 400: return HttpResponse(content=content, reason=content, status=status, content_type=content_type) # decompress GZipped responses if not enabled # if content and response and response.getheader('Content-Encoding') == 'gzip': if content and content_type and content_type == 'gzip': buf = io.BytesIO(content) f = gzip.GzipFile(fileobj=buf) content = f.read() PLAIN_CONTENT_TYPES = ['text', 'plain', 'html', 'json', 'xml', 'gml'] for _ct in PLAIN_CONTENT_TYPES: if content_type and _ct in content_type and not isinstance( content, six.string_types): try: content = content.decode() break except Exception: pass if response and response_callback: kwargs = {} if not kwargs else kwargs kwargs.update({ 'response': response, 'content': content, 'status': status, 'content_type': content_type }) return response_callback(**kwargs) else: # If we get a redirect, let's add a useful message. if status and status in (301, 302, 303, 307): _response = HttpResponse( ('This proxy does not support redirects. The server in "%s" ' 'asked for a redirect to "%s"' % (url, response.getheader('Location'))), status=status, content_type=content_type) _response['Location'] = response.getheader('Location') return _response else: def _get_message(text): _s = text if isinstance(text, bytes): _s = text.decode("utf-8", "replace") try: found = re.search('<b>Message</b>(.+?)</p>', _s).group(1).strip() except Exception: found = _s return found return HttpResponse( content=content, reason=_get_message(content) if status not in (200, 201) else None, status=status, content_type=content_type)
def check_geoserver_access(request, proxy_path, downstream_path, workspace=None, layername=None, allowed_hosts=[]): def strip_prefix(path, prefix): if prefix not in path: _s_prefix = prefix.split('/', 3) _s_path = path.split('/', 3) assert _s_prefix[1] == _s_path[1] _prefix = f'/{_s_path[1]}/{_s_path[2]}' else: _prefix = prefix assert _prefix in path prefix_idx = path.index(_prefix) _prefix = path[:prefix_idx] + _prefix full_prefix = f"{_prefix}/{layername}/{downstream_path}" if layername else _prefix return path[len(full_prefix):] path = strip_prefix(request.get_full_path(), proxy_path) raw_url = str( "".join([ogc_server_settings.LOCATION, downstream_path, path])) if settings.DEFAULT_WORKSPACE or workspace: ws = (workspace or settings.DEFAULT_WORKSPACE) if ws and ws in path: # Strip out WS from PATH try: path = f'/{strip_prefix(path, f"/{ws}:")}' except Exception: pass if proxy_path == f'/gs/{settings.DEFAULT_WORKSPACE}' and layername: import posixpath raw_url = urljoin(ogc_server_settings.LOCATION, posixpath.join(workspace, layername, downstream_path, path)) if downstream_path in ('rest/styles') and len(request.body) > 0: if ws: # Lets try # http://localhost:8080/geoserver/rest/workspaces/<ws>/styles/<style>.xml _url = str("".join([ogc_server_settings.LOCATION, 'rest/workspaces/', ws, '/styles', path])) else: _url = str("".join([ogc_server_settings.LOCATION, 'rest/styles', path])) raw_url = _url if downstream_path in 'ows' and ( re.match(r'/(rest).*$', path, re.IGNORECASE) or re.match(r'/(w.*s).*$', path, re.IGNORECASE) or re.match(r'/(ows).*$', path, re.IGNORECASE)): _url = str("".join([ogc_server_settings.LOCATION, '', path[1:]])) raw_url = _url url = urlsplit(raw_url) if f'{ws}/layers' in path: downstream_path = 'rest/layers' elif f'{ws}/styles' in path: downstream_path = 'rest/styles' # Collecting headers and cookies headers, access_token = get_headers(request, url, unquote(raw_url), allowed_hosts=allowed_hosts) return (raw_url, headers, access_token)
def download(request, resourceid, sender=Layer): _not_authorized = _("You are not authorized to download this resource.") _not_permitted = _("You are not permitted to save or edit this resource.") _no_files_found = _( "No files have been found for this resource. Please, contact a system administrator." ) instance = resolve_object(request, sender, {'pk': resourceid}, permission='base.download_resourcebase', permission_msg=_not_permitted) if isinstance(instance, Layer): # Create Target Folder dirpath = tempfile.mkdtemp() dir_time_suffix = get_dir_time_suffix() target_folder = os.path.join(dirpath, dir_time_suffix) if not os.path.exists(target_folder): os.makedirs(target_folder) layer_files = [] try: upload_session = instance.get_upload_session() if upload_session: layer_files = [ item for idx, item in enumerate( LayerFile.objects.filter( upload_session=upload_session)) ] if layer_files: # Copy all Layer related files into a temporary folder for lyr in layer_files: if storage.exists(str(lyr.file)): geonode_layer_path = storage.path(str(lyr.file)) shutil.copy2(geonode_layer_path, target_folder) else: return HttpResponse(loader.render_to_string( '401.html', context={ 'error_title': _("No files found."), 'error_message': _no_files_found }, request=request), status=404) # Check we can access the original files if not layer_files: return HttpResponse(loader.render_to_string( '401.html', context={ 'error_title': _("No files found."), 'error_message': _no_files_found }, request=request), status=404) # Let's check for associated SLD files (if any) try: for s in instance.styles.all(): sld_file_path = os.path.join(target_folder, "".join([s.name, ".sld"])) with open(sld_file_path, "w") as sld_file: sld_file.write(s.sld_body.strip()) try: # Collecting headers and cookies headers, access_token = get_headers( request, urlsplit(s.sld_url), s.sld_url) response, content = http_client.get(s.sld_url, headers=headers, timeout=TIMEOUT, user=request.user) sld_remote_content = response.text sld_file_path = os.path.join( target_folder, "".join([s.name, "_remote.sld"])) with open(sld_file_path, "w") as sld_file: sld_file.write(sld_remote_content.strip()) except Exception: traceback.print_exc() tb = traceback.format_exc() logger.debug(tb) except Exception: traceback.print_exc() tb = traceback.format_exc() logger.debug(tb) # Let's dump metadata target_md_folder = os.path.join(target_folder, ".metadata") if not os.path.exists(target_md_folder): os.makedirs(target_md_folder) try: dump_file = os.path.join(target_md_folder, "".join([instance.name, ".dump"])) with open(dump_file, 'w') as outfile: serialized_obj = json_serializer_producer( model_to_dict(instance)) json.dump(serialized_obj, outfile) links = Link.objects.filter(resource=instance.resourcebase_ptr) for link in links: link_name = slugify(link.name) link_file = os.path.join( target_md_folder, "".join([link_name, ".%s" % link.extension])) if link.link_type in ('data'): # Skipping 'data' download links continue elif link.link_type in ('metadata', 'image'): # Dumping metadata files and images with open(link_file, "wb"): try: # Collecting headers and cookies headers, access_token = get_headers( request, urlsplit(link.url), link.url) response, raw = http_client.get( link.url, stream=True, headers=headers, timeout=TIMEOUT, user=request.user) raw.decode_content = True shutil.copyfileobj(raw, link_file) except Exception: traceback.print_exc() tb = traceback.format_exc() logger.debug(tb) elif link.link_type.startswith('OGC'): # Dumping OGC/OWS links with open(link_file, "w") as link_file: link_file.write(link.url.strip()) except Exception: traceback.print_exc() tb = traceback.format_exc() logger.debug(tb) # ZIP everything and return target_file_name = "".join([instance.name, ".zip"]) target_file = os.path.join(dirpath, target_file_name) zip_dir(target_folder, target_file) register_event(request, 'download', instance) response = HttpResponse(content=open(target_file, mode='rb'), status=200, content_type="application/zip") response[ 'Content-Disposition'] = 'attachment; filename="%s"' % target_file_name return response except NotImplementedError: traceback.print_exc() tb = traceback.format_exc() logger.debug(tb) return HttpResponse(loader.render_to_string( '401.html', context={ 'error_title': _("No files found."), 'error_message': _no_files_found }, request=request), status=404) return HttpResponse(loader.render_to_string('401.html', context={ 'error_title': _("Not Authorized"), 'error_message': _not_authorized }, request=request), status=403)
def download(request, resourceid, sender=Layer): _not_authorized = _("You are not authorized to download this resource.") _not_permitted = _("You are not permitted to save or edit this resource.") _no_files_found = _( "No files have been found for this resource. Please, contact a system administrator." ) instance = resolve_object(request, sender, {'pk': resourceid}, permission='base.download_resourcebase', permission_msg=_not_permitted) if isinstance(instance, Layer): layer_files = [] file_list = [] # Store file info to be returned try: upload_session = instance.get_upload_session() if upload_session: layer_files = [ item for idx, item in enumerate( LayerFile.objects.filter( upload_session=upload_session)) ] if layer_files: # Copy all Layer related files into a temporary folder for lyr in layer_files: if storage.exists(str(lyr.file)): geonode_layer_path = storage.path(str(lyr.file)) file_list.append({ "zip_folder": "", "name": lyr.file.name.split('/')[-1], "data_src_file": geonode_layer_path, }) else: return HttpResponse(loader.render_to_string( '401.html', context={ 'error_title': _("No files found."), 'error_message': _no_files_found }, request=request), status=404) # Check we can access the original files if not layer_files: return HttpResponse(loader.render_to_string( '401.html', context={ 'error_title': _("No files found."), 'error_message': _no_files_found }, request=request), status=404) # Let's check for associated SLD files (if any) try: for s in instance.styles.all(): sld_file_name = "".join([s.name, ".sld"]) file_list.append({ "zip_folder": "", "name": sld_file_name, "data_str": s.sld_body.strip(), }) try: # Collecting headers and cookies headers, access_token = get_headers( request, urlsplit(s.sld_url), s.sld_url) response, content = http_client.get(s.sld_url, headers=headers, timeout=TIMEOUT, user=request.user) sld_remote_content = response.text remote_sld_file_name = "".join([s.name, "_remote.sld"]) file_list.append({ "zip_folder": "", "name": remote_sld_file_name, "data_str": sld_remote_content, }) except Exception: traceback.print_exc() tb = traceback.format_exc() logger.debug(tb) except Exception: traceback.print_exc() tb = traceback.format_exc() logger.debug(tb) # Let's dump metadata try: dump_file_name = "".join([instance.name, ".dump"]) serialized_obj = json_serializer_producer( model_to_dict(instance)) file_list.append({ "zip_folder": ".metadata/", "name": dump_file_name, "data_str": json.dumps(serialized_obj), }) links = Link.objects.filter(resource=instance.resourcebase_ptr) for link in links: link_name = slugify(link.name) link_file_name = "".join([link_name, f".{link.extension}"]) link_file_obj = None if link.link_type in ('data'): # Skipping 'data' download links continue elif link.link_type in ('metadata', 'image'): # Dumping metadata files and images try: # Collecting headers and cookies headers, access_token = get_headers( request, urlsplit(link.url), link.url) response, raw = http_client.get(link.url, stream=True, headers=headers, timeout=TIMEOUT, user=request.user) raw.decode_content = True if raw and raw is not None: link_file_obj = { "zip_folder": ".metadata/", "name": link_file_name, "data_iter": raw, } except Exception: traceback.print_exc() tb = traceback.format_exc() logger.debug(tb) elif link.link_type.startswith('OGC'): # Dumping OGC/OWS links link_file_obj = { "zip_folder": ".metadata/", "name": link_file_name, "data_str": link.url.strip(), } # Add file_info to the file list if link_file_obj is not None: file_list.append(link_file_obj) except Exception: traceback.print_exc() tb = traceback.format_exc() logger.debug(tb) # ZIP everything and return target_file_name = "".join([instance.name, ".zip"]) target_zip = zipstream.ZipFile(mode='w', compression=zipstream.ZIP_DEFLATED, allowZip64=True) # Iterable: Needed when the file_info has it's data as a stream def _iterable(source_iter): while True: buf = source_iter.read(BUFFER_CHUNK_SIZE) if not buf: break yield buf # Add files to zip for file_info in file_list: zip_file_name = "".join( [file_info['zip_folder'], file_info['name']]) # The zip can be built from 3 data sources: str, iterable or a file path if 'data_str' in file_info and file_info[ 'data_str'] is not None: target_zip.writestr(arcname=zip_file_name, data=bytes(file_info['data_str'], 'utf-8')) elif 'data_iter' in file_info and file_info[ 'data_iter'] is not None: target_zip.write_iter(arcname=zip_file_name, iterable=_iterable( file_info['data_iter'])) elif 'data_src_file' in file_info and file_info[ 'data_src_file'] is not None: target_zip.write(filename=file_info['data_src_file'], arcname=zip_file_name) register_event(request, 'download', instance) # Streaming content response response = StreamingHttpResponse(target_zip, content_type='application/zip') response[ 'Content-Disposition'] = f'attachment; filename="{target_file_name}"' return response except NotImplementedError: traceback.print_exc() tb = traceback.format_exc() logger.debug(tb) return HttpResponse(loader.render_to_string( '401.html', context={ 'error_title': _("No files found."), 'error_message': _no_files_found }, request=request), status=404) return HttpResponse(loader.render_to_string('401.html', context={ 'error_title': _("Not Authorized"), 'error_message': _not_authorized }, request=request), status=403)