def _parse_layers(self, xml_obj): """ Parses all layers of a service and creates OGCWebMapLayer objects from each. Uses recursion on the inside to get all children. Args: xml_obj: The iterable xml tree Returns: nothing """ # get most upper parent layer, which normally lives directly in <Capability> layers = xml_helper.try_get_element_from_xml( elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("Capability") + "/" + GENERIC_NAMESPACE_TEMPLATE.format("Layer"), xml_elem=xml_obj) total_layers = xml_helper.try_get_element_from_xml( elem="//" + GENERIC_NAMESPACE_TEMPLATE.format("Layer"), xml_elem=xml_obj) # calculate the step size for an async call # 55 is the diff from the last process update (10) to the next static one (65) len_layers = len(total_layers) if len_layers == 0: # No division by zero! len_layers = 1 step_size = float(PROGRESS_STATUS_AFTER_PARSING / len_layers) service_logger.debug( "Total number of layers: {}. Step size: {}".format( len_layers, step_size)) self._parse_layers_recursive(layers, step_size=step_size)
def create_from_capabilities(self, metadata_only: bool = False, async_task: Task = None, external_auth: ExternalAuthentication = None): """ Fills the object with data from the capabilities document Returns: nothing """ # get xml as iterable object xml_obj = xml_helper.parse_xml(xml=self.service_capabilities_xml) start_time = time.time() self.get_service_metadata_from_capabilities(xml_obj=xml_obj, async_task=async_task) # check if 'real' service metadata exist service_metadata_uri = xml_helper.try_get_text_from_xml_element(xml_elem=xml_obj, elem="//VendorSpecificCapabilities/inspire_vs:ExtendedCapabilities/inspire_common:MetadataUrl/inspire_common:URL") if service_metadata_uri is not None: self.get_service_metadata(uri=service_metadata_uri, async_task=async_task) service_logger.debug(EXEC_TIME_PRINT % ("service metadata", time.time() - start_time)) # check possible operations on this service start_time = time.time() self.get_service_operations_and_formats(xml_obj) service_logger.debug(EXEC_TIME_PRINT % ("service operation checking", time.time() - start_time)) # parse possible linked dataset metadata start_time = time.time() self.get_service_dataset_metadata(xml_obj=xml_obj) service_logger.debug(EXEC_TIME_PRINT % ("service iso metadata", time.time() - start_time)) self.get_version_specific_metadata(xml_obj=xml_obj) if not metadata_only: start_time = time.time() self._parse_layers(xml_obj=xml_obj, async_task=async_task) service_logger.debug(EXEC_TIME_PRINT % ("layer metadata", time.time() - start_time))
def async_new_service(url_dict: dict, user_id: int, register_group_id: int, register_for_organization_id: int, external_auth: dict): """ Async call of new service creation Since redis is used as broker, the objects can not be passed directly into the function. They have to be resolved using their ids, since the objects are not easily serializable using json Args: url_dict (dict): Contains basic information about the service like connection uri user_id (int): Id of the performing user register_group_id (int): Id of the group which wants to register register_for_organization_id (int): Id of the organization for which the service is registered Returns: nothing """ # create ExternalAuthentication object if external_auth is not None: external_auth = ExternalAuthentication( username=external_auth["username"], password=external_auth["password"], auth_type=external_auth["auth_type"], ) # get current task id curr_task_id = async_new_service.request.id # set progress for current task to 0 if curr_task_id is not None: task_helper.update_progress(async_new_service, 0) # restore objects from ids user = MrMapUser.objects.get(id=user_id) url_dict["service"] = service_helper.resolve_service_enum( url_dict["service"]) url_dict["version"] = service_helper.resolve_version_enum( url_dict["version"]) register_group = MrMapGroup.objects.get(id=register_group_id) if utils.resolve_none_string( str(register_for_organization_id)) is not None: register_for_organization = Organization.objects.get( id=register_for_organization_id) else: register_for_organization = None try: t_start = time.time() service = service_helper.create_service(url_dict.get("service"), url_dict.get("version"), url_dict.get("base_uri"), user, register_group, register_for_organization, async_task=async_new_service, external_auth=external_auth) # update progress if curr_task_id is not None: task_helper.update_progress(async_new_service, PROGRESS_STATUS_AFTER_PARSING) # get db object if curr_task_id is not None: pending_task = PendingTask.objects.get(task_id=curr_task_id) # update db pending task information pending_task.description = json.dumps({ "service": service.metadata.title, "phase": "Persisting", }) pending_task.save() # update progress if curr_task_id is not None: task_helper.update_progress(async_new_service, 95) # after service AND documents have been persisted, we can now set the service being secured if needed if external_auth is not None: service.metadata.set_proxy(True) metadatas = Metadata.objects.filter(pk=service.metadata.pk) sub_elements = service.get_subelements().select_related('metadata') for sub_element in sub_elements: metadatas |= Metadata.objects.filter(pk=sub_element.metadata.pk) metadatas |= sub_element.metadata.get_related_dataset_metadatas() service_logger.debug(EXEC_TIME_PRINT % ("total registration", time.time() - t_start)) user_helper.create_group_activity(service.metadata.created_by, user, SERVICE_REGISTERED, service.metadata.title) if curr_task_id is not None: task_helper.update_progress(async_new_service, 100) # delete pending task from db if curr_task_id is not None: pending_task = PendingTask.objects.get(task_id=curr_task_id) pending_task.delete() except (BaseException, XMLSyntaxError, XPathEvalError, InvalidURL, ConnectionError) as e: url = url_dict[ 'base_uri'] + f"SERVICE={url_dict['service'].value}&VERSION={url_dict['version'].value}&request={url_dict['request']}" error_msg = f"Error while trying to register new resource for url: {url}\n" response = requests.get(url) if response.status_code == 200: cap_doc = "-----------------------------------------------------------\n"\ f"We could receive the following capabilities document:\n{response.text}" error_msg += cap_doc service_logger.error(msg=error_msg) service_logger.exception(e, stack_info=True, exc_info=True) if curr_task_id is not None: pending_task = PendingTask.objects.get(task_id=curr_task_id) register_group = MrMapGroup.objects.get(id=register_group_id) error_report = ErrorReport(message=error_msg, traceback=traceback.format_exc(), created_by=register_group) error_report.save() descr = json.loads(pending_task.description) pending_task.description = json.dumps({ "service": descr.get("service", None), "info": { "current": "0", }, "exception": e.__str__(), "phase": "ERROR: Something went wrong! Click on generate error report to inform your serveradmin about this error.", }) pending_task.error_report = error_report pending_task.save() raise e
def __load_curl(self, params: dict = None): response = types.SimpleNamespace() # Example from http://pycurl.io/docs/latest/quickstart.html # import curl #normally we would use pycurl - but the class has been renamed? #import re # try: # from io import BytesIO # except ImportError: # from StringIO import StringIO as BytesIO headers = {} def header_function(header_line): # HTTP standard specifies that headers are encoded in iso-8859-1. # On Python 2, decoding step can be skipped. # On Python 3, decoding step is required. header_line = header_line.decode('iso-8859-1') # Header lines include the first status line (HTTP/1.x ...). # We are going to ignore all lines that don't have a colon in them. # This will botch headers that are split on multiple lines... if ':' not in header_line: return # Break the header line into header name and value. name, value = header_line.split(':', 1) # Remove whitespace that may be present. # Header lines include the trailing newline, and there may be whitespace # around the colon. name = name.strip() value = value.strip() # Header names are case insensitive. # Lowercase name here. name = name.lower() # Now we can actually record the header name and value. # Note: this only works when headers are not duplicated, see below. headers[name] = value url_args = "" if params is not None: url_args = "?" + urlencode(params) buffer = BytesIO() c = pycurl.Curl() c.setopt(c.URL, self._url + url_args) c.setopt(c.WRITEFUNCTION, buffer.write) # Set our header function. c.setopt(c.HEADERFUNCTION, header_function) # Check for proxies if HTTP_PROXY is not None: c.setopt(pycurl.PROXY, HTTP_PROXY) c.perform() c.close() # Figure out what encoding was sent with the response, if any. # Check against lowercased header name. encoding = None if 'content-type' in headers: content_type = headers['content-type'].lower() match = re.search('charset=(\S+)', content_type) if match: encoding = match.group(1) service_logger.debug('Decoding using %s' % encoding) if encoding is None: # Default encoding for HTML is iso-8859-1. # Other content types may have different default encoding, # or in case of binary data, may have no encoding at all. encoding = 'iso-8859-1' service_logger.debug('Assuming encoding is %s' % encoding) response.content = buffer.getvalue() response.encoding = encoding response.text = response.content.decode(encoding) return response
def async_new_service(url_dict: dict, user_id: int, register_group_id: int, register_for_organization_id: int, external_auth: dict): """ Async call of new service creation Since redis is used as broker, the objects can not be passed directly into the function. They have to be resolved using their ids, since the objects are not easily serializable using json Args: url_dict (dict): Contains basic information about the service like connection uri user_id (int): Id of the performing user register_group_id (int): Id of the group which wants to register register_for_organization_id (int): Id of the organization for which the service is registered Returns: nothing """ if current_task: current_task.update_state(state=states.STARTED, meta={ 'current': 0, 'total': 100, 'phase': 'pre configure task...', }) # create ExternalAuthentication object if external_auth is not None: external_auth = ExternalAuthentication( username=external_auth["username"], password=external_auth["password"], auth_type=external_auth["auth_type"], ) # restore objects from ids user = MrMapUser.objects.get(id=user_id) url_dict["service"] = service_helper.resolve_service_enum( url_dict["service"]) url_dict["version"] = service_helper.resolve_version_enum( url_dict["version"]) register_group = MrMapGroup.objects.get(id=register_group_id) if utils.resolve_none_string( str(register_for_organization_id)) is not None: register_for_organization = Organization.objects.get( id=register_for_organization_id) else: register_for_organization = None t_start = time.time() service = service_helper.create_service(url_dict.get("service"), url_dict.get("version"), url_dict.get("base_uri"), user, register_group, register_for_organization, external_auth=external_auth) # after service AND documents have been persisted, we can now set the service being secured if needed if external_auth is not None: #todo: check this...... if current_task: current_task.update_state(state=states.STARTED, meta={ 'current': PROGRESS_STATUS_AFTER_PARSING, 'phase': 'Securing...', 'service': service.metadata.title }) service.metadata.set_proxy(True) service_logger.debug(EXEC_TIME_PRINT % ("total registration", time.time() - t_start)) user_helper.create_group_activity(service.metadata.created_by, user, SERVICE_REGISTERED, service.metadata.title) return { 'msg': 'Done. New service registered.', 'id': str(service.metadata.pk), 'absolute_url': service.metadata.get_absolute_url(), 'absolute_url_html': f'<a href={service.metadata.get_absolute_url()}>{service.metadata.title}</a>' }