Exemple #1
0
def task_failed_handling(task, err_msg):
    """
    Is called if a task failed to create error Artefacts which signals
    the client that the task has failed
    @param task: the failed task
    @type task: models.Task
    @param err_msg: the error message
    @type err_msg: string
    @return: None
    @rtype: NoneType
    """
    wps_log.debug(f"task{task.id} failed, due to error: {err_msg}")
    wps_log.debug("error artefacts are created")

    time_now = datetime.now()
    process = task.process

    error_output_list = list(
        InputOutput.objects.filter(process=process, role='1'))
    wps_log.debug(
        f"trying to generate {len(error_output_list)} error artefacts")
    for output in error_output_list:
        if len(
                list(
                    Artefact.objects.filter(task=task,
                                            parameter=output,
                                            role='1'))) == 0:
            Artefact.objects.create(task=task,
                                    parameter=output,
                                    role='1',
                                    format='error',
                                    data=err_msg,
                                    created_at=time_now,
                                    updated_at=time_now)
        else:
            wps_log.warning(
                f"task{task.id} failed due to ProcessFailed status, but there are already artefacts, "
                f"setting artefacts to error mode")
            Artefact.objects.filter(task=task, parameter=output,
                                    role='1').update(format='error',
                                                     data=err_msg,
                                                     updated_at=time_now)
Exemple #2
0
def get_execute_url(task):
    """
    Extracts the Execute URL from the Database for a given task. Returns empty string on error.
    @param task: Task object from Database
    @type task: Task
    @return: Execute URL. Empty on error or empty DB field
    @rtype: string
    """
    execute_url = ""

    try:
        process = task.process
        wps = process.wps
        execute_url = wps.execute_url
        wps_log.debug(f"execute url of task{task.id} is {wps.execute_url}")
    except Process.DoesNotExist or WPS.DoesNotExist:
        wps_log.warning(f"no execute url found for task{task.id}")
        execute_url = ""

    return execute_url
Exemple #3
0
def calculate_percent_done(workflow):
    """
    Calculates the percentage of finished tasks in the workflow of task
    @param task: task with recently changed status
    @type task: Task
    @return: percentage of finished tasks in the workflow of task
    @rtype: int
    """
    err_tasks = list(Task.objects.filter(workflow=workflow, status='5'))
    if len(err_tasks):
        wps_log.warning(f"workflow{workflow.id} execution has failed due to "
                        f"failure of tasks: {[task.id for task in err_tasks]}")
        percent_done = -1
        workflow.save()
    else:
        finished = list(Task.objects.filter(workflow=workflow, status='4'))
        all_wf_tasks = list(Task.objects.filter(workflow=workflow))
        percent_done = int((len(finished) / len(all_wf_tasks)) * 100)
        wps_log.debug(
            f"updating progress of workflow{workflow.id} to {percent_done}%")

    workflow.percent_done = percent_done
    workflow.save()
Exemple #4
0
def parse_output(output, task):
    """
    Parses output node of xml and inserts respective data if found and
    also updates status of task if there are any changes
    @param output the output that has to be parsed
    @type output lxmls.etree._Element
    @param task: the task that belongs to the output
    @type task: subclass of models.Model
    @return: None
    @rtype: NoneType
    """
    wps_log.debug(f"parsing output information for task{task.id}")
    out_id = output.find(ns_map["Identifier"]).text

    try:
        output_db = InputOutput.objects.get(process=task.process,
                                            identifier=out_id,
                                            role='1')
        artefact = Artefact.objects.get(task=task,
                                        parameter=output_db,
                                        role='1')
    except InputOutput.DoesNotExist:
        wps_log.warning(f"output for task{task.id} not found, aborting")
        return
    except:
        time_now = datetime.now()
        wps_log.debug(
            f"output artefact for task {task.id} not found, creating new artefact"
        )
        artefact = Artefact.objects.create(task=task,
                                           parameter=output_db,
                                           role='1',
                                           created_at=time_now,
                                           updated_at=time_now)

    # everything is the same up to here for each output type
    data_elem = output.find(ns_map["Data"])
    reference = output.find(ns_map["Reference"])
    time_now = datetime.now()

    if data_elem is not None:
        try:
            # there should always be just one element!
            data_elem = data_elem.getchildren()[0]
        except:
            wps_log.debug(f"data has no child for task{task.id}")
            # go back to next output
            return

        if data_elem.tag == ns_map["LiteralData"]:
            wps_log.debug(
                f"literal data found in data for output{output_db.id} of task{task.id}"
            )
            parse_response_literaldata(artefact, data_elem)

        elif data_elem.tag == ns_map["BoundingBox"]:
            wps_log.debug(
                f"boundingbox data found in data for output{output_db.id} of task{task.id}"
            )
            parse_response_bbox(artefact, data_elem)

        elif data_elem.tag == ns_map["ComplexData"]:
            wps_log.debug(
                f"complex data found in data for output{output_db.id} of task{task.id}"
            )
            parse_response_complexdata(artefact, data_elem)
    elif reference is not None:
        # complexdata found, usually gets passed by url reference which won't be 500 chars long
        db_format = "plain" if data_elem.get("dataType") \
                               is None else data_elem.get("dataType").split(':')[-1]
        wps_log.debug("writing data to db")
        db_data = reference.text  # should be a url
        artefact.format = db_format
        artefact.data = db_data
        artefact.updated_at = time_now
        artefact.save()

    try:
        wps_log.debug(
            f"trying to get edge from task{task.id}, output{output_db.id}")
        edges = Edge.objects.filter(from_task=task, output=output_db)
    except Edge.DoesNotExist:
        wps_log.debug(f"edge does not exist")
        edges = []

    for edge in edges:
        if artefact.data is not None:
            try:
                to_artefact = Artefact.objects.get(task=task,
                                                   parameter=edge.input,
                                                   role='1')
                to_artefact.format = artefact.format
                to_artefact.data = artefact.data
                to_artefact.updated_at = time_now
                to_artefact.save()
            except Artefact.DoesNotExist:
                wps_log.debug(
                    "input artefact not found, creating new artefact")
                to_artefact = Artefact.objects.create(task=edge.to_task,
                                                      parameter=edge.input,
                                                      role='0',
                                                      format=artefact.format,
                                                      data=artefact.data,
                                                      created_at=time_now,
                                                      updated_at=time_now)
                wps_log.debug(f"artefact{to_artefact.id} has been created")
Exemple #5
0
def parse_execute_response(task):
    """
    Checks parameter tasks status by checking xml file found at status_url for change
    If task has finished write data to db if there is any data
    @param task: the task whose status is currently checked
    @type task: subclass of models.Model
    @return: 0 on success, error code otherwise
    @rtype: int
    """

    # try to parse document which should be returned by request
    try:
        wps_log.debug(f"task{task.id}s url: {task.status_url}")
        root = etree.parse(StringIO(requests.get(task.status_url).text))
    except ValueError:
        '''
        might throw ValueError if CDATA is placed within document:
        ValueError: Unicode strings with encoding declaration are not supported. Please use bytes input or XML fragments without declaration.
        in this case try to parse document by encoding and reading in BytesIO buffer bevore parsing
        '''
        root = etree.parse(BytesIO(
            requests.get(task.status_url).text.encode()))
    except:
        task.status = '5'
        task.save()
        task_failed_handling(
            task,
            "status could not be read, check internet connection or server availability"
        )
        # otherwise just exit and return error code
        wps_log.debug(
            f"request of {task.status_url} for task {task.id} could not be parsed"
        )
        return 1

    process_info = root.find(ns_map["Process"])
    try:
        output_list = root.find(ns_map["ProcessOutputs"]).findall(
            ns_map["Output"])
    except:
        # no Processes in output
        wps_log.warning(f"response xml for task{task.id} has no output nodes")
        output_list = []

    if process_info is None:
        wps_log.warning(f"Process information not found for task{task.id}")
        return 2

    for output in output_list:
        parse_output(output, task)

    try:
        process_status = root.find(ns_map["Status"])
        status_name = etree.QName(process_status[0].tag).localname
    except:
        wps_log.warning(f"no status found in xml for task{task.id}")
        return 2

    new_status = STATUS[3][0] if status_name in ["ProcessAccepted", "ProcessStarted", "ProcessPaused"] \
        else STATUS[4][0] if status_name == "ProcessSucceeded" else STATUS[5][0]

    if task.status != new_status:
        wps_log.debug(
            f"old status of task{task.id}: {task.status}, new status: {new_status}"
        )
        task.status = new_status
        task.save()

    # if status failed, create error output artefacts for task
    if task.status == '5':
        wps_log.debug(
            f"task{task.id} failed, status link can be found here: {task.status_url}"
        )
        try:
            err_msg = process_status[0].find(ns_map['ExceptionReport']).find(
                ns_map['Exception']).find(ns_map['ExceptionText']).text
            wps_log.debug("found failure information")
        except:
            wps_log.debug("could not find information about failure")
            err_msg = "unknown error"

        task_failed_handling(task, err_msg)
        return 3

    # update process of workflow after every response
    calculate_percent_done(task.workflow)

    return 0
Exemple #6
0
def send_task(task_id, xml_dir):
    """
    Sends a Task identified by its Database ID to its WPS Server.
    @param task_id: ID of Task in Database
    @type task_id: int
    @param xml_dir: Directory where XMLs are stored in
    @type xml_dir: string
    @return: None
    @rtype: NoneType
    """
    filepath = str(xml_dir) + 'task' + str(task_id) + '.xml'
    if not os.path.isfile(filepath):
        wps_log.warning(f"file for task {task_id} does not exist, aborting...")
        return
    try:
        # This only is outsourced to extra function for better readability
        execute_url = get_execute_url(Task.objects.get(id=task_id))
    except Task.DoesNotExist:
        wps_log.warning(
            "Error, execute url is empty, but is not allowed to. Aborting...")
        return

    # TODO: validate execution url
    file = '<?xml version="1.0" encoding="utf-8" standalone="yes"?>' + \
           str(open(filepath, 'r').read())

    # send to url
    try:
        # 'http://pse.rudolphrichard.de:5000/wps'
        response = requests.post(execute_url, data=file)
        # get response from send
        xml = ET.fromstring(response.text)

    except:
        task = Task.objects.get(id=task_id)
        task.status = '5'
        task.save()
        task_failed_handling(
            task,
            "status could not be read, check internet connection or server availability"
        )
        wps_log.warning(
            f"request for task{task_id} could not be posted or returned something unexpected, aborting"
        )
        return

    err_msg = ""

    # check for status node in xml
    if xml.find(ns_map['Status']) is not None:
        # if there is status node, search for process status
        if xml.find(ns_map['Status']).find(
                ns_map['ProcessAccepted']) is not None:
            status = '3'
        elif xml.find(ns_map['Status']).find(
                ns_map['ProcessStarted']) is not None:
            status = '3'
        elif xml.find(ns_map['Status']).find(
                ns_map['ProcessPaused']) is not None:
            status = '3'
        elif xml.find(ns_map['Status']).find(
                ns_map['ProcessSucceeded']) is not None:
            status = '4'
        elif xml.find(ns_map['Status']).find(
                ns_map['ProcessFailed']) is not None:
            status = '5'
            try:
                err_msg = xml.find(ns_map['Status']).find(ns_map['ProcessFailed']) \
                    .find(ns_map['ExceptionReport']).find(ns_map['Exception']).find(ns_map['ExceptionText']).text
            except:
                err_msg = "unknown error"
        else:
            status = '5'
            err_msg = 'unknown error'
    elif xml.find(ns_map['Exception']) is not None:
        status = '5'
        exception_elem = xml.find(ns_map['Exception']).find(
            ns_map['ExceptionText'])
        if exception_elem is None:
            err_msg = "unknown error"
        else:
            err_msg = exception_elem.text
    else:
        status = '5'
        err_msg = "unknown error"

    try:
        # Update DB Entry
        task = Task.objects.get(id=task_id)
    except Task.DoesNotExist:
        wps_log.warning(f"task{task_id} not found, aborting")
        return

    # TODO refactor dirty fix
    status_url = xml.get('statusLocation')
    if status_url is None:
        status = '5'
        status_url = "error_url"
    else:
        status_url = "http://" + re.sub(r"^http://", "", status_url)

    wps_log.info(f"STATUS URL: {status_url}")

    task.status_url = status_url

    task.status = status
    task.started_at = datetime.now()
    wps_log.debug(f"task{task_id} started at {task.started_at}")
    task.save()

    # Delete execution XML
    if os.path.isfile(filepath):
        os.remove(filepath)

    if task.status == '5':
        task_failed_handling(task, err_msg)
Exemple #7
0
def create_data_doc(task):
    """
    Creates subtree for execute request for model.Task task.
    @param task: the task for which the data subtree is created
    @type task: models.Task
    @return: subtree on success, error code 1 otherwise
    @rtype: lxml.etree._Element/int
    """
    # returns [] if no match found
    wps_log.debug(f"creating data subtree for task{task.id}")
    inputs = list(InputOutput.objects.filter(process=task.process, role='0'))
    data_inputs = wps_em.DataInputs()
    wps_log.debug(f"found inputs: {[input.id for input in data_inputs]}")
    for input in inputs:
        # try to get artefact from db
        try:
            artefact = Artefact.objects.get(task=task, parameter=input)
        except:
            # something is wrong here if artefact has not been created yet
            # as execute documents for next execution are only started if previous task has finished
            # and when previous task has finished, the output data is automatically passed to next tasks input
            wps_log.warning(
                f"Error: artefact for task{task.id}s input{input.id} has not been created yet"
            )
            return 1

        # create identifier and title as they are used in any case
        identifier = ows_em.Identifier(input.identifier)
        title = ows_em.Title(input.title)

        # first check if it is a file path, as data with length over 490 chars will be stored in a file
        # if so insert file path in Reference node
        # TODO: must check if this equals correct url of own server matching to task
        if artefact.data == utils_module.get_file_path(task):
            wps_log.debug(
                f"file path found in task{task.id}s artefact{artefact.id}s data, inserting as data"
            )
            data_inputs.append(
                wps_em.Input(
                    identifier, title,
                    wps_em.Reference(
                        {"method": "GET"},
                        {ns_map["href"]: utils_module.get_file_path(artefact)
                         })))
            # go to loop header and continue
            continue

        wps_log.debug(
            f"no file path as data in task{task.id}s artefact{artefact.id}, so there must be data"
        )
        # literal data case, there is either a url or real data in the LiteralData element
        # in this case just send the data
        if input.datatype == '0':
            wps_log.debug(f"literal data found for task{task.id}")
            literal_data = wps_em.LiteralData(artefact.data)
            # check for attributes
            if artefact.format != 'plain':
                literal_data.set('dataType', artefact.format)

            # just create subtree with identifier, title and data with nested literaldata containing the artefacts data
            data_inputs.append(
                wps_em.Input(identifier, title, wps_em.Data(literal_data)))
        # complex data case, first try to parse xml, if successfully append to ComplexData element
        #                    second check if there is CDATA ??
        elif input.datatype == '1':
            wps_log.debug(f"complex data found for task{task.id}")
            # append format data as attributes to complex data element
            # TODO: delete if unneeded, uncommented complex data format handling - complicated stuff
            # check if there is cdata in format
            # if artefact.format.split(";")[0] == "CDATA":
            #     wps_log.debug(
            #         f"cdata found in task{task.id} inserting cdata nested in tags into data of artefact{artefact.id}")
            #     complex_data.append(f"<![CDATA[{artefact.data}]]")
            #     # put data nested in cdata tag in complex data element
            #     data_inputs.append(wps_em.Input(
            #         identifier, title, wps_em.Data(complex_data)))
            # else:
            # just append it as if it is in xml format, it can also be inserted as text, will then not be in
            # pretty_print format, but wps server doesn't care about that
            try:
                wps_log.debug(
                    f"just inserting complex data for task{task.id} of artefact{artefact.id} in xml"
                )
                data_inputs.append(
                    wps_em.Input(
                        identifier, title,
                        wps_em.Data(wps_em.ComplexData(artefact.data))))
            except:
                wps_log.debug(
                    f"inserting CDATA for task{task.id} of artefact{artefact.id} in xml"
                )
                data_inputs.append(
                    wps_em.Input(
                        identifier, title,
                        wps_em.Data(
                            wps_em.ComplexData(
                                etree.CDATA(base64.b64decode(
                                    artefact.data))))))
        # bounding box case there should just be lowercorner and uppercorner data
        elif input.datatype == '2':
            wps_log.debug(
                f"boundingbox data found for task{task.id}: {artefact.data}")
            lower_corner = ows_em.LowerCorner()
            upper_corner = ows_em.UpperCorner()
            data = artefact.data
            wps_log.debug(
                f"{len(data.split('LowerCorner')) == 2 and len(data.split('UpperCorner')) == 2}"
            )
            if len(data.split("LowerCorner")) == 2 and len(
                    data.split("UpperCorner")) == 2:
                bbox_corners = data.split(";")
                lower_corner_data = bbox_corners[1].lstrip(
                    'LowerCorner').split(' ')
                upper_corner_data = bbox_corners[0].lstrip(
                    'UpperCorner').split(' ')
                upper_corner = ows_em.UpperCorner(
                    f"{upper_corner_data[0]} {upper_corner_data[1]}")
                lower_corner = ows_em.LowerCorner(
                    f"{lower_corner_data[0]} {lower_corner_data[1]}")

            # quite strange, but this node is called BoundingBoxData for inputs, for outputs it's just BoundingBox
            # also for inputs it is used with wps namespace, for outputs the ows namespace is used
            bbox_elem = wps_em.Data(
                wps_em.BoundingBoxData(lower_corner, upper_corner, {
                    'crs': 'EPSG:4326',
                    'dimensions': '2'
                }))

            # finally create subtree
            data_inputs.append(wps_em.Input(identifier, title, bbox_elem))
    # TODO: check if something is missing
    wps_log.debug(f"finished input xml generation for task{task.id}")
    return data_inputs
Exemple #8
0
def scheduler():
    """
    Main scheduling function. Schedules Tasks in Workflows according to their execution order, generates execution XML files and sends tasks to
    their server for execution
    @return: None
    @rtype: NoneType
    """

    # TODO: set to changeable by settings & config file
    wps_log.debug("starting schedule")
    dir_path = os.path.dirname(os.path.abspath(__file__))
    xml_dir = os.path.join(dir_path, 'testfiles/')

    exec_list = []

    for current_workflow in Workflow.objects.all():
        all_tasks = Task.objects.filter(workflow=current_workflow, status='1')
        wps_log.debug(
            f"found {len(all_tasks)} tasks in workflow{current_workflow.id}")
        for current_task in all_tasks:
            previous_tasks_failed = False
            previous_tasks_finished = True
            edges_to_current_task = Edge.objects.filter(to_task=current_task)
            wps_log.debug(
                f"found {len(edges_to_current_task)} edges to task{current_task.id} in workflow{current_workflow.id}"
            )
            for current_edge in edges_to_current_task:
                if current_edge.from_task.status == '4':
                    wps_log.debug(
                        f"task{current_task.id}'s prior task{current_edge.from_task.id} is finished"
                    )
                    if not Artefact.objects.filter(task=current_task,
                                                   role='0'):
                        wps_log.warning(
                            f"something is wrong here, task{current_task.id} has no artefacts,"
                            f"but there should at least be input artefacts")
                        previous_tasks_finished = False
                        break
                    else:
                        for current_artefact in Artefact.objects.filter(
                                task=current_task, role='0'):
                            wps_log.debug(
                                f"checking data of artefact{current_artefact.id} of task{current_task.id}"
                            )
                            if not current_artefact.data:
                                wps_log.warning(
                                    f"task{current_task.id} has artefact{current_artefact.id} which has no data"
                                )
                                previous_tasks_finished = False
                                break
                else:
                    wps_log.debug(
                        f"task{current_task.id}s prior task{current_edge.from_task.id} is not finished"
                    )
                    previous_tasks_finished = False
                    if current_edge.from_task.status == '5':
                        wps_log.debug(
                            f"task{current_task.id}'s prior task{current_edge.from_task.id} has failed"
                        )
                        previous_tasks_failed = True
                    break
            if previous_tasks_failed:
                current_task.status = '5'
                current_task.save()
            elif previous_tasks_finished:
                wps_log.debug(
                    f"previous task is finished, scheduling now following task{current_task.id}"
                )
                current_task.status = '2'
                exec_list.append(current_task.id)
                current_task.save()

    # generate execute xmls for all tasks with status waiting
    xml_generator(xml_dir)

    wps_log.debug(f"xmls generated for tasks: {exec_list}")

    # send tasks
    for tid in exec_list:
        wps_log.debug(f"sending execution request to server for task{tid}")
        send_task(tid, xml_dir)
Exemple #9
0
def xml_generator(xml_dir):
    """
    Traverses Database and generates execution XML files for every Task set to status WAITING
    @param xml_dir: Directory where XMLs are generated in
    @type xml_dir: string
    @return: None
    @rtype: NoneType
    """
    wps_log.debug("starting xml generator")
    try:
        task_list = list(Task.objects.filter(status='2'))
    except Task.DoesNotExist:
        wps_log.debug("no running tasks found")
        task_list = []
    wps_log.debug(f"scheduled tasks: {[task.id for task in task_list]}")
    for task in task_list:
        try:
            process = task.process
        except Process.DoesNotExist:
            # process not found
            wps_log.warning(f"process of task{task.id} not found")
            return
        root = wps_em.Execute(ows_em.Identifier(process.identifier))
        root.set('service', 'WPS')
        root.set('version', '1.0.0')
        inputs_tree = create_data_doc(task)
        if inputs_tree == 1:
            # error code, something wrong with task TODO: check for better handling?
            wps_log.warning(f"Error: missing input artefact for task{task.id}")
            continue
        root.append(inputs_tree)

        wps_log.debug(
            f"successfully inserted inputs to xml document for task{task.id}")

        response_doc = wps_em.ResponseDocument()
        response_doc.set('storeExecuteResponse', 'true')
        response_doc.set('lineage', 'true')
        response_doc.set('status', 'true')

        output_list = list(
            InputOutput.objects.filter(process=task.process, role='1'))
        wps_log.debug(
            f"list of outputs of task{task.id}: {[output.id for output in output_list]}"
        )
        for output in output_list:
            response_doc.append(
                wps_em.Output(ows_em.Identifier(output.identifier),
                              ows_em.Title(output.title),
                              {'asReference': 'true'}))

        root.append(wps_em.ResponseForm(response_doc))

        wps_log.debug(f"successfully created xml for task{task.id}")
        #f":\n{etree.tostring(root, pretty_print=True).decode()}") # use to print xml to log

        # write to file, for testing let pretty_print=True for better readability
        # TODO: rework if file path problem is solved
        try:
            with open(f"{xml_dir}/task{task.id}.xml", 'w') as xml_file:
                xml_file.write(
                    etree.tostring(root, pretty_print=True).decode())
            wps_log.debug(
                f"successfully written xml of task{task.id} to file, ready for sending to server"
            )
        except:
            wps_log.warning(f"writing failed for task{task.id}")