コード例 #1
0
ファイル: shell.py プロジェクト: kd0kfo/dag
def runprocess(proc, message_queue):
    """
    Called by the master shell program, this function forks a shell process.
    The master process returns the PID of the child. The child process runs
    the shell process and then sends a message back to the master process
    indicating its status.
    """
    import os
    import smq
    pid = os.fork()
    if pid:  # Master
        return pid

    # Child
    L.debug("Forked %s as %d" % (proc.workunit_name, os.getpid()))
    # Updating master
    proc.state = States.RUNNING
    proc.message_queue = message_queue
    message_queue.send(smq.Message("state:%d" % proc.state, "str",
                               proc.workunit_name, MASTER_SENDER_NAME))
    proc.start()
    # If proc.start does not update its state, assume SUCCESS
    if proc.state == States.RUNNING:
        proc.state = States.SUCCESS
    message_queue.send(smq.Message("state:%d" % proc.state, "str",
                               proc.workunit_name, MASTER_SENDER_NAME))
    L.debug("%d finished start() with status %s" % (os.getpid(),
                                                    strstate(proc.state)))
    exit(0)
コード例 #2
0
def update_dag_job_state(result, is_valid):
    import dag.boinc
    from dag import States, strstate
    import dag

    root_dag = dag.boinc.result_to_dag(result.name)
    if not root_dag:
        print("Could not get DAG file for %s" % result.name)
        return
    wuname = dag.boinc.name_result2workunit(result.name)
    proc = root_dag.get_process(wuname)
    if not proc:
        print("In dag %s, could not find workunit %s" %
              (root_dag.filename, wuname))
        return

    if is_valid:
        proc.state = States.SUCCESS
    else:
        proc.state = States.FAIL
    print("Marking %s as %s" % (result.name, strstate(proc.state)))
    try:
        root_dag.save()
    except dag.DagException as de:
        print("Could not save dag file '%s'" % root_dag.filename)
        raise de
コード例 #3
0
def update_dag_job_state(result,is_valid):
	import dag.boinc
	from dag import States,strstate
	import dag

	root_dag = dag.boinc.result_to_dag(result.name)
	if not root_dag:
		print("Could not get DAG file for %s" % result.name)
		return
	wuname = dag.boinc.name_result2workunit(result.name)
	proc = root_dag.get_process(wuname)
	if not proc:
		print("In dag %s, could not find workunit %s" % (root_dag.filename, wuname))
		return

	if is_valid:
		proc.state = States.SUCCESS
	else:
		proc.state = States.FAIL
	print("Marking %s as %s" % (result.name, strstate(proc.state)))
	try:
		root_dag.save()
	except dag.DagException as de:
		print("Could not save dag file '%s'" % root_dag.filename)
		raise de
コード例 #4
0
ファイル: shell.py プロジェクト: kd0kfo/dag
def process_messages(root_dag, message_queue):
    """
    Reads through messages in the queue for the master and acts on them.

    @see: perform_operation
    @param root_dag: Main DAG object
    @type root_dag: dag.DAG
    @param message_queue: Message queue being read
    @type message_queue: smq.Queue
    """
    global kill_switch
    from smq import Message
    from dag import FINISHED_STATES

    def send(text, recipient):
        message_queue.send(Message(text, "str", MASTER_SENDER_NAME, recipient))

    retval = None
    while message_queue.has_message(MASTER_SENDER_NAME):
        message = message_queue.next(MASTER_SENDER_NAME)
        L.debug("Processing Message from %s: %s..." % (message.sender,
                                                       message.content[0:15]))
        if message.content == "shutdown":
            kill_switch = True
            retval = "Shutting down shell processes"
        elif message.content.startswith("state:"):
            proc = root_dag.get_process(message.sender)
            if not proc:
                retval = (
                    "Cannot change state. Unknown process %s" % message.sender)
                break
            newstate = message.content.replace("state:", "")
            proc.state = int(newstate)
            L.debug("Changed state of %s to %s" % (proc.workunit_name,
                                                   strstate(proc.state)))
            root_dag.save()
            if proc.state in FINISHED_STATES:
                for ended in [
                        i for i in running_children
                        if i[0] == proc.workunit_name
                ]:
                    running_children.remove(ended)
        elif message.content == "dump":
            retval = dump_state(root_dag, message_queue)
        else:
            retval = perform_operation(root_dag, message)
        if retval is not None:
            send(retval, message.sender)
コード例 #5
0
ファイル: update_dag.py プロジェクト: kd0kfo/dag
def modify_dag(root_dag, cmd, cmd_args, debug=False):
    """
    This is the main operating function. This takes a command and performs
    an action on the dag
    
    @param root_dag: DAG to be modified
    @type root_dag: dag.DAG
    @param cmd: Command to execute
    @type cmd: str
    @param cmd_args: Optional arguments for commands.
    @type cmd_args: list
    @param debug: Optional debug flag
    @type debug: bool
    """
    import os.path as OP
    import dag

    return_message = ""
    if cmd == "attach":
        from dag.shell import Waiter
        if len(cmd_args) != 2:
            raise Exception("Attach requires a workunit name"
                            " and a process id number (PID).")
        new_process = Waiter(cmd_args[0], [cmd_args[1], ])
        for process in root_dag.processes:
            if (process.state in [dag.States.CREATED, dag.States.STAGED]
                and not isinstance(process, Waiter)):
                new_process.children.append(process)
        root_dag.processes.append(new_process)
        root_dag.save()
        return "Attached %s" % cmd_args[0]
    elif cmd == "print":
        if len(cmd_args) == 0:
            return_message += "%s\n" % root_dag
        else:
            proc = root_dag.get_process(cmd_args[0])
            if proc:
                return_message += "%s\n" % proc
            else:
                return_message += "No such process found: {0}\n".format(cmd_args[0])
            return return_message
    elif cmd == "help":
        if not cmd_args:
            return get_help_string(None)
        else:
            return get_help_string(cmd_args[0])
    elif cmd == "list":
        for proc in root_dag.processes:
            return_message += "%s: %s\n" % (proc.workunit_name, proc.cmd)
        return return_message
    elif cmd in ["remove", "run", "stage"]:
        if len(cmd_args) == 0:
            raise Exception("%s requires at least one workunit name" % cmd)
        for wuname in cmd_args:
            proc = root_dag.get_process(wuname)
            if cmd == "remove":
                if wuname == "all":
                    from sys import stdin
                    print("Are you sure you want to remove ALL workunits"
                          " (yes or no)?")
                    if (not stdin.readline().strip()
                       in ["y", "Y", "yes", "Yes", "YES"]):
                        # Cancel workunit
                        print("Canceled.")
                        exit(1)
                    count = 0
                    progress_bar = None
                    if not debug:
                        from progressbar import ProgressBar, Percentage, Bar
                        num_processes = len(root_dag.processes)
                        if num_processes:
                            progress_bar = ProgressBar(widgets = [Percentage(), Bar()], maxval = num_processes).start()
                    for proc in root_dag.processes:
                        if debug:
                            print("Removing %s" % proc.workunit_name)
                        clean_workunit(root_dag, proc)
                        count += 1
                        if progress_bar:
                            progress_bar.update(count)
                    if progress_bar:
                        print("")  # reset line return
                    root_dag.processes = []  # clear process list
                else:
                    if debug:
                        print("Removing %s" % wuname)
                    clean_workunit(root_dag, proc)
                    root_dag.processes.remove(proc)  # remove process
                    return_message += "Removed %s\n" % wuname
            if cmd in ["run", "stage"]:
                print("Staging %s" % wuname)
                stage_files(root_dag, proc)
                if proc.state == dag.States.CREATED:
                    proc.state = dag.States.STAGED
                if cmd == "run":
                    return_message += "Starting %s\n" % wuname
                    if root_dag.incomplete_prereqs(proc):
                        raise Exception("Cannot start %s."
                                        " Missing dependencies.")
                    schedule_work(root_dag, proc, root_dag.filename)
                    if isinstance(proc, dag.InternalProcess):
                        proc.state = dag.States.SUCCESS
                        return_message += "Finished %s" % wuname
                    else:
                        proc.state = dag.States.RUNNING
            #save dag
            root_dag.save()
            print("updated dagfile")
    elif cmd == "start":
        start_processes(root_dag, OP.abspath(root_dag.filename),
                        True, root_dag.num_cores)
        return_message += "Started processes"
    elif cmd == "recreate":
        if not cmd_args:
            raise Exception("recreate requires a specific file type"
                            " to recreate.")
        if cmd_args[0] == "result_template":
            if root_dag.engine != dag.Engine.BOINC:
                raise dag.DagException("Can only make result template"
                                       " with BOINC jobs.")
            import dag.boinc
            proc = root_dag.get_process(cmd_args[1])
            dag.boinc.create_result_template(proc,
                                             proc.result_template.full_path())
            print("Created result template")
        else:
            print("Do not know how to recreate: '%s'" % cmd_args[0])
        return_message += "Recreated %s\n" % cmd_args[0]
    elif cmd == "reset":
        for wuname in cmd_args:
            proc = root_dag.get_process(wuname)
            if not proc:
                return_message += "No such workunit: %s\n" % wuname
                continue
            clean_workunit(root_dag, proc)
            proc.workunit_name = None
            proc.workunit_template = None
            proc.result_template = None
            proc.state = dag.States.CREATED
            root_dag.save()
            return_message += "Reset %s" % wuname
    elif cmd == "cancel":
        if root_dag.engine == dag.Engine.LSF:
            raise dag.DagException("Cannot yet cancel LSF jobs.")
        elif root_dag.engine == dag.Engine.SHELL:
            if not hasattr(root_dag, "message_queue"):
                raise dag.DagException("Cannot stop shell process "
                                       "without message queue")
        
        proc_list = [root_dag.get_process(wuname) for wuname in cmd_args]
        if root_dag.engine == dag.Engine.BOINC:
            dag.boinc.cancel_workunits(proc_list)
        elif root_dag.engine == dag.Engine.SHELL:
            dag.shell.cancel_workunits(root_dag, proc_list)
        root_dag.save()
        return_message += "Cancelled %s" % ", ".join(cmd_args)
    elif cmd == "update":
        update_state(cmd_args, root_dag)
        if root_dag.engine == dag.Engine.LSF:
            start_processes(root_dag, root_dag.filename, False)
        return_message += "Updated process"
    elif cmd == "state":
        count_only = False
        if "--count" in cmd_args:
            count_only = True

        if not cmd_args:
            raise dag.DagException("Missing state name.")
        states_to_view = cmd_args[0]
        if states_to_view == "all":
            states_to_view = ",".join([dag.strstate(i)
                                       for i in range(0,
                                                      dag.States.NUM_STATES)])

        for state_name in states_to_view.split(","):
            state = dag.intstate(state_name.upper())
            if state is None:
                print("%s is not a valid state." % state_name)
                print("States are %s"
                      % ", ".join([dag.strstate(i)
                                   for i in range(0, dag.States.NUM_STATES)]))
                raise dag.DagException("Invalid State")
            proc_list = root_dag.get_processes_by_state(state)
            if count_only:
                return_message += "%s: %d\n" % (dag.strstate(state), len(proc_list))
            else:
                for i in proc_list:
                    return_message += "%s" % i
    elif cmd == "uuid":
        proc = root_dag.get_process(cmd_args[0])
        return_message += str(proc.uuid)
    else:
        if not debug:
            return_message += "Unknown command: %s" % cmd
        raise Exception("Unknown command: %s" % cmd)
    return return_message
コード例 #6
0
ファイル: update_dag.py プロジェクト: kd0kfo/dag
              " name is not given, all processes are listed."),
    "recreate": ("Regenerates specified temporary files."
                 " Options are: 'result_template'"),
    "reset": ("Clears generated values, such as workunit name,"
              " and moves process to CREATED state."),
    "remove": ("Removes a workunit. 'all' can be supplied instead"
               " of a workunit name to remove ALL of the workunits."),
    "run": ("Stars a specific process, by workunit name. This should be run"
            " after 'stage'"),
    "stage": ("Copies necessary files to their required locations"
              " on the server."),
    "start": "Starts ALL processes",
    "state": ("Prints processes in a given state. The optional \"--count\""
              " flag may be used to show only a count of the number "
              "of processes in that state. States are: {0}"
              .format(", ".join([dag.strstate(i)
                                 for i in range(0, dag.States.NUM_STATES)]))),
    "update": "Update the state of a workunit.",
    "uuid": "Gets UUID for a work unit."
    }


def get_help_string(command=None):
    """
    Prints help for a command using the command_help dict. Help is printed
     to standard output.

    @param command: Command for which help is required
    @type command: str
    """
    if not command in command_help:
コード例 #7
0
def clean(result):
    import re
    import dag.util as dag_utils
    import boinctools
    import dag, dag.boinc
    import shutil
    import stat
    from os import path as OP
    import os

    if len(result.name) >= 2:
        if result.name[-2:] != "_0":
            print("Not cleaning %s" % result.name)
            return True

    print("Cleaning %s" % result.name)

    wuname = re.findall(r"^(.*)_\d*$", result.name)
    if len(wuname) == 0:
        print("Malformed result name")
        return None
    wuname = wuname[0]
    try:
        the_dag = dag.boinc.result_to_dag(result.name)
    except dag_utils.NoDagMarkerException as ndme:
        print("Warning: Missing dag")
        print("Skipping clean up")
        print("Message:")
        print(ndme.message)
        return False
    except dag.MissingDAGFile as mdf:
        print(
            "Missing dag file for result '%s'. Attempting to move output to invalid_results directory"
            % result.name)
        for output_file in result.output_files:
            boinctools.save_bad_res_output(output_file[0], wuname)
        return False

    if not the_dag:
        return False

    dagpath = dag.boinc.marker_to_dagpath(
        dag.boinc.dag_marker_filename(wuname))
    dagdir = OP.split(dagpath)[0]

    print("Getting process %s" % wuname)
    proc = the_dag.get_process(wuname)
    if not proc:
        print(
            "%s was not found in the job batch file %s. Moving resultfile to invalid_results"
            % (wuname, dagpath))
        for output_file in result.output_files:
            dag.boinc.save_bad_res_output(output_file[0], wuname)
        return False

    if not proc.output_files:
        return True

    source_file = "%s_0" % result.name
    output_file = proc.output_files[0]
    logical_name = output_file.logical_name
    print("Clean filename \"%s\"" % logical_name)

    # Get output dir
    if output_file.dir:
        output_dir = proc.directory  # final destination (If it can be written there)
    else:
        output_dir = dagdir

    # If the result is valid, but the data in the bad_results
    # directory.
    if proc.state not in [dag.States.SUCCESS, dag.States.RUNNING]:
        output_dir = OP.join(output_dir, "bad_results")
        print(
            "Process has not been marked as successful. It is %s instead. Saving output in %s"
            % (dag.strstate(proc.state), output_dir))
        if not OP.isdir(output_dir):
            os.mkdir(output_dir)

    dest_file = OP.join(output_dir, output_file.logical_name)
    upload_path = boinctools.dir_hier_path(source_file).replace(
        "/download", "/upload")
    if not OP.isfile(upload_path):
        print("Output file not found: '%s'" % upload_path)
        return False

    if not OP.isfile(dest_file):
        # Copy file. If it does not exist, move it to the invalid_results directory
        print("Copying {0} to {1}.".format(upload_path, dest_file))
        try:
            shutil.copy(upload_path, dest_file)
            OP.os.chmod(
                dest_file,
                stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP)
        except Exception as e:
            dag.boinc.save_bad_res_output(upload_path, wuname)
            print(
                "ERROR - Could not copy result output file to data directory, %s. It was copied to invalid_results/%s"
                % (dagdir, wuname))
            print("ERROR - Message:\n%s" % e.message)
            if isinstance(e, IOError):
                print(e.strerror)
            raise e
    else:  #output already exists, append.
        try:
            with open(dest_file, "a") as old_file:
                old_file.write("\n")
                old_file.write(open(upload_path, "r").read())
        except Exception as e:
            dag.boinc.save_bad_res_output(upload_path, wuname)
            print(
                "ERROR - Could not copy result output file to data directory, %s. It was copied to invalid_results/%s"
                % (dagdir, wuname))
            print("ERROR - Message:\n%s" % e.message)
            if isinstance(e, IOError):
                print(e.strerror)
            raise e
    return True
コード例 #8
0
def clean(result):
	import re
	import dag.util as dag_utils
	import boinctools
	import dag,dag.boinc
	import shutil
	import stat
	from os import path as OP
	import os

	if len(result.name) >= 2:
		if result.name[-2:] != "_0":
			print("Not cleaning %s" % result.name)
			return True

    	print("Cleaning %s" % result.name)

	wuname = re.findall(r"^(.*)_\d*$",result.name)
	if len(wuname) == 0:
		print("Malformed result name")
		return None
	wuname = wuname[0]
	try:
		the_dag = dag.boinc.result_to_dag(result.name)
	except dag_utils.NoDagMarkerException as ndme:
		print("Warning: Missing dag")
		print("Skipping clean up" )
		print("Message:")
		print(ndme.message)
		return False
	except dag.MissingDAGFile as mdf:
		print("Missing dag file for result '%s'. Attempting to move output to invalid_results directory" % result.name)
		for output_file in result.output_files:
			boinctools.save_bad_res_output(output_file[0],wuname)
		return False
		
	if not the_dag:
		return False

	dagpath = dag.boinc.marker_to_dagpath(dag.boinc.dag_marker_filename(wuname))
	dagdir = OP.split(dagpath)[0]
	
	print("Getting process %s" % wuname)
	proc = the_dag.get_process(wuname)
	if not proc:
		print("%s was not found in the job batch file %s. Moving resultfile to invalid_results" % (wuname, dagpath))
		for output_file in result.output_files:
			dag.boinc.save_bad_res_output(output_file[0],wuname)
		return False

	if not proc.output_files:
		return True

	source_file = "%s_0" % result.name
	output_file = proc.output_files[0]
	logical_name = output_file.logical_name
	print("Clean filename \"%s\"" % logical_name)

	# Get output dir
	if output_file.dir:
		output_dir = proc.directory # final destination (If it can be written there)
	else:
		output_dir = dagdir

	# If the result is valid, but the data in the bad_results
	# directory.
	if proc.state not in [dag.States.SUCCESS,dag.States.RUNNING]:
		output_dir = OP.join(output_dir,"bad_results") 
		print("Process has not been marked as successful. It is %s instead. Saving output in %s" % (dag.strstate(proc.state), output_dir))
		if not OP.isdir(output_dir):
			os.mkdir(output_dir)

	dest_file = OP.join(output_dir,output_file.logical_name)
	upload_path = boinctools.dir_hier_path(source_file).replace("/download","/upload")
	if not OP.isfile(upload_path):
		print("Output file not found: '%s'" % upload_path)
		return False

	if not OP.isfile(dest_file):
		# Copy file. If it does not exist, move it to the invalid_results directory
		print("Copying {0} to {1}.".format(upload_path,dest_file))
		try:
			shutil.copy(upload_path,dest_file)
			OP.os.chmod(dest_file,stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP)
		except Exception as e:
			dag.boinc.save_bad_res_output(upload_path,wuname)
			print("ERROR - Could not copy result output file to data directory, %s. It was copied to invalid_results/%s" % (dagdir,wuname))
			print("ERROR - Message:\n%s" % e.message)
			if isinstance(e,IOError):
				print(e.strerror)
			raise e
	else: #output already exists, append.
		try:
			with open(dest_file,"a") as old_file:
				old_file.write("\n")
				old_file.write(open(upload_path,"r").read())
		except Exception as e:
			dag.boinc.save_bad_res_output(upload_path,wuname)
			print("ERROR - Could not copy result output file to data directory, %s. It was copied to invalid_results/%s" % (dagdir,wuname))
			print("ERROR - Message:\n%s" % e.message)
			if isinstance(e,IOError):
				print(e.strerror)
			raise e
	return True