예제 #1
0
def the_job(test_dir, resources):
    job_script = format_script("#!/usr/bin/python3\n" + textwrap.dedent("""
        import os
        import sys
        import time

        elapsed = 0;
        while elapsed < int(sys.argv[1]):""" + "".join(f"""
            os.system('condor_status -ads ${{_CONDOR_SCRATCH_DIR}}/.update.ad -af Assigned{resource}s {resource}sMemoryUsage')
        """ for resource in resources) + """
            time.sleep(1)
            elapsed += 1
        """))

    script_file = test_dir / "poll-memory.py"
    write_file(script_file, job_script)

    job_spec = {
        "executable": script_file.as_posix(),
        "arguments": "17",
        "log": (test_dir / "events.log").as_posix(),
        "output":
        (test_dir / "poll-memory.$(Cluster).$(Process).out").as_posix(),
        "error":
        (test_dir / "poll-memory.$(Cluster).$(Process).err").as_posix(),
        "getenv": "true",
        "LeaveJobInQueue": "true",
    }

    for resource in resources:
        job_spec[f"request_{resource}s"] = "2"

    return job_spec
def condor(test_dir, slot_config):
    for resource in resources.keys():
        sequence = { f"{resource}{i}": j for i, j in enumerate(usages[resource]) }
        discovery_script = format_script(discovery_script_for(resource, sequence))
        write_file(test_dir / f"{resource}-discovery.py", discovery_script)

        sequences = { f"{resource}{i}": j for i, j in enumerate(peaks[resource]) }
        monitor_script = both_monitor_script(resource, sequence, sequences)
        write_file(test_dir / f"{resource}-monitor.py", monitor_script)

    with Condor(
        local_dir=test_dir / "condor",
        config={**slot_config, "TEST_DIR": test_dir.as_posix()},
    ) as condor:

        # Ornithology will run condor_who to verify that all the daemons are running,
        # but occasionally, not all slots will have made it to the collector

        num_slots = int(slot_config["NUM_SLOTS"])
        loop_count = 0
        while num_slots != len(condor.status(ad_type=htcondor.AdTypes.Startd, projection=["SlotID"])):
            loop_count = loop_count + 1
            assert(loop_count < 20)
            time.sleep(1)
        yield condor
예제 #3
0
def path_to_exit_one(test_dir):
    exit_one_script = '''#!/bin/bash
    echo "exit_one = 1"
    exit 1
    '''
    path = test_dir / "exit_one"
    write_file(path, exit_one_script)
    return path
예제 #4
0
def path_to_sig_kill(test_dir):
    sig_kill_script = '''#!/bin/bash
    echo "sig_kill = 1"
    kill -KILL $$
    '''
    path = test_dir / "sig_kill"
    write_file(path, sig_kill_script)
    return path
예제 #5
0
def job_shell_file(test_dir, job_python_file, plugin_log_file):
    job_shell_file = test_dir / "debug.sh"
    contents = format_script(f"""
        #!/bin/bash
        exec {job_python_file} $@ &> {plugin_log_file}
    """)
    write_file(job_shell_file, contents)
    return job_shell_file
예제 #6
0
def path_to_exit_zero(test_dir):
    exit_zero_script = '''#!/bin/bash
    echo "exit_zero = 0"
    exit 0
    '''
    path = test_dir / "exit_zero"
    write_file(path, exit_zero_script)
    return path
예제 #7
0
def path_to_ordering_script(test_dir):
    script="""
    #!/bin/bash
    cat a_file
    exit 0
    """

    path = test_dir / "ordering.sh"
    write_file(path, format_script(script))

    return path
def condor(test_dir, slot_config, discovery_script, monitor_script):
    write_file(test_dir / "discovery.py", discovery_script)
    write_file(test_dir / "monitor.py", monitor_script)

    with Condor(
        local_dir=test_dir / "condor",
        config={**slot_config, "TEST_DIR": test_dir.as_posix()},
    ) as condor:
        # try to make sure the monitor runs before we continue with the test
        time.sleep(MONITOR_PERIOD * 1.5)
        yield condor
예제 #9
0
def condor(test_dir, slot_config, discovery_script, monitor_script):
    write_file(test_dir / "discovery", discovery_script)
    write_file(test_dir / "monitor", monitor_script)

    with Condor(
            local_dir=test_dir / "condor",
            config={
                **slot_config, "TEST_DIR": test_dir.as_posix()
            },
    ) as condor:
        yield condor
def condor(test_dir, slot_config):
    for resource in resources.keys():
        sequence = { f"{resource}{i}": j for i, j in enumerate(usages[resource]) }
        discovery_script = format_script(discovery_script_for(resource, sequence))
        write_file(test_dir / f"{resource}-discovery.py", discovery_script)

        sequences = { f"{resource}{i}": j for i, j in enumerate(peaks[resource]) }
        monitor_script = both_monitor_script(resource, sequence, sequences)
        write_file(test_dir / f"{resource}-monitor.py", monitor_script)

    with Condor(
        local_dir=test_dir / "condor",
        config={**slot_config, "TEST_DIR": test_dir.as_posix()},
    ) as condor:
        yield condor
예제 #11
0
def jobids_for_sleep_jobs(test_dir, condor, max_idle, max_materialize):
    sub_description = """
        executable = /bin/sleep
        arguments = 10

        request_memory = 1MB
        request_disk = 1MB

        max_materialize = {max_materialize}
        max_idle = {max_idle}

        queue {q}
    """.format(
        max_materialize=max_materialize,
        max_idle=max_idle,
        q=max_materialize + max_idle + 1,
    )
    submit_file = write_file(test_dir / "queue.sub", sub_description)

    submit_cmd = condor.run_command(["condor_submit", submit_file])
    clusterid, num_procs = parse_submit_result(submit_cmd)

    jobids = [JobID(clusterid, n) for n in range(num_procs)]

    condor.job_queue.wait_for_events(
        {jobid: [SetJobStatus(JobStatus.COMPLETED)] for jobid in jobids}, timeout=60
    )

    return jobids
예제 #12
0
def submit_dataflow_skip_job_cmd(
    test_dir,
    default_condor,
    finished_output_jobid,
    path_to_sleep,
    dataflow_input_file,
    dataflow_output_file,
):
    """
    After submit_output_job_cmd() has completed, we now want to send a new job
    with an argument to skip if it's a dataflow job.
    """

    sub_description = """
        executable = {exe}
        arguments = 10
        transfer_input_files = {input}
        transfer_output_files = {output}
        should_transfer_files = YES
        skip_if_dataflow = True
        log = {log}

        queue
    """.format(
        exe=path_to_sleep,
        input=dataflow_input_file,
        output=dataflow_output_file,
        log=test_dir / "submit" / "dataflow-skip.log",
    )
    submit_file = write_file(test_dir / "submit" / "dataflow-skip.sub",
                             sub_description)

    return default_condor.run_command(["condor_submit", submit_file])
예제 #13
0
def job_queue_events_for_sleep_job(test_dir, default_condor):
    sub_description = """
        executable = /bin/sleep
        arguments = 10
        
        queue
    """
    submit_file = write_file(test_dir / "job.sub", sub_description)

    submit_cmd = default_condor.run_command(["condor_submit", submit_file])
    clusterid, num_procs = parse_submit_result(submit_cmd)
    jobid = JobID(clusterid, 0)

    default_condor.job_queue.wait_for_events(
        {
            jobid: [
                (  # when the job starts running, hold it
                    SetJobStatus(JobStatus.RUNNING),
                    lambda jobid, event: default_condor.run_command(
                        ["condor_hold", jobid]),
                ),
                (  # once the job is held, release it
                    SetJobStatus(JobStatus.HELD),
                    lambda jobid, event: default_condor.run_command(
                        ["condor_release", jobid]),
                ),
                SetJobStatus(JobStatus.COMPLETED),
            ]
        },
        timeout=60,
    )

    return default_condor.job_queue.by_jobid[jobid]
예제 #14
0
def peak_job(test_dir, resource):
    script_file = (test_dir / "poll-memory.py")
    write_file(script_file, peak_job_script(resource))

    return {
        "executable": script_file.as_posix(),
        "arguments": "17",
        f"request_{resource}s": "1",
        "log": (test_dir / "events.log").as_posix(),
        "output":
        (test_dir / "poll-memory.$(Cluster).$(Process).out").as_posix(),
        "error":
        (test_dir / "poll-memory.$(Cluster).$(Process).err").as_posix(),
        "getenv": "true",
        "LeaveJobInQueue": "true",
    }
예제 #15
0
def clusterid_for_itemdata(test_dir, condor):
    # enable late materialization, but with a high enough limit that they all
    # show up immediately (on hold, because we don't need to actually run
    # the jobs to do the tests)
    sub_description = """
        executable = /bin/sleep
        arguments = 0

        request_memory = 1MB
        request_disk = 1MB

        max_materialize = 5

        hold = true

        My.Foo = "$(Item)"

        queue in (A, B, C, D, E)
    """
    submit_file = write_file(test_dir / "queue_in.sub", sub_description)

    submit_cmd = condor.run_command(["condor_submit", submit_file])
    clusterid, num_procs = parse_submit_result(submit_cmd)

    jobids = [JobID(clusterid, n) for n in range(num_procs)]

    condor.job_queue.wait_for_events(
        {jobid: [SetAttribute("Foo", None)] for jobid in jobids}, timeout=10
    )

    yield clusterid

    condor.run_command(["condor_rm", clusterid])
def condor(test_dir, slot_config, discovery_script, monitor_script):
    write_file(test_dir / "discovery.py", discovery_script)
    write_file(test_dir / "monitor.py", monitor_script)

    with Condor(
        local_dir=test_dir / "condor",
        config={**slot_config, "TEST_DIR": test_dir.as_posix()},
    ) as condor:
        # Ornithology will run condor_who to verify that all the daemons are running,
        # but occasionally, not all 16 slots will have made it to the collector

        loop_count = 0
        while 16 != len(condor.status(ad_type=htcondor.AdTypes.Startd, projection=["SlotID"])):
            loop_count = loop_count + 1
            assert(loop_count < 20)
            time.sleep(1)
        yield condor
예제 #17
0
def submit_sleep_job_cmd(test_dir, default_condor, path_to_sleep):
    sub_description = """
        executable = {exe}
        arguments = 1
        
        queue
    """.format(exe=path_to_sleep)
    submit_file = write_file(test_dir / "submit" / "job.sub", sub_description)

    return default_condor.run_command(["condor_submit", submit_file])
예제 #18
0
def path_to_the_job_script(test_dir):
    script = """
    #!/usr/bin/python3

    import sys
    import time
    import getopt

    total_steps = 24
    num_completed_steps = 0
    try:
        with open("saved-state", "r") as saved_state:
            num_completed_steps = int(saved_state.readline().strip())
    except IOError:
        pass

    while num_completed_steps < total_steps:
        print(f"Starting step {num_completed_steps}.")

        time.sleep(3)
        num_completed_steps += 1

        if num_completed_steps % 5 == 0:
            print(f"Checkpointing after {num_completed_steps}.")
            try:
                with open("saved-state", "w") as saved_state:
                    saved_state.write(f"{num_completed_steps}")
                sys.exit(17)
            except IOError:
                print("Failed to write checkpoint.", file=sys.stderr);
                sys.exit(1)

    print(f"Completed all {total_steps} steps.")
    sys.exit(0)
    """

    path = test_dir / "counting.py"
    write_file(path, format_script(script))

    return path
예제 #19
0
def path_to_directory_script(default_condor, test_dir):
    script=f"""
    #!/bin/bash
    export CONDOR_CONFIG={default_condor.config_file}
    export PATH=$PATH:{os.environ["PATH"]}
    cat data/data_file >> {test_dir}/directory-test-file
    DATA=`cat data/data_file`
    if [[ $DATA == "first job modification" ]]; then
        echo "second job modification" > data/data_file
        exit 0
    fi
    echo "first job modification" > data/data_file
    condor_vacate_job $1
    # Don't exit before we've been vacated.
    sleep 60
    exit 0
    """

    path = test_dir / "directory.sh"
    write_file(path, format_script(script))

    return path
예제 #20
0
def path_to_prp_script(default_condor, test_dir):
    script=f"""
    #!/bin/bash
    export CONDOR_CONFIG={default_condor.config_file}
    export PATH=$PATH:{os.environ["PATH"]}

    DATA=`tail -n 1 {test_dir}/prp-test-file`
    echo "Starting up..." >> {test_dir}/prp-test-file

    if [[ $DATA == "" ]]; then
        echo "step one" >> {test_dir}/prp-test-file
        echo "step one" >> prp/data/data_file
        mkdir prp/data/subdir
        echo "step one" >> prp/data/subdir/other_data_file
        exit 85
    fi
    if [[ $DATA == "step one" ]]; then
        echo "step two" >> {test_dir}/prp-test-file
        echo "step two" >> prp/data/data_file
        condor_vacate_job $1
        # Don't exit before we've been vacated.
        sleep 60
        # We did not succeed.
        exit 1
    fi
    if [[ $DATA == "step two" ]]; then
        echo "step three" >> {test_dir}/prp-test-file
        echo "step three" >> prp/data/data_file
        exit 0
    fi
    echo "step never-never" >> {test_dir}/prp-test-file
    echo "step never-never" >> prp/data/data_file
    exit 1
    """

    path = test_dir / "prp.sh"
    write_file(path, format_script(script))

    return path
def path_to_job_two_script(test_dir):
    script = """
    #!/usr/bin/python3

    import sys
    import time

    nap = 0
    nap_lengths = [5, 10, 15]

    try:
        with open("saved-state", "r") as saved_state:
            nap = int(saved_state.readline().strip())
            print(f"Restarting naps from #{nap}")
    except IOError:
        pass

    print(f"Nap #{nap} will be {nap_lengths[nap]} seconds long.")
    time.sleep(nap_lengths[nap])
    nap += 1

    if nap >= len(nap_lengths):
        print(f"Completed all naps.")
        sys.exit(0)

    try:
        with open("saved-state", "w") as saved_state:
            saved_state.write(f"{nap}")
            sys.exit(17)
    except IOError:
        print("Failed to write checkpoint.", file=sys.stderr);
        sys.exit(1)
    """

    path = test_dir / "counting.py"
    write_file(path, format_script(script))

    return path
예제 #22
0
def submit_output_job_cmd(test_dir, default_condor, dataflow_input_file,
                          dataflow_output_file):
    # Start by creating an input file
    input_description = "dataflow-input"
    input_file = write_file(dataflow_input_file, input_description)

    # First job in our workflow outputs a basic file to disk.
    # Later jobs will compare timestamps of input and output to determine if dataflow.
    executable_description = """#!/bin/sh

    echo "dataflow-output" > {output}
    """.format(output=dataflow_output_file)
    executable_file = write_file(test_dir / "submit" / "dataflow-output.sh",
                                 executable_description)

    sub_description = """
        executable = {exe}

        queue
    """.format(exe=test_dir / "submit" / "dataflow-output.sh")
    submit_file = write_file(test_dir / "submit" / "dataflow-output.sub",
                             sub_description)

    return default_condor.run_command(["condor_submit", submit_file])
예제 #23
0
def test_script(test_dir, test_script_contents):
    test_script = test_dir / "test_script.py"
    write_file(test_script, test_script_contents)
    return test_script
예제 #24
0
def job_python_file(test_dir):
    job_python_file = test_dir / "debug.py"
    contents = format_script("""
        #!/usr/bin/python3

        import classad
        import json
        import os
        import posixpath
        import shutil
        import socket
        import sys
        import time

        from urllib.parse import urlparse

        DEFAULT_TIMEOUT = 30
        PLUGIN_VERSION = '1.0.0'

        EXIT_SUCCESS = 0
        EXIT_FAILURE = 1
        EXIT_AUTHENTICATION_REFRESH = 2


        def print_help(stream = sys.stderr):
            help_msg = '''Usage: {0} -infile <input-filename> -outfile <output-filename>
               {0} -classad

        Options:
          -classad                    Print a ClassAd containing the capablities of this
                                      file transfer plugin.
          -infile <input-filename>    Input ClassAd file
          -outfile <output-filename>  Output ClassAd file
          -upload                     Indicates this transfer is an upload (default is
                                      download)
        '''
            stream.write(help_msg.format(sys.argv[0]))

        def print_capabilities():
            capabilities = {
                 'MultipleFileSupport': True,
                 'PluginType': 'FileTransfer',
                 'SupportedMethods': 'debug',
                 'Version': PLUGIN_VERSION,
            }
            sys.stdout.write(classad.ClassAd(capabilities).printOld())

        def parse_args():

            # The only argument lists that are acceptable are
            # <this> -classad
            # <this> -infile <input-filename> -outfile <output-filename>
            # <this> -outfile <output-filename> -infile <input-filename>
            if not len(sys.argv) in [2, 5, 6]:
                print_help()
                sys.exit(EXIT_FAILURE)

            # If -classad, print the capabilities of the plugin and exit early
            if (len(sys.argv) == 2) and (sys.argv[1] == '-classad'):
                print_capabilities()
                sys.exit(EXIT_SUCCESS)

            # If -upload, set is_upload to True and remove it from the args list
            is_upload = False
            if '-upload' in sys.argv[1:]:
                is_upload = True
                sys.argv.remove('-upload')

            # -infile and -outfile must be in the first and third position
            if not (
                    ('-infile' in sys.argv[1:]) and
                    ('-outfile' in sys.argv[1:]) and
                    (sys.argv[1] in ['-infile', '-outfile']) and
                    (sys.argv[3] in ['-infile', '-outfile']) and
                    (len(sys.argv) == 5)):
                print_help()
                sys.exit(1)
            infile = None
            outfile = None
            try:
                for i, arg in enumerate(sys.argv):
                    if i == 0:
                        continue
                    elif arg == '-infile':
                        infile = sys.argv[i+1]
                    elif arg == '-outfile':
                        outfile = sys.argv[i+1]
            except IndexError:
                print_help()
                sys.exit(EXIT_FAILURE)

            return {'infile': infile, 'outfile': outfile, 'upload': is_upload}

        def format_error(error):
            return '{0}: {1}'.format(type(error).__name__, str(error))

        def get_error_dict(error, url = ''):
            error_string = format_error(error)
            error_dict = {
                'TransferSuccess': False,
                'TransferError': error_string,
                'TransferUrl': url,
            }

            return error_dict

        class DebugPlugin:

            # Extract whatever information we want from the url provided.
            # In this example, convert the example://path/to/file url to a
            # path in the file system (ie. /path/to/file)
            def parse_url(self, url):
                url_path = url[(url.find("://") + 3):]
                return url_path

            def download_file(self, url, local_file_path):

                start_time = time.time()

                # Download transfer logic goes here
                print(f"DEBUG: download {url} -> {local_file_path}")
                file_size = 0

                end_time = time.time()

                # Get transfer statistics
                transfer_stats = {
                    'TransferSuccess': True,
                    'TransferProtocol': 'example',
                    'TransferType': 'upload',
                    'TransferFileName': local_file_path,
                    'TransferFileBytes': file_size,
                    'TransferTotalBytes': file_size,
                    'TransferStartTime': int(start_time),
                    'TransferEndTime': int(end_time),
                    'ConnectionTimeSeconds': end_time - start_time,
                    'TransferUrl': url,
                }

                return transfer_stats

            def upload_file(self, url, local_file_path):

                start_time = time.time()

                # Upload transfer logic goes here
                print(f"DEBUG: upload {local_file_path} --> {url}")
                file_size = 0

                end_time = time.time()

                # Get transfer statistics
                transfer_stats = {
                    'TransferSuccess': True,
                    'TransferProtocol': 'example',
                    'TransferType': 'upload',
                    'TransferFileName': local_file_path,
                    'TransferFileBytes': file_size,
                    'TransferTotalBytes': file_size,
                    'TransferStartTime': int(start_time),
                    'TransferEndTime': int(end_time),
                    'ConnectionTimeSeconds': end_time - start_time,
                    'TransferUrl': url,
                }

                return transfer_stats


        if __name__ == '__main__':

            # Start by parsing input arguments
            try:
                args = parse_args()
            except Exception:
                sys.exit(EXIT_FAILURE)

            debug_plugin = DebugPlugin()

            # Parse in the classads stored in the input file.
            # Each ad represents a single file to be transferred.
            try:
                infile_ads = classad.parseAds(open(args['infile'], 'r'))
            except Exception as err:
                try:
                    with open(args['outfile'], 'w') as outfile:
                        outfile_dict = get_error_dict(err)
                        outfile.write(str(classad.ClassAd(outfile_dict)))
                except Exception:
                    pass
                sys.exit(EXIT_FAILURE)

            # Now iterate over the list of classads and perform the transfers.
            try:
                with open(args['outfile'], 'w') as outfile:
                    for ad in infile_ads:
                        try:
                            if not args['upload']:
                                outfile_dict = debug_plugin.download_file(ad['Url'], ad['LocalFileName'])
                            else:
                                outfile_dict = debug_plugin.upload_file(ad['Url'], ad['LocalFileName'])

                            outfile.write(str(classad.ClassAd(outfile_dict)))

                        except Exception as err:
                            try:
                                outfile_dict = get_error_dict(err, url = ad['Url'])
                                outfile.write(str(classad.ClassAd(outfile_dict)))
                            except Exception:
                                pass
                            sys.exit(EXIT_FAILURE)

            except Exception:
                sys.exit(EXIT_FAILURE)
    """)
    write_file(job_python_file, contents)
    return job_python_file