Beispiel #1
0
def the_job(test_dir, resources):
    job_script = format_script("#!/usr/bin/python3\n" + textwrap.dedent("""
        import os
        import sys
        import time

        elapsed = 0;
        while elapsed < int(sys.argv[1]):""" + "".join(f"""
            os.system('condor_status -ads ${{_CONDOR_SCRATCH_DIR}}/.update.ad -af Assigned{resource}s {resource}sMemoryUsage')
        """ for resource in resources) + """
            time.sleep(1)
            elapsed += 1
        """))

    script_file = test_dir / "poll-memory.py"
    write_file(script_file, job_script)

    job_spec = {
        "executable": script_file.as_posix(),
        "arguments": "17",
        "log": (test_dir / "events.log").as_posix(),
        "output":
        (test_dir / "poll-memory.$(Cluster).$(Process).out").as_posix(),
        "error":
        (test_dir / "poll-memory.$(Cluster).$(Process).err").as_posix(),
        "getenv": "true",
        "LeaveJobInQueue": "true",
    }

    for resource in resources:
        job_spec[f"request_{resource}s"] = "2"

    return job_spec
def condor(test_dir, slot_config):
    for resource in resources.keys():
        sequence = { f"{resource}{i}": j for i, j in enumerate(usages[resource]) }
        discovery_script = format_script(discovery_script_for(resource, sequence))
        write_file(test_dir / f"{resource}-discovery.py", discovery_script)

        sequences = { f"{resource}{i}": j for i, j in enumerate(peaks[resource]) }
        monitor_script = both_monitor_script(resource, sequence, sequences)
        write_file(test_dir / f"{resource}-monitor.py", monitor_script)

    with Condor(
        local_dir=test_dir / "condor",
        config={**slot_config, "TEST_DIR": test_dir.as_posix()},
    ) as condor:

        # Ornithology will run condor_who to verify that all the daemons are running,
        # but occasionally, not all slots will have made it to the collector

        num_slots = int(slot_config["NUM_SLOTS"])
        loop_count = 0
        while num_slots != len(condor.status(ad_type=htcondor.AdTypes.Startd, projection=["SlotID"])):
            loop_count = loop_count + 1
            assert(loop_count < 20)
            time.sleep(1)
        yield condor
Beispiel #3
0
def test_script_contents():
    return format_script("""
        #!/usr/bin/env python3

        with open("new_output_file", "w") as f:
            f.write("")
        exit(0)
        """)
def job_shell_file(test_dir, job_python_file, plugin_log_file):
    job_shell_file = test_dir / "debug.sh"
    contents = format_script(f"""
        #!/bin/bash
        exec {job_python_file} $@ &> {plugin_log_file}
    """)
    write_file(job_shell_file, contents)
    return job_shell_file
def discovery_script(resources):
    return format_script(
        """
        #!/usr/bin/python3

        print('DetectedXXX="{res}"')
        """.format(
            res=", ".join(resources.keys())
        )
    )
def path_to_ordering_script(test_dir):
    script="""
    #!/bin/bash
    cat a_file
    exit 0
    """

    path = test_dir / "ordering.sh"
    write_file(path, format_script(script))

    return path
Beispiel #7
0
def peak_job_script(resource):
    return format_script("#!/usr/bin/python3\n" + textwrap.dedent(f"""
        import os
        import sys
        import time

        elapsed = 0;
        while elapsed < int(sys.argv[1]):
            os.system('condor_status -ads ${{_CONDOR_SCRATCH_DIR}}/.update.ad -af Assigned{resource}s {resource}sMemoryUsage')
            time.sleep(1)
            elapsed += 1
        """))
def condor(test_dir, slot_config):
    for resource in resources.keys():
        sequence = { f"{resource}{i}": j for i, j in enumerate(usages[resource]) }
        discovery_script = format_script(discovery_script_for(resource, sequence))
        write_file(test_dir / f"{resource}-discovery.py", discovery_script)

        sequences = { f"{resource}{i}": j for i, j in enumerate(peaks[resource]) }
        monitor_script = both_monitor_script(resource, sequence, sequences)
        write_file(test_dir / f"{resource}-monitor.py", monitor_script)

    with Condor(
        local_dir=test_dir / "condor",
        config={**slot_config, "TEST_DIR": test_dir.as_posix()},
    ) as condor:
        yield condor
def monitor_script(resources):
    return format_script(
        "#!/usr/bin/python3\n"
        + "".join(
            textwrap.dedent(
                """
            print('SlotMergeConstraint = StringListMember( "{name}", AssignedXXX )')
            print('UptimeXXXSeconds = {increment}')
            print('- {name}')
            """.format(
                    name=name, increment=increment
                )
            )
            for name, increment in resources.items()
        )
    )
Beispiel #10
0
def path_to_the_job_script(test_dir):
    script = """
    #!/usr/bin/python3

    import sys
    import time
    import getopt

    total_steps = 24
    num_completed_steps = 0
    try:
        with open("saved-state", "r") as saved_state:
            num_completed_steps = int(saved_state.readline().strip())
    except IOError:
        pass

    while num_completed_steps < total_steps:
        print(f"Starting step {num_completed_steps}.")

        time.sleep(3)
        num_completed_steps += 1

        if num_completed_steps % 5 == 0:
            print(f"Checkpointing after {num_completed_steps}.")
            try:
                with open("saved-state", "w") as saved_state:
                    saved_state.write(f"{num_completed_steps}")
                sys.exit(17)
            except IOError:
                print("Failed to write checkpoint.", file=sys.stderr);
                sys.exit(1)

    print(f"Completed all {total_steps} steps.")
    sys.exit(0)
    """

    path = test_dir / "counting.py"
    write_file(path, format_script(script))

    return path
def path_to_prp_script(default_condor, test_dir):
    script=f"""
    #!/bin/bash
    export CONDOR_CONFIG={default_condor.config_file}
    export PATH=$PATH:{os.environ["PATH"]}

    DATA=`tail -n 1 {test_dir}/prp-test-file`
    echo "Starting up..." >> {test_dir}/prp-test-file

    if [[ $DATA == "" ]]; then
        echo "step one" >> {test_dir}/prp-test-file
        echo "step one" >> prp/data/data_file
        mkdir prp/data/subdir
        echo "step one" >> prp/data/subdir/other_data_file
        exit 85
    fi
    if [[ $DATA == "step one" ]]; then
        echo "step two" >> {test_dir}/prp-test-file
        echo "step two" >> prp/data/data_file
        condor_vacate_job $1
        # Don't exit before we've been vacated.
        sleep 60
        # We did not succeed.
        exit 1
    fi
    if [[ $DATA == "step two" ]]; then
        echo "step three" >> {test_dir}/prp-test-file
        echo "step three" >> prp/data/data_file
        exit 0
    fi
    echo "step never-never" >> {test_dir}/prp-test-file
    echo "step never-never" >> prp/data/data_file
    exit 1
    """

    path = test_dir / "prp.sh"
    write_file(path, format_script(script))

    return path
def path_to_directory_script(default_condor, test_dir):
    script=f"""
    #!/bin/bash
    export CONDOR_CONFIG={default_condor.config_file}
    export PATH=$PATH:{os.environ["PATH"]}
    cat data/data_file >> {test_dir}/directory-test-file
    DATA=`cat data/data_file`
    if [[ $DATA == "first job modification" ]]; then
        echo "second job modification" > data/data_file
        exit 0
    fi
    echo "first job modification" > data/data_file
    condor_vacate_job $1
    # Don't exit before we've been vacated.
    sleep 60
    exit 0
    """

    path = test_dir / "directory.sh"
    write_file(path, format_script(script))

    return path
def path_to_job_two_script(test_dir):
    script = """
    #!/usr/bin/python3

    import sys
    import time

    nap = 0
    nap_lengths = [5, 10, 15]

    try:
        with open("saved-state", "r") as saved_state:
            nap = int(saved_state.readline().strip())
            print(f"Restarting naps from #{nap}")
    except IOError:
        pass

    print(f"Nap #{nap} will be {nap_lengths[nap]} seconds long.")
    time.sleep(nap_lengths[nap])
    nap += 1

    if nap >= len(nap_lengths):
        print(f"Completed all naps.")
        sys.exit(0)

    try:
        with open("saved-state", "w") as saved_state:
            saved_state.write(f"{nap}")
            sys.exit(17)
    except IOError:
        print("Failed to write checkpoint.", file=sys.stderr);
        sys.exit(1)
    """

    path = test_dir / "counting.py"
    write_file(path, format_script(script))

    return path
def job_python_file(test_dir):
    job_python_file = test_dir / "debug.py"
    contents = format_script("""
        #!/usr/bin/python3

        import classad
        import json
        import os
        import posixpath
        import shutil
        import socket
        import sys
        import time

        from urllib.parse import urlparse

        DEFAULT_TIMEOUT = 30
        PLUGIN_VERSION = '1.0.0'

        EXIT_SUCCESS = 0
        EXIT_FAILURE = 1
        EXIT_AUTHENTICATION_REFRESH = 2


        def print_help(stream = sys.stderr):
            help_msg = '''Usage: {0} -infile <input-filename> -outfile <output-filename>
               {0} -classad

        Options:
          -classad                    Print a ClassAd containing the capablities of this
                                      file transfer plugin.
          -infile <input-filename>    Input ClassAd file
          -outfile <output-filename>  Output ClassAd file
          -upload                     Indicates this transfer is an upload (default is
                                      download)
        '''
            stream.write(help_msg.format(sys.argv[0]))

        def print_capabilities():
            capabilities = {
                 'MultipleFileSupport': True,
                 'PluginType': 'FileTransfer',
                 'SupportedMethods': 'debug',
                 'Version': PLUGIN_VERSION,
            }
            sys.stdout.write(classad.ClassAd(capabilities).printOld())

        def parse_args():

            # The only argument lists that are acceptable are
            # <this> -classad
            # <this> -infile <input-filename> -outfile <output-filename>
            # <this> -outfile <output-filename> -infile <input-filename>
            if not len(sys.argv) in [2, 5, 6]:
                print_help()
                sys.exit(EXIT_FAILURE)

            # If -classad, print the capabilities of the plugin and exit early
            if (len(sys.argv) == 2) and (sys.argv[1] == '-classad'):
                print_capabilities()
                sys.exit(EXIT_SUCCESS)

            # If -upload, set is_upload to True and remove it from the args list
            is_upload = False
            if '-upload' in sys.argv[1:]:
                is_upload = True
                sys.argv.remove('-upload')

            # -infile and -outfile must be in the first and third position
            if not (
                    ('-infile' in sys.argv[1:]) and
                    ('-outfile' in sys.argv[1:]) and
                    (sys.argv[1] in ['-infile', '-outfile']) and
                    (sys.argv[3] in ['-infile', '-outfile']) and
                    (len(sys.argv) == 5)):
                print_help()
                sys.exit(1)
            infile = None
            outfile = None
            try:
                for i, arg in enumerate(sys.argv):
                    if i == 0:
                        continue
                    elif arg == '-infile':
                        infile = sys.argv[i+1]
                    elif arg == '-outfile':
                        outfile = sys.argv[i+1]
            except IndexError:
                print_help()
                sys.exit(EXIT_FAILURE)

            return {'infile': infile, 'outfile': outfile, 'upload': is_upload}

        def format_error(error):
            return '{0}: {1}'.format(type(error).__name__, str(error))

        def get_error_dict(error, url = ''):
            error_string = format_error(error)
            error_dict = {
                'TransferSuccess': False,
                'TransferError': error_string,
                'TransferUrl': url,
            }

            return error_dict

        class DebugPlugin:

            # Extract whatever information we want from the url provided.
            # In this example, convert the example://path/to/file url to a
            # path in the file system (ie. /path/to/file)
            def parse_url(self, url):
                url_path = url[(url.find("://") + 3):]
                return url_path

            def download_file(self, url, local_file_path):

                start_time = time.time()

                # Download transfer logic goes here
                print(f"DEBUG: download {url} -> {local_file_path}")
                file_size = 0

                end_time = time.time()

                # Get transfer statistics
                transfer_stats = {
                    'TransferSuccess': True,
                    'TransferProtocol': 'example',
                    'TransferType': 'upload',
                    'TransferFileName': local_file_path,
                    'TransferFileBytes': file_size,
                    'TransferTotalBytes': file_size,
                    'TransferStartTime': int(start_time),
                    'TransferEndTime': int(end_time),
                    'ConnectionTimeSeconds': end_time - start_time,
                    'TransferUrl': url,
                }

                return transfer_stats

            def upload_file(self, url, local_file_path):

                start_time = time.time()

                # Upload transfer logic goes here
                print(f"DEBUG: upload {local_file_path} --> {url}")
                file_size = 0

                end_time = time.time()

                # Get transfer statistics
                transfer_stats = {
                    'TransferSuccess': True,
                    'TransferProtocol': 'example',
                    'TransferType': 'upload',
                    'TransferFileName': local_file_path,
                    'TransferFileBytes': file_size,
                    'TransferTotalBytes': file_size,
                    'TransferStartTime': int(start_time),
                    'TransferEndTime': int(end_time),
                    'ConnectionTimeSeconds': end_time - start_time,
                    'TransferUrl': url,
                }

                return transfer_stats


        if __name__ == '__main__':

            # Start by parsing input arguments
            try:
                args = parse_args()
            except Exception:
                sys.exit(EXIT_FAILURE)

            debug_plugin = DebugPlugin()

            # Parse in the classads stored in the input file.
            # Each ad represents a single file to be transferred.
            try:
                infile_ads = classad.parseAds(open(args['infile'], 'r'))
            except Exception as err:
                try:
                    with open(args['outfile'], 'w') as outfile:
                        outfile_dict = get_error_dict(err)
                        outfile.write(str(classad.ClassAd(outfile_dict)))
                except Exception:
                    pass
                sys.exit(EXIT_FAILURE)

            # Now iterate over the list of classads and perform the transfers.
            try:
                with open(args['outfile'], 'w') as outfile:
                    for ad in infile_ads:
                        try:
                            if not args['upload']:
                                outfile_dict = debug_plugin.download_file(ad['Url'], ad['LocalFileName'])
                            else:
                                outfile_dict = debug_plugin.upload_file(ad['Url'], ad['LocalFileName'])

                            outfile.write(str(classad.ClassAd(outfile_dict)))

                        except Exception as err:
                            try:
                                outfile_dict = get_error_dict(err, url = ad['Url'])
                                outfile.write(str(classad.ClassAd(outfile_dict)))
                            except Exception:
                                pass
                            sys.exit(EXIT_FAILURE)

            except Exception:
                sys.exit(EXIT_FAILURE)
    """)
    write_file(job_python_file, contents)
    return job_python_file
Beispiel #15
0
def monitor_script(the_config):
    return format_script(the_config["monitor"])
Beispiel #16
0
def discovery_script():
    return format_script(discovery_script_for(resource, resources))