Example #1
0
async def auth_aws(hvac, role):
    """ Get an AWS token save it to the system.

    boto is not async-friendly, but this call must be.

    Role is one of pct, robot, developer.
    """
    try:
        future = asyncio.get_event_loop().run_in_executor(
            None, lambda: hvac.write(f'aws/sts/{role}', ttl='36h'))
        token = await asyncio.wait_for(future, 10)
    except Exception as e:
        log.error(f"Failed to authenticate hvac: {e}")
        return False

    creds = f"[default]\naws_access_key_id = {token['data']['access_key']}\naws_secret_access_key = " + \
        f"{token['data']['secret_key']}\naws_security_token = {token['data']['security_token']}\n"

    async with aiofiles.open('/app/.aws/credentials', "w") as f:
        await f.write(creds)

    async with aiofiles.open('/app/.aws/config', "w") as f:
        await f.write('[default]\nregion = us-east-2\noutput = json\n')

    log.verbose(f'Authenticated with aws as {role}')
    return True
Example #2
0
async def auth_docker():
    """ Authenticate the HOSTS docker daemon """
    boto_client = boto3.client('ecr')

    try:
        future = asyncio.get_event_loop().run_in_executor(
            None, boto_client.get_authorization_token)
        res = await asyncio.wait_for(future, 10)
    except Exception as e:
        return log.error(f"Unable to authenticate with AWS ECR: {e}")

    # Also has an expiresAt key
    token = res['authorizationData'][0]['authorizationToken'].encode('utf-8')
    username, password = base64.b64decode(token).decode('utf-8').split(':')
    endpoint = '778747430246.dkr.ecr.us-east-2.amazonaws.com'

    command = f'docker login {endpoint} --username AWS --password {password}'
    ret = await asyncio_utils.stream_subprocess(command,
                                                log.verbose,
                                                log.verbose,
                                                timeout=20)

    if ret != 0:
        return log.error('Unable to authenticate docker')

    log.verbose('Authenticated with docker')
    return True
Example #3
0
async def ensure_auth(hvac, role):
    """ Check the current authentication state of the system, including AWS and Docker.

    SSH is not included here, since its developer-only.
    """
    if role == 'snowbot':
        role = 'robot'

    last_aws_auth = settings.hatch.aws_auth_time
    hours = (time.time() - last_aws_auth) / 3600
    log.verbose(f'Hours since last AWS auth: {hours}')

    if hours > properties.aws_auth_period or True:
        if not await auth_aws(hvac, role):
            return

        settings.reload()
        settings.hatch.aws_auth_time = time.time()
        settings.save()

    # Docker
    last_docker_auth = settings.hatch.docker_auth_time
    hours = (time.time() - last_docker_auth) / 3600
    log.verbose(f'Hours since last Docker auth: {hours}')

    if hours > properties.docker_auth_period or True:
        if not await auth_docker():
            return

        settings.reload()
        settings.hatch.docker_auth_time = time.time()
        settings.save()

    # SSH keys
    if not await ssh_key_valid():
        await sign_ssh_key(hvac)
Example #4
0
def Submit(config, jobdesc):
    """
    Submits a job to the SLURM queue specified in arc.conf. This method executes the required
    RunTimeEnvironment scripts and assembles the bash job script. The job script is
    written to file and submitted with ``sbatch``.

    :param str config: path to arc.conf
    :param jobdesc: job description object
    :type jobdesc: :py:class:`arc.JobDescription`
    :return: local job ID if successfully submitted, else ``None``
    :rtype: :py:obj:`str`
    """

    configure(config, set_slurm)

    validate_attributes(jobdesc)
    if Config.remote_host:
        ssh_connect(Config.remote_host, Config.remote_user, Config.private_key)
        
    # Run RTE stage0
    debug('----- starting slurmSubmitter.py -----', 'slurm.Submit')
    RTE_stage0(jobdesc, 'SLURM', SBATCH_ACCOUNT = 'OtherAttributes.SBATCH_ACCOUNT')

    set_grid_global_jobid(jobdesc)

    # Create script file and write job script
    jobscript = get_job_script(jobdesc)
    script_file = write_script_file(jobscript)
    debug('Created file %s' % script_file, 'slurm.Submit')

    debug('SLURM jobname: %s' % jobdesc.Identification.JobName, 'slurm.Submit')
    debug('SLURM job script built', 'slurm.Submit')
    debug('----------------- BEGIN job script -----', 'slurm.Submit')
    emptylines = 0
    for line in jobscript.split('\n'):
        if not line:
            emptylines += 1
        else:
            debug(emptylines*'\n' + line.replace("%", "%%"), 'slurm.Submit')
            emptylines = 0
    if emptylines > 1:
            debug((emptylines-1)*'\n', 'slurm.Submit')
    debug('----------------- END job script -----', 'slurm.Submit')

    if 'ONLY_WRITE_JOBSCRIPT' in os.environ and os.environ['ONLY_WRITE_JOBSCRIPT'] == 'yes':
        return "-1"

    #######################################
    #  Submit the job
    ######################################

    execute = execute_local if not Config.remote_host else execute_remote
    directory = jobdesc.OtherAttributes['joboption;directory']

    debug('Session directory: %s' % directory, 'slurm.Submit')

    SLURM_TRIES = 0
    handle = None
    while SLURM_TRIES < 10:
        args = '%s/oarsub %s' % (Config.slurm_bin_path, script_file)
        verbose('Executing \'%s\' on %s' % 
                (args, Config.remote_host if Config.remote_host else 'localhost'), 'slurm.Submit')
        handle = execute(args)
        if handle.returncode == 0:
            break
        if handle.returncode == 198 or wait_for_queue(handle):
            debug('Waiting for queue to decrease', 'slurm.Submit')
            time.sleep(60)
            SLURM_TRIES += 1
            continue
        break # Other error than full queue

    if handle.returncode == 0:
        # TODO: Test what happens when the jobqueue is full or when the slurm
        # ctld is not responding. SLURM 1.x and 2.2.x outputs the jobid into 
        # STDERR and STDOUT respectively. Concat them, and let sed sort it out. 
        # From the exit code we know that the job was submitted, so this
        # is safe. Ulf Tigerstedt <*****@*****.**> 1.5.2011 
        localid = get_job_id(handle)
        if localid:
            debug('Job submitted successfully!', 'slurm.Submit')
            debug('Local job id: ' + localid, 'slurm.Submit')
            debug('----- exiting submitSubmitter.py -----', 'slurm.Submit')
            return localid

    debug('job *NOT* submitted successfully!', 'slurm.Submit')
    debug('got error code from sbatch: %d !' % handle.returncode, 'slurm.Submit')
    debug('Output is:\n' + ''.join(handle.stdout), 'slurm.Submit')
    debug('Error output is:\n' + ''.join(handle.stderr), 'slurm.Submit')
    debug('----- exiting slurmSubmitter.py -----', 'slurm.Submit')
Example #5
0
def Submit(config, jobdesc):
    """
    Submits a job to the SLURM queue specified in arc.conf. This method executes the required
    RunTimeEnvironment scripts and assembles the bash job script. The job script is
    written to file and submitted with ``sbatch``.

    :param str config: path to arc.conf
    :param jobdesc: job description object
    :type jobdesc: :py:class:`arc.JobDescription`
    :return: local job ID if successfully submitted, else ``None``
    :rtype: :py:obj:`str`
    """

    configure(config, set_slurm)

    validate_attributes(jobdesc)
    if Config.remote_host:
        ssh_connect(Config.remote_host, Config.remote_user, Config.private_key)

    # Run RTE stage0
    debug("----- starting slurmSubmitter.py -----", "slurm.Submit")
    RTE_stage0(jobdesc, "SLURM", SBATCH_ACCOUNT="OtherAttributes.SBATCH_ACCOUNT")

    # Create script file and write job script
    jobscript = get_job_script(jobdesc)
    script_file = write_script_file(jobscript)

    debug("SLURM jobname: %s" % jobdesc.Identification.JobName, "slurm.Submit")
    debug("SLURM job script built", "slurm.Submit")
    debug("----------------- BEGIN job script -----", "slurm.Submit")
    for line in jobscript.split("\n"):
        debug(line, "slurm.Submit")
    debug("----------------- END job script -----", "slurm.Submit")

    if "ONLY_WRITE_JOBSCRIPT" in os.environ and os.environ["ONLY_WRITE_JOBSCRIPT"] == "yes":
        return

    #######################################
    #  Submit the job
    ######################################

    execute = execute_local if not Config.remote_host else execute_remote
    directory = jobdesc.OtherAttributes["joboption;directory"]

    debug("Session directory: %s" % directory, "slurm.Submit")

    SLURM_TRIES = 0
    handle = None
    while SLURM_TRIES < 10:
        args = "%s/sbatch %s" % (Config.slurm_bin_path, script_file)
        verbose(
            "Executing '%s' on %s" % (args, Config.remote_host if Config.remote_host else "localhost"), "slurm.Submit"
        )
        handle = execute(args)
        if handle.returncode == 0:
            break
        if handle.returncode == 198 or wait_for_queue(handle):
            debug("Waiting for queue to decrease", "slurm.Submit")
            time.sleep(60)
            SLURM_TRIES += 1
            continue
        break  # Other error than full queue

    if handle.returncode == 0:
        # TODO: Test what happens when the jobqueue is full or when the slurm
        # ctld is not responding. SLURM 1.x and 2.2.x outputs the jobid into
        # STDERR and STDOUT respectively. Concat them, and let sed sort it out.
        # From the exit code we know that the job was submitted, so this
        # is safe. Ulf Tigerstedt <*****@*****.**> 1.5.2011
        localid = get_job_id(handle)
        if localid:
            debug("Job submitted successfully!", "slurm.Submit")
            debug("Local job id: " + localid, "slurm.Submit")
            debug("----- exiting submitSubmitter.py -----", "slurm.Submit")
            return localid

    debug("job *NOT* submitted successfully!", "slurm.Submit")
    debug("got error code from sbatch: %d !" % handle.returncode, "slurm.Submit")
    debug("Output is:\n" + "".join(handle.stdout), "slurm.Submit")
    debug("Error output is:\n" + "".join(handle.stderr), "slurm.Submit")
    debug("----- exiting slurmSubmitter.py -----", "slurm.Submit")
Example #6
0
def Submit(config, jobdesc):
    """
    Submits a job to the LSF queue specified in arc.conf. This method executes the required
    RunTimeEnvironment scripts and assembles the bash job script. The job script is
    written to file and submitted with ``bsub``.

    :param str config: path to arc.conf
    :param jobdesc: job description object
    :type jobdesc: :py:class:`arc.JobDescription`
    :return: local job ID if successfully submitted, else ``None``
    :rtype: :py:obj:`bool`
    """

    configure(config, set_lsf)

    validate_attributes(jobdesc)
    if Config.remote_host:
        ssh_connect(Config.remote_host, Config.remote_user, Config.private_key)
        
    # Run RTE stage0
    debug('----- starting lsfSubmitter.py -----', 'lsf.Submit')
    RTE_stage0(jobdesc, 'LSF')

    # Create script file and write job script
    jobscript = get_job_script(jobdesc)
    script_file = write_script_file(jobscript)

    debug('LSF jobname: %s' % jobdesc.Identification.JobName, 'lsf.Submit')
    debug('LSF job script built', 'lsf.Submit')
    debug('----------------- BEGIN job script -----', 'lsf.Submit')
    for line in jobscript.split('\n'):
        debug(line, 'lsf.Submit')
    debug('----------------- END job script -----', 'lsf.Submit')

    if 'ONLY_WRITE_JOBSCRIPT' in os.environ and os.environ['ONLY_WRITE_JOBSCRIPT'] == 'yes':
        return False

    #######################################
    #  Submit the job
    ######################################

    execute = excute_local if not Config.remote_host else execute_remote
    directory = jobdesc.OtherAttributes['joboption;directory']

    debug('Session directory: %s' % directory, 'lsf.Submit')

    LSF_TRIES = 0
    args = '%s %s/bsub < %s' % (Config.lsf_setup, Config.lsf_bin_path, script_file)
    verbose('executing \'%s\' on %s' % (args, Config.remote_host if Config.remote_host else 'localhost'), 'lsf.Submit')
    handle = execute(args)

    if handle.returncode == 0:
        localid = get_job_id(handle)
        if localid:
            debug('Job submitted successfully!', 'lsf.Submit')
            debug('Local job id: ' + localid, 'lsf.Submit')
            debug('----- exiting lsfSubmitter.py -----', 'lsf.Submit')
            return localid

    debug('job *NOT* submitted successfully!', 'lsf.Submit')
    debug('got error code from bsub: %d !' % handle.returncode, 'lsf.Submit')
    debug('Output is:\n' + ''.join(handle.stdout), 'lsf.Submit')
    debug('Error output is:\n' + ''.join(handle.stderr), 'lsf.Submit')
    debug('----- exiting lsfSubmitter.py -----', 'lsf.Submit')