Example #1
0
def get_output_json_file():
    """
    :rtype: string
    :returns: Path to output JSON file
    """
    home_dir = environ.get('HOME')
    return os.path.join(home_dir, "job_output.json")
Example #2
0
def exit_with_error(msg):
    '''
    :param msg: string message to print before exiting

    Print the error message, as well as a blurb on where to find the
    job workspaces
    '''
    msg += '\n'
    msg += 'Local job workspaces can be found in: ' + str(environ.get('DX_TEST_JOB_HOMEDIRS'))
    sys.exit(msg)
Example #3
0
def exit_with_error(msg):
    '''
    :param msg: string message to print before exiting

    Print the error message, as well as a blurb on where to find the
    job workspaces
    '''
    msg += '\n'
    msg += 'Local job workspaces can be found in: ' + str(
        environ.get('DX_TEST_JOB_HOMEDIRS'))
    sys.exit(msg)
Example #4
0
def get_input_dir(job_homedir=None):
    '''
    :param job_homedir: explicit value for home directory, used for testing purposes
    :rtype: string
    :returns: path to input directory

    Returns the input directory, where all inputs are downloaded
    '''
    if job_homedir is not None:
        home_dir = job_homedir
    else:
        home_dir = environ.get('HOME')
    idir = os.path.join(home_dir, 'in')
    return idir
Example #5
0
def get_output_dir(job_homedir=None):
    '''
    :param job_homedir: explicit value for home directory, used for testing purposes
    :rtype: string
    :returns: path to output directory

    Returns the output directory, where all outputs are created, and
    uploaded from
    '''
    if job_homedir is not None:
        home_dir = job_homedir
    else:
        home_dir = environ.get('HOME')
    odir = os.path.join(home_dir, 'out')
    return odir
Example #6
0
def run_one_entry_point(job_id, function, input_hash, run_spec, depends_on, name=None):
    '''
    :param job_id: job ID of the local job to run
    :type job_id: string
    :param function: function to run
    :type function: string
    :param input_hash: input for the job (may include job-based object references)
    :type input_hash: dict
    :param run_spec: run specification from the dxapp.json of the app
    :type run_spec: dict

    Runs the specified entry point and retrieves the job's output,
    updating job_outputs.json (in $DX_TEST_JOB_HOMEDIRS) appropriately.
    '''
    print('======')

    job_homedir = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id)

    job_env = environ.copy()
    job_env['HOME'] = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id)

    all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json')

    with open(all_job_outputs_path, 'r') as fd:
        all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict)

    if isinstance(name, basestring):
        name += ' (' + job_id + ':' + function + ')'
    else:
        name = job_id + ':' + function
    job_name = BLUE() + BOLD() + name + ENDC()
    print(job_name)

    # Resolve local job-based object references
    try:
        resolve_job_references(input_hash, all_job_outputs)
    except Exception as e:
        exit_with_error(job_name + ' ' + JOB_STATES('failed') + ' when resolving input:\n' + fill(str(e)))

    # Get list of non-closed data objects in the input that appear as
    # DNAnexus links; append to depends_on
    if depends_on is None:
        depends_on = []
    get_implicit_depends_on(input_hash, depends_on)

    try:
        wait_for_depends_on(depends_on, all_job_outputs)
    except Exception as e:
        exit_with_error(job_name + ' ' + JOB_STATES('failed') + ' when processing depends_on:\n' + fill(str(e)))

    # Save job input to job_input.json
    with open(os.path.join(job_homedir, 'job_input.json'), 'wb') as fd:
        json.dump(input_hash, fd, indent=4)
        fd.write(b'\n')

    print(job_output_to_str(input_hash, title=(BOLD() + 'Input: ' + ENDC()),
                            title_len=len("Input: ")).lstrip())

    if run_spec['interpreter'] == 'bash':
        # Save job input to env vars
        env_path = os.path.join(job_homedir, 'environment')
        with open(env_path, 'w') as fd:
            # Following code is what is used to generate env vars on the remote worker
            fd.write("\n".join(["export {k}=( {vlist} )".format(k=k, vlist=" ".join([pipes.quote(vitem if isinstance(vitem, basestring) else json.dumps(vitem)) for vitem in v])) if isinstance(v, list) else "export {k}={v}".format(k=k, v=pipes.quote(v if isinstance(v, basestring) else json.dumps(v))) for k, v in input_hash.items()]))

    print(BOLD() + 'Logs:' + ENDC())
    start_time = datetime.datetime.now()
    if run_spec['interpreter'] == 'bash':
        script = '''
          cd hotexamples_com;
          . {env_path};
          . {code_path};
          if [[ $(type -t {function}) == "function" ]];
          then {function};
          else echo "$0: Global scope execution complete. Not invoking entry point function {function} because it was not found" 1>&2;
          fi'''.format(homedir=pipes.quote(job_homedir),
                       env_path=pipes.quote(os.path.join(job_env['HOME'], 'environment')),
                       code_path=pipes.quote(environ['DX_TEST_CODE_PATH']),
                       function=function)
        invocation_args = ['bash', '-c', '-e'] + (['-x'] if environ.get('DX_TEST_X_FLAG') else []) + [script]
    elif run_spec['interpreter'] == 'python2.7':
        script = '''#!/usr/bin/env python
import os
os.chdir(hotexamples_com)

{code}

import dxpy, json
if dxpy.utils.exec_utils.RUN_COUNT == 0:
    dxpy.run()
'''.format(homedir=repr(job_homedir),
           code=run_spec['code'])

        job_env['DX_TEST_FUNCTION'] = function
        invocation_args = ['python', '-c', script]

    if USING_PYTHON2:
        invocation_args = [arg.encode(sys.stdout.encoding) for arg in invocation_args]
        env = {k: v.encode(sys.stdout.encoding) for k, v in job_env.items()}
    else:
        env = job_env

    fn_process = subprocess.Popen(invocation_args, env=env)

    fn_process.communicate()
    end_time = datetime.datetime.now()

    if fn_process.returncode != 0:
        exit_with_error(job_name + ' ' + JOB_STATES('failed') + ', exited with error code ' + str(fn_process.returncode) + ' after ' + str(end_time - start_time))

    # Now updating job output aggregation file with job's output
    job_output_path = os.path.join(job_env['HOME'], 'job_output.json')
    if os.path.exists(job_output_path):
        try:
            with open(job_output_path, 'r') as fd:
                job_output = json.load(fd, object_pairs_hook=collections.OrderedDict)
        except Exception as e:
            exit_with_error('Error: Could not load output of ' + job_name + ':\n' + fill(str(e.__class__) + ': ' + str(e)))
    else:
        job_output = {}

    print(job_name + ' -> ' + GREEN() + 'finished running' + ENDC() + ' after ' + str(end_time - start_time))
    print(job_output_to_str(job_output, title=(BOLD() + "Output: " + ENDC()),
                            title_len=len("Output: ")).lstrip())

    with open(os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'), 'r') as fd:
        all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict)
    all_job_outputs[job_id] = job_output

    # Before dumping, see if any new jbors should be resolved now
    for other_job_id in all_job_outputs:
        if all_job_outputs[other_job_id] is None:
            # Skip if job is not done yet (true for ancestor jobs)
            continue
        resolve_job_references(all_job_outputs[other_job_id], all_job_outputs, should_resolve=False)

    with open(os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'), 'wb') as fd:
        json.dump(all_job_outputs, fd, indent=4)
        fd.write(b'\n')
Example #7
0
def run_one_entry_point(job_id,
                        function,
                        input_hash,
                        run_spec,
                        depends_on,
                        name=None):
    '''
    :param job_id: job ID of the local job to run
    :type job_id: string
    :param function: function to run
    :type function: string
    :param input_hash: input for the job (may include job-based object references)
    :type input_hash: dict
    :param run_spec: run specification from the dxapp.json of the app
    :type run_spec: dict

    Runs the specified entry point and retrieves the job's output,
    updating job_outputs.json (in $DX_TEST_JOB_HOMEDIRS) appropriately.
    '''
    print('======')

    job_homedir = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id)

    job_env = environ.copy()
    job_env['HOME'] = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id)

    all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'],
                                        'job_outputs.json')

    with open(all_job_outputs_path, 'r') as fd:
        all_job_outputs = json.load(fd,
                                    object_pairs_hook=collections.OrderedDict)

    if isinstance(name, basestring):
        name += ' (' + job_id + ':' + function + ')'
    else:
        name = job_id + ':' + function
    job_name = BLUE() + BOLD() + name + ENDC()
    print(job_name)

    # Resolve local job-based object references
    try:
        resolve_job_references(input_hash, all_job_outputs)
    except Exception as e:
        exit_with_error(job_name + ' ' + JOB_STATES('failed') +
                        ' when resolving input:\n' + fill(str(e)))

    # Get list of non-closed data objects in the input that appear as
    # DNAnexus links; append to depends_on
    if depends_on is None:
        depends_on = []
    get_implicit_depends_on(input_hash, depends_on)

    try:
        wait_for_depends_on(depends_on, all_job_outputs)
    except Exception as e:
        exit_with_error(job_name + ' ' + JOB_STATES('failed') +
                        ' when processing depends_on:\n' + fill(str(e)))

    # Save job input to job_input.json
    with open(os.path.join(job_homedir, 'job_input.json'), 'wb') as fd:
        json.dump(input_hash, fd, indent=4)
        fd.write(b'\n')

    print(
        job_output_to_str(input_hash,
                          title=(BOLD() + 'Input: ' + ENDC()),
                          title_len=len("Input: ")).lstrip())

    if run_spec['interpreter'] == 'bash':
        # Save job input to env vars
        env_path = os.path.join(job_homedir, 'environment')
        with open(env_path, 'w') as fd:
            job_input_file = os.path.join(job_homedir, 'job_input.json')
            var_defs_hash = file_load_utils.gen_bash_vars(
                job_input_file, job_homedir=job_homedir)
            for key, val in var_defs_hash.iteritems():
                fd.write("{}={}\n".format(key, val))

    print(BOLD() + 'Logs:' + ENDC())
    start_time = datetime.datetime.now()
    if run_spec['interpreter'] == 'bash':
        script = '''
          cd hotexamples_com;
          . {env_path};
          . {code_path};
          if [[ $(type -t {function}) == "function" ]];
          then {function};
          else echo "$0: Global scope execution complete. Not invoking entry point function {function} because it was not found" 1>&2;
          fi'''.format(homedir=pipes.quote(job_homedir),
                       env_path=pipes.quote(
                           os.path.join(job_env['HOME'], 'environment')),
                       code_path=pipes.quote(environ['DX_TEST_CODE_PATH']),
                       function=function)
        invocation_args = ['bash', '-c', '-e'] + (
            ['-x'] if environ.get('DX_TEST_X_FLAG') else []) + [script]
    elif run_spec['interpreter'] == 'python2.7':
        script = '''#!/usr/bin/env python
import os
os.chdir(hotexamples_com)

{code}

import dxpy, json
if dxpy.utils.exec_utils.RUN_COUNT == 0:
    dxpy.run()
'''.format(homedir=repr(job_homedir), code=run_spec['code'])

        job_env['DX_TEST_FUNCTION'] = function
        invocation_args = ['python', '-c', script]

    if USING_PYTHON2:
        invocation_args = [
            arg.encode(sys.stdout.encoding) for arg in invocation_args
        ]
        env = {k: v.encode(sys.stdout.encoding) for k, v in job_env.items()}
    else:
        env = job_env

    fn_process = subprocess.Popen(invocation_args, env=env)

    fn_process.communicate()
    end_time = datetime.datetime.now()

    if fn_process.returncode != 0:
        exit_with_error(job_name + ' ' + JOB_STATES('failed') +
                        ', exited with error code ' +
                        str(fn_process.returncode) + ' after ' +
                        str(end_time - start_time))

    # Now updating job output aggregation file with job's output
    job_output_path = os.path.join(job_env['HOME'], 'job_output.json')
    if os.path.exists(job_output_path):
        try:
            with open(job_output_path, 'r') as fd:
                job_output = json.load(
                    fd, object_pairs_hook=collections.OrderedDict)
        except Exception as e:
            exit_with_error('Error: Could not load output of ' + job_name +
                            ':\n' + fill(str(e.__class__) + ': ' + str(e)))
    else:
        job_output = {}

    print(job_name + ' -> ' + GREEN() + 'finished running' + ENDC() +
          ' after ' + str(end_time - start_time))
    print(
        job_output_to_str(job_output,
                          title=(BOLD() + "Output: " + ENDC()),
                          title_len=len("Output: ")).lstrip())

    with open(
            os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'),
            'r') as fd:
        all_job_outputs = json.load(fd,
                                    object_pairs_hook=collections.OrderedDict)
    all_job_outputs[job_id] = job_output

    # Before dumping, see if any new jbors should be resolved now
    for other_job_id in all_job_outputs:
        if all_job_outputs[other_job_id] is None:
            # Skip if job is not done yet (true for ancestor jobs)
            continue
        resolve_job_references(all_job_outputs[other_job_id],
                               all_job_outputs,
                               should_resolve=False)

    with open(
            os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'),
            'wb') as fd:
        json.dump(all_job_outputs, fd, indent=4)
        fd.write(b'\n')