def get_output_json_file(): """ :rtype: string :returns: Path to output JSON file """ home_dir = environ.get('HOME') return os.path.join(home_dir, "job_output.json")
def exit_with_error(msg): ''' :param msg: string message to print before exiting Print the error message, as well as a blurb on where to find the job workspaces ''' msg += '\n' msg += 'Local job workspaces can be found in: ' + str(environ.get('DX_TEST_JOB_HOMEDIRS')) sys.exit(msg)
def exit_with_error(msg): ''' :param msg: string message to print before exiting Print the error message, as well as a blurb on where to find the job workspaces ''' msg += '\n' msg += 'Local job workspaces can be found in: ' + str( environ.get('DX_TEST_JOB_HOMEDIRS')) sys.exit(msg)
def get_input_dir(job_homedir=None): ''' :param job_homedir: explicit value for home directory, used for testing purposes :rtype: string :returns: path to input directory Returns the input directory, where all inputs are downloaded ''' if job_homedir is not None: home_dir = job_homedir else: home_dir = environ.get('HOME') idir = os.path.join(home_dir, 'in') return idir
def get_output_dir(job_homedir=None): ''' :param job_homedir: explicit value for home directory, used for testing purposes :rtype: string :returns: path to output directory Returns the output directory, where all outputs are created, and uploaded from ''' if job_homedir is not None: home_dir = job_homedir else: home_dir = environ.get('HOME') odir = os.path.join(home_dir, 'out') return odir
def run_one_entry_point(job_id, function, input_hash, run_spec, depends_on, name=None): ''' :param job_id: job ID of the local job to run :type job_id: string :param function: function to run :type function: string :param input_hash: input for the job (may include job-based object references) :type input_hash: dict :param run_spec: run specification from the dxapp.json of the app :type run_spec: dict Runs the specified entry point and retrieves the job's output, updating job_outputs.json (in $DX_TEST_JOB_HOMEDIRS) appropriately. ''' print('======') job_homedir = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id) job_env = environ.copy() job_env['HOME'] = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id) all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json') with open(all_job_outputs_path, 'r') as fd: all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict) if isinstance(name, basestring): name += ' (' + job_id + ':' + function + ')' else: name = job_id + ':' + function job_name = BLUE() + BOLD() + name + ENDC() print(job_name) # Resolve local job-based object references try: resolve_job_references(input_hash, all_job_outputs) except Exception as e: exit_with_error(job_name + ' ' + JOB_STATES('failed') + ' when resolving input:\n' + fill(str(e))) # Get list of non-closed data objects in the input that appear as # DNAnexus links; append to depends_on if depends_on is None: depends_on = [] get_implicit_depends_on(input_hash, depends_on) try: wait_for_depends_on(depends_on, all_job_outputs) except Exception as e: exit_with_error(job_name + ' ' + JOB_STATES('failed') + ' when processing depends_on:\n' + fill(str(e))) # Save job input to job_input.json with open(os.path.join(job_homedir, 'job_input.json'), 'wb') as fd: json.dump(input_hash, fd, indent=4) fd.write(b'\n') print(job_output_to_str(input_hash, title=(BOLD() + 'Input: ' + ENDC()), title_len=len("Input: ")).lstrip()) if run_spec['interpreter'] == 'bash': # Save job input to env vars env_path = os.path.join(job_homedir, 'environment') with open(env_path, 'w') as fd: # Following code is what is used to generate env vars on the remote worker fd.write("\n".join(["export {k}=( {vlist} )".format(k=k, vlist=" ".join([pipes.quote(vitem if isinstance(vitem, basestring) else json.dumps(vitem)) for vitem in v])) if isinstance(v, list) else "export {k}={v}".format(k=k, v=pipes.quote(v if isinstance(v, basestring) else json.dumps(v))) for k, v in input_hash.items()])) print(BOLD() + 'Logs:' + ENDC()) start_time = datetime.datetime.now() if run_spec['interpreter'] == 'bash': script = ''' cd hotexamples_com; . {env_path}; . {code_path}; if [[ $(type -t {function}) == "function" ]]; then {function}; else echo "$0: Global scope execution complete. Not invoking entry point function {function} because it was not found" 1>&2; fi'''.format(homedir=pipes.quote(job_homedir), env_path=pipes.quote(os.path.join(job_env['HOME'], 'environment')), code_path=pipes.quote(environ['DX_TEST_CODE_PATH']), function=function) invocation_args = ['bash', '-c', '-e'] + (['-x'] if environ.get('DX_TEST_X_FLAG') else []) + [script] elif run_spec['interpreter'] == 'python2.7': script = '''#!/usr/bin/env python import os os.chdir(hotexamples_com) {code} import dxpy, json if dxpy.utils.exec_utils.RUN_COUNT == 0: dxpy.run() '''.format(homedir=repr(job_homedir), code=run_spec['code']) job_env['DX_TEST_FUNCTION'] = function invocation_args = ['python', '-c', script] if USING_PYTHON2: invocation_args = [arg.encode(sys.stdout.encoding) for arg in invocation_args] env = {k: v.encode(sys.stdout.encoding) for k, v in job_env.items()} else: env = job_env fn_process = subprocess.Popen(invocation_args, env=env) fn_process.communicate() end_time = datetime.datetime.now() if fn_process.returncode != 0: exit_with_error(job_name + ' ' + JOB_STATES('failed') + ', exited with error code ' + str(fn_process.returncode) + ' after ' + str(end_time - start_time)) # Now updating job output aggregation file with job's output job_output_path = os.path.join(job_env['HOME'], 'job_output.json') if os.path.exists(job_output_path): try: with open(job_output_path, 'r') as fd: job_output = json.load(fd, object_pairs_hook=collections.OrderedDict) except Exception as e: exit_with_error('Error: Could not load output of ' + job_name + ':\n' + fill(str(e.__class__) + ': ' + str(e))) else: job_output = {} print(job_name + ' -> ' + GREEN() + 'finished running' + ENDC() + ' after ' + str(end_time - start_time)) print(job_output_to_str(job_output, title=(BOLD() + "Output: " + ENDC()), title_len=len("Output: ")).lstrip()) with open(os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'), 'r') as fd: all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict) all_job_outputs[job_id] = job_output # Before dumping, see if any new jbors should be resolved now for other_job_id in all_job_outputs: if all_job_outputs[other_job_id] is None: # Skip if job is not done yet (true for ancestor jobs) continue resolve_job_references(all_job_outputs[other_job_id], all_job_outputs, should_resolve=False) with open(os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'), 'wb') as fd: json.dump(all_job_outputs, fd, indent=4) fd.write(b'\n')
def run_one_entry_point(job_id, function, input_hash, run_spec, depends_on, name=None): ''' :param job_id: job ID of the local job to run :type job_id: string :param function: function to run :type function: string :param input_hash: input for the job (may include job-based object references) :type input_hash: dict :param run_spec: run specification from the dxapp.json of the app :type run_spec: dict Runs the specified entry point and retrieves the job's output, updating job_outputs.json (in $DX_TEST_JOB_HOMEDIRS) appropriately. ''' print('======') job_homedir = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id) job_env = environ.copy() job_env['HOME'] = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id) all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json') with open(all_job_outputs_path, 'r') as fd: all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict) if isinstance(name, basestring): name += ' (' + job_id + ':' + function + ')' else: name = job_id + ':' + function job_name = BLUE() + BOLD() + name + ENDC() print(job_name) # Resolve local job-based object references try: resolve_job_references(input_hash, all_job_outputs) except Exception as e: exit_with_error(job_name + ' ' + JOB_STATES('failed') + ' when resolving input:\n' + fill(str(e))) # Get list of non-closed data objects in the input that appear as # DNAnexus links; append to depends_on if depends_on is None: depends_on = [] get_implicit_depends_on(input_hash, depends_on) try: wait_for_depends_on(depends_on, all_job_outputs) except Exception as e: exit_with_error(job_name + ' ' + JOB_STATES('failed') + ' when processing depends_on:\n' + fill(str(e))) # Save job input to job_input.json with open(os.path.join(job_homedir, 'job_input.json'), 'wb') as fd: json.dump(input_hash, fd, indent=4) fd.write(b'\n') print( job_output_to_str(input_hash, title=(BOLD() + 'Input: ' + ENDC()), title_len=len("Input: ")).lstrip()) if run_spec['interpreter'] == 'bash': # Save job input to env vars env_path = os.path.join(job_homedir, 'environment') with open(env_path, 'w') as fd: job_input_file = os.path.join(job_homedir, 'job_input.json') var_defs_hash = file_load_utils.gen_bash_vars( job_input_file, job_homedir=job_homedir) for key, val in var_defs_hash.iteritems(): fd.write("{}={}\n".format(key, val)) print(BOLD() + 'Logs:' + ENDC()) start_time = datetime.datetime.now() if run_spec['interpreter'] == 'bash': script = ''' cd hotexamples_com; . {env_path}; . {code_path}; if [[ $(type -t {function}) == "function" ]]; then {function}; else echo "$0: Global scope execution complete. Not invoking entry point function {function} because it was not found" 1>&2; fi'''.format(homedir=pipes.quote(job_homedir), env_path=pipes.quote( os.path.join(job_env['HOME'], 'environment')), code_path=pipes.quote(environ['DX_TEST_CODE_PATH']), function=function) invocation_args = ['bash', '-c', '-e'] + ( ['-x'] if environ.get('DX_TEST_X_FLAG') else []) + [script] elif run_spec['interpreter'] == 'python2.7': script = '''#!/usr/bin/env python import os os.chdir(hotexamples_com) {code} import dxpy, json if dxpy.utils.exec_utils.RUN_COUNT == 0: dxpy.run() '''.format(homedir=repr(job_homedir), code=run_spec['code']) job_env['DX_TEST_FUNCTION'] = function invocation_args = ['python', '-c', script] if USING_PYTHON2: invocation_args = [ arg.encode(sys.stdout.encoding) for arg in invocation_args ] env = {k: v.encode(sys.stdout.encoding) for k, v in job_env.items()} else: env = job_env fn_process = subprocess.Popen(invocation_args, env=env) fn_process.communicate() end_time = datetime.datetime.now() if fn_process.returncode != 0: exit_with_error(job_name + ' ' + JOB_STATES('failed') + ', exited with error code ' + str(fn_process.returncode) + ' after ' + str(end_time - start_time)) # Now updating job output aggregation file with job's output job_output_path = os.path.join(job_env['HOME'], 'job_output.json') if os.path.exists(job_output_path): try: with open(job_output_path, 'r') as fd: job_output = json.load( fd, object_pairs_hook=collections.OrderedDict) except Exception as e: exit_with_error('Error: Could not load output of ' + job_name + ':\n' + fill(str(e.__class__) + ': ' + str(e))) else: job_output = {} print(job_name + ' -> ' + GREEN() + 'finished running' + ENDC() + ' after ' + str(end_time - start_time)) print( job_output_to_str(job_output, title=(BOLD() + "Output: " + ENDC()), title_len=len("Output: ")).lstrip()) with open( os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'), 'r') as fd: all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict) all_job_outputs[job_id] = job_output # Before dumping, see if any new jbors should be resolved now for other_job_id in all_job_outputs: if all_job_outputs[other_job_id] is None: # Skip if job is not done yet (true for ancestor jobs) continue resolve_job_references(all_job_outputs[other_job_id], all_job_outputs, should_resolve=False) with open( os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'), 'wb') as fd: json.dump(all_job_outputs, fd, indent=4) fd.write(b'\n')