def test_file_arrays(self): # Create file with junk content dxfile = dxpy.upload_string("xxyyzz", project=self.project, wait_on_close=True, name="bubbles") # write python code into code.py file tmp_path = tempfile.mkdtemp() code_path = os.path.join(tmp_path, 'code.py') with open(code_path, write_mode) as f: f.write("@dxpy.entry_point('main')\n") f.write("def main(**job_inputs):\n") f.write("\toutput = {}\n") f.write("\toutput['plant'] = job_inputs['plant']\n") f.write("\treturn output\n") f.write("\n") f.write("dxpy.run()\n") with open(code_path, 'r') as f: code = f.read() # write arguments table. These ara arrays with a single element. arg_table = os.path.join(tmp_path, 'table.csv') with open(arg_table, write_mode) as csvfile: writer = csv.writer(csvfile, delimiter=delimiter) header = ["batch ID", "plant", "plant ID"] writer.writerow(header) writer.writerow( ["SRR_1", "[bubbles]", "[" + dxfile.get_id() + "]"]) applet = dxpy.api.applet_new({ "name": "ident_file_array", "project": self.project, "dxapi": "1.0.0", "inputSpec": [{ "name": "plant", "class": "array:file" }], "outputSpec": [{ "name": "plant", "class": "array:file" }], "runSpec": { "interpreter": "python2.7", "code": code, "distribution": "Ubuntu", "release": "14.04" } }) job_id = run("dx run {} --batch-tsv={} --yes --brief".format( applet["id"], arg_table)).strip() job_desc = dxpy.api.job_describe(job_id) self.assertEqual(job_desc["executableName"], 'ident_file_array') self.assertEqual(job_desc["input"], {"plant": [{ "$dnanexus_link": dxfile.get_id() }]})
def _check_file_syntax(filename, temp_dir, override_lang=None, enforce=True): """ Checks that the code in FILENAME parses, attempting to autodetect the language if necessary. Raises IOError if the file cannot be read. Raises DXSyntaxError if there is a problem and "enforce" is True. """ def check_python(filename): # Generate a semi-recognizable name to write the pyc to. Of # course it's possible that different files being scanned could # have the same basename, so this path won't be unique, but the # checks don't run concurrently so this shouldn't cause any # problems. pyc_path = os.path.join(temp_dir, os.path.basename(filename) + ".pyc") try: if USING_PYTHON2: filename = filename.encode(sys.getfilesystemencoding()) py_compile.compile(filename, cfile=pyc_path, doraise=True) finally: try: os.unlink(pyc_path) except OSError: pass def check_bash(filename): subprocess.check_output(["/bin/bash", "-n", filename], stderr=subprocess.STDOUT) if override_lang == "python2.7": checker_fn = check_python elif override_lang == "bash": checker_fn = check_bash elif filename.endswith(".py"): checker_fn = check_python elif filename.endswith(".sh"): checker_fn = check_bash else: # Ignore other kinds of files. return # Do a test read of the file to catch errors like the file not # existing or not being readable. open(filename) try: checker_fn(filename) except subprocess.CalledProcessError as e: print(filename + " has a syntax error! Interpreter output:", file=sys.stderr) for line in e.output.strip("\n").split("\n"): print(" " + line.rstrip("\n"), file=sys.stderr) if enforce: raise DXSyntaxError(filename + " has a syntax error") except py_compile.PyCompileError as e: print(filename + " has a syntax error! Interpreter output:", file=sys.stderr) print(" " + e.msg.strip(), file=sys.stderr) if enforce: raise DXSyntaxError(e.msg.strip())
def _check_file_syntax(filename, temp_dir, override_lang=None, enforce=True): """ Checks that the code in FILENAME parses, attempting to autodetect the language if necessary. Raises IOError if the file cannot be read. Raises DXSyntaxError if there is a problem and "enforce" is True. """ def check_python(filename): # Generate a semi-recognizable name to write the pyc to. Of # course it's possible that different files being scanned could # have the same basename, so this path won't be unique, but the # checks don't run concurrently so this shouldn't cause any # problems. pyc_path = os.path.join(temp_dir, os.path.basename(filename) + ".pyc") try: if USING_PYTHON2: filename = filename.encode(sys.getfilesystemencoding()) py_compile.compile(filename, cfile=pyc_path, doraise=True) finally: try: os.unlink(pyc_path) except OSError: pass def check_bash(filename): subprocess.check_output(["/bin/bash", "-n", filename], stderr=subprocess.STDOUT) if override_lang == 'python2.7': checker_fn = check_python elif override_lang == 'bash': checker_fn = check_bash elif filename.endswith('.py'): checker_fn = check_python elif filename.endswith('.sh'): checker_fn = check_bash else: # Ignore other kinds of files. return # Do a test read of the file to catch errors like the file not # existing or not being readable. open(filename) try: checker_fn(filename) except subprocess.CalledProcessError as e: print(filename + " has a syntax error! Interpreter output:", file=sys.stderr) for line in e.output.strip("\n").split("\n"): print(" " + line.rstrip("\n"), file=sys.stderr) if enforce: raise DXSyntaxError(filename + " has a syntax error") except py_compile.PyCompileError as e: print(filename + " has a syntax error! Interpreter output:", file=sys.stderr) print(" " + e.msg.strip(), file=sys.stderr) if enforce: raise DXSyntaxError(e.msg.strip())
def run_entry_points(run_spec): ''' :param run_spec: run specification from the dxapp.json of the app :type run_spec: dict Runs all job entry points found in $DX_TEST_JOB_HOMEDIRS/job_queue.json in a first-in, first-out manner until it is an empty array (or an error occurs). ''' job_queue_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_queue.json') all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json') while True: with open(job_queue_path, 'r') as fd: job_queue = json.load(fd) if len(job_queue) == 0: return with open(all_job_outputs_path, 'r') as fd: all_job_outputs = json.load(fd) entry_point_to_run = None for i, entry_point in enumerate(job_queue): runnable = True # See if its inputs are ready while has_local_job_refs(entry_point['input_hash']): try: resolve_job_references(entry_point['input_hash'], all_job_outputs) except: runnable = False break if runnable: entry_point_to_run = job_queue.pop(i) break if entry_point_to_run is None: # Just run the first entry point and let the runner throw # the appropriate error entry_point_to_run = job_queue.pop(0) with open(job_queue_path, 'wb') as fd: # Update job queue with updated inputs and after having # popped the entry point to be run json.dump(job_queue, fd) fd.write(b'\n') run_one_entry_point(job_id=entry_point_to_run['id'], function=entry_point_to_run['function'], input_hash=entry_point_to_run['input_hash'], run_spec=run_spec, depends_on=entry_point_to_run.get( 'depends_on', []), name=entry_point_to_run.get('name'))
def queue_entry_point(function, input_hash, depends_on=[], name=None): ''' :param function: function to run :param input_hash: input to new job :param depends_on: list of data object IDs and/or job IDs (local or remote) to wait for before the job can be run :type depends_on: list of strings :param name: job name (optional) :returns: new local job ID This function should only be called by a locally running job, so all relevant DX_TEST_* environment variables should be set. This function will set up the home directory for the job, add an entry in job_outputs.json, and append the job information to the job_queue.json file. (Both files found in $DX_TEST_JOB_HOMEDIRS.) ''' ensure_env_vars() all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json') with open(all_job_outputs_path, 'r') as fd: all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict) job_id = 'localjob-' + str(len(all_job_outputs)) with open(all_job_outputs_path, 'wb') as fd: all_job_outputs[job_id] = None json.dump(all_job_outputs, fd, indent=4) fd.write(b'\n') job_homedir = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id) os.mkdir(job_homedir) job_queue_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_queue.json') with open(job_queue_path, 'r') as fd: job_queue = json.load(fd) job_entry = { "id": job_id, "function": function, "input_hash": input_hash, "depends_on": depends_on } if name is not None: job_entry['name'] = name job_queue.append(job_entry) with open(job_queue_path, 'wb') as fd: json.dump(job_queue, fd, indent=4) fd.write(b'\n') return job_id
def run_entry_points(run_spec): ''' :param run_spec: run specification from the dxapp.json of the app :type run_spec: dict Runs all job entry points found in $DX_TEST_JOB_HOMEDIRS/job_queue.json in a first-in, first-out manner until it is an empty array (or an error occurs). ''' job_queue_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_queue.json') all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json') while True: with open(job_queue_path, 'r') as fd: job_queue = json.load(fd) if len(job_queue) == 0: return with open(all_job_outputs_path, 'r') as fd: all_job_outputs = json.load(fd) entry_point_to_run = None for i, entry_point in enumerate(job_queue): runnable = True # See if its inputs are ready while has_local_job_refs(entry_point['input_hash']): try: resolve_job_references(entry_point['input_hash'], all_job_outputs) except: runnable = False break if runnable: entry_point_to_run = job_queue.pop(i) break if entry_point_to_run is None: # Just run the first entry point and let the runner throw # the appropriate error entry_point_to_run = job_queue.pop(0) with open(job_queue_path, 'wb') as fd: # Update job queue with updated inputs and after having # popped the entry point to be run json.dump(job_queue, fd) fd.write(b'\n') run_one_entry_point(job_id=entry_point_to_run['id'], function=entry_point_to_run['function'], input_hash=entry_point_to_run['input_hash'], run_spec=run_spec, depends_on=entry_point_to_run.get('depends_on', []), name=entry_point_to_run.get('name'))
def test_file_arrays(self): # Create file with junk content dxfile = dxpy.upload_string("xxyyzz", project=self.project, wait_on_close=True, name="bubbles") # write python code into code.py file tmp_path = tempfile.mkdtemp() code_path = os.path.join(tmp_path, 'code.py') with open(code_path, write_mode) as f: f.write("@dxpy.entry_point('main')\n") f.write("def main(**job_inputs):\n") f.write("\toutput = {}\n") f.write("\toutput['plant'] = job_inputs['plant']\n") f.write("\treturn output\n") f.write("\n") f.write("dxpy.run()\n") with open(code_path, 'r') as f: code = f.read() # write arguments table. These ara arrays with a single element. arg_table = os.path.join(tmp_path, 'table.csv') with open(arg_table, write_mode) as csvfile: writer = csv.writer(csvfile, delimiter=delimiter) header = ["batch ID", "plant", "plant ID"] writer.writerow(header) writer.writerow(["SRR_1", "[bubbles]", "[" + dxfile.get_id() + "]" ]) applet = dxpy.api.applet_new({ "name": "ident_file_array", "project": self.project, "dxapi": "1.0.0", "inputSpec": [ { "name": "plant", "class": "array:file" } ], "outputSpec": [ { "name": "plant", "class": "array:file" } ], "runSpec": { "interpreter": "python2.7", "code": code, "distribution": "Ubuntu", "release": "14.04" } }) job_id = run("dx run {} --batch-tsv={} --yes --brief" .format(applet["id"], arg_table)).strip() job_desc = dxpy.api.job_describe(job_id) self.assertEqual(job_desc["executableName"], 'ident_file_array') self.assertEqual(job_desc["input"], { "plant": [{ "$dnanexus_link": dxfile.get_id() }] })
def queue_entry_point(function, input_hash, depends_on=[], name=None): ''' :param function: function to run :param input_hash: input to new job :param depends_on: list of data object IDs and/or job IDs (local or remote) to wait for before the job can be run :type depends_on: list of strings :param name: job name (optional) :returns: new local job ID This function should only be called by a locally running job, so all relevant DX_TEST_* environment variables should be set. This function will set up the home directory for the job, add an entry in job_outputs.json, and append the job information to the job_queue.json file. (Both files found in $DX_TEST_JOB_HOMEDIRS.) ''' ensure_env_vars() all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json') with open(all_job_outputs_path, 'r') as fd: all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict) job_id = 'localjob-' + str(len(all_job_outputs)) with open(all_job_outputs_path, 'wb') as fd: all_job_outputs[job_id] = None json.dump(all_job_outputs, fd, indent=4) fd.write(b'\n') job_homedir = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id) os.mkdir(job_homedir) job_queue_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_queue.json') with open(job_queue_path, 'r') as fd: job_queue = json.load(fd) job_entry = {"id": job_id, "function": function, "input_hash": input_hash, "depends_on": depends_on} if name is not None: job_entry['name'] = name job_queue.append(job_entry) with open(job_queue_path, 'wb') as fd: json.dump(job_queue, fd, indent=4) fd.write(b'\n') return job_id
def _parse_app_spec(src_dir): """Returns the parsed contents of dxapp.json. Raises either AppBuilderException or a parser error (exit codes 3 or 2 respectively) if this cannot be done. """ if not os.path.isdir(src_dir): parser.error("%s is not a directory" % src_dir) if not os.path.exists(os.path.join(src_dir, "dxapp.json")): raise dxpy.app_builder.AppBuilderException("Directory %s does not contain dxapp.json: not a valid DNAnexus app source directory" % src_dir) with open(os.path.join(src_dir, "dxapp.json")) as app_desc: try: return json_load_raise_on_duplicates(app_desc) except Exception as e: raise dxpy.app_builder.AppBuilderException("Could not parse dxapp.json file as JSON: " + e.message)
def _check_syntax(code, lang, temp_dir, enforce=True): """ Checks that the code whose text is in CODE parses as LANG. Raises DXSyntaxError if there is a problem and "enforce" is True. """ # This function needs the language to be explicitly set, so we can # generate an appropriate temp filename. if lang == 'python2.7': temp_basename = 'inlined_code_from_dxapp_json.py' elif lang == 'bash': temp_basename = 'inlined_code_from_dxapp_json.sh' else: raise ValueError('lang must be one of "python2.7" or "bash"') # Dump the contents out to a temporary file, then call _check_file_syntax. with open(os.path.join(temp_dir, temp_basename), 'w') as ofile: ofile.write(code) _check_file_syntax(os.path.join(temp_dir, temp_basename), temp_dir, override_lang=lang, enforce=enforce)
def get_input_spec_patterns(): ''' Extract the inputSpec patterns, if they exist -- modifed from dx-upload-all-outputs Returns a dict of all patterns, with keys equal to the respective input parameter names. ''' input_spec = None if 'DX_JOB_ID' in environ: # works in the cloud, not locally job_desc = dxpy.describe(dxpy.JOB_ID) if job_desc["function"] == "main": # The input spec does not apply for subjobs desc = dxpy.describe(job_desc.get("app", job_desc.get("applet"))) if "inputSpec" in desc: input_spec = desc["inputSpec"] elif 'DX_TEST_DXAPP_JSON' in environ: # works only locally path_to_dxapp_json = environ['DX_TEST_DXAPP_JSON'] with open(path_to_dxapp_json) as fd: dxapp_json = json.load(fd) input_spec = dxapp_json.get('inputSpec') # convert to a dictionary. Each entry in the input spec # has {name, class} attributes. if input_spec is None: return {} # For each field name, return its patterns. # Make sure a pattern is legal, ignore illegal patterns. def is_legal_pattern(pattern): return "*" in pattern patterns_dict = {} for spec in input_spec: name = spec['name'] if 'patterns' in spec: patterns_dict[name] = [] for p in spec['patterns']: if is_legal_pattern(p): patterns_dict[name].append(p) return patterns_dict
def test_basic(self): # write python code into code.py file tmp_path = tempfile.mkdtemp() code_path = os.path.join(tmp_path, 'code.py') with open(code_path, write_mode) as f: f.write("@dxpy.entry_point('main')\n") f.write("def main(**job_inputs):\n") f.write("\toutput = {}\n") f.write("\toutput['thresholds'] = job_inputs['thresholds']\n") f.write("\toutput['pie'] = job_inputs['pie'] + 1\n") f.write("\toutput['misc'] = {'n': 'non', 'y': 'oui'}\n") f.write("\treturn output\n") f.write("\n") f.write("dxpy.run()\n") with open(code_path, 'r') as f: code = f.read() # write arguments table arg_table = os.path.join(tmp_path, 'table.csv') with open(arg_table, write_mode) as csvfile: writer = csv.writer(csvfile, delimiter=delimiter) header = ["batch ID", "thresholds", "pie", "misc"] writer.writerow(header) writer.writerow(["SRR_1", "[10,81]", "3.12", "{}"]) applet = dxpy.api.applet_new({ "name": "copy_all", "project": self.project, "dxapi": "1.0.0", "inputSpec": [ { "name": "thresholds", "class": "array:int"}, { "name": "pie", "class": "float" }, { "name": "misc", "class": "hash" } ], "outputSpec": [ { "name": "thresholds", "class": "array:int" }, { "name": "pie", "class": "float" }, { "name": "misc", "class": "hash" } ], "runSpec": { "interpreter": "python2.7", "code": code, "distribution": "Ubuntu", "release": "14.04" } }) # run in batch mode job_id = run("dx run {} --batch-tsv={} --yes --brief" .format(applet["id"], arg_table)).strip() job_desc = dxpy.api.job_describe(job_id) self.assertEqual(job_desc["executableName"], 'copy_all') self.assertEqual(job_desc["input"], { "thresholds": [10,81], "misc": {}, "pie": 3.12 }) # run in batch mode with --batch-folders job_id = run("dx run {} --batch-tsv={} --batch-folders --yes --brief" .format(applet["id"], arg_table)).strip() job_desc = dxpy.api.job_describe(job_id) self.assertEqual(job_desc["folder"], "/SRR_1") # run in batch mode with --batch-folders and --destination job_id = run("dx run {} --batch-tsv={} --batch-folders --destination={}:/run_01 --yes --brief" .format(applet["id"], arg_table, self.project)).strip() job_desc = dxpy.api.job_describe(job_id) self.assertEqual(job_desc["folder"], "/run_01/SRR_1")
def test_basic(self): # write python code into code.py file tmp_path = tempfile.mkdtemp() code_path = os.path.join(tmp_path, 'code.py') with open(code_path, write_mode) as f: f.write("@dxpy.entry_point('main')\n") f.write("def main(**job_inputs):\n") f.write("\toutput = {}\n") f.write("\toutput['thresholds'] = job_inputs['thresholds']\n") f.write("\toutput['pie'] = job_inputs['pie'] + 1\n") f.write("\toutput['misc'] = {'n': 'non', 'y': 'oui'}\n") f.write("\treturn output\n") f.write("\n") f.write("dxpy.run()\n") with open(code_path, 'r') as f: code = f.read() # write arguments table arg_table = os.path.join(tmp_path, 'table.csv') with open(arg_table, write_mode) as csvfile: writer = csv.writer(csvfile, delimiter=delimiter) header = ["batch ID", "thresholds", "pie", "misc"] writer.writerow(header) writer.writerow(["SRR_1", "[10,81]", "3.12", "{}"]) applet = dxpy.api.applet_new({ "name": "copy_all", "project": self.project, "dxapi": "1.0.0", "inputSpec": [{ "name": "thresholds", "class": "array:int" }, { "name": "pie", "class": "float" }, { "name": "misc", "class": "hash" }], "outputSpec": [{ "name": "thresholds", "class": "array:int" }, { "name": "pie", "class": "float" }, { "name": "misc", "class": "hash" }], "runSpec": { "interpreter": "python2.7", "code": code, "distribution": "Ubuntu", "release": "14.04" } }) # run in batch mode job_id = run("dx run {} --batch-tsv={} --yes --brief".format( applet["id"], arg_table)).strip() job_desc = dxpy.api.job_describe(job_id) self.assertEqual(job_desc["executableName"], 'copy_all') self.assertEqual(job_desc["input"], { "thresholds": [10, 81], "misc": {}, "pie": 3.12 }) # run in batch mode with --batch-folders job_id = run( "dx run {} --batch-tsv={} --batch-folders --yes --brief".format( applet["id"], arg_table)).strip() job_desc = dxpy.api.job_describe(job_id) self.assertEqual(job_desc["folder"], "/SRR_1") # run in batch mode with --batch-folders and --destination job_id = run( "dx run {} --batch-tsv={} --batch-folders --destination={}:/run_01 --yes --brief" .format(applet["id"], arg_table, self.project)).strip() job_desc = dxpy.api.job_describe(job_id) self.assertEqual(job_desc["folder"], "/run_01/SRR_1")
def _build_app_remote(mode, src_dir, publish=False, destination_override=None, version_override=None, bill_to_override=None, dx_toolkit_autodep="stable", do_version_autonumbering=True, do_try_update=True, do_parallel_build=True, do_check_syntax=True): if mode == 'app': builder_app = 'app-tarball_app_builder' else: builder_app = 'app-tarball_applet_builder' temp_dir = tempfile.mkdtemp() # TODO: this is vestigial, the "auto" setting should be removed. if dx_toolkit_autodep == "auto": dx_toolkit_autodep = "stable" build_options = {'dx_toolkit_autodep': dx_toolkit_autodep} if version_override: build_options['version_override'] = version_override elif do_version_autonumbering: # If autonumbering is DISABLED, the interior run of dx-build-app # will detect the correct version to use without our help. If it # is ENABLED, the version suffix might depend on the state of # the git repository. Since we'll remove the .git directory # before uploading, we need to determine the correct version to # use here and pass it in to the interior run of dx-build-app. if do_version_autonumbering: app_spec = _parse_app_spec(src_dir) original_version = app_spec['version'] app_describe = None try: app_describe = dxpy.api.app_describe("app-" + app_spec["name"], alias=original_version, always_retry=False) except dxpy.exceptions.DXAPIError as e: if e.name == 'ResourceNotFound' or (mode == 'applet' and e.name == 'PermissionDenied'): pass else: raise e if app_describe is not None: if app_describe.has_key('published') or not do_try_update: # The version we wanted was taken; fall back to the # autogenerated version number. build_options['version_override'] = original_version + _get_version_suffix(src_dir, original_version) # The following flags are basically passed through verbatim. if bill_to_override: build_options['bill_to_override'] = bill_to_override if not do_version_autonumbering: build_options['do_version_autonumbering'] = False if not do_try_update: build_options['do_try_update'] = False if not do_parallel_build: build_options['do_parallel_build'] = False if not do_check_syntax: build_options['do_check_syntax'] = False using_temp_project_for_remote_build = False # If building an applet, run the builder app in the destination # project. If building an app, run the builder app in a temporary # project. dest_folder = None dest_applet_name = None if mode == "applet": # Translate the --destination flag as follows. If --destination # is PROJ:FOLDER/NAME, # # 1. Run the builder app in PROJ # 2. Make the output folder FOLDER # 3. Supply --destination=NAME to the interior call of dx-build-applet. build_project_id = dxpy.WORKSPACE_ID if destination_override: build_project_id, dest_folder, dest_applet_name = parse_destination(destination_override) if build_project_id is None: parser.error("Can't create an applet without specifying a destination project; please use the -d/--destination flag to explicitly specify a project") if dest_applet_name: build_options['destination_override'] = '/' + dest_applet_name elif mode == "app": using_temp_project_for_remote_build = True build_project_id = dxpy.api.project_new({"name": "dx-build-app --remote temporary project"})["id"] try: # Resolve relative paths and symlinks here so we have something # reasonable to write in the job name below. src_dir = os.path.realpath(src_dir) # Show the user some progress as the tarball is being generated. # Hopefully this will help them to understand when their tarball # is huge (e.g. the target directory already has a whole bunch # of binaries in it) and interrupt before uploading begins. app_tarball_file = os.path.join(temp_dir, "app_tarball.tar.gz") tar_subprocess = subprocess.Popen(["tar", "-czf", "-", "--exclude", "./.git", "."], cwd=src_dir, stdout=subprocess.PIPE) with open(app_tarball_file, 'wb') as tar_output_file: total_num_bytes = 0 last_console_update = 0 start_time = time.time() printed_static_message = False # Pipe the output of tar into the output file while True: tar_exitcode = tar_subprocess.poll() data = tar_subprocess.stdout.read(4 * 1024 * 1024) if tar_exitcode is not None and len(data) == 0: break tar_output_file.write(data) total_num_bytes += len(data) current_time = time.time() # Don't show status messages at all for very short tar # operations (< 1.0 sec) if current_time - last_console_update > 0.25 and current_time - start_time > 1.0: if sys.stderr.isatty(): if last_console_update > 0: sys.stderr.write("\r") sys.stderr.write("Compressing target directory {dir}... ({kb_so_far:,} kb)".format(dir=src_dir, kb_so_far=total_num_bytes / 1024)) sys.stderr.flush() last_console_update = current_time elif not printed_static_message: # Print a message (once only) when stderr is not # going to a live console sys.stderr.write("Compressing target directory %s..." % (src_dir,)) printed_static_message = True if last_console_update > 0: sys.stderr.write("\n") if tar_exitcode != 0: raise Exception("tar exited with non-zero exit code " + str(tar_exitcode)) dxpy.set_workspace_id(build_project_id) remote_file = dxpy.upload_local_file(app_tarball_file, media_type="application/gzip", wait_on_close=True, show_progress=True) try: input_hash = { "input_file": dxpy.dxlink(remote_file), "build_options": build_options } if mode == 'app': input_hash["publish"] = publish api_options = { "name": "Remote build of %s" % (os.path.basename(src_dir),), "input": input_hash, "project": build_project_id, } if dest_folder: api_options["folder"] = dest_folder app_run_result = dxpy.api.app_run(builder_app, input_params=api_options) job_id = app_run_result["id"] print("Started builder job %s" % (job_id,)) try: subprocess.check_call(["dx", "watch", job_id]) except subprocess.CalledProcessError as e: if e.returncode == 3: # Some kind of failure to build the app. The reason # for the failure is probably self-evident from the # job log (and if it's not, the CalledProcessError # is not informative anyway), so just propagate the # return code without additional remarks. sys.exit(3) else: raise e dxpy.DXJob(job_id).wait_on_done(interval=1) if mode == 'applet': applet_id, _ = dxpy.get_dxlink_ids(dxpy.api.job_describe(job_id)['output']['output_applet']) return applet_id else: # TODO: determine and return the app ID, to allow # running the app if args.run is specified return None finally: if not using_temp_project_for_remote_build: dxpy.DXProject(build_project_id).remove_objects([remote_file.get_id()]) finally: if using_temp_project_for_remote_build: dxpy.api.project_destroy(build_project_id, {"terminateJobs": True}) shutil.rmtree(temp_dir)
def get_input_hash(): with open(job_input_file) as fh: job_input = json.load(fh) return job_input
def _verify_app_source_dir_impl(src_dir, temp_dir, mode, enforce=True): """Performs syntax and lint checks on the app source. Precondition: the dxapp.json file exists and can be parsed. """ _lint(os.path.join(src_dir, "dxapp.json"), mode) # Check that the entry point file parses as the type it is going to # be interpreted as. The extension is irrelevant. manifest = json.load(open(os.path.join(src_dir, "dxapp.json"))) if "runSpec" in manifest: if "interpreter" not in manifest['runSpec']: raise dxpy.app_builder.AppBuilderException('runSpec.interpreter field was not present') if manifest['runSpec']['interpreter'] in ["python2.7", "bash"]: if "file" in manifest['runSpec']: entry_point_file = os.path.abspath(os.path.join(src_dir, manifest['runSpec']['file'])) try: _check_file_syntax(entry_point_file, temp_dir, override_lang=manifest['runSpec']['interpreter'], enforce=enforce) except IOError as e: raise dxpy.app_builder.AppBuilderException( 'Could not open runSpec.file=%r. The problem was: %s' % (entry_point_file, e)) except DXSyntaxError: raise dxpy.app_builder.AppBuilderException('Entry point file %s has syntax errors, see above for details. Rerun with --no-check-syntax to proceed anyway.' % (entry_point_file,)) elif "code" in manifest['runSpec']: try: _check_syntax(manifest['runSpec']['code'], manifest['runSpec']['interpreter'], temp_dir, enforce=enforce) except DXSyntaxError: raise dxpy.app_builder.AppBuilderException('Code in runSpec.code has syntax errors, see above for details. Rerun with --no-check-syntax to proceed anyway.') if 'execDepends' in manifest['runSpec']: if not isinstance(manifest['runSpec']['execDepends'], list): raise dxpy.app_builder.AppBuilderException('Expected runSpec.execDepends to be an array. Rerun with --no-check-syntax to proceed anyway.') if not all(isinstance(dep, dict) for dep in manifest['runSpec']['execDepends']): raise dxpy.app_builder.AppBuilderException('Expected runSpec.execDepends to be an array of hashes. Rerun with --no-check-syntax to proceed anyway.') if any(dep.get('package_manager', 'apt') != 'apt' for dep in manifest['runSpec']['execDepends']): if not isinstance(manifest.get('access'), dict) or 'network' not in manifest['access']: msg = '\n'.join(['runSpec.execDepends specifies non-APT dependencies, but no network access spec is given.', 'Add {"access": {"network": ["*"]}} to allow dependencies to install.', 'See https://wiki.dnanexus.com/Developer-Tutorials/Request-Additional-App-Resources#Network-Access.', 'Rerun with --no-check-syntax to proceed anyway.']) raise dxpy.app_builder.AppBuilderException(msg) if 'authorizedUsers' in manifest: if not isinstance(manifest['authorizedUsers'], list) or isinstance(manifest['authorizedUsers'], basestring): raise dxpy.app_builder.AppBuilderException('Expected authorizedUsers to be a list of strings') for thing in manifest['authorizedUsers']: if thing != 'PUBLIC' and (not isinstance(thing, basestring) or not re.match("^(org-|user-)", thing)): raise dxpy.app_builder.AppBuilderException('authorizedUsers field contains an entry which is not either the string "PUBLIC" or a user or org ID') # Check all other files that are going to be in the resources tree. # For these we detect the language based on the filename extension. # Obviously this check can have false positives, since the app can # execute (or not execute!) all these files in whatever way it # wishes, e.g. it could use Python != 2.7 or some non-bash shell. # Consequently errors here are non-fatal. files_with_problems = [] for dirpath, dirnames, filenames in os.walk(os.path.abspath(os.path.join(src_dir, "resources"))): for filename in filenames: # On Mac OS, the resource fork for "FILE.EXT" gets tarred up # as a file named "._FILE.EXT". To a naive check this # appears to be a file of the same extension. Therefore, we # exclude these from syntax checking since they are likely # to not parse as whatever language they appear to be. if not filename.startswith("._"): try: _check_file_syntax(os.path.join(dirpath, filename), temp_dir, enforce=True) except IOError as e: raise dxpy.app_builder.AppBuilderException( 'Could not open file in resources directory %r. The problem was: %s' % (os.path.join(dirpath, filename), e) ) except DXSyntaxError: # Suppresses errors from _check_file_syntax so we # only print a nice error message files_with_problems.append(os.path.join(dirpath, filename)) if files_with_problems: # Make a message of the form: # "/path/to/my/app.py" # OR "/path/to/my/app.py and 3 other files" files_str = files_with_problems[0] if len(files_with_problems) == 1 else (files_with_problems[0] + " and " + str(len(files_with_problems) - 1) + " other file" + ("s" if len(files_with_problems) > 2 else "")) logging.warn('%s contained syntax errors, see above for details' % (files_str,))
def _lint(dxapp_json_filename, mode): """ Examines the specified dxapp.json file and warns about any violations of app guidelines. Precondition: the dxapp.json file exists and can be parsed. """ def _find_readme(dirname): for basename in ['README.md', 'Readme.md', 'readme.md']: if os.path.exists(os.path.join(dirname, basename)): return os.path.join(dirname, basename) return None app_spec = json.load(open(dxapp_json_filename)) dirname = os.path.basename(os.path.dirname(os.path.abspath(dxapp_json_filename))) if mode == "app": if 'title' not in app_spec: logger.warn('app is missing a title, please add one in the "title" field of dxapp.json') if 'summary' in app_spec: if app_spec['summary'].endswith('.'): logger.warn('summary "%s" should be a short phrase not ending in a period' % (app_spec['summary'],)) else: logger.warn('app is missing a summary, please add one in the "summary" field of dxapp.json') readme_filename = _find_readme(os.path.dirname(dxapp_json_filename)) if 'description' in app_spec: if readme_filename: logger.warn('"description" field shadows file ' + readme_filename) if not app_spec['description'].strip().endswith('.'): logger.warn('"description" field should be written in complete sentences and end with a period') else: if readme_filename is None: logger.warn("app is missing a description, please supply one in README.md") if 'categories' in app_spec: for category in app_spec['categories']: if category not in APP_CATEGORIES: logger.warn('app has unrecognized category "%s"' % (category,)) if category == 'Import': if 'title' in app_spec and not app_spec['title'].endswith('Importer'): logger.warn('title "%s" should end in "Importer"' % (app_spec['title'],)) if category == 'Export': if 'title' in app_spec and not app_spec['title'].endswith('Exporter'): logger.warn('title "%s" should end in "Exporter"' % (app_spec['title'],)) if 'name' in app_spec: if app_spec['name'] != app_spec['name'].lower(): logger.warn('name "%s" should be all lowercase' % (app_spec['name'],)) if dirname != app_spec['name']: logger.warn('app name "%s" does not match containing directory "%s"' % (app_spec['name'], dirname)) else: logger.warn('app is missing a name, please add one in the "name" field of dxapp.json') if 'version' in app_spec: if not APP_VERSION_RE.match(app_spec['version']): logger.warn('"version" %s should be semver compliant (e.g. of the form X.Y.Z)' % (app_spec['version'],)) # Note that identical checks are performed on the server side (and # will cause the app build to fail), but the checks here are printed # sooner and multiple naming problems can be detected in a single # pass. if 'inputSpec' in app_spec: for i, input_field in enumerate(app_spec['inputSpec']): if not re.match("^[a-zA-Z_][0-9a-zA-Z_]*$", input_field['name']): logger.error('input %d has illegal name "%s" (must match ^[a-zA-Z_][0-9a-zA-Z_]*$)' % (i, input_field['name'])) else: logger.warn("dxapp.json contains no input specification (inputSpec). Your applet will not be usable as an " + "app, runnable from the GUI, or composable using workflows.") if 'outputSpec' in app_spec: for i, output_field in enumerate(app_spec['outputSpec']): if not re.match("^[a-zA-Z_][0-9a-zA-Z_]*$", output_field['name']): logger.error('output %d has illegal name "%s" (must match ^[a-zA-Z_][0-9a-zA-Z_]*$)' % (i, output_field['name'])) else: logger.warn("dxapp.json contains no output specification (outputSpec). Your applet will not be usable as an " + "app, runnable from the GUI, or composable using workflows.")
def run_one_entry_point(job_id, function, input_hash, run_spec, depends_on, name=None): ''' :param job_id: job ID of the local job to run :type job_id: string :param function: function to run :type function: string :param input_hash: input for the job (may include job-based object references) :type input_hash: dict :param run_spec: run specification from the dxapp.json of the app :type run_spec: dict Runs the specified entry point and retrieves the job's output, updating job_outputs.json (in $DX_TEST_JOB_HOMEDIRS) appropriately. ''' print('======') job_homedir = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id) job_env = environ.copy() job_env['HOME'] = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id) all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json') with open(all_job_outputs_path, 'r') as fd: all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict) if isinstance(name, basestring): name += ' (' + job_id + ':' + function + ')' else: name = job_id + ':' + function job_name = BLUE() + BOLD() + name + ENDC() print(job_name) # Resolve local job-based object references try: resolve_job_references(input_hash, all_job_outputs) except Exception as e: exit_with_error(job_name + ' ' + JOB_STATES('failed') + ' when resolving input:\n' + fill(str(e))) # Get list of non-closed data objects in the input that appear as # DNAnexus links; append to depends_on if depends_on is None: depends_on = [] get_implicit_depends_on(input_hash, depends_on) try: wait_for_depends_on(depends_on, all_job_outputs) except Exception as e: exit_with_error(job_name + ' ' + JOB_STATES('failed') + ' when processing depends_on:\n' + fill(str(e))) # Save job input to job_input.json with open(os.path.join(job_homedir, 'job_input.json'), 'wb') as fd: json.dump(input_hash, fd, indent=4) fd.write(b'\n') print( job_output_to_str(input_hash, title=(BOLD() + 'Input: ' + ENDC()), title_len=len("Input: ")).lstrip()) if run_spec['interpreter'] == 'bash': # Save job input to env vars env_path = os.path.join(job_homedir, 'environment') with open(env_path, 'w') as fd: job_input_file = os.path.join(job_homedir, 'job_input.json') var_defs_hash = file_load_utils.gen_bash_vars( job_input_file, job_homedir=job_homedir) for key, val in var_defs_hash.iteritems(): fd.write("{}={}\n".format(key, val)) print(BOLD() + 'Logs:' + ENDC()) start_time = datetime.datetime.now() if run_spec['interpreter'] == 'bash': script = ''' cd hotexamples_com; . {env_path}; . {code_path}; if [[ $(type -t {function}) == "function" ]]; then {function}; else echo "$0: Global scope execution complete. Not invoking entry point function {function} because it was not found" 1>&2; fi'''.format(homedir=pipes.quote(job_homedir), env_path=pipes.quote( os.path.join(job_env['HOME'], 'environment')), code_path=pipes.quote(environ['DX_TEST_CODE_PATH']), function=function) invocation_args = ['bash', '-c', '-e'] + ( ['-x'] if environ.get('DX_TEST_X_FLAG') else []) + [script] elif run_spec['interpreter'] == 'python2.7': script = '''#!/usr/bin/env python import os os.chdir(hotexamples_com) {code} import dxpy, json if dxpy.utils.exec_utils.RUN_COUNT == 0: dxpy.run() '''.format(homedir=repr(job_homedir), code=run_spec['code']) job_env['DX_TEST_FUNCTION'] = function invocation_args = ['python', '-c', script] if USING_PYTHON2: invocation_args = [ arg.encode(sys.stdout.encoding) for arg in invocation_args ] env = {k: v.encode(sys.stdout.encoding) for k, v in job_env.items()} else: env = job_env fn_process = subprocess.Popen(invocation_args, env=env) fn_process.communicate() end_time = datetime.datetime.now() if fn_process.returncode != 0: exit_with_error(job_name + ' ' + JOB_STATES('failed') + ', exited with error code ' + str(fn_process.returncode) + ' after ' + str(end_time - start_time)) # Now updating job output aggregation file with job's output job_output_path = os.path.join(job_env['HOME'], 'job_output.json') if os.path.exists(job_output_path): try: with open(job_output_path, 'r') as fd: job_output = json.load( fd, object_pairs_hook=collections.OrderedDict) except Exception as e: exit_with_error('Error: Could not load output of ' + job_name + ':\n' + fill(str(e.__class__) + ': ' + str(e))) else: job_output = {} print(job_name + ' -> ' + GREEN() + 'finished running' + ENDC() + ' after ' + str(end_time - start_time)) print( job_output_to_str(job_output, title=(BOLD() + "Output: " + ENDC()), title_len=len("Output: ")).lstrip()) with open( os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'), 'r') as fd: all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict) all_job_outputs[job_id] = job_output # Before dumping, see if any new jbors should be resolved now for other_job_id in all_job_outputs: if all_job_outputs[other_job_id] is None: # Skip if job is not done yet (true for ancestor jobs) continue resolve_job_references(all_job_outputs[other_job_id], all_job_outputs, should_resolve=False) with open( os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'), 'wb') as fd: json.dump(all_job_outputs, fd, indent=4) fd.write(b'\n')
def run_one_entry_point(job_id, function, input_hash, run_spec, depends_on, name=None): ''' :param job_id: job ID of the local job to run :type job_id: string :param function: function to run :type function: string :param input_hash: input for the job (may include job-based object references) :type input_hash: dict :param run_spec: run specification from the dxapp.json of the app :type run_spec: dict Runs the specified entry point and retrieves the job's output, updating job_outputs.json (in $DX_TEST_JOB_HOMEDIRS) appropriately. ''' print('======') job_homedir = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id) job_env = environ.copy() job_env['HOME'] = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id) all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json') with open(all_job_outputs_path, 'r') as fd: all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict) if isinstance(name, basestring): name += ' (' + job_id + ':' + function + ')' else: name = job_id + ':' + function job_name = BLUE() + BOLD() + name + ENDC() print(job_name) # Resolve local job-based object references try: resolve_job_references(input_hash, all_job_outputs) except Exception as e: exit_with_error(job_name + ' ' + JOB_STATES('failed') + ' when resolving input:\n' + fill(str(e))) # Get list of non-closed data objects in the input that appear as # DNAnexus links; append to depends_on if depends_on is None: depends_on = [] get_implicit_depends_on(input_hash, depends_on) try: wait_for_depends_on(depends_on, all_job_outputs) except Exception as e: exit_with_error(job_name + ' ' + JOB_STATES('failed') + ' when processing depends_on:\n' + fill(str(e))) # Save job input to job_input.json with open(os.path.join(job_homedir, 'job_input.json'), 'wb') as fd: json.dump(input_hash, fd, indent=4) fd.write(b'\n') print(job_output_to_str(input_hash, title=(BOLD() + 'Input: ' + ENDC()), title_len=len("Input: ")).lstrip()) if run_spec['interpreter'] == 'bash': # Save job input to env vars env_path = os.path.join(job_homedir, 'environment') with open(env_path, 'w') as fd: # Following code is what is used to generate env vars on the remote worker fd.write("\n".join(["export {k}=( {vlist} )".format(k=k, vlist=" ".join([pipes.quote(vitem if isinstance(vitem, basestring) else json.dumps(vitem)) for vitem in v])) if isinstance(v, list) else "export {k}={v}".format(k=k, v=pipes.quote(v if isinstance(v, basestring) else json.dumps(v))) for k, v in input_hash.items()])) print(BOLD() + 'Logs:' + ENDC()) start_time = datetime.datetime.now() if run_spec['interpreter'] == 'bash': script = ''' cd hotexamples_com; . {env_path}; . {code_path}; if [[ $(type -t {function}) == "function" ]]; then {function}; else echo "$0: Global scope execution complete. Not invoking entry point function {function} because it was not found" 1>&2; fi'''.format(homedir=pipes.quote(job_homedir), env_path=pipes.quote(os.path.join(job_env['HOME'], 'environment')), code_path=pipes.quote(environ['DX_TEST_CODE_PATH']), function=function) invocation_args = ['bash', '-c', '-e'] + (['-x'] if environ.get('DX_TEST_X_FLAG') else []) + [script] elif run_spec['interpreter'] == 'python2.7': script = '''#!/usr/bin/env python import os os.chdir(hotexamples_com) {code} import dxpy, json if dxpy.utils.exec_utils.RUN_COUNT == 0: dxpy.run() '''.format(homedir=repr(job_homedir), code=run_spec['code']) job_env['DX_TEST_FUNCTION'] = function invocation_args = ['python', '-c', script] if USING_PYTHON2: invocation_args = [arg.encode(sys.stdout.encoding) for arg in invocation_args] env = {k: v.encode(sys.stdout.encoding) for k, v in job_env.items()} else: env = job_env fn_process = subprocess.Popen(invocation_args, env=env) fn_process.communicate() end_time = datetime.datetime.now() if fn_process.returncode != 0: exit_with_error(job_name + ' ' + JOB_STATES('failed') + ', exited with error code ' + str(fn_process.returncode) + ' after ' + str(end_time - start_time)) # Now updating job output aggregation file with job's output job_output_path = os.path.join(job_env['HOME'], 'job_output.json') if os.path.exists(job_output_path): try: with open(job_output_path, 'r') as fd: job_output = json.load(fd, object_pairs_hook=collections.OrderedDict) except Exception as e: exit_with_error('Error: Could not load output of ' + job_name + ':\n' + fill(str(e.__class__) + ': ' + str(e))) else: job_output = {} print(job_name + ' -> ' + GREEN() + 'finished running' + ENDC() + ' after ' + str(end_time - start_time)) print(job_output_to_str(job_output, title=(BOLD() + "Output: " + ENDC()), title_len=len("Output: ")).lstrip()) with open(os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'), 'r') as fd: all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict) all_job_outputs[job_id] = job_output # Before dumping, see if any new jbors should be resolved now for other_job_id in all_job_outputs: if all_job_outputs[other_job_id] is None: # Skip if job is not done yet (true for ancestor jobs) continue resolve_job_references(all_job_outputs[other_job_id], all_job_outputs, should_resolve=False) with open(os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'), 'wb') as fd: json.dump(all_job_outputs, fd, indent=4) fd.write(b'\n')