Exemple #1
0
    def test_file_arrays(self):
        # Create file with junk content
        dxfile = dxpy.upload_string("xxyyzz",
                                    project=self.project,
                                    wait_on_close=True,
                                    name="bubbles")

        # write python code into code.py file
        tmp_path = tempfile.mkdtemp()
        code_path = os.path.join(tmp_path, 'code.py')
        with open(code_path, write_mode) as f:
            f.write("@dxpy.entry_point('main')\n")
            f.write("def main(**job_inputs):\n")
            f.write("\toutput = {}\n")
            f.write("\toutput['plant'] = job_inputs['plant']\n")
            f.write("\treturn output\n")
            f.write("\n")
            f.write("dxpy.run()\n")
        with open(code_path, 'r') as f:
            code = f.read()

        # write arguments table. These ara arrays with a single element.
        arg_table = os.path.join(tmp_path, 'table.csv')
        with open(arg_table, write_mode) as csvfile:
            writer = csv.writer(csvfile, delimiter=delimiter)
            header = ["batch ID", "plant", "plant ID"]
            writer.writerow(header)
            writer.writerow(
                ["SRR_1", "[bubbles]", "[" + dxfile.get_id() + "]"])

        applet = dxpy.api.applet_new({
            "name":
            "ident_file_array",
            "project":
            self.project,
            "dxapi":
            "1.0.0",
            "inputSpec": [{
                "name": "plant",
                "class": "array:file"
            }],
            "outputSpec": [{
                "name": "plant",
                "class": "array:file"
            }],
            "runSpec": {
                "interpreter": "python2.7",
                "code": code,
                "distribution": "Ubuntu",
                "release": "14.04"
            }
        })
        job_id = run("dx run {} --batch-tsv={} --yes --brief".format(
            applet["id"], arg_table)).strip()
        job_desc = dxpy.api.job_describe(job_id)
        self.assertEqual(job_desc["executableName"], 'ident_file_array')
        self.assertEqual(job_desc["input"],
                         {"plant": [{
                             "$dnanexus_link": dxfile.get_id()
                         }]})
Exemple #2
0
def _check_file_syntax(filename, temp_dir, override_lang=None, enforce=True):
    """
    Checks that the code in FILENAME parses, attempting to autodetect
    the language if necessary.

    Raises IOError if the file cannot be read.

    Raises DXSyntaxError if there is a problem and "enforce" is True.
    """

    def check_python(filename):
        # Generate a semi-recognizable name to write the pyc to. Of
        # course it's possible that different files being scanned could
        # have the same basename, so this path won't be unique, but the
        # checks don't run concurrently so this shouldn't cause any
        # problems.
        pyc_path = os.path.join(temp_dir, os.path.basename(filename) + ".pyc")
        try:
            if USING_PYTHON2:
                filename = filename.encode(sys.getfilesystemencoding())
            py_compile.compile(filename, cfile=pyc_path, doraise=True)
        finally:
            try:
                os.unlink(pyc_path)
            except OSError:
                pass

    def check_bash(filename):
        subprocess.check_output(["/bin/bash", "-n", filename], stderr=subprocess.STDOUT)

    if override_lang == "python2.7":
        checker_fn = check_python
    elif override_lang == "bash":
        checker_fn = check_bash
    elif filename.endswith(".py"):
        checker_fn = check_python
    elif filename.endswith(".sh"):
        checker_fn = check_bash
    else:
        # Ignore other kinds of files.
        return

    # Do a test read of the file to catch errors like the file not
    # existing or not being readable.
    open(filename)

    try:
        checker_fn(filename)
    except subprocess.CalledProcessError as e:
        print(filename + " has a syntax error! Interpreter output:", file=sys.stderr)
        for line in e.output.strip("\n").split("\n"):
            print("  " + line.rstrip("\n"), file=sys.stderr)
        if enforce:
            raise DXSyntaxError(filename + " has a syntax error")
    except py_compile.PyCompileError as e:
        print(filename + " has a syntax error! Interpreter output:", file=sys.stderr)
        print("  " + e.msg.strip(), file=sys.stderr)
        if enforce:
            raise DXSyntaxError(e.msg.strip())
Exemple #3
0
def _check_file_syntax(filename, temp_dir, override_lang=None, enforce=True):
    """
    Checks that the code in FILENAME parses, attempting to autodetect
    the language if necessary.

    Raises IOError if the file cannot be read.

    Raises DXSyntaxError if there is a problem and "enforce" is True.
    """
    def check_python(filename):
        # Generate a semi-recognizable name to write the pyc to. Of
        # course it's possible that different files being scanned could
        # have the same basename, so this path won't be unique, but the
        # checks don't run concurrently so this shouldn't cause any
        # problems.
        pyc_path = os.path.join(temp_dir, os.path.basename(filename) + ".pyc")
        try:
            if USING_PYTHON2:
                filename = filename.encode(sys.getfilesystemencoding())
            py_compile.compile(filename, cfile=pyc_path, doraise=True)
        finally:
            try:
                os.unlink(pyc_path)
            except OSError:
                pass
    def check_bash(filename):
        subprocess.check_output(["/bin/bash", "-n", filename], stderr=subprocess.STDOUT)

    if override_lang == 'python2.7':
        checker_fn = check_python
    elif override_lang == 'bash':
        checker_fn = check_bash
    elif filename.endswith('.py'):
        checker_fn = check_python
    elif filename.endswith('.sh'):
        checker_fn = check_bash
    else:
        # Ignore other kinds of files.
        return

    # Do a test read of the file to catch errors like the file not
    # existing or not being readable.
    open(filename)

    try:
        checker_fn(filename)
    except subprocess.CalledProcessError as e:
        print(filename + " has a syntax error! Interpreter output:", file=sys.stderr)
        for line in e.output.strip("\n").split("\n"):
            print("  " + line.rstrip("\n"), file=sys.stderr)
        if enforce:
            raise DXSyntaxError(filename + " has a syntax error")
    except py_compile.PyCompileError as e:
        print(filename + " has a syntax error! Interpreter output:", file=sys.stderr)
        print("  " + e.msg.strip(), file=sys.stderr)
        if enforce:
            raise DXSyntaxError(e.msg.strip())
def run_entry_points(run_spec):
    '''
    :param run_spec: run specification from the dxapp.json of the app
    :type run_spec: dict

    Runs all job entry points found in
    $DX_TEST_JOB_HOMEDIRS/job_queue.json in a first-in, first-out
    manner until it is an empty array (or an error occurs).
    '''
    job_queue_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'],
                                  'job_queue.json')
    all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'],
                                        'job_outputs.json')

    while True:
        with open(job_queue_path, 'r') as fd:
            job_queue = json.load(fd)
        if len(job_queue) == 0:
            return

        with open(all_job_outputs_path, 'r') as fd:
            all_job_outputs = json.load(fd)

        entry_point_to_run = None
        for i, entry_point in enumerate(job_queue):
            runnable = True
            # See if its inputs are ready
            while has_local_job_refs(entry_point['input_hash']):
                try:
                    resolve_job_references(entry_point['input_hash'],
                                           all_job_outputs)
                except:
                    runnable = False
                    break
            if runnable:
                entry_point_to_run = job_queue.pop(i)
                break

        if entry_point_to_run is None:
            # Just run the first entry point and let the runner throw
            # the appropriate error
            entry_point_to_run = job_queue.pop(0)

        with open(job_queue_path, 'wb') as fd:
            # Update job queue with updated inputs and after having
            # popped the entry point to be run
            json.dump(job_queue, fd)
            fd.write(b'\n')

        run_one_entry_point(job_id=entry_point_to_run['id'],
                            function=entry_point_to_run['function'],
                            input_hash=entry_point_to_run['input_hash'],
                            run_spec=run_spec,
                            depends_on=entry_point_to_run.get(
                                'depends_on', []),
                            name=entry_point_to_run.get('name'))
def queue_entry_point(function, input_hash, depends_on=[], name=None):
    '''
    :param function: function to run
    :param input_hash: input to new job
    :param depends_on: list of data object IDs and/or job IDs (local or remote) to wait for before the job can be run
    :type depends_on: list of strings
    :param name: job name (optional)
    :returns: new local job ID

    This function should only be called by a locally running job, so
    all relevant DX_TEST_* environment variables should be set.

    This function will set up the home directory for the job, add an
    entry in job_outputs.json, and append the job information to the
    job_queue.json file.  (Both files found in
    $DX_TEST_JOB_HOMEDIRS.)
    '''
    ensure_env_vars()

    all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'],
                                        'job_outputs.json')

    with open(all_job_outputs_path, 'r') as fd:
        all_job_outputs = json.load(fd,
                                    object_pairs_hook=collections.OrderedDict)
        job_id = 'localjob-' + str(len(all_job_outputs))

    with open(all_job_outputs_path, 'wb') as fd:
        all_job_outputs[job_id] = None
        json.dump(all_job_outputs, fd, indent=4)
        fd.write(b'\n')

    job_homedir = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id)
    os.mkdir(job_homedir)

    job_queue_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'],
                                  'job_queue.json')
    with open(job_queue_path, 'r') as fd:
        job_queue = json.load(fd)
    job_entry = {
        "id": job_id,
        "function": function,
        "input_hash": input_hash,
        "depends_on": depends_on
    }
    if name is not None:
        job_entry['name'] = name
    job_queue.append(job_entry)
    with open(job_queue_path, 'wb') as fd:
        json.dump(job_queue, fd, indent=4)
        fd.write(b'\n')

    return job_id
def run_entry_points(run_spec):
    '''
    :param run_spec: run specification from the dxapp.json of the app
    :type run_spec: dict

    Runs all job entry points found in
    $DX_TEST_JOB_HOMEDIRS/job_queue.json in a first-in, first-out
    manner until it is an empty array (or an error occurs).
    '''
    job_queue_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_queue.json')
    all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json')

    while True:
        with open(job_queue_path, 'r') as fd:
            job_queue = json.load(fd)
        if len(job_queue) == 0:
            return

        with open(all_job_outputs_path, 'r') as fd:
            all_job_outputs = json.load(fd)

        entry_point_to_run = None
        for i, entry_point in enumerate(job_queue):
            runnable = True
            # See if its inputs are ready
            while has_local_job_refs(entry_point['input_hash']):
                try:
                    resolve_job_references(entry_point['input_hash'], all_job_outputs)
                except:
                    runnable = False
                    break
            if runnable:
                entry_point_to_run = job_queue.pop(i)
                break

        if entry_point_to_run is None:
            # Just run the first entry point and let the runner throw
            # the appropriate error
            entry_point_to_run = job_queue.pop(0)

        with open(job_queue_path, 'wb') as fd:
            # Update job queue with updated inputs and after having
            # popped the entry point to be run
            json.dump(job_queue, fd)
            fd.write(b'\n')

        run_one_entry_point(job_id=entry_point_to_run['id'],
                            function=entry_point_to_run['function'],
                            input_hash=entry_point_to_run['input_hash'],
                            run_spec=run_spec,
                            depends_on=entry_point_to_run.get('depends_on', []),
                            name=entry_point_to_run.get('name'))
Exemple #7
0
    def test_file_arrays(self):
        # Create file with junk content
        dxfile = dxpy.upload_string("xxyyzz", project=self.project,
                                    wait_on_close=True, name="bubbles")

        # write python code into code.py file
        tmp_path = tempfile.mkdtemp()
        code_path = os.path.join(tmp_path, 'code.py')
        with open(code_path, write_mode) as f:
            f.write("@dxpy.entry_point('main')\n")
            f.write("def main(**job_inputs):\n")
            f.write("\toutput = {}\n")
            f.write("\toutput['plant'] = job_inputs['plant']\n")
            f.write("\treturn output\n")
            f.write("\n")
            f.write("dxpy.run()\n")
        with open(code_path, 'r') as f:
            code = f.read()

        # write arguments table. These ara arrays with a single element.
        arg_table = os.path.join(tmp_path, 'table.csv')
        with open(arg_table, write_mode) as csvfile:
            writer = csv.writer(csvfile, delimiter=delimiter)
            header = ["batch ID", "plant", "plant ID"]
            writer.writerow(header)
            writer.writerow(["SRR_1",
                             "[bubbles]",
                             "[" + dxfile.get_id() + "]"
            ])

        applet = dxpy.api.applet_new({
            "name": "ident_file_array",
            "project": self.project,
            "dxapi": "1.0.0",
            "inputSpec": [ { "name": "plant", "class": "array:file" } ],
            "outputSpec": [ { "name": "plant", "class": "array:file" } ],
            "runSpec": { "interpreter": "python2.7",
                         "code": code,
                         "distribution": "Ubuntu",
                         "release": "14.04" }
        })
        job_id = run("dx run {} --batch-tsv={} --yes --brief"
                     .format(applet["id"], arg_table)).strip()
        job_desc = dxpy.api.job_describe(job_id)
        self.assertEqual(job_desc["executableName"], 'ident_file_array')
        self.assertEqual(job_desc["input"],
                         { "plant":
                           [{ "$dnanexus_link": dxfile.get_id() }]
                         })
def queue_entry_point(function, input_hash, depends_on=[], name=None):
    '''
    :param function: function to run
    :param input_hash: input to new job
    :param depends_on: list of data object IDs and/or job IDs (local or remote) to wait for before the job can be run
    :type depends_on: list of strings
    :param name: job name (optional)
    :returns: new local job ID

    This function should only be called by a locally running job, so
    all relevant DX_TEST_* environment variables should be set.

    This function will set up the home directory for the job, add an
    entry in job_outputs.json, and append the job information to the
    job_queue.json file.  (Both files found in
    $DX_TEST_JOB_HOMEDIRS.)
    '''
    ensure_env_vars()

    all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json')

    with open(all_job_outputs_path, 'r') as fd:
        all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict)
        job_id = 'localjob-' + str(len(all_job_outputs))

    with open(all_job_outputs_path, 'wb') as fd:
        all_job_outputs[job_id] = None
        json.dump(all_job_outputs, fd, indent=4)
        fd.write(b'\n')

    job_homedir = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id)
    os.mkdir(job_homedir)

    job_queue_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_queue.json')
    with open(job_queue_path, 'r') as fd:
        job_queue = json.load(fd)
    job_entry = {"id": job_id,
                 "function": function,
                 "input_hash": input_hash,
                 "depends_on": depends_on}
    if name is not None:
        job_entry['name'] = name
    job_queue.append(job_entry)
    with open(job_queue_path, 'wb') as fd:
        json.dump(job_queue, fd, indent=4)
        fd.write(b'\n')

    return job_id
Exemple #9
0
def _parse_app_spec(src_dir):
    """Returns the parsed contents of dxapp.json.

    Raises either AppBuilderException or a parser error (exit codes 3 or
    2 respectively) if this cannot be done.
    """
    if not os.path.isdir(src_dir):
        parser.error("%s is not a directory" % src_dir)
    if not os.path.exists(os.path.join(src_dir, "dxapp.json")):
        raise dxpy.app_builder.AppBuilderException("Directory %s does not contain dxapp.json: not a valid DNAnexus app source directory" % src_dir)
    with open(os.path.join(src_dir, "dxapp.json")) as app_desc:
        try:
            return json_load_raise_on_duplicates(app_desc)
        except Exception as e:
            raise dxpy.app_builder.AppBuilderException("Could not parse dxapp.json file as JSON: " + e.message)
Exemple #10
0
def _parse_app_spec(src_dir):
    """Returns the parsed contents of dxapp.json.

    Raises either AppBuilderException or a parser error (exit codes 3 or
    2 respectively) if this cannot be done.
    """
    if not os.path.isdir(src_dir):
        parser.error("%s is not a directory" % src_dir)
    if not os.path.exists(os.path.join(src_dir, "dxapp.json")):
        raise dxpy.app_builder.AppBuilderException("Directory %s does not contain dxapp.json: not a valid DNAnexus app source directory" % src_dir)
    with open(os.path.join(src_dir, "dxapp.json")) as app_desc:
        try:
            return json_load_raise_on_duplicates(app_desc)
        except Exception as e:
            raise dxpy.app_builder.AppBuilderException("Could not parse dxapp.json file as JSON: " + e.message)
Exemple #11
0
def _check_syntax(code, lang, temp_dir, enforce=True):
    """
    Checks that the code whose text is in CODE parses as LANG.

    Raises DXSyntaxError if there is a problem and "enforce" is True.
    """
    # This function needs the language to be explicitly set, so we can
    # generate an appropriate temp filename.
    if lang == 'python2.7':
        temp_basename = 'inlined_code_from_dxapp_json.py'
    elif lang == 'bash':
        temp_basename = 'inlined_code_from_dxapp_json.sh'
    else:
        raise ValueError('lang must be one of "python2.7" or "bash"')
    # Dump the contents out to a temporary file, then call _check_file_syntax.
    with open(os.path.join(temp_dir, temp_basename), 'w') as ofile:
        ofile.write(code)
    _check_file_syntax(os.path.join(temp_dir, temp_basename), temp_dir, override_lang=lang, enforce=enforce)
Exemple #12
0
def _check_syntax(code, lang, temp_dir, enforce=True):
    """
    Checks that the code whose text is in CODE parses as LANG.

    Raises DXSyntaxError if there is a problem and "enforce" is True.
    """
    # This function needs the language to be explicitly set, so we can
    # generate an appropriate temp filename.
    if lang == 'python2.7':
        temp_basename = 'inlined_code_from_dxapp_json.py'
    elif lang == 'bash':
        temp_basename = 'inlined_code_from_dxapp_json.sh'
    else:
        raise ValueError('lang must be one of "python2.7" or "bash"')
    # Dump the contents out to a temporary file, then call _check_file_syntax.
    with open(os.path.join(temp_dir, temp_basename), 'w') as ofile:
        ofile.write(code)
    _check_file_syntax(os.path.join(temp_dir, temp_basename), temp_dir, override_lang=lang, enforce=enforce)
Exemple #13
0
def get_input_spec_patterns():
    ''' Extract the inputSpec patterns, if they exist -- modifed from dx-upload-all-outputs

    Returns a dict of all patterns, with keys equal to the respective
    input parameter names.
    '''
    input_spec = None
    if 'DX_JOB_ID' in environ:
        # works in the cloud, not locally
        job_desc = dxpy.describe(dxpy.JOB_ID)
        if job_desc["function"] == "main":
            # The input spec does not apply for subjobs
            desc = dxpy.describe(job_desc.get("app", job_desc.get("applet")))
            if "inputSpec" in desc:
                input_spec = desc["inputSpec"]
    elif 'DX_TEST_DXAPP_JSON' in environ:
        # works only locally
        path_to_dxapp_json = environ['DX_TEST_DXAPP_JSON']
        with open(path_to_dxapp_json) as fd:
            dxapp_json = json.load(fd)
            input_spec = dxapp_json.get('inputSpec')

    # convert to a dictionary. Each entry in the input spec
    # has {name, class} attributes.
    if input_spec is None:
        return {}

    # For each field name, return its patterns.
    # Make sure a pattern is legal, ignore illegal patterns.
    def is_legal_pattern(pattern):
        return "*" in pattern

    patterns_dict = {}
    for spec in input_spec:
        name = spec['name']
        if 'patterns' in spec:
            patterns_dict[name] = []
            for p in spec['patterns']:
                if is_legal_pattern(p):
                    patterns_dict[name].append(p)
    return patterns_dict
Exemple #14
0
    def test_basic(self):
        # write python code into code.py file
        tmp_path = tempfile.mkdtemp()
        code_path = os.path.join(tmp_path, 'code.py')
        with open(code_path, write_mode) as f:
            f.write("@dxpy.entry_point('main')\n")
            f.write("def main(**job_inputs):\n")
            f.write("\toutput = {}\n")
            f.write("\toutput['thresholds'] = job_inputs['thresholds']\n")
            f.write("\toutput['pie'] = job_inputs['pie'] + 1\n")
            f.write("\toutput['misc'] = {'n': 'non', 'y': 'oui'}\n")
            f.write("\treturn output\n")
            f.write("\n")
            f.write("dxpy.run()\n")
        with open(code_path, 'r') as f:
            code = f.read()

        # write arguments table
        arg_table = os.path.join(tmp_path, 'table.csv')
        with open(arg_table, write_mode) as csvfile:
            writer = csv.writer(csvfile, delimiter=delimiter)
            header = ["batch ID", "thresholds", "pie", "misc"]
            writer.writerow(header)
            writer.writerow(["SRR_1", "[10,81]", "3.12", "{}"])

        applet = dxpy.api.applet_new({
            "name": "copy_all",
            "project": self.project,
            "dxapi": "1.0.0",
            "inputSpec": [ { "name": "thresholds", "class": "array:int"},
                           { "name": "pie", "class": "float" },
                           { "name": "misc", "class": "hash" } ],
            "outputSpec": [ { "name": "thresholds", "class": "array:int" },
                            { "name": "pie", "class": "float" },
                            { "name": "misc", "class": "hash" } ],
            "runSpec": { "interpreter": "python2.7",
                         "code": code,
                         "distribution": "Ubuntu",
                         "release": "14.04" }
        })

        # run in batch mode
        job_id = run("dx run {} --batch-tsv={} --yes --brief"
                     .format(applet["id"], arg_table)).strip()
        job_desc = dxpy.api.job_describe(job_id)
        self.assertEqual(job_desc["executableName"], 'copy_all')
        self.assertEqual(job_desc["input"], { "thresholds": [10,81],
                                               "misc": {},
                                               "pie": 3.12 })

        # run in batch mode with --batch-folders
        job_id = run("dx run {} --batch-tsv={} --batch-folders --yes --brief"
                     .format(applet["id"], arg_table)).strip()
        job_desc = dxpy.api.job_describe(job_id)
        self.assertEqual(job_desc["folder"], "/SRR_1")

        # run in batch mode with --batch-folders and --destination
        job_id = run("dx run {} --batch-tsv={} --batch-folders --destination={}:/run_01 --yes --brief"
                     .format(applet["id"], arg_table, self.project)).strip()
        job_desc = dxpy.api.job_describe(job_id)
        self.assertEqual(job_desc["folder"], "/run_01/SRR_1")
Exemple #15
0
    def test_basic(self):
        # write python code into code.py file
        tmp_path = tempfile.mkdtemp()
        code_path = os.path.join(tmp_path, 'code.py')
        with open(code_path, write_mode) as f:
            f.write("@dxpy.entry_point('main')\n")
            f.write("def main(**job_inputs):\n")
            f.write("\toutput = {}\n")
            f.write("\toutput['thresholds'] = job_inputs['thresholds']\n")
            f.write("\toutput['pie'] = job_inputs['pie'] + 1\n")
            f.write("\toutput['misc'] = {'n': 'non', 'y': 'oui'}\n")
            f.write("\treturn output\n")
            f.write("\n")
            f.write("dxpy.run()\n")
        with open(code_path, 'r') as f:
            code = f.read()

        # write arguments table
        arg_table = os.path.join(tmp_path, 'table.csv')
        with open(arg_table, write_mode) as csvfile:
            writer = csv.writer(csvfile, delimiter=delimiter)
            header = ["batch ID", "thresholds", "pie", "misc"]
            writer.writerow(header)
            writer.writerow(["SRR_1", "[10,81]", "3.12", "{}"])

        applet = dxpy.api.applet_new({
            "name":
            "copy_all",
            "project":
            self.project,
            "dxapi":
            "1.0.0",
            "inputSpec": [{
                "name": "thresholds",
                "class": "array:int"
            }, {
                "name": "pie",
                "class": "float"
            }, {
                "name": "misc",
                "class": "hash"
            }],
            "outputSpec": [{
                "name": "thresholds",
                "class": "array:int"
            }, {
                "name": "pie",
                "class": "float"
            }, {
                "name": "misc",
                "class": "hash"
            }],
            "runSpec": {
                "interpreter": "python2.7",
                "code": code,
                "distribution": "Ubuntu",
                "release": "14.04"
            }
        })

        # run in batch mode
        job_id = run("dx run {} --batch-tsv={} --yes --brief".format(
            applet["id"], arg_table)).strip()
        job_desc = dxpy.api.job_describe(job_id)
        self.assertEqual(job_desc["executableName"], 'copy_all')
        self.assertEqual(job_desc["input"], {
            "thresholds": [10, 81],
            "misc": {},
            "pie": 3.12
        })

        # run in batch mode with --batch-folders
        job_id = run(
            "dx run {} --batch-tsv={} --batch-folders --yes --brief".format(
                applet["id"], arg_table)).strip()
        job_desc = dxpy.api.job_describe(job_id)
        self.assertEqual(job_desc["folder"], "/SRR_1")

        # run in batch mode with --batch-folders and --destination
        job_id = run(
            "dx run {} --batch-tsv={} --batch-folders --destination={}:/run_01 --yes --brief"
            .format(applet["id"], arg_table, self.project)).strip()
        job_desc = dxpy.api.job_describe(job_id)
        self.assertEqual(job_desc["folder"], "/run_01/SRR_1")
Exemple #16
0
def _build_app_remote(mode, src_dir, publish=False, destination_override=None,
                      version_override=None, bill_to_override=None, dx_toolkit_autodep="stable",
                      do_version_autonumbering=True, do_try_update=True, do_parallel_build=True,
                      do_check_syntax=True):
    if mode == 'app':
        builder_app = 'app-tarball_app_builder'
    else:
        builder_app = 'app-tarball_applet_builder'

    temp_dir = tempfile.mkdtemp()

    # TODO: this is vestigial, the "auto" setting should be removed.
    if dx_toolkit_autodep == "auto":
        dx_toolkit_autodep = "stable"

    build_options = {'dx_toolkit_autodep': dx_toolkit_autodep}

    if version_override:
        build_options['version_override'] = version_override
    elif do_version_autonumbering:
        # If autonumbering is DISABLED, the interior run of dx-build-app
        # will detect the correct version to use without our help. If it
        # is ENABLED, the version suffix might depend on the state of
        # the git repository. Since we'll remove the .git directory
        # before uploading, we need to determine the correct version to
        # use here and pass it in to the interior run of dx-build-app.
        if do_version_autonumbering:
            app_spec = _parse_app_spec(src_dir)
            original_version = app_spec['version']
            app_describe = None
            try:
                app_describe = dxpy.api.app_describe("app-" + app_spec["name"], alias=original_version, always_retry=False)
            except dxpy.exceptions.DXAPIError as e:
                if e.name == 'ResourceNotFound' or (mode == 'applet' and e.name == 'PermissionDenied'):
                    pass
                else:
                    raise e
            if app_describe is not None:
                if app_describe.has_key('published') or not do_try_update:
                    # The version we wanted was taken; fall back to the
                    # autogenerated version number.
                    build_options['version_override'] = original_version + _get_version_suffix(src_dir, original_version)

    # The following flags are basically passed through verbatim.
    if bill_to_override:
        build_options['bill_to_override'] = bill_to_override
    if not do_version_autonumbering:
        build_options['do_version_autonumbering'] = False
    if not do_try_update:
        build_options['do_try_update'] = False
    if not do_parallel_build:
        build_options['do_parallel_build'] = False
    if not do_check_syntax:
        build_options['do_check_syntax'] = False

    using_temp_project_for_remote_build = False

    # If building an applet, run the builder app in the destination
    # project. If building an app, run the builder app in a temporary
    # project.
    dest_folder = None
    dest_applet_name = None
    if mode == "applet":
        # Translate the --destination flag as follows. If --destination
        # is PROJ:FOLDER/NAME,
        #
        # 1. Run the builder app in PROJ
        # 2. Make the output folder FOLDER
        # 3. Supply --destination=NAME to the interior call of dx-build-applet.
        build_project_id = dxpy.WORKSPACE_ID
        if destination_override:
            build_project_id, dest_folder, dest_applet_name = parse_destination(destination_override)
        if build_project_id is None:
            parser.error("Can't create an applet without specifying a destination project; please use the -d/--destination flag to explicitly specify a project")
        if dest_applet_name:
            build_options['destination_override'] = '/' + dest_applet_name

    elif mode == "app":
        using_temp_project_for_remote_build = True
        build_project_id = dxpy.api.project_new({"name": "dx-build-app --remote temporary project"})["id"]

    try:
        # Resolve relative paths and symlinks here so we have something
        # reasonable to write in the job name below.
        src_dir = os.path.realpath(src_dir)

        # Show the user some progress as the tarball is being generated.
        # Hopefully this will help them to understand when their tarball
        # is huge (e.g. the target directory already has a whole bunch
        # of binaries in it) and interrupt before uploading begins.
        app_tarball_file = os.path.join(temp_dir, "app_tarball.tar.gz")
        tar_subprocess = subprocess.Popen(["tar", "-czf", "-", "--exclude", "./.git", "."], cwd=src_dir, stdout=subprocess.PIPE)
        with open(app_tarball_file, 'wb') as tar_output_file:
            total_num_bytes = 0
            last_console_update = 0
            start_time = time.time()
            printed_static_message = False
            # Pipe the output of tar into the output file
            while True:
                tar_exitcode = tar_subprocess.poll()
                data = tar_subprocess.stdout.read(4 * 1024 * 1024)
                if tar_exitcode is not None and len(data) == 0:
                    break
                tar_output_file.write(data)
                total_num_bytes += len(data)
                current_time = time.time()
                # Don't show status messages at all for very short tar
                # operations (< 1.0 sec)
                if current_time - last_console_update > 0.25 and current_time - start_time > 1.0:
                    if sys.stderr.isatty():
                        if last_console_update > 0:
                            sys.stderr.write("\r")
                        sys.stderr.write("Compressing target directory {dir}... ({kb_so_far:,} kb)".format(dir=src_dir, kb_so_far=total_num_bytes / 1024))
                        sys.stderr.flush()
                        last_console_update = current_time
                    elif not printed_static_message:
                        # Print a message (once only) when stderr is not
                        # going to a live console
                        sys.stderr.write("Compressing target directory %s..." % (src_dir,))
                        printed_static_message = True

        if last_console_update > 0:
            sys.stderr.write("\n")
        if tar_exitcode != 0:
            raise Exception("tar exited with non-zero exit code " + str(tar_exitcode))

        dxpy.set_workspace_id(build_project_id)

        remote_file = dxpy.upload_local_file(app_tarball_file, media_type="application/gzip",
                                             wait_on_close=True, show_progress=True)

        try:
            input_hash = {
                "input_file": dxpy.dxlink(remote_file),
                "build_options": build_options
                }
            if mode == 'app':
                input_hash["publish"] = publish
            api_options = {
                "name": "Remote build of %s" % (os.path.basename(src_dir),),
                "input": input_hash,
                "project": build_project_id,
                }
            if dest_folder:
                api_options["folder"] = dest_folder
            app_run_result = dxpy.api.app_run(builder_app, input_params=api_options)
            job_id = app_run_result["id"]
            print("Started builder job %s" % (job_id,))
            try:
                subprocess.check_call(["dx", "watch", job_id])
            except subprocess.CalledProcessError as e:
                if e.returncode == 3:
                    # Some kind of failure to build the app. The reason
                    # for the failure is probably self-evident from the
                    # job log (and if it's not, the CalledProcessError
                    # is not informative anyway), so just propagate the
                    # return code without additional remarks.
                    sys.exit(3)
                else:
                    raise e

            dxpy.DXJob(job_id).wait_on_done(interval=1)

            if mode == 'applet':
                applet_id, _ = dxpy.get_dxlink_ids(dxpy.api.job_describe(job_id)['output']['output_applet'])
                return applet_id
            else:
                # TODO: determine and return the app ID, to allow
                # running the app if args.run is specified
                return None
        finally:
            if not using_temp_project_for_remote_build:
                dxpy.DXProject(build_project_id).remove_objects([remote_file.get_id()])
    finally:
        if using_temp_project_for_remote_build:
            dxpy.api.project_destroy(build_project_id, {"terminateJobs": True})
        shutil.rmtree(temp_dir)
Exemple #17
0
 def get_input_hash():
     with open(job_input_file) as fh:
         job_input = json.load(fh)
         return job_input
Exemple #18
0
def _verify_app_source_dir_impl(src_dir, temp_dir, mode, enforce=True):
    """Performs syntax and lint checks on the app source.

    Precondition: the dxapp.json file exists and can be parsed.
    """
    _lint(os.path.join(src_dir, "dxapp.json"), mode)

    # Check that the entry point file parses as the type it is going to
    # be interpreted as. The extension is irrelevant.
    manifest = json.load(open(os.path.join(src_dir, "dxapp.json")))
    if "runSpec" in manifest:
        if "interpreter" not in manifest['runSpec']:
            raise dxpy.app_builder.AppBuilderException('runSpec.interpreter field was not present')

        if manifest['runSpec']['interpreter'] in ["python2.7", "bash"]:
            if "file" in manifest['runSpec']:
                entry_point_file = os.path.abspath(os.path.join(src_dir, manifest['runSpec']['file']))
                try:
                    _check_file_syntax(entry_point_file, temp_dir, override_lang=manifest['runSpec']['interpreter'], enforce=enforce)
                except IOError as e:
                    raise dxpy.app_builder.AppBuilderException(
                        'Could not open runSpec.file=%r. The problem was: %s' % (entry_point_file, e))
                except DXSyntaxError:
                    raise dxpy.app_builder.AppBuilderException('Entry point file %s has syntax errors, see above for details. Rerun with --no-check-syntax to proceed anyway.' % (entry_point_file,))
            elif "code" in manifest['runSpec']:
                try:
                    _check_syntax(manifest['runSpec']['code'], manifest['runSpec']['interpreter'], temp_dir, enforce=enforce)
                except DXSyntaxError:
                    raise dxpy.app_builder.AppBuilderException('Code in runSpec.code has syntax errors, see above for details. Rerun with --no-check-syntax to proceed anyway.')

        if 'execDepends' in manifest['runSpec']:
            if not isinstance(manifest['runSpec']['execDepends'], list):
                raise dxpy.app_builder.AppBuilderException('Expected runSpec.execDepends to be an array. Rerun with --no-check-syntax to proceed anyway.')
            if not all(isinstance(dep, dict) for dep in manifest['runSpec']['execDepends']):
                raise dxpy.app_builder.AppBuilderException('Expected runSpec.execDepends to be an array of hashes. Rerun with --no-check-syntax to proceed anyway.')
            if any(dep.get('package_manager', 'apt') != 'apt' for dep in manifest['runSpec']['execDepends']):
                if not isinstance(manifest.get('access'), dict) or 'network' not in manifest['access']:
                    msg = '\n'.join(['runSpec.execDepends specifies non-APT dependencies, but no network access spec is given.',
                    'Add {"access": {"network": ["*"]}} to allow dependencies to install.',
                    'See https://wiki.dnanexus.com/Developer-Tutorials/Request-Additional-App-Resources#Network-Access.',
                    'Rerun with --no-check-syntax to proceed anyway.'])
                    raise dxpy.app_builder.AppBuilderException(msg)

    if 'authorizedUsers' in manifest:
        if not isinstance(manifest['authorizedUsers'], list) or isinstance(manifest['authorizedUsers'], basestring):
            raise dxpy.app_builder.AppBuilderException('Expected authorizedUsers to be a list of strings')
        for thing in manifest['authorizedUsers']:
            if thing != 'PUBLIC' and (not isinstance(thing, basestring) or not re.match("^(org-|user-)", thing)):
                raise dxpy.app_builder.AppBuilderException('authorizedUsers field contains an entry which is not either the string "PUBLIC" or a user or org ID')

    # Check all other files that are going to be in the resources tree.
    # For these we detect the language based on the filename extension.
    # Obviously this check can have false positives, since the app can
    # execute (or not execute!) all these files in whatever way it
    # wishes, e.g. it could use Python != 2.7 or some non-bash shell.
    # Consequently errors here are non-fatal.
    files_with_problems = []
    for dirpath, dirnames, filenames in os.walk(os.path.abspath(os.path.join(src_dir, "resources"))):
        for filename in filenames:
            # On Mac OS, the resource fork for "FILE.EXT" gets tarred up
            # as a file named "._FILE.EXT". To a naive check this
            # appears to be a file of the same extension. Therefore, we
            # exclude these from syntax checking since they are likely
            # to not parse as whatever language they appear to be.
            if not filename.startswith("._"):
                try:
                    _check_file_syntax(os.path.join(dirpath, filename), temp_dir, enforce=True)
                except IOError as e:
                    raise dxpy.app_builder.AppBuilderException(
                        'Could not open file in resources directory %r. The problem was: %s' %
                        (os.path.join(dirpath, filename), e)
                    )
                except DXSyntaxError:
                    # Suppresses errors from _check_file_syntax so we
                    # only print a nice error message
                    files_with_problems.append(os.path.join(dirpath, filename))

    if files_with_problems:
        # Make a message of the form:
        #    "/path/to/my/app.py"
        # OR "/path/to/my/app.py and 3 other files"
        files_str = files_with_problems[0] if len(files_with_problems) == 1 else (files_with_problems[0] + " and " + str(len(files_with_problems) - 1) + " other file" + ("s" if len(files_with_problems) > 2 else ""))
        logging.warn('%s contained syntax errors, see above for details' % (files_str,))
Exemple #19
0
def _lint(dxapp_json_filename, mode):
    """
    Examines the specified dxapp.json file and warns about any
    violations of app guidelines.

    Precondition: the dxapp.json file exists and can be parsed.
    """

    def _find_readme(dirname):
        for basename in ['README.md', 'Readme.md', 'readme.md']:
            if os.path.exists(os.path.join(dirname, basename)):
                return os.path.join(dirname, basename)
        return None

    app_spec = json.load(open(dxapp_json_filename))

    dirname = os.path.basename(os.path.dirname(os.path.abspath(dxapp_json_filename)))

    if mode == "app":
        if 'title' not in app_spec:
            logger.warn('app is missing a title, please add one in the "title" field of dxapp.json')

        if 'summary' in app_spec:
            if app_spec['summary'].endswith('.'):
                logger.warn('summary "%s" should be a short phrase not ending in a period' % (app_spec['summary'],))
        else:
            logger.warn('app is missing a summary, please add one in the "summary" field of dxapp.json')

        readme_filename = _find_readme(os.path.dirname(dxapp_json_filename))
        if 'description' in app_spec:
            if readme_filename:
                logger.warn('"description" field shadows file ' + readme_filename)
            if not app_spec['description'].strip().endswith('.'):
                logger.warn('"description" field should be written in complete sentences and end with a period')
        else:
            if readme_filename is None:
                logger.warn("app is missing a description, please supply one in README.md")
        if 'categories' in app_spec:
            for category in app_spec['categories']:
                if category not in APP_CATEGORIES:
                    logger.warn('app has unrecognized category "%s"' % (category,))
                if category == 'Import':
                    if 'title' in app_spec and not app_spec['title'].endswith('Importer'):
                        logger.warn('title "%s" should end in "Importer"' % (app_spec['title'],))
                if category == 'Export':
                    if 'title' in app_spec and not app_spec['title'].endswith('Exporter'):
                        logger.warn('title "%s" should end in "Exporter"' % (app_spec['title'],))

    if 'name' in app_spec:
        if app_spec['name'] != app_spec['name'].lower():
            logger.warn('name "%s" should be all lowercase' % (app_spec['name'],))
        if dirname != app_spec['name']:
            logger.warn('app name "%s" does not match containing directory "%s"' % (app_spec['name'], dirname))
    else:
        logger.warn('app is missing a name, please add one in the "name" field of dxapp.json')

    if 'version' in app_spec:
        if not APP_VERSION_RE.match(app_spec['version']):
            logger.warn('"version" %s should be semver compliant (e.g. of the form X.Y.Z)' % (app_spec['version'],))

    # Note that identical checks are performed on the server side (and
    # will cause the app build to fail), but the checks here are printed
    # sooner and multiple naming problems can be detected in a single
    # pass.
    if 'inputSpec' in app_spec:
        for i, input_field in enumerate(app_spec['inputSpec']):
            if not re.match("^[a-zA-Z_][0-9a-zA-Z_]*$", input_field['name']):
                logger.error('input %d has illegal name "%s" (must match ^[a-zA-Z_][0-9a-zA-Z_]*$)' % (i, input_field['name']))
    else:
        logger.warn("dxapp.json contains no input specification (inputSpec). Your applet will not be usable as an " +
                    "app, runnable from the GUI, or composable using workflows.")
    if 'outputSpec' in app_spec:
        for i, output_field in enumerate(app_spec['outputSpec']):
            if not re.match("^[a-zA-Z_][0-9a-zA-Z_]*$", output_field['name']):
                logger.error('output %d has illegal name "%s" (must match ^[a-zA-Z_][0-9a-zA-Z_]*$)' % (i, output_field['name']))
    else:
        logger.warn("dxapp.json contains no output specification (outputSpec). Your applet will not be usable as an " +
                    "app, runnable from the GUI, or composable using workflows.")
Exemple #20
0
def _verify_app_source_dir_impl(src_dir, temp_dir, mode, enforce=True):
    """Performs syntax and lint checks on the app source.

    Precondition: the dxapp.json file exists and can be parsed.
    """
    _lint(os.path.join(src_dir, "dxapp.json"), mode)

    # Check that the entry point file parses as the type it is going to
    # be interpreted as. The extension is irrelevant.
    manifest = json.load(open(os.path.join(src_dir, "dxapp.json")))
    if "runSpec" in manifest:
        if "interpreter" not in manifest['runSpec']:
            raise dxpy.app_builder.AppBuilderException('runSpec.interpreter field was not present')

        if manifest['runSpec']['interpreter'] in ["python2.7", "bash"]:
            if "file" in manifest['runSpec']:
                entry_point_file = os.path.abspath(os.path.join(src_dir, manifest['runSpec']['file']))
                try:
                    _check_file_syntax(entry_point_file, temp_dir, override_lang=manifest['runSpec']['interpreter'], enforce=enforce)
                except IOError as e:
                    raise dxpy.app_builder.AppBuilderException(
                        'Could not open runSpec.file=%r. The problem was: %s' % (entry_point_file, e))
                except DXSyntaxError:
                    raise dxpy.app_builder.AppBuilderException('Entry point file %s has syntax errors, see above for details. Rerun with --no-check-syntax to proceed anyway.' % (entry_point_file,))
            elif "code" in manifest['runSpec']:
                try:
                    _check_syntax(manifest['runSpec']['code'], manifest['runSpec']['interpreter'], temp_dir, enforce=enforce)
                except DXSyntaxError:
                    raise dxpy.app_builder.AppBuilderException('Code in runSpec.code has syntax errors, see above for details. Rerun with --no-check-syntax to proceed anyway.')

        if 'execDepends' in manifest['runSpec']:
            if not isinstance(manifest['runSpec']['execDepends'], list):
                raise dxpy.app_builder.AppBuilderException('Expected runSpec.execDepends to be an array. Rerun with --no-check-syntax to proceed anyway.')
            if not all(isinstance(dep, dict) for dep in manifest['runSpec']['execDepends']):
                raise dxpy.app_builder.AppBuilderException('Expected runSpec.execDepends to be an array of hashes. Rerun with --no-check-syntax to proceed anyway.')
            if any(dep.get('package_manager', 'apt') != 'apt' for dep in manifest['runSpec']['execDepends']):
                if not isinstance(manifest.get('access'), dict) or 'network' not in manifest['access']:
                    msg = '\n'.join(['runSpec.execDepends specifies non-APT dependencies, but no network access spec is given.',
                    'Add {"access": {"network": ["*"]}} to allow dependencies to install.',
                    'See https://wiki.dnanexus.com/Developer-Tutorials/Request-Additional-App-Resources#Network-Access.',
                    'Rerun with --no-check-syntax to proceed anyway.'])
                    raise dxpy.app_builder.AppBuilderException(msg)

    if 'authorizedUsers' in manifest:
        if not isinstance(manifest['authorizedUsers'], list) or isinstance(manifest['authorizedUsers'], basestring):
            raise dxpy.app_builder.AppBuilderException('Expected authorizedUsers to be a list of strings')
        for thing in manifest['authorizedUsers']:
            if thing != 'PUBLIC' and (not isinstance(thing, basestring) or not re.match("^(org-|user-)", thing)):
                raise dxpy.app_builder.AppBuilderException('authorizedUsers field contains an entry which is not either the string "PUBLIC" or a user or org ID')

    # Check all other files that are going to be in the resources tree.
    # For these we detect the language based on the filename extension.
    # Obviously this check can have false positives, since the app can
    # execute (or not execute!) all these files in whatever way it
    # wishes, e.g. it could use Python != 2.7 or some non-bash shell.
    # Consequently errors here are non-fatal.
    files_with_problems = []
    for dirpath, dirnames, filenames in os.walk(os.path.abspath(os.path.join(src_dir, "resources"))):
        for filename in filenames:
            # On Mac OS, the resource fork for "FILE.EXT" gets tarred up
            # as a file named "._FILE.EXT". To a naive check this
            # appears to be a file of the same extension. Therefore, we
            # exclude these from syntax checking since they are likely
            # to not parse as whatever language they appear to be.
            if not filename.startswith("._"):
                try:
                    _check_file_syntax(os.path.join(dirpath, filename), temp_dir, enforce=True)
                except IOError as e:
                    raise dxpy.app_builder.AppBuilderException(
                        'Could not open file in resources directory %r. The problem was: %s' %
                        (os.path.join(dirpath, filename), e)
                    )
                except DXSyntaxError:
                    # Suppresses errors from _check_file_syntax so we
                    # only print a nice error message
                    files_with_problems.append(os.path.join(dirpath, filename))

    if files_with_problems:
        # Make a message of the form:
        #    "/path/to/my/app.py"
        # OR "/path/to/my/app.py and 3 other files"
        files_str = files_with_problems[0] if len(files_with_problems) == 1 else (files_with_problems[0] + " and " + str(len(files_with_problems) - 1) + " other file" + ("s" if len(files_with_problems) > 2 else ""))
        logging.warn('%s contained syntax errors, see above for details' % (files_str,))
def run_one_entry_point(job_id,
                        function,
                        input_hash,
                        run_spec,
                        depends_on,
                        name=None):
    '''
    :param job_id: job ID of the local job to run
    :type job_id: string
    :param function: function to run
    :type function: string
    :param input_hash: input for the job (may include job-based object references)
    :type input_hash: dict
    :param run_spec: run specification from the dxapp.json of the app
    :type run_spec: dict

    Runs the specified entry point and retrieves the job's output,
    updating job_outputs.json (in $DX_TEST_JOB_HOMEDIRS) appropriately.
    '''
    print('======')

    job_homedir = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id)

    job_env = environ.copy()
    job_env['HOME'] = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id)

    all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'],
                                        'job_outputs.json')

    with open(all_job_outputs_path, 'r') as fd:
        all_job_outputs = json.load(fd,
                                    object_pairs_hook=collections.OrderedDict)

    if isinstance(name, basestring):
        name += ' (' + job_id + ':' + function + ')'
    else:
        name = job_id + ':' + function
    job_name = BLUE() + BOLD() + name + ENDC()
    print(job_name)

    # Resolve local job-based object references
    try:
        resolve_job_references(input_hash, all_job_outputs)
    except Exception as e:
        exit_with_error(job_name + ' ' + JOB_STATES('failed') +
                        ' when resolving input:\n' + fill(str(e)))

    # Get list of non-closed data objects in the input that appear as
    # DNAnexus links; append to depends_on
    if depends_on is None:
        depends_on = []
    get_implicit_depends_on(input_hash, depends_on)

    try:
        wait_for_depends_on(depends_on, all_job_outputs)
    except Exception as e:
        exit_with_error(job_name + ' ' + JOB_STATES('failed') +
                        ' when processing depends_on:\n' + fill(str(e)))

    # Save job input to job_input.json
    with open(os.path.join(job_homedir, 'job_input.json'), 'wb') as fd:
        json.dump(input_hash, fd, indent=4)
        fd.write(b'\n')

    print(
        job_output_to_str(input_hash,
                          title=(BOLD() + 'Input: ' + ENDC()),
                          title_len=len("Input: ")).lstrip())

    if run_spec['interpreter'] == 'bash':
        # Save job input to env vars
        env_path = os.path.join(job_homedir, 'environment')
        with open(env_path, 'w') as fd:
            job_input_file = os.path.join(job_homedir, 'job_input.json')
            var_defs_hash = file_load_utils.gen_bash_vars(
                job_input_file, job_homedir=job_homedir)
            for key, val in var_defs_hash.iteritems():
                fd.write("{}={}\n".format(key, val))

    print(BOLD() + 'Logs:' + ENDC())
    start_time = datetime.datetime.now()
    if run_spec['interpreter'] == 'bash':
        script = '''
          cd hotexamples_com;
          . {env_path};
          . {code_path};
          if [[ $(type -t {function}) == "function" ]];
          then {function};
          else echo "$0: Global scope execution complete. Not invoking entry point function {function} because it was not found" 1>&2;
          fi'''.format(homedir=pipes.quote(job_homedir),
                       env_path=pipes.quote(
                           os.path.join(job_env['HOME'], 'environment')),
                       code_path=pipes.quote(environ['DX_TEST_CODE_PATH']),
                       function=function)
        invocation_args = ['bash', '-c', '-e'] + (
            ['-x'] if environ.get('DX_TEST_X_FLAG') else []) + [script]
    elif run_spec['interpreter'] == 'python2.7':
        script = '''#!/usr/bin/env python
import os
os.chdir(hotexamples_com)

{code}

import dxpy, json
if dxpy.utils.exec_utils.RUN_COUNT == 0:
    dxpy.run()
'''.format(homedir=repr(job_homedir), code=run_spec['code'])

        job_env['DX_TEST_FUNCTION'] = function
        invocation_args = ['python', '-c', script]

    if USING_PYTHON2:
        invocation_args = [
            arg.encode(sys.stdout.encoding) for arg in invocation_args
        ]
        env = {k: v.encode(sys.stdout.encoding) for k, v in job_env.items()}
    else:
        env = job_env

    fn_process = subprocess.Popen(invocation_args, env=env)

    fn_process.communicate()
    end_time = datetime.datetime.now()

    if fn_process.returncode != 0:
        exit_with_error(job_name + ' ' + JOB_STATES('failed') +
                        ', exited with error code ' +
                        str(fn_process.returncode) + ' after ' +
                        str(end_time - start_time))

    # Now updating job output aggregation file with job's output
    job_output_path = os.path.join(job_env['HOME'], 'job_output.json')
    if os.path.exists(job_output_path):
        try:
            with open(job_output_path, 'r') as fd:
                job_output = json.load(
                    fd, object_pairs_hook=collections.OrderedDict)
        except Exception as e:
            exit_with_error('Error: Could not load output of ' + job_name +
                            ':\n' + fill(str(e.__class__) + ': ' + str(e)))
    else:
        job_output = {}

    print(job_name + ' -> ' + GREEN() + 'finished running' + ENDC() +
          ' after ' + str(end_time - start_time))
    print(
        job_output_to_str(job_output,
                          title=(BOLD() + "Output: " + ENDC()),
                          title_len=len("Output: ")).lstrip())

    with open(
            os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'),
            'r') as fd:
        all_job_outputs = json.load(fd,
                                    object_pairs_hook=collections.OrderedDict)
    all_job_outputs[job_id] = job_output

    # Before dumping, see if any new jbors should be resolved now
    for other_job_id in all_job_outputs:
        if all_job_outputs[other_job_id] is None:
            # Skip if job is not done yet (true for ancestor jobs)
            continue
        resolve_job_references(all_job_outputs[other_job_id],
                               all_job_outputs,
                               should_resolve=False)

    with open(
            os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'),
            'wb') as fd:
        json.dump(all_job_outputs, fd, indent=4)
        fd.write(b'\n')
def run_one_entry_point(job_id, function, input_hash, run_spec, depends_on, name=None):
    '''
    :param job_id: job ID of the local job to run
    :type job_id: string
    :param function: function to run
    :type function: string
    :param input_hash: input for the job (may include job-based object references)
    :type input_hash: dict
    :param run_spec: run specification from the dxapp.json of the app
    :type run_spec: dict

    Runs the specified entry point and retrieves the job's output,
    updating job_outputs.json (in $DX_TEST_JOB_HOMEDIRS) appropriately.
    '''
    print('======')

    job_homedir = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id)

    job_env = environ.copy()
    job_env['HOME'] = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], job_id)

    all_job_outputs_path = os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json')

    with open(all_job_outputs_path, 'r') as fd:
        all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict)

    if isinstance(name, basestring):
        name += ' (' + job_id + ':' + function + ')'
    else:
        name = job_id + ':' + function
    job_name = BLUE() + BOLD() + name + ENDC()
    print(job_name)

    # Resolve local job-based object references
    try:
        resolve_job_references(input_hash, all_job_outputs)
    except Exception as e:
        exit_with_error(job_name + ' ' + JOB_STATES('failed') + ' when resolving input:\n' + fill(str(e)))

    # Get list of non-closed data objects in the input that appear as
    # DNAnexus links; append to depends_on
    if depends_on is None:
        depends_on = []
    get_implicit_depends_on(input_hash, depends_on)

    try:
        wait_for_depends_on(depends_on, all_job_outputs)
    except Exception as e:
        exit_with_error(job_name + ' ' + JOB_STATES('failed') + ' when processing depends_on:\n' + fill(str(e)))

    # Save job input to job_input.json
    with open(os.path.join(job_homedir, 'job_input.json'), 'wb') as fd:
        json.dump(input_hash, fd, indent=4)
        fd.write(b'\n')

    print(job_output_to_str(input_hash, title=(BOLD() + 'Input: ' + ENDC()),
                            title_len=len("Input: ")).lstrip())

    if run_spec['interpreter'] == 'bash':
        # Save job input to env vars
        env_path = os.path.join(job_homedir, 'environment')
        with open(env_path, 'w') as fd:
            # Following code is what is used to generate env vars on the remote worker
            fd.write("\n".join(["export {k}=( {vlist} )".format(k=k, vlist=" ".join([pipes.quote(vitem if isinstance(vitem, basestring) else json.dumps(vitem)) for vitem in v])) if isinstance(v, list) else "export {k}={v}".format(k=k, v=pipes.quote(v if isinstance(v, basestring) else json.dumps(v))) for k, v in input_hash.items()]))

    print(BOLD() + 'Logs:' + ENDC())
    start_time = datetime.datetime.now()
    if run_spec['interpreter'] == 'bash':
        script = '''
          cd hotexamples_com;
          . {env_path};
          . {code_path};
          if [[ $(type -t {function}) == "function" ]];
          then {function};
          else echo "$0: Global scope execution complete. Not invoking entry point function {function} because it was not found" 1>&2;
          fi'''.format(homedir=pipes.quote(job_homedir),
                       env_path=pipes.quote(os.path.join(job_env['HOME'], 'environment')),
                       code_path=pipes.quote(environ['DX_TEST_CODE_PATH']),
                       function=function)
        invocation_args = ['bash', '-c', '-e'] + (['-x'] if environ.get('DX_TEST_X_FLAG') else []) + [script]
    elif run_spec['interpreter'] == 'python2.7':
        script = '''#!/usr/bin/env python
import os
os.chdir(hotexamples_com)

{code}

import dxpy, json
if dxpy.utils.exec_utils.RUN_COUNT == 0:
    dxpy.run()
'''.format(homedir=repr(job_homedir),
           code=run_spec['code'])

        job_env['DX_TEST_FUNCTION'] = function
        invocation_args = ['python', '-c', script]

    if USING_PYTHON2:
        invocation_args = [arg.encode(sys.stdout.encoding) for arg in invocation_args]
        env = {k: v.encode(sys.stdout.encoding) for k, v in job_env.items()}
    else:
        env = job_env

    fn_process = subprocess.Popen(invocation_args, env=env)

    fn_process.communicate()
    end_time = datetime.datetime.now()

    if fn_process.returncode != 0:
        exit_with_error(job_name + ' ' + JOB_STATES('failed') + ', exited with error code ' + str(fn_process.returncode) + ' after ' + str(end_time - start_time))

    # Now updating job output aggregation file with job's output
    job_output_path = os.path.join(job_env['HOME'], 'job_output.json')
    if os.path.exists(job_output_path):
        try:
            with open(job_output_path, 'r') as fd:
                job_output = json.load(fd, object_pairs_hook=collections.OrderedDict)
        except Exception as e:
            exit_with_error('Error: Could not load output of ' + job_name + ':\n' + fill(str(e.__class__) + ': ' + str(e)))
    else:
        job_output = {}

    print(job_name + ' -> ' + GREEN() + 'finished running' + ENDC() + ' after ' + str(end_time - start_time))
    print(job_output_to_str(job_output, title=(BOLD() + "Output: " + ENDC()),
                            title_len=len("Output: ")).lstrip())

    with open(os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'), 'r') as fd:
        all_job_outputs = json.load(fd, object_pairs_hook=collections.OrderedDict)
    all_job_outputs[job_id] = job_output

    # Before dumping, see if any new jbors should be resolved now
    for other_job_id in all_job_outputs:
        if all_job_outputs[other_job_id] is None:
            # Skip if job is not done yet (true for ancestor jobs)
            continue
        resolve_job_references(all_job_outputs[other_job_id], all_job_outputs, should_resolve=False)

    with open(os.path.join(environ['DX_TEST_JOB_HOMEDIRS'], 'job_outputs.json'), 'wb') as fd:
        json.dump(all_job_outputs, fd, indent=4)
        fd.write(b'\n')
Exemple #23
0
def _lint(dxapp_json_filename, mode):
    """
    Examines the specified dxapp.json file and warns about any
    violations of app guidelines.

    Precondition: the dxapp.json file exists and can be parsed.
    """

    def _find_readme(dirname):
        for basename in ['README.md', 'Readme.md', 'readme.md']:
            if os.path.exists(os.path.join(dirname, basename)):
                return os.path.join(dirname, basename)
        return None

    app_spec = json.load(open(dxapp_json_filename))

    dirname = os.path.basename(os.path.dirname(os.path.abspath(dxapp_json_filename)))

    if mode == "app":
        if 'title' not in app_spec:
            logger.warn('app is missing a title, please add one in the "title" field of dxapp.json')

        if 'summary' in app_spec:
            if app_spec['summary'].endswith('.'):
                logger.warn('summary "%s" should be a short phrase not ending in a period' % (app_spec['summary'],))
        else:
            logger.warn('app is missing a summary, please add one in the "summary" field of dxapp.json')

        readme_filename = _find_readme(os.path.dirname(dxapp_json_filename))
        if 'description' in app_spec:
            if readme_filename:
                logger.warn('"description" field shadows file ' + readme_filename)
            if not app_spec['description'].strip().endswith('.'):
                logger.warn('"description" field should be written in complete sentences and end with a period')
        else:
            if readme_filename is None:
                logger.warn("app is missing a description, please supply one in README.md")
        if 'categories' in app_spec:
            for category in app_spec['categories']:
                if category not in APP_CATEGORIES:
                    logger.warn('app has unrecognized category "%s"' % (category,))
                if category == 'Import':
                    if 'title' in app_spec and not app_spec['title'].endswith('Importer'):
                        logger.warn('title "%s" should end in "Importer"' % (app_spec['title'],))
                if category == 'Export':
                    if 'title' in app_spec and not app_spec['title'].endswith('Exporter'):
                        logger.warn('title "%s" should end in "Exporter"' % (app_spec['title'],))

    if 'name' in app_spec:
        if app_spec['name'] != app_spec['name'].lower():
            logger.warn('name "%s" should be all lowercase' % (app_spec['name'],))
        if dirname != app_spec['name']:
            logger.warn('app name "%s" does not match containing directory "%s"' % (app_spec['name'], dirname))
    else:
        logger.warn('app is missing a name, please add one in the "name" field of dxapp.json')

    if 'version' in app_spec:
        if not APP_VERSION_RE.match(app_spec['version']):
            logger.warn('"version" %s should be semver compliant (e.g. of the form X.Y.Z)' % (app_spec['version'],))

    # Note that identical checks are performed on the server side (and
    # will cause the app build to fail), but the checks here are printed
    # sooner and multiple naming problems can be detected in a single
    # pass.
    if 'inputSpec' in app_spec:
        for i, input_field in enumerate(app_spec['inputSpec']):
            if not re.match("^[a-zA-Z_][0-9a-zA-Z_]*$", input_field['name']):
                logger.error('input %d has illegal name "%s" (must match ^[a-zA-Z_][0-9a-zA-Z_]*$)' % (i, input_field['name']))
    else:
        logger.warn("dxapp.json contains no input specification (inputSpec). Your applet will not be usable as an " +
                    "app, runnable from the GUI, or composable using workflows.")
    if 'outputSpec' in app_spec:
        for i, output_field in enumerate(app_spec['outputSpec']):
            if not re.match("^[a-zA-Z_][0-9a-zA-Z_]*$", output_field['name']):
                logger.error('output %d has illegal name "%s" (must match ^[a-zA-Z_][0-9a-zA-Z_]*$)' % (i, output_field['name']))
    else:
        logger.warn("dxapp.json contains no output specification (outputSpec). Your applet will not be usable as an " +
                    "app, runnable from the GUI, or composable using workflows.")
Exemple #24
0
def _build_app_remote(mode, src_dir, publish=False, destination_override=None,
                      version_override=None, bill_to_override=None, dx_toolkit_autodep="stable",
                      do_version_autonumbering=True, do_try_update=True, do_parallel_build=True,
                      do_check_syntax=True):
    if mode == 'app':
        builder_app = 'app-tarball_app_builder'
    else:
        builder_app = 'app-tarball_applet_builder'

    temp_dir = tempfile.mkdtemp()

    # TODO: this is vestigial, the "auto" setting should be removed.
    if dx_toolkit_autodep == "auto":
        dx_toolkit_autodep = "stable"

    build_options = {'dx_toolkit_autodep': dx_toolkit_autodep}

    if version_override:
        build_options['version_override'] = version_override
    elif do_version_autonumbering:
        # If autonumbering is DISABLED, the interior run of dx-build-app
        # will detect the correct version to use without our help. If it
        # is ENABLED, the version suffix might depend on the state of
        # the git repository. Since we'll remove the .git directory
        # before uploading, we need to determine the correct version to
        # use here and pass it in to the interior run of dx-build-app.
        if do_version_autonumbering:
            app_spec = _parse_app_spec(src_dir)
            original_version = app_spec['version']
            app_describe = None
            try:
                app_describe = dxpy.api.app_describe("app-" + app_spec["name"], alias=original_version, always_retry=False)
            except dxpy.exceptions.DXAPIError as e:
                if e.name == 'ResourceNotFound' or (mode == 'applet' and e.name == 'PermissionDenied'):
                    pass
                else:
                    raise e
            if app_describe is not None:
                if app_describe.has_key('published') or not do_try_update:
                    # The version we wanted was taken; fall back to the
                    # autogenerated version number.
                    build_options['version_override'] = original_version + _get_version_suffix(src_dir, original_version)

    # The following flags are basically passed through verbatim.
    if bill_to_override:
        build_options['bill_to_override'] = bill_to_override
    if not do_version_autonumbering:
        build_options['do_version_autonumbering'] = False
    if not do_try_update:
        build_options['do_try_update'] = False
    if not do_parallel_build:
        build_options['do_parallel_build'] = False
    if not do_check_syntax:
        build_options['do_check_syntax'] = False

    using_temp_project_for_remote_build = False

    # If building an applet, run the builder app in the destination
    # project. If building an app, run the builder app in a temporary
    # project.
    dest_folder = None
    dest_applet_name = None
    if mode == "applet":
        # Translate the --destination flag as follows. If --destination
        # is PROJ:FOLDER/NAME,
        #
        # 1. Run the builder app in PROJ
        # 2. Make the output folder FOLDER
        # 3. Supply --destination=NAME to the interior call of dx-build-applet.
        build_project_id = dxpy.WORKSPACE_ID
        if destination_override:
            build_project_id, dest_folder, dest_applet_name = parse_destination(destination_override)
        if build_project_id is None:
            parser.error("Can't create an applet without specifying a destination project; please use the -d/--destination flag to explicitly specify a project")
        if dest_applet_name:
            build_options['destination_override'] = '/' + dest_applet_name

    elif mode == "app":
        using_temp_project_for_remote_build = True
        build_project_id = dxpy.api.project_new({"name": "dx-build-app --remote temporary project"})["id"]

    try:
        # Resolve relative paths and symlinks here so we have something
        # reasonable to write in the job name below.
        src_dir = os.path.realpath(src_dir)

        # Show the user some progress as the tarball is being generated.
        # Hopefully this will help them to understand when their tarball
        # is huge (e.g. the target directory already has a whole bunch
        # of binaries in it) and interrupt before uploading begins.
        app_tarball_file = os.path.join(temp_dir, "app_tarball.tar.gz")
        tar_subprocess = subprocess.Popen(["tar", "-czf", "-", "--exclude", "./.git", "."], cwd=src_dir, stdout=subprocess.PIPE)
        with open(app_tarball_file, 'wb') as tar_output_file:
            total_num_bytes = 0
            last_console_update = 0
            start_time = time.time()
            printed_static_message = False
            # Pipe the output of tar into the output file
            while True:
                tar_exitcode = tar_subprocess.poll()
                data = tar_subprocess.stdout.read(4 * 1024 * 1024)
                if tar_exitcode is not None and len(data) == 0:
                    break
                tar_output_file.write(data)
                total_num_bytes += len(data)
                current_time = time.time()
                # Don't show status messages at all for very short tar
                # operations (< 1.0 sec)
                if current_time - last_console_update > 0.25 and current_time - start_time > 1.0:
                    if sys.stderr.isatty():
                        if last_console_update > 0:
                            sys.stderr.write("\r")
                        sys.stderr.write("Compressing target directory {dir}... ({kb_so_far:,} kb)".format(dir=src_dir, kb_so_far=total_num_bytes / 1024))
                        sys.stderr.flush()
                        last_console_update = current_time
                    elif not printed_static_message:
                        # Print a message (once only) when stderr is not
                        # going to a live console
                        sys.stderr.write("Compressing target directory %s..." % (src_dir,))
                        printed_static_message = True

        if last_console_update > 0:
            sys.stderr.write("\n")
        if tar_exitcode != 0:
            raise Exception("tar exited with non-zero exit code " + str(tar_exitcode))

        dxpy.set_workspace_id(build_project_id)

        remote_file = dxpy.upload_local_file(app_tarball_file, media_type="application/gzip",
                                             wait_on_close=True, show_progress=True)

        try:
            input_hash = {
                "input_file": dxpy.dxlink(remote_file),
                "build_options": build_options
                }
            if mode == 'app':
                input_hash["publish"] = publish
            api_options = {
                "name": "Remote build of %s" % (os.path.basename(src_dir),),
                "input": input_hash,
                "project": build_project_id,
                }
            if dest_folder:
                api_options["folder"] = dest_folder
            app_run_result = dxpy.api.app_run(builder_app, input_params=api_options)
            job_id = app_run_result["id"]
            print("Started builder job %s" % (job_id,))
            try:
                subprocess.check_call(["dx", "watch", job_id])
            except subprocess.CalledProcessError as e:
                if e.returncode == 3:
                    # Some kind of failure to build the app. The reason
                    # for the failure is probably self-evident from the
                    # job log (and if it's not, the CalledProcessError
                    # is not informative anyway), so just propagate the
                    # return code without additional remarks.
                    sys.exit(3)
                else:
                    raise e

            dxpy.DXJob(job_id).wait_on_done(interval=1)

            if mode == 'applet':
                applet_id, _ = dxpy.get_dxlink_ids(dxpy.api.job_describe(job_id)['output']['output_applet'])
                return applet_id
            else:
                # TODO: determine and return the app ID, to allow
                # running the app if args.run is specified
                return None
        finally:
            if not using_temp_project_for_remote_build:
                dxpy.DXProject(build_project_id).remove_objects([remote_file.get_id()])
    finally:
        if using_temp_project_for_remote_build:
            dxpy.api.project_destroy(build_project_id, {"terminateJobs": True})
        shutil.rmtree(temp_dir)