Example #1
0
    def __init__(self,
                 priority_log_address="/opt/dnanexus/log/priority",
                 bulk_log_address="/opt/dnanexus/log/bulk",
                 source="DX_APP"):
        logging.Handler.__init__(self)

        self.priority_log_address = priority_log_address
        self.priority_log_socket = socket.socket(socket.AF_UNIX,
                                                 socket.SOCK_DGRAM)

        self.bulk_log_address = bulk_log_address
        self.bulk_log_socket = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)

        if not os.path.exists(priority_log_address):
            raise DXError(
                "The path %s does not exist, but is required for application logging"
                % (priority_log_address))
        if not os.path.exists(bulk_log_address):
            raise DXError(
                "The path %s does not exist, but is required for application logging"
                % (bulk_log_address))

        self.priority_log_socket.connect(priority_log_address)
        self.bulk_log_socket.connect(bulk_log_address)

        self.source = source
Example #2
0
    def update(self, title=None, unset_title=False, summary=None, description=None,
               output_folder=None, unset_output_folder=False, stages=None,
               edit_version=None, **kwargs):
        '''
        :param title: workflow title to set; cannot be provided with *unset_title* set to True
        :type title: string
        :param unset_title: whether to unset the title; cannot be provided with string value for *title*
        :type unset_title: boolean
        :param summary: workflow summary to set
        :type summary: string
        :param description: workflow description to set
        :type description: string
        :param output_folder: new default output folder for the workflow
        :type output_folder: string
        :param unset_folder: whether to unset the default output folder; cannot be True with string value for *output_folder*
        :type unset_folder: boolean
        :param stages: updates to the stages to make; see API documentation for /workflow-xxxx/update for syntax of this field; use :meth:`update_stage()` to update a single stage
        :type stages: dict
        :param edit_version: if provided, the edit version of the workflow that should be modified; if not provided, the current edit version will be used (optional)
        :type edit_version: int

        Make general metadata updates to the workflow
        '''
        update_input = {}
        if title is not None and unset_title:
            raise DXError('dxpy.DXWorkflow.update: cannot provide both "title" and set "unset_title"')
        if output_folder is not None and unset_output_folder:
            raise DXError('dxpy.DXWorkflow.update: cannot provide both "output_folder" and set "unset_output_folder"')
        if title is not None:
            update_input["title"] = title
        elif unset_title:
            update_input["title"] = None
        if summary is not None:
            update_input["summary"] = summary
        if description is not None:
            update_input["description"] = description
        if output_folder is not None:
            update_input["outputFolder"] = output_folder
        elif unset_output_folder:
            update_input["outputFolder"] = None
        if stages is not None:
            update_input["stages"] = stages

        # only perform update if there are changes to make
        if update_input:
            self._add_edit_version_to_request(update_input, edit_version)
            try:
                dxpy.api.workflow_update(self._dxid, update_input, **kwargs)
            finally:
                self.describe() # update cached describe
Example #3
0
def DXWorkflow(dxid, project=None):
    '''
    Returns the appropriate remote workflow object handler.
    '''
    if dxid is None:
        # We don't know which subclass to return
        raise DXError(
            'DXWorkflow requires an ID to return the appropriate handler')
    if dxid.startswith('record-'):
        return DXRecordWorkflow(dxid, project)
    elif dxid.startswith('workflow-'):
        return DXAnalysisWorkflow(dxid, project)
    else:
        raise DXError('DXWorkflow requires a record or workflow ID')
Example #4
0
 def _get_effective_input(self, workflow_input):
     effective_input = {}
     for key in workflow_input:
         input_name = self._get_input_name(key)
         if input_name in effective_input:
             raise DXError('DXWorkflow: the input for ' + input_name + ' was provided more than once')
         effective_input[input_name] = workflow_input[key]
     return effective_input
Example #5
0
    def _new(self, dx_hash, **kwargs):
        """
        :param dx_hash: Standard hash populated in :func:`dxpy.bindings.DXDataObject.new()` containing attributes common to all data object classes.
        :type dx_hash: dict
        :param title: Workflow title (optional)
        :type title: string
        :param summary: Workflow summary (optional)
        :type summary: string
        :param description: Workflow description (optional)
        :type description: string
        :param output_folder: Default output folder of the workflow (optional)
        :type output_folder: string
        :param init_from: Another analysis workflow object handler or and analysis (string or handler) from which to initialize the metadata (optional)
        :type init_from: :class:`~dxpy.bindings.dxworkflow.DXWorkflow`, :class:`~dxpy.bindings.dxanalysis.DXAnalysis`, or string (for analysis IDs only)

        Create a new remote workflow object.
        """

        if "init_from" in kwargs:
            if kwargs["init_from"] is not None:
                if not (isinstance(kwargs["init_from"], (DXWorkflow, DXAnalysis)) or \
                        (isinstance(kwargs["init_from"], basestring) and \
                         re.compile('^analysis-[0-9A-Za-z]{24}$').match(kwargs["init_from"]))):
                    raise DXError("Expected init_from to be an instance of DXWorkflow or DXAnalysis, or to be a string analysis ID.")
                if isinstance(kwargs["init_from"], basestring):
                    dx_hash["initializeFrom"] = {"id": kwargs["init_from"]}
                else:
                    dx_hash["initializeFrom"] = {"id": kwargs["init_from"].get_id()}
                    if isinstance(kwargs["init_from"], DXWorkflow):
                        dx_hash["initializeFrom"]["project"] = kwargs["init_from"].get_proj_id()
            del kwargs["init_from"]

        if "title" in kwargs:
            if kwargs["title"] is not None:
                dx_hash["title"] = kwargs["title"]
            del kwargs["title"]

        if "summary" in kwargs:
            if kwargs["summary"] is not None:
                dx_hash["summary"] = kwargs["summary"]
            del kwargs["summary"]

        if "description" in kwargs:
            if kwargs["description"] is not None:
                dx_hash["description"] = kwargs["description"]
            del kwargs["description"]

        if "output_folder" in kwargs:
            if kwargs["output_folder"] is not None:
                dx_hash["outputFolder"] = kwargs["output_folder"]
            del kwargs["output_folder"]

        resp = dxpy.api.workflow_new(dx_hash, **kwargs)
        self.set_ids(resp["id"], dx_hash["project"])
Example #6
0
 def get_stage(self, stage, **kwargs):
     '''
     :param stage: A number for the stage index (for the nth stage, starting from 0), or a string of the stage index, name, or ID
     :type stage: int or string
     :returns: Hash of stage descriptor in workflow
     '''
     stage_id = self._get_stage_id(stage)
     try:
         return next(stage for stage in self.stages if stage['id'] == stage_id)
     except StopIteration:
         raise DXError('The stage ID ' + stage_id + ' could not be found')
Example #7
0
 def tearDown(self):
     completed = False
     while not completed:
         resp = dxpy.api.project_remove_folder(self.project_id,
                                               {"folder": "/", "recurse": True, "partial": True})
         if 'completed' not in resp:
             raise DXError('Error removing folder')
         completed = resp['completed']
     for var in 'IFS', '_ARGCOMPLETE', '_DX_ARC_DEBUG', 'COMP_WORDBREAKS':
         if var in os.environ:
             del os.environ[var]
Example #8
0
    def _get_stage_id(self, stage):
        '''
        :param stage: A stage ID, name, or index (stage index is the number n for the nth stage, starting from 0; can be provided as an int or a string)
        :type stage: int or string
        :returns: The stage ID (this is a no-op if it was already a stage ID)
        :raises: :class:`~dxpy.exceptions.DXError` if *stage* could not be parsed or resolved to a stage ID
        '''
        # first, if it is a string, see if it is an integer
        if isinstance(stage, basestring):
            try:
                stage = int(stage)
            except:
                # we'll try parsing it as a string later
                pass

        if not isinstance(stage, basestring):
            # Try to parse as stage index; ensure that if it's not a
            # string that it is an integer at this point.
            try:
                stage_index = int(stage)
            except:
                raise DXError('DXWorkflow: the given stage identifier was neither a string stage ID nor an integer index')
            if stage_index < 0 or stage_index >= len(self.stages):
                raise DXError('DXWorkflow: the workflow contains ' + str(len(self.stages)) + \
                              ' stage(s), and the numerical value of the given stage identifier is out of range')
            return self.stages[stage_index].get("id")

        if re.compile('^stage-[0-9A-Za-z]{24}$').match(stage) is None:
            # Doesn't look like a stage ID, so look for it as a name
            matching_stage_ids = [stg['id'] for stg in self.stages if stg.get('name') == stage]
            if len(matching_stage_ids) == 0:
                raise DXError('DXWorkflow: the given stage identifier could not be parsed as a stage ID nor found as a stage name')
            elif len(matching_stage_ids) > 1:
                raise DXError('DXWorkflow: more than one workflow stage was found to have the name "' + stage + '"')
            else:
                return matching_stage_ids[0]
        else:
            # Already a stage ID
            return stage
Example #9
0
    def add_stage(self, executable, name=None, folder=None, stage_input=None, instance_type=None,
                  edit_version=None, **kwargs):
        '''
        :param executable: string or a handler for an app or applet
        :type executable: string, DXApplet, or DXApp
        :param name: name for the stage (optional)
        :type name: string
        :param folder: default output folder for the stage; either a relative or absolute path (optional)
        :type folder: string
        :param stage_input: input fields to bind as default inputs for the executable (optional)
        :type stage_input: dict
        :param instance_type: Default instance type on which all jobs will be run for this stage, or a dict mapping function names to instance type requests
        :type instance_type: string or dict
        :param edit_version: if provided, the edit version of the workflow that should be modified; if not provided, the current edit version will be used (optional)
        :type edit_version: int
        :returns: ID of the added stage
        :rtype: string
        :raises: :class:`~dxpy.exceptions.DXError` if *executable* is not an expected type :class:`~dxpy.exceptions.DXAPIError` for errors thrown from the API call

        Adds the specified executable as a new stage in the workflow.
        '''
        if isinstance(executable, basestring):
            exec_id = executable
        elif isinstance(executable, DXExecutable):
            exec_id = executable.get_id()
        else:
            raise DXError("dxpy.DXWorkflow.add_stage: executable must be a string or an instance of DXApplet or DXApp")
        add_stage_input = {"executable": exec_id}
        if name is not None:
            add_stage_input["name"] = name
        if folder is not None:
            add_stage_input["folder"] = folder
        if stage_input is not None:
            add_stage_input["input"] = stage_input
        if instance_type is not None:
            add_stage_input["systemRequirements"] = self._inst_type_to_sys_reqs(instance_type)
        self._add_edit_version_to_request(add_stage_input, edit_version)
        try:
            result = dxpy.api.workflow_add_stage(self._dxid, add_stage_input, **kwargs)
        finally:
            self.describe() # update cached describe
        return result['stage']
Example #10
0
    def update_stage(self, stage, executable=None, force=False,
                     name=None, unset_name=False, folder=None, unset_folder=False, stage_input=None,
                     instance_type=None, edit_version=None, **kwargs):
        '''
        :param stage: A number for the stage index (for the nth stage, starting from 0), or a string stage index, name, or ID
        :type stage: int or string
        :param executable: string or a handler for an app or applet
        :type executable: string, DXApplet, or DXApp
        :param force: whether to use *executable* even if it is incompatible with the previous executable's spec
        :type force: boolean
        :param name: new name for the stage; cannot be provided with *unset_name* set to True
        :type name: string
        :param unset_name: whether to unset the stage name; cannot be True with string value for *name*
        :type unset_name: boolean
        :param folder: new default output folder for the stage; either a relative or absolute path (optional)
        :type folder: string
        :param unset_folder: whether to unset the stage folder; cannot be True with string value for *folder*
        :type unset_folder: boolean
        :param stage_input: input fields to bind as default inputs for the executable (optional)
        :type stage_input: dict
        :param instance_type: Default instance type on which all jobs will be run for this stage, or a dict mapping function names to instance type requests
        :type instance_type: string or dict
        :param edit_version: if provided, the edit version of the workflow that should be modified; if not provided, the current edit version will be used (optional)
        :type edit_version: int

        Removes the specified stage from the workflow
        '''
        stage_id = self._get_stage_id(stage)

        if name is not None and unset_name:
            raise DXError('dxpy.DXWorkflow.update_stage: cannot provide both "name" and set "unset_name"')
        if folder is not None and unset_folder:
            raise DXError('dxpy.DXWorkflow.update_stage: cannot provide both "folder" and set "unset_folder"')

        if executable is not None:
            if isinstance(executable, basestring):
                exec_id = executable
            elif isinstance(executable, DXExecutable):
                exec_id = executable.get_id()
            else:
                raise DXError("dxpy.DXWorkflow.update_stage: executable (if provided) must be a string or an instance of DXApplet or DXApp")
            update_stage_exec_input = {"stage": stage_id,
                                       "executable": exec_id,
                                       "force": force}
            self._add_edit_version_to_request(update_stage_exec_input, edit_version)
            try:
                dxpy.api.workflow_update_stage_executable(self._dxid, update_stage_exec_input, **kwargs)
            finally:
                self.describe() # update cached describe

        # Construct hash and update the workflow's stage if necessary
        update_stage_input = {}
        if name is not None:
            update_stage_input["name"] = name
        elif unset_name:
            update_stage_input["name"] = None
        if folder:
            update_stage_input["folder"] = folder
        elif unset_folder:
            update_stage_input["folder"] = None
        if stage_input:
            update_stage_input["input"] = stage_input
        if instance_type is not None:
            update_stage_input["systemRequirements"] = self._inst_type_to_sys_reqs(instance_type)
        if update_stage_input:
            update_input = {"stages": {stage_id: update_stage_input}}
            self._add_edit_version_to_request(update_input, edit_version)
            try:
                dxpy.api.workflow_update(self._dxid, update_input, **kwargs)
            finally:
                self.describe() # update cached describe
Example #11
0
    def run(self,
            workflow_input,
            project=None,
            folder="/",
            name=None,
            **kwargs):
        '''
        :param workflow_input: Hash of the workflow's input arguments, with keys equal to "N.name" where N is the stage number and name is the name of the input, e.g. "0.reads" if the first stage takes in an input called "reads"
        :type workflow_input: dict
        :param project: Project ID in which to run the jobs (project context)
        :type project: string
        :param folder: Folder in which the workflow's outputs will be placed in *project*
        :type folder: string
        :param name: String to append to the default job name for each job (default is the workflow's name)
        :type name: string
        :returns: list of job IDs in order of the stages

        Run each stage in the associated workflow
        '''

        workflow_name = self.describe()['name']
        workflow_spec = self.get_details()
        workflow_details = copy.deepcopy(workflow_spec)
        if workflow_spec.get('version') not in range(2, 6):
            raise DXError("Unrecognized workflow version {v} in {w}\n".format(
                v=workflow_spec.get('version', '<none>'), w=self))

        launched_jobs = OrderedDict()
        for stage in workflow_spec['stages']:
            launched_jobs[stage['id']] = None

        for k in range(len(workflow_spec['stages'])):
            workflow_spec['stages'][k].setdefault('key', str(k))
            for i in workflow_spec['stages'][k].get('inputs', {}).keys():
                if workflow_spec['stages'][k]['inputs'][i] == "":
                    del workflow_spec['stages'][k]['inputs'][i]

        for k, stage in enumerate(workflow_spec['stages']):
            inputs_from_stage = {
                k: stage_to_job_refs(v, launched_jobs)
                for k, v in stage['inputs'].iteritems() if v is not None
            }

            exec_id = stage['app']['id'] if 'id' in stage['app'] else stage[
                'app']
            if isinstance(exec_id, dict) and '$dnanexus_link' in exec_id:
                exec_id = exec_id['$dnanexus_link']
            if exec_id.startswith('app-'):
                from dxpy.utils.resolver import get_app_from_path
                exec_id = get_app_from_path(exec_id)['id']

            executable = get_handler(exec_id)
            executable_desc = executable.describe()

            if exec_id.startswith('app-'):
                workflow_details['stages'][k]['app'] = {
                    "$dnanexus_link":
                    'app-' + executable_desc['name'] + '/' +
                    executable_desc['version']
                }

            job_name = executable_desc.get('title', '')
            if job_name == '':
                job_name = executable_desc['name']
            job_name += ' - ' + (name if name is not None else workflow_name)

            exec_inputs = ExecutableInputs(
                executable, input_name_prefix=str(stage['key']) + ".")
            exec_inputs.update(inputs_from_stage, strip_prefix=False)
            fake_args = Namespace()
            fake_args.filename = None
            fake_args.input = None
            fake_args.input_spec = None
            fake_args.input_json = json.dumps(workflow_input)
            exec_inputs.update_from_args(fake_args)
            input_json = exec_inputs.inputs

            launched_jobs[stage['id']] = executable.run(input_json,
                                                        project=project,
                                                        folder=folder,
                                                        name=job_name,
                                                        **kwargs)

        # Update workflow with updated executable IDs
        self.set_details(workflow_details)

        return launched_jobs.values()