def __init__(self, priority_log_address="/opt/dnanexus/log/priority", bulk_log_address="/opt/dnanexus/log/bulk", source="DX_APP"): logging.Handler.__init__(self) self.priority_log_address = priority_log_address self.priority_log_socket = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) self.bulk_log_address = bulk_log_address self.bulk_log_socket = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) if not os.path.exists(priority_log_address): raise DXError( "The path %s does not exist, but is required for application logging" % (priority_log_address)) if not os.path.exists(bulk_log_address): raise DXError( "The path %s does not exist, but is required for application logging" % (bulk_log_address)) self.priority_log_socket.connect(priority_log_address) self.bulk_log_socket.connect(bulk_log_address) self.source = source
def update(self, title=None, unset_title=False, summary=None, description=None, output_folder=None, unset_output_folder=False, stages=None, edit_version=None, **kwargs): ''' :param title: workflow title to set; cannot be provided with *unset_title* set to True :type title: string :param unset_title: whether to unset the title; cannot be provided with string value for *title* :type unset_title: boolean :param summary: workflow summary to set :type summary: string :param description: workflow description to set :type description: string :param output_folder: new default output folder for the workflow :type output_folder: string :param unset_folder: whether to unset the default output folder; cannot be True with string value for *output_folder* :type unset_folder: boolean :param stages: updates to the stages to make; see API documentation for /workflow-xxxx/update for syntax of this field; use :meth:`update_stage()` to update a single stage :type stages: dict :param edit_version: if provided, the edit version of the workflow that should be modified; if not provided, the current edit version will be used (optional) :type edit_version: int Make general metadata updates to the workflow ''' update_input = {} if title is not None and unset_title: raise DXError('dxpy.DXWorkflow.update: cannot provide both "title" and set "unset_title"') if output_folder is not None and unset_output_folder: raise DXError('dxpy.DXWorkflow.update: cannot provide both "output_folder" and set "unset_output_folder"') if title is not None: update_input["title"] = title elif unset_title: update_input["title"] = None if summary is not None: update_input["summary"] = summary if description is not None: update_input["description"] = description if output_folder is not None: update_input["outputFolder"] = output_folder elif unset_output_folder: update_input["outputFolder"] = None if stages is not None: update_input["stages"] = stages # only perform update if there are changes to make if update_input: self._add_edit_version_to_request(update_input, edit_version) try: dxpy.api.workflow_update(self._dxid, update_input, **kwargs) finally: self.describe() # update cached describe
def DXWorkflow(dxid, project=None): ''' Returns the appropriate remote workflow object handler. ''' if dxid is None: # We don't know which subclass to return raise DXError( 'DXWorkflow requires an ID to return the appropriate handler') if dxid.startswith('record-'): return DXRecordWorkflow(dxid, project) elif dxid.startswith('workflow-'): return DXAnalysisWorkflow(dxid, project) else: raise DXError('DXWorkflow requires a record or workflow ID')
def _get_effective_input(self, workflow_input): effective_input = {} for key in workflow_input: input_name = self._get_input_name(key) if input_name in effective_input: raise DXError('DXWorkflow: the input for ' + input_name + ' was provided more than once') effective_input[input_name] = workflow_input[key] return effective_input
def _new(self, dx_hash, **kwargs): """ :param dx_hash: Standard hash populated in :func:`dxpy.bindings.DXDataObject.new()` containing attributes common to all data object classes. :type dx_hash: dict :param title: Workflow title (optional) :type title: string :param summary: Workflow summary (optional) :type summary: string :param description: Workflow description (optional) :type description: string :param output_folder: Default output folder of the workflow (optional) :type output_folder: string :param init_from: Another analysis workflow object handler or and analysis (string or handler) from which to initialize the metadata (optional) :type init_from: :class:`~dxpy.bindings.dxworkflow.DXWorkflow`, :class:`~dxpy.bindings.dxanalysis.DXAnalysis`, or string (for analysis IDs only) Create a new remote workflow object. """ if "init_from" in kwargs: if kwargs["init_from"] is not None: if not (isinstance(kwargs["init_from"], (DXWorkflow, DXAnalysis)) or \ (isinstance(kwargs["init_from"], basestring) and \ re.compile('^analysis-[0-9A-Za-z]{24}$').match(kwargs["init_from"]))): raise DXError("Expected init_from to be an instance of DXWorkflow or DXAnalysis, or to be a string analysis ID.") if isinstance(kwargs["init_from"], basestring): dx_hash["initializeFrom"] = {"id": kwargs["init_from"]} else: dx_hash["initializeFrom"] = {"id": kwargs["init_from"].get_id()} if isinstance(kwargs["init_from"], DXWorkflow): dx_hash["initializeFrom"]["project"] = kwargs["init_from"].get_proj_id() del kwargs["init_from"] if "title" in kwargs: if kwargs["title"] is not None: dx_hash["title"] = kwargs["title"] del kwargs["title"] if "summary" in kwargs: if kwargs["summary"] is not None: dx_hash["summary"] = kwargs["summary"] del kwargs["summary"] if "description" in kwargs: if kwargs["description"] is not None: dx_hash["description"] = kwargs["description"] del kwargs["description"] if "output_folder" in kwargs: if kwargs["output_folder"] is not None: dx_hash["outputFolder"] = kwargs["output_folder"] del kwargs["output_folder"] resp = dxpy.api.workflow_new(dx_hash, **kwargs) self.set_ids(resp["id"], dx_hash["project"])
def get_stage(self, stage, **kwargs): ''' :param stage: A number for the stage index (for the nth stage, starting from 0), or a string of the stage index, name, or ID :type stage: int or string :returns: Hash of stage descriptor in workflow ''' stage_id = self._get_stage_id(stage) try: return next(stage for stage in self.stages if stage['id'] == stage_id) except StopIteration: raise DXError('The stage ID ' + stage_id + ' could not be found')
def tearDown(self): completed = False while not completed: resp = dxpy.api.project_remove_folder(self.project_id, {"folder": "/", "recurse": True, "partial": True}) if 'completed' not in resp: raise DXError('Error removing folder') completed = resp['completed'] for var in 'IFS', '_ARGCOMPLETE', '_DX_ARC_DEBUG', 'COMP_WORDBREAKS': if var in os.environ: del os.environ[var]
def _get_stage_id(self, stage): ''' :param stage: A stage ID, name, or index (stage index is the number n for the nth stage, starting from 0; can be provided as an int or a string) :type stage: int or string :returns: The stage ID (this is a no-op if it was already a stage ID) :raises: :class:`~dxpy.exceptions.DXError` if *stage* could not be parsed or resolved to a stage ID ''' # first, if it is a string, see if it is an integer if isinstance(stage, basestring): try: stage = int(stage) except: # we'll try parsing it as a string later pass if not isinstance(stage, basestring): # Try to parse as stage index; ensure that if it's not a # string that it is an integer at this point. try: stage_index = int(stage) except: raise DXError('DXWorkflow: the given stage identifier was neither a string stage ID nor an integer index') if stage_index < 0 or stage_index >= len(self.stages): raise DXError('DXWorkflow: the workflow contains ' + str(len(self.stages)) + \ ' stage(s), and the numerical value of the given stage identifier is out of range') return self.stages[stage_index].get("id") if re.compile('^stage-[0-9A-Za-z]{24}$').match(stage) is None: # Doesn't look like a stage ID, so look for it as a name matching_stage_ids = [stg['id'] for stg in self.stages if stg.get('name') == stage] if len(matching_stage_ids) == 0: raise DXError('DXWorkflow: the given stage identifier could not be parsed as a stage ID nor found as a stage name') elif len(matching_stage_ids) > 1: raise DXError('DXWorkflow: more than one workflow stage was found to have the name "' + stage + '"') else: return matching_stage_ids[0] else: # Already a stage ID return stage
def add_stage(self, executable, name=None, folder=None, stage_input=None, instance_type=None, edit_version=None, **kwargs): ''' :param executable: string or a handler for an app or applet :type executable: string, DXApplet, or DXApp :param name: name for the stage (optional) :type name: string :param folder: default output folder for the stage; either a relative or absolute path (optional) :type folder: string :param stage_input: input fields to bind as default inputs for the executable (optional) :type stage_input: dict :param instance_type: Default instance type on which all jobs will be run for this stage, or a dict mapping function names to instance type requests :type instance_type: string or dict :param edit_version: if provided, the edit version of the workflow that should be modified; if not provided, the current edit version will be used (optional) :type edit_version: int :returns: ID of the added stage :rtype: string :raises: :class:`~dxpy.exceptions.DXError` if *executable* is not an expected type :class:`~dxpy.exceptions.DXAPIError` for errors thrown from the API call Adds the specified executable as a new stage in the workflow. ''' if isinstance(executable, basestring): exec_id = executable elif isinstance(executable, DXExecutable): exec_id = executable.get_id() else: raise DXError("dxpy.DXWorkflow.add_stage: executable must be a string or an instance of DXApplet or DXApp") add_stage_input = {"executable": exec_id} if name is not None: add_stage_input["name"] = name if folder is not None: add_stage_input["folder"] = folder if stage_input is not None: add_stage_input["input"] = stage_input if instance_type is not None: add_stage_input["systemRequirements"] = self._inst_type_to_sys_reqs(instance_type) self._add_edit_version_to_request(add_stage_input, edit_version) try: result = dxpy.api.workflow_add_stage(self._dxid, add_stage_input, **kwargs) finally: self.describe() # update cached describe return result['stage']
def update_stage(self, stage, executable=None, force=False, name=None, unset_name=False, folder=None, unset_folder=False, stage_input=None, instance_type=None, edit_version=None, **kwargs): ''' :param stage: A number for the stage index (for the nth stage, starting from 0), or a string stage index, name, or ID :type stage: int or string :param executable: string or a handler for an app or applet :type executable: string, DXApplet, or DXApp :param force: whether to use *executable* even if it is incompatible with the previous executable's spec :type force: boolean :param name: new name for the stage; cannot be provided with *unset_name* set to True :type name: string :param unset_name: whether to unset the stage name; cannot be True with string value for *name* :type unset_name: boolean :param folder: new default output folder for the stage; either a relative or absolute path (optional) :type folder: string :param unset_folder: whether to unset the stage folder; cannot be True with string value for *folder* :type unset_folder: boolean :param stage_input: input fields to bind as default inputs for the executable (optional) :type stage_input: dict :param instance_type: Default instance type on which all jobs will be run for this stage, or a dict mapping function names to instance type requests :type instance_type: string or dict :param edit_version: if provided, the edit version of the workflow that should be modified; if not provided, the current edit version will be used (optional) :type edit_version: int Removes the specified stage from the workflow ''' stage_id = self._get_stage_id(stage) if name is not None and unset_name: raise DXError('dxpy.DXWorkflow.update_stage: cannot provide both "name" and set "unset_name"') if folder is not None and unset_folder: raise DXError('dxpy.DXWorkflow.update_stage: cannot provide both "folder" and set "unset_folder"') if executable is not None: if isinstance(executable, basestring): exec_id = executable elif isinstance(executable, DXExecutable): exec_id = executable.get_id() else: raise DXError("dxpy.DXWorkflow.update_stage: executable (if provided) must be a string or an instance of DXApplet or DXApp") update_stage_exec_input = {"stage": stage_id, "executable": exec_id, "force": force} self._add_edit_version_to_request(update_stage_exec_input, edit_version) try: dxpy.api.workflow_update_stage_executable(self._dxid, update_stage_exec_input, **kwargs) finally: self.describe() # update cached describe # Construct hash and update the workflow's stage if necessary update_stage_input = {} if name is not None: update_stage_input["name"] = name elif unset_name: update_stage_input["name"] = None if folder: update_stage_input["folder"] = folder elif unset_folder: update_stage_input["folder"] = None if stage_input: update_stage_input["input"] = stage_input if instance_type is not None: update_stage_input["systemRequirements"] = self._inst_type_to_sys_reqs(instance_type) if update_stage_input: update_input = {"stages": {stage_id: update_stage_input}} self._add_edit_version_to_request(update_input, edit_version) try: dxpy.api.workflow_update(self._dxid, update_input, **kwargs) finally: self.describe() # update cached describe
def run(self, workflow_input, project=None, folder="/", name=None, **kwargs): ''' :param workflow_input: Hash of the workflow's input arguments, with keys equal to "N.name" where N is the stage number and name is the name of the input, e.g. "0.reads" if the first stage takes in an input called "reads" :type workflow_input: dict :param project: Project ID in which to run the jobs (project context) :type project: string :param folder: Folder in which the workflow's outputs will be placed in *project* :type folder: string :param name: String to append to the default job name for each job (default is the workflow's name) :type name: string :returns: list of job IDs in order of the stages Run each stage in the associated workflow ''' workflow_name = self.describe()['name'] workflow_spec = self.get_details() workflow_details = copy.deepcopy(workflow_spec) if workflow_spec.get('version') not in range(2, 6): raise DXError("Unrecognized workflow version {v} in {w}\n".format( v=workflow_spec.get('version', '<none>'), w=self)) launched_jobs = OrderedDict() for stage in workflow_spec['stages']: launched_jobs[stage['id']] = None for k in range(len(workflow_spec['stages'])): workflow_spec['stages'][k].setdefault('key', str(k)) for i in workflow_spec['stages'][k].get('inputs', {}).keys(): if workflow_spec['stages'][k]['inputs'][i] == "": del workflow_spec['stages'][k]['inputs'][i] for k, stage in enumerate(workflow_spec['stages']): inputs_from_stage = { k: stage_to_job_refs(v, launched_jobs) for k, v in stage['inputs'].iteritems() if v is not None } exec_id = stage['app']['id'] if 'id' in stage['app'] else stage[ 'app'] if isinstance(exec_id, dict) and '$dnanexus_link' in exec_id: exec_id = exec_id['$dnanexus_link'] if exec_id.startswith('app-'): from dxpy.utils.resolver import get_app_from_path exec_id = get_app_from_path(exec_id)['id'] executable = get_handler(exec_id) executable_desc = executable.describe() if exec_id.startswith('app-'): workflow_details['stages'][k]['app'] = { "$dnanexus_link": 'app-' + executable_desc['name'] + '/' + executable_desc['version'] } job_name = executable_desc.get('title', '') if job_name == '': job_name = executable_desc['name'] job_name += ' - ' + (name if name is not None else workflow_name) exec_inputs = ExecutableInputs( executable, input_name_prefix=str(stage['key']) + ".") exec_inputs.update(inputs_from_stage, strip_prefix=False) fake_args = Namespace() fake_args.filename = None fake_args.input = None fake_args.input_spec = None fake_args.input_json = json.dumps(workflow_input) exec_inputs.update_from_args(fake_args) input_json = exec_inputs.inputs launched_jobs[stage['id']] = executable.run(input_json, project=project, folder=folder, name=job_name, **kwargs) # Update workflow with updated executable IDs self.set_details(workflow_details) return launched_jobs.values()