def sweepster( self, trans, id=None, hda_ldda=None, dataset_id=None, regions=None ): """ Displays a sweepster visualization using the incoming parameters. If id is available, get the visualization with the given id; otherwise, create a new visualization using a given dataset and regions. """ # Need to create history if necessary in order to create tool form. trans.get_history( create=True ) if id: # Loading a shared visualization. viz = self.get_visualization( trans, id ) viz_config = self.get_visualization_config( trans, viz ) dataset = self.get_dataset( trans, viz_config[ 'dataset_id' ] ) else: # Loading new visualization. dataset = self.get_hda_or_ldda( trans, hda_ldda, dataset_id ) job = get_dataset_job( dataset ) viz_config = { 'dataset_id': dataset_id, 'tool_id': job.tool_id, 'regions': from_json_string( regions ) } # Add tool, dataset attributes to config based on id. tool = trans.app.toolbox.get_tool( viz_config[ 'tool_id' ] ) viz_config[ 'tool' ] = tool.to_dict( trans, for_display=True ) viz_config[ 'dataset' ] = dataset.get_api_value() return trans.fill_template_mako( "visualization/sweepster.mako", config=viz_config )
def _rerun_tool(self, trans, payload, **kwargs): """ Rerun a tool to produce a new output dataset that corresponds to a dataset that a user is currently viewing. """ # # TODO: refactor to use same code as run_tool. # # Run tool on region if region is specificied. run_on_regions = False regions = payload.get('regions', None) if regions: if isinstance(regions, dict): # Regions is a single region. regions = [GenomeRegion.from_dict(regions)] elif isinstance(regions, list): # There is a list of regions. regions = [GenomeRegion.from_dict(r) for r in regions] if len(regions) > 1: # Sort by chrom name, start so that data is not fetched out of order. regions = sorted(regions, key=lambda r: (r.chrom.lower(), r.start)) # Merge overlapping regions so that regions do not overlap # and hence data is not included multiple times. prev = regions[0] cur = regions[1] index = 1 while True: if cur.chrom == prev.chrom and cur.start <= prev.end: # Found overlapping regions, so join them into prev. prev.end = cur.end del regions[index] else: # No overlap, move to next region. prev = cur index += 1 # Get next region or exit. if index == len(regions): # Done. break else: cur = regions[index] run_on_regions = True # Dataset check. original_dataset = self.get_dataset(trans, payload['target_dataset_id'], check_ownership=False, check_accessible=True) msg = self.check_dataset_state(trans, original_dataset) if msg: return to_json_string(msg) # # Set tool parameters--except non-hidden dataset parameters--using combination of # job's previous parameters and incoming parameters. Incoming parameters # have priority. # original_job = get_dataset_job(original_dataset) tool = trans.app.toolbox.get_tool(original_job.tool_id) if not tool: return trans.app.model.Dataset.conversion_messages.NO_TOOL tool_params = dict([(p.name, p.value) for p in original_job.parameters]) # TODO: rather than set new inputs using dict of json'ed value, unpack parameters and set using set_param_value below. # TODO: need to handle updates to conditional parameters; conditional # params are stored in dicts (and dicts within dicts). new_inputs = payload['inputs'] tool_params.update( dict([(key, to_json_string(value)) for key, value in new_inputs.items() if key in tool.inputs and new_inputs[key] is not None])) tool_params = tool.params_from_strings(tool_params, self.app) # # If running tool on region, convert input datasets (create indices) so # that can regions of data can be quickly extracted. # data_provider_registry = trans.app.data_provider_registry messages_list = [] if run_on_regions: for jida in original_job.input_datasets: input_dataset = jida.dataset data_provider = data_provider_registry.get_data_provider( trans, original_dataset=input_dataset, source='data') if data_provider: if not data_provider.converted_dataset: msg = self.convert_dataset(trans, input_dataset, data_source) if msg is not None: messages_list.append(msg) # Return any messages generated during conversions. return_message = self._get_highest_priority_msg(messages_list) if return_message: return to_json_string(return_message) # # Set target history (the history that tool will use for inputs/outputs). # If user owns dataset, put new data in original dataset's history; if # user does not own dataset (and hence is accessing dataset via sharing), # put new data in user's current history. # if original_dataset.history.user == trans.user: target_history = original_dataset.history else: target_history = trans.get_history(create=True) hda_permissions = trans.app.security_agent.history_get_default_permissions( target_history) def set_param_value(param_dict, param_name, param_value): """ Set new parameter value in a tool's parameter dictionary. """ # Recursive function to set param value. def set_value(param_dict, group_name, group_index, param_name, param_value): if group_name in param_dict: param_dict[group_name][group_index][ param_name] = param_value return True elif param_name in param_dict: param_dict[param_name] = param_value return True else: # Recursive search. return_val = False for name, value in param_dict.items(): if isinstance(value, dict): return_val = set_value(value, group_name, group_index, param_name, param_value) if return_val: return return_val return False # Parse parameter name if necessary. if param_name.find("|") == -1: # Non-grouping parameter. group_name = group_index = None else: # Grouping parameter. group, param_name = param_name.split("|") index = group.rfind("_") group_name = group[:index] group_index = int(group[index + 1:]) return set_value(param_dict, group_name, group_index, param_name, param_value) # Set parameters based tool's trackster config. params_set = {} for action in tool.trackster_conf.actions: success = False for joda in original_job.output_datasets: if joda.name == action.output_name: set_param_value(tool_params, action.name, joda.dataset) params_set[action.name] = True success = True break if not success: return trans.app.model.Dataset.conversion_messages.ERROR # # Set input datasets for tool. If running on regions, extract and use subset # when possible. # if run_on_regions: regions_str = ",".join([str(r) for r in regions]) for jida in original_job.input_datasets: # If param set previously by config actions, do nothing. if jida.name in params_set: continue input_dataset = jida.dataset if input_dataset is None: #optional dataset and dataset wasn't selected tool_params[jida.name] = None elif run_on_regions and hasattr(input_dataset.datatype, 'get_track_type'): # Dataset is indexed and hence a subset can be extracted and used # as input. # Look for subset. subset_dataset_association = trans.sa_session.query( trans.app.model.HistoryDatasetAssociationSubset ) \ .filter_by( hda=input_dataset, location=regions_str ) \ .first() if subset_dataset_association: # Data subset exists. subset_dataset = subset_dataset_association.subset else: # Need to create subset. track_type, data_sources = input_dataset.datatype.get_track_type( ) data_source = data_sources['data'] converted_dataset = input_dataset.get_converted_dataset( trans, data_source) deps = input_dataset.get_converted_dataset_deps( trans, data_source) # Create new HDA for input dataset's subset. new_dataset = trans.app.model.HistoryDatasetAssociation( extension=input_dataset.ext, \ dbkey=input_dataset.dbkey, \ create_dataset=True, \ sa_session=trans.sa_session, name="Subset [%s] of data %i" % \ ( regions_str, input_dataset.hid ), visible=False ) target_history.add_dataset(new_dataset) trans.sa_session.add(new_dataset) trans.app.security_agent.set_all_dataset_permissions( new_dataset.dataset, hda_permissions) # Write subset of data to new dataset data_provider = data_provider_registry.get_data_provider( trans, original_dataset=input_dataset, source='data') trans.app.object_store.create(new_dataset.dataset) data_provider.write_data_to_file(regions, new_dataset.file_name) # TODO: (a) size not working; (b) need to set peek. new_dataset.set_size() new_dataset.info = "Data subset for trackster" new_dataset.set_dataset_state( trans.app.model.Dataset.states.OK) # Set metadata. # TODO: set meta internally if dataset is small enough? if trans.app.config.set_metadata_externally: trans.app.datatypes_registry.set_external_metadata_tool.tool_action.execute( trans.app.datatypes_registry. set_external_metadata_tool, trans, incoming={'input1': new_dataset}, overwrite=False, job_params={"source": "trackster"}) else: message = 'Attributes updated' new_dataset.set_meta() new_dataset.datatype.after_setting_metadata( new_dataset) # Add HDA subset association. subset_association = trans.app.model.HistoryDatasetAssociationSubset( hda=input_dataset, subset=new_dataset, location=regions_str) trans.sa_session.add(subset_association) subset_dataset = new_dataset trans.sa_session.flush() # Add dataset to tool's parameters. if not set_param_value(tool_params, jida.name, subset_dataset): return to_json_string({ "error": True, "message": "error setting parameter %s" % jida.name }) # # Execute tool and handle outputs. # try: subset_job, subset_job_outputs = tool.execute( trans, incoming=tool_params, history=target_history, job_params={"source": "trackster"}) except Exception, e: # Lots of things can go wrong when trying to execute tool. return to_json_string({ "error": True, "message": e.__class__.__name__ + ": " + str(e) })
def _rerun_tool( self, trans, payload, **kwargs ): """ Rerun a tool to produce a new output dataset that corresponds to a dataset that a user is currently viewing. """ # # TODO: refactor to use same code as run_tool. # # Run tool on region if region is specificied. run_on_regions = False regions = payload.get( 'regions', None ) if regions: if isinstance( regions, dict ): # Regions is a single region. regions = [ GenomeRegion.from_dict( regions ) ] elif isinstance( regions, list ): # There is a list of regions. regions = [ GenomeRegion.from_dict( r ) for r in regions ] if len( regions ) > 1: # Sort by chrom name, start so that data is not fetched out of order. regions = sorted(regions, key=lambda r: (r.chrom.lower(), r.start)) # Merge overlapping regions so that regions do not overlap # and hence data is not included multiple times. prev = regions[0] cur = regions[1] index = 1 while True: if cur.chrom == prev.chrom and cur.start <= prev.end: # Found overlapping regions, so join them into prev. prev.end = cur.end del regions[ index ] else: # No overlap, move to next region. prev = cur index += 1 # Get next region or exit. if index == len( regions ): # Done. break else: cur = regions[ index ] run_on_regions = True # Dataset check. original_dataset = self.get_dataset( trans, payload[ 'target_dataset_id' ], check_ownership=False, check_accessible=True ) msg = self.check_dataset_state( trans, original_dataset ) if msg: return to_json_string( msg ) # # Set tool parameters--except non-hidden dataset parameters--using combination of # job's previous parameters and incoming parameters. Incoming parameters # have priority. # original_job = get_dataset_job( original_dataset ) tool = trans.app.toolbox.get_tool( original_job.tool_id ) if not tool: return trans.app.model.Dataset.conversion_messages.NO_TOOL tool_params = dict( [ ( p.name, p.value ) for p in original_job.parameters ] ) # TODO: rather than set new inputs using dict of json'ed value, unpack parameters and set using set_param_value below. # TODO: need to handle updates to conditional parameters; conditional # params are stored in dicts (and dicts within dicts). new_inputs = payload[ 'inputs' ] tool_params.update( dict( [ ( key, to_json_string( value ) ) for key, value in new_inputs.items() if key in tool.inputs and new_inputs[ key ] is not None ] ) ) tool_params = tool.params_from_strings( tool_params, self.app ) # # If running tool on region, convert input datasets (create indices) so # that can regions of data can be quickly extracted. # data_provider_registry = trans.app.data_provider_registry messages_list = [] if run_on_regions: for jida in original_job.input_datasets: input_dataset = jida.dataset data_provider = data_provider_registry.get_data_provider( trans, original_dataset=input_dataset, source='data' ) if data_provider: if not data_provider.converted_dataset: msg = self.convert_dataset( trans, input_dataset, data_source ) if msg is not None: messages_list.append( msg ) # Return any messages generated during conversions. return_message = self._get_highest_priority_msg( messages_list ) if return_message: return to_json_string( return_message ) # # Set target history (the history that tool will use for inputs/outputs). # If user owns dataset, put new data in original dataset's history; if # user does not own dataset (and hence is accessing dataset via sharing), # put new data in user's current history. # if original_dataset.history.user == trans.user: target_history = original_dataset.history else: target_history = trans.get_history( create=True ) hda_permissions = trans.app.security_agent.history_get_default_permissions( target_history ) def set_param_value( param_dict, param_name, param_value ): """ Set new parameter value in a tool's parameter dictionary. """ # Recursive function to set param value. def set_value( param_dict, group_name, group_index, param_name, param_value ): if group_name in param_dict: param_dict[ group_name ][ group_index ][ param_name ] = param_value return True elif param_name in param_dict: param_dict[ param_name ] = param_value return True else: # Recursive search. return_val = False for name, value in param_dict.items(): if isinstance( value, dict ): return_val = set_value( value, group_name, group_index, param_name, param_value) if return_val: return return_val return False # Parse parameter name if necessary. if param_name.find( "|" ) == -1: # Non-grouping parameter. group_name = group_index = None else: # Grouping parameter. group, param_name = param_name.split( "|" ) index = group.rfind( "_" ) group_name = group[ :index ] group_index = int( group[ index + 1: ] ) return set_value( param_dict, group_name, group_index, param_name, param_value ) # Set parameters based tool's trackster config. params_set = {} for action in tool.trackster_conf.actions: success = False for joda in original_job.output_datasets: if joda.name == action.output_name: set_param_value( tool_params, action.name, joda.dataset ) params_set[ action.name ] = True success = True break if not success: return trans.app.model.Dataset.conversion_messages.ERROR # # Set input datasets for tool. If running on regions, extract and use subset # when possible. # if run_on_regions: regions_str = ",".join( [ str( r ) for r in regions ] ) for jida in original_job.input_datasets: # If param set previously by config actions, do nothing. if jida.name in params_set: continue input_dataset = jida.dataset if input_dataset is None: #optional dataset and dataset wasn't selected tool_params[ jida.name ] = None elif run_on_regions and hasattr( input_dataset.datatype, 'get_track_type' ): # Dataset is indexed and hence a subset can be extracted and used # as input. # Look for subset. subset_dataset_association = trans.sa_session.query( trans.app.model.HistoryDatasetAssociationSubset ) \ .filter_by( hda=input_dataset, location=regions_str ) \ .first() if subset_dataset_association: # Data subset exists. subset_dataset = subset_dataset_association.subset else: # Need to create subset. track_type, data_sources = input_dataset.datatype.get_track_type() data_source = data_sources[ 'data' ] converted_dataset = input_dataset.get_converted_dataset( trans, data_source ) deps = input_dataset.get_converted_dataset_deps( trans, data_source ) # Create new HDA for input dataset's subset. new_dataset = trans.app.model.HistoryDatasetAssociation( extension=input_dataset.ext, \ dbkey=input_dataset.dbkey, \ create_dataset=True, \ sa_session=trans.sa_session, name="Subset [%s] of data %i" % \ ( regions_str, input_dataset.hid ), visible=False ) target_history.add_dataset( new_dataset ) trans.sa_session.add( new_dataset ) trans.app.security_agent.set_all_dataset_permissions( new_dataset.dataset, hda_permissions ) # Write subset of data to new dataset data_provider = data_provider_registry.get_data_provider( trans, original_dataset=input_dataset, source='data' ) trans.app.object_store.create( new_dataset.dataset ) data_provider.write_data_to_file( regions, new_dataset.file_name ) # TODO: (a) size not working; (b) need to set peek. new_dataset.set_size() new_dataset.info = "Data subset for trackster" new_dataset.set_dataset_state( trans.app.model.Dataset.states.OK ) # Set metadata. # TODO: set meta internally if dataset is small enough? if trans.app.config.set_metadata_externally: trans.app.datatypes_registry.set_external_metadata_tool.tool_action.execute( trans.app.datatypes_registry.set_external_metadata_tool, trans, incoming = { 'input1':new_dataset }, overwrite=False, job_params={ "source" : "trackster" } ) else: message = 'Attributes updated' new_dataset.set_meta() new_dataset.datatype.after_setting_metadata( new_dataset ) # Add HDA subset association. subset_association = trans.app.model.HistoryDatasetAssociationSubset( hda=input_dataset, subset=new_dataset, location=regions_str ) trans.sa_session.add( subset_association ) subset_dataset = new_dataset trans.sa_session.flush() # Add dataset to tool's parameters. if not set_param_value( tool_params, jida.name, subset_dataset ): return to_json_string( { "error" : True, "message" : "error setting parameter %s" % jida.name } ) # # Execute tool and handle outputs. # try: subset_job, subset_job_outputs = tool.execute( trans, incoming=tool_params, history=target_history, job_params={ "source" : "trackster" } ) except Exception, e: # Lots of things can go wrong when trying to execute tool. return to_json_string( { "error" : True, "message" : e.__class__.__name__ + ": " + str(e) } )