Example #1
0
    def sweepster( self, trans, id=None, hda_ldda=None, dataset_id=None, regions=None ):
        """
        Displays a sweepster visualization using the incoming parameters. If id is available,
        get the visualization with the given id; otherwise, create a new visualization using
        a given dataset and regions.
        """
        # Need to create history if necessary in order to create tool form.
        trans.get_history( create=True )

        if id:
            # Loading a shared visualization.
            viz = self.get_visualization( trans, id )
            viz_config = self.get_visualization_config( trans, viz )
            dataset = self.get_dataset( trans, viz_config[ 'dataset_id' ] )
        else:
            # Loading new visualization.
            dataset = self.get_hda_or_ldda( trans, hda_ldda, dataset_id )
            job = get_dataset_job( dataset )
            viz_config = {
                'dataset_id': dataset_id,
                'tool_id': job.tool_id,
                'regions': from_json_string( regions )
            }
                
        # Add tool, dataset attributes to config based on id.
        tool = trans.app.toolbox.get_tool( viz_config[ 'tool_id' ] )
        viz_config[ 'tool' ] = tool.to_dict( trans, for_display=True )
        viz_config[ 'dataset' ] = dataset.get_api_value()

        return trans.fill_template_mako( "visualization/sweepster.mako", config=viz_config )
Example #2
0
    def _rerun_tool(self, trans, payload, **kwargs):
        """
        Rerun a tool to produce a new output dataset that corresponds to a 
        dataset that a user is currently viewing.
        """

        #
        # TODO: refactor to use same code as run_tool.
        #

        # Run tool on region if region is specificied.
        run_on_regions = False
        regions = payload.get('regions', None)
        if regions:
            if isinstance(regions, dict):
                # Regions is a single region.
                regions = [GenomeRegion.from_dict(regions)]
            elif isinstance(regions, list):
                # There is a list of regions.
                regions = [GenomeRegion.from_dict(r) for r in regions]

                if len(regions) > 1:
                    # Sort by chrom name, start so that data is not fetched out of order.
                    regions = sorted(regions,
                                     key=lambda r: (r.chrom.lower(), r.start))

                    # Merge overlapping regions so that regions do not overlap
                    # and hence data is not included multiple times.
                    prev = regions[0]
                    cur = regions[1]
                    index = 1
                    while True:
                        if cur.chrom == prev.chrom and cur.start <= prev.end:
                            # Found overlapping regions, so join them into prev.
                            prev.end = cur.end
                            del regions[index]
                        else:
                            # No overlap, move to next region.
                            prev = cur
                            index += 1

                        # Get next region or exit.
                        if index == len(regions):
                            # Done.
                            break
                        else:
                            cur = regions[index]

            run_on_regions = True

        # Dataset check.
        original_dataset = self.get_dataset(trans,
                                            payload['target_dataset_id'],
                                            check_ownership=False,
                                            check_accessible=True)
        msg = self.check_dataset_state(trans, original_dataset)
        if msg:
            return to_json_string(msg)

        #
        # Set tool parameters--except non-hidden dataset parameters--using combination of
        # job's previous parameters and incoming parameters. Incoming parameters
        # have priority.
        #
        original_job = get_dataset_job(original_dataset)
        tool = trans.app.toolbox.get_tool(original_job.tool_id)
        if not tool:
            return trans.app.model.Dataset.conversion_messages.NO_TOOL
        tool_params = dict([(p.name, p.value)
                            for p in original_job.parameters])

        # TODO: rather than set new inputs using dict of json'ed value, unpack parameters and set using set_param_value below.
        # TODO: need to handle updates to conditional parameters; conditional
        # params are stored in dicts (and dicts within dicts).
        new_inputs = payload['inputs']
        tool_params.update(
            dict([(key, to_json_string(value))
                  for key, value in new_inputs.items()
                  if key in tool.inputs and new_inputs[key] is not None]))
        tool_params = tool.params_from_strings(tool_params, self.app)

        #
        # If running tool on region, convert input datasets (create indices) so
        # that can regions of data can be quickly extracted.
        #
        data_provider_registry = trans.app.data_provider_registry
        messages_list = []
        if run_on_regions:
            for jida in original_job.input_datasets:
                input_dataset = jida.dataset
                data_provider = data_provider_registry.get_data_provider(
                    trans, original_dataset=input_dataset, source='data')
                if data_provider:
                    if not data_provider.converted_dataset:
                        msg = self.convert_dataset(trans, input_dataset,
                                                   data_source)
                        if msg is not None:
                            messages_list.append(msg)

        # Return any messages generated during conversions.
        return_message = self._get_highest_priority_msg(messages_list)
        if return_message:
            return to_json_string(return_message)

        #
        # Set target history (the history that tool will use for inputs/outputs).
        # If user owns dataset, put new data in original dataset's history; if
        # user does not own dataset (and hence is accessing dataset via sharing),
        # put new data in user's current history.
        #
        if original_dataset.history.user == trans.user:
            target_history = original_dataset.history
        else:
            target_history = trans.get_history(create=True)
        hda_permissions = trans.app.security_agent.history_get_default_permissions(
            target_history)

        def set_param_value(param_dict, param_name, param_value):
            """
            Set new parameter value in a tool's parameter dictionary.
            """

            # Recursive function to set param value.
            def set_value(param_dict, group_name, group_index, param_name,
                          param_value):
                if group_name in param_dict:
                    param_dict[group_name][group_index][
                        param_name] = param_value
                    return True
                elif param_name in param_dict:
                    param_dict[param_name] = param_value
                    return True
                else:
                    # Recursive search.
                    return_val = False
                    for name, value in param_dict.items():
                        if isinstance(value, dict):
                            return_val = set_value(value, group_name,
                                                   group_index, param_name,
                                                   param_value)
                            if return_val:
                                return return_val
                    return False

            # Parse parameter name if necessary.
            if param_name.find("|") == -1:
                # Non-grouping parameter.
                group_name = group_index = None
            else:
                # Grouping parameter.
                group, param_name = param_name.split("|")
                index = group.rfind("_")
                group_name = group[:index]
                group_index = int(group[index + 1:])

            return set_value(param_dict, group_name, group_index, param_name,
                             param_value)

        # Set parameters based tool's trackster config.
        params_set = {}
        for action in tool.trackster_conf.actions:
            success = False
            for joda in original_job.output_datasets:
                if joda.name == action.output_name:
                    set_param_value(tool_params, action.name, joda.dataset)
                    params_set[action.name] = True
                    success = True
                    break

            if not success:
                return trans.app.model.Dataset.conversion_messages.ERROR

        #
        # Set input datasets for tool. If running on regions, extract and use subset
        # when possible.
        #
        if run_on_regions:
            regions_str = ",".join([str(r) for r in regions])
        for jida in original_job.input_datasets:
            # If param set previously by config actions, do nothing.
            if jida.name in params_set:
                continue

            input_dataset = jida.dataset
            if input_dataset is None:  #optional dataset and dataset wasn't selected
                tool_params[jida.name] = None
            elif run_on_regions and hasattr(input_dataset.datatype,
                                            'get_track_type'):
                # Dataset is indexed and hence a subset can be extracted and used
                # as input.

                # Look for subset.
                subset_dataset_association = trans.sa_session.query( trans.app.model.HistoryDatasetAssociationSubset ) \
                                                             .filter_by( hda=input_dataset, location=regions_str ) \
                                                             .first()
                if subset_dataset_association:
                    # Data subset exists.
                    subset_dataset = subset_dataset_association.subset
                else:
                    # Need to create subset.
                    track_type, data_sources = input_dataset.datatype.get_track_type(
                    )
                    data_source = data_sources['data']
                    converted_dataset = input_dataset.get_converted_dataset(
                        trans, data_source)
                    deps = input_dataset.get_converted_dataset_deps(
                        trans, data_source)

                    # Create new HDA for input dataset's subset.
                    new_dataset = trans.app.model.HistoryDatasetAssociation( extension=input_dataset.ext, \
                                                                             dbkey=input_dataset.dbkey, \
                                                                             create_dataset=True, \
                                                                             sa_session=trans.sa_session,
                                                                             name="Subset [%s] of data %i" % \
                                                                                 ( regions_str, input_dataset.hid ),
                                                                             visible=False )
                    target_history.add_dataset(new_dataset)
                    trans.sa_session.add(new_dataset)
                    trans.app.security_agent.set_all_dataset_permissions(
                        new_dataset.dataset, hda_permissions)

                    # Write subset of data to new dataset
                    data_provider = data_provider_registry.get_data_provider(
                        trans, original_dataset=input_dataset, source='data')
                    trans.app.object_store.create(new_dataset.dataset)
                    data_provider.write_data_to_file(regions,
                                                     new_dataset.file_name)

                    # TODO: (a) size not working; (b) need to set peek.
                    new_dataset.set_size()
                    new_dataset.info = "Data subset for trackster"
                    new_dataset.set_dataset_state(
                        trans.app.model.Dataset.states.OK)

                    # Set metadata.
                    # TODO: set meta internally if dataset is small enough?
                    if trans.app.config.set_metadata_externally:
                        trans.app.datatypes_registry.set_external_metadata_tool.tool_action.execute(
                            trans.app.datatypes_registry.
                            set_external_metadata_tool,
                            trans,
                            incoming={'input1': new_dataset},
                            overwrite=False,
                            job_params={"source": "trackster"})
                    else:
                        message = 'Attributes updated'
                        new_dataset.set_meta()
                        new_dataset.datatype.after_setting_metadata(
                            new_dataset)

                    # Add HDA subset association.
                    subset_association = trans.app.model.HistoryDatasetAssociationSubset(
                        hda=input_dataset,
                        subset=new_dataset,
                        location=regions_str)
                    trans.sa_session.add(subset_association)

                    subset_dataset = new_dataset

                trans.sa_session.flush()

                # Add dataset to tool's parameters.
                if not set_param_value(tool_params, jida.name, subset_dataset):
                    return to_json_string({
                        "error":
                        True,
                        "message":
                        "error setting parameter %s" % jida.name
                    })

        #
        # Execute tool and handle outputs.
        #
        try:
            subset_job, subset_job_outputs = tool.execute(
                trans,
                incoming=tool_params,
                history=target_history,
                job_params={"source": "trackster"})
        except Exception, e:
            # Lots of things can go wrong when trying to execute tool.
            return to_json_string({
                "error":
                True,
                "message":
                e.__class__.__name__ + ": " + str(e)
            })
Example #3
0
    def _rerun_tool( self, trans, payload, **kwargs ):
        """
        Rerun a tool to produce a new output dataset that corresponds to a 
        dataset that a user is currently viewing.
        """

        #
        # TODO: refactor to use same code as run_tool.
        #        

        # Run tool on region if region is specificied.
        run_on_regions = False
        regions = payload.get( 'regions', None )
        if regions:
            if isinstance( regions, dict ):
                # Regions is a single region.
                regions = [ GenomeRegion.from_dict( regions ) ]
            elif isinstance( regions, list ):
                # There is a list of regions.
                regions = [ GenomeRegion.from_dict( r ) for r in regions ]

                if len( regions ) > 1:
                    # Sort by chrom name, start so that data is not fetched out of order.
                    regions = sorted(regions, key=lambda r: (r.chrom.lower(), r.start))
                    
                    # Merge overlapping regions so that regions do not overlap 
                    # and hence data is not included multiple times.
                    prev = regions[0]
                    cur = regions[1]
                    index = 1
                    while True:
                        if cur.chrom == prev.chrom and cur.start <= prev.end:
                            # Found overlapping regions, so join them into prev.
                            prev.end = cur.end
                            del regions[ index ]
                        else:
                            # No overlap, move to next region.
                            prev = cur
                            index += 1

                        # Get next region or exit.    
                        if index == len( regions ):
                            # Done.
                            break
                        else:
                            cur = regions[ index ]
                    
            run_on_regions = True
            
        # Dataset check.
        original_dataset = self.get_dataset( trans, payload[ 'target_dataset_id' ], check_ownership=False, check_accessible=True )
        msg = self.check_dataset_state( trans, original_dataset )
        if msg:
            return to_json_string( msg )

        #
        # Set tool parameters--except non-hidden dataset parameters--using combination of
        # job's previous parameters and incoming parameters. Incoming parameters
        # have priority.
        #
        original_job = get_dataset_job( original_dataset )
        tool = trans.app.toolbox.get_tool( original_job.tool_id )
        if not tool:
            return trans.app.model.Dataset.conversion_messages.NO_TOOL
        tool_params = dict( [ ( p.name, p.value ) for p in original_job.parameters ] )
        
        # TODO: rather than set new inputs using dict of json'ed value, unpack parameters and set using set_param_value below.
        # TODO: need to handle updates to conditional parameters; conditional 
        # params are stored in dicts (and dicts within dicts).
        new_inputs = payload[ 'inputs' ]
        tool_params.update( dict( [ ( key, to_json_string( value ) ) for key, value in new_inputs.items() if key in tool.inputs and new_inputs[ key ] is not None ] ) )
        tool_params = tool.params_from_strings( tool_params, self.app )

        #
        # If running tool on region, convert input datasets (create indices) so
        # that can regions of data can be quickly extracted.
        # 
        data_provider_registry = trans.app.data_provider_registry
        messages_list = []
        if run_on_regions:
            for jida in original_job.input_datasets:
                input_dataset = jida.dataset
                data_provider = data_provider_registry.get_data_provider( trans, original_dataset=input_dataset, source='data' )
                if data_provider:
                    if not data_provider.converted_dataset:
                        msg = self.convert_dataset( trans, input_dataset, data_source )
                        if msg is not None:
                            messages_list.append( msg )

        # Return any messages generated during conversions.
        return_message = self._get_highest_priority_msg( messages_list )
        if return_message:
            return to_json_string( return_message )

        #
        # Set target history (the history that tool will use for inputs/outputs).
        # If user owns dataset, put new data in original dataset's history; if 
        # user does not own dataset (and hence is accessing dataset via sharing), 
        # put new data in user's current history.
        #
        if original_dataset.history.user == trans.user:
            target_history = original_dataset.history
        else:
            target_history = trans.get_history( create=True )
        hda_permissions = trans.app.security_agent.history_get_default_permissions( target_history )

        def set_param_value( param_dict, param_name, param_value ):
            """
            Set new parameter value in a tool's parameter dictionary.
            """

            # Recursive function to set param value.
            def set_value( param_dict, group_name, group_index, param_name, param_value ):
                if group_name in param_dict:
                    param_dict[ group_name ][ group_index ][ param_name ] = param_value
                    return True
                elif param_name in param_dict:
                    param_dict[ param_name ] = param_value
                    return True
                else:
                    # Recursive search.
                    return_val = False
                    for name, value in param_dict.items():
                        if isinstance( value, dict ):
                            return_val = set_value( value, group_name, group_index, param_name, param_value)
                            if return_val:
                                return return_val
                    return False

            # Parse parameter name if necessary.
            if param_name.find( "|" ) == -1:
                # Non-grouping parameter.
                group_name = group_index = None
            else:
                # Grouping parameter.
                group, param_name = param_name.split( "|" )
                index = group.rfind( "_" )
                group_name = group[ :index ]
                group_index = int( group[ index + 1: ] )

            return set_value( param_dict, group_name, group_index, param_name, param_value )

        # Set parameters based tool's trackster config.
        params_set = {}
        for action in tool.trackster_conf.actions:
            success = False
            for joda in original_job.output_datasets:
                if joda.name == action.output_name:
                    set_param_value( tool_params, action.name, joda.dataset )
                    params_set[ action.name ] = True
                    success = True
                    break

            if not success:
                return trans.app.model.Dataset.conversion_messages.ERROR

        #
        # Set input datasets for tool. If running on regions, extract and use subset
        # when possible.
        #
        if run_on_regions:
            regions_str = ",".join( [ str( r ) for r in regions ] )
        for jida in original_job.input_datasets:
            # If param set previously by config actions, do nothing.
            if jida.name in params_set:
                continue

            input_dataset = jida.dataset
            if input_dataset is None: #optional dataset and dataset wasn't selected
                tool_params[ jida.name ] = None
            elif run_on_regions and hasattr( input_dataset.datatype, 'get_track_type' ):
                # Dataset is indexed and hence a subset can be extracted and used
                # as input.

                # Look for subset.
                subset_dataset_association = trans.sa_session.query( trans.app.model.HistoryDatasetAssociationSubset ) \
                                                             .filter_by( hda=input_dataset, location=regions_str ) \
                                                             .first()
                if subset_dataset_association:
                    # Data subset exists.
                    subset_dataset = subset_dataset_association.subset
                else:
                    # Need to create subset.
                    track_type, data_sources = input_dataset.datatype.get_track_type()
                    data_source = data_sources[ 'data' ]
                    converted_dataset = input_dataset.get_converted_dataset( trans, data_source )
                    deps = input_dataset.get_converted_dataset_deps( trans, data_source )

                    # Create new HDA for input dataset's subset.
                    new_dataset = trans.app.model.HistoryDatasetAssociation( extension=input_dataset.ext, \
                                                                             dbkey=input_dataset.dbkey, \
                                                                             create_dataset=True, \
                                                                             sa_session=trans.sa_session,
                                                                             name="Subset [%s] of data %i" % \
                                                                                 ( regions_str, input_dataset.hid ),
                                                                             visible=False )
                    target_history.add_dataset( new_dataset )
                    trans.sa_session.add( new_dataset )
                    trans.app.security_agent.set_all_dataset_permissions( new_dataset.dataset, hda_permissions )

                    # Write subset of data to new dataset
                    data_provider = data_provider_registry.get_data_provider( trans, original_dataset=input_dataset, source='data' )
                    trans.app.object_store.create( new_dataset.dataset )
                    data_provider.write_data_to_file( regions, new_dataset.file_name )

                    # TODO: (a) size not working; (b) need to set peek.
                    new_dataset.set_size()
                    new_dataset.info = "Data subset for trackster"
                    new_dataset.set_dataset_state( trans.app.model.Dataset.states.OK )

                    # Set metadata.
                    # TODO: set meta internally if dataset is small enough?
                    if trans.app.config.set_metadata_externally:
                        trans.app.datatypes_registry.set_external_metadata_tool.tool_action.execute( trans.app.datatypes_registry.set_external_metadata_tool, 
                                                                                                     trans, incoming = { 'input1':new_dataset }, 
                                                                                                     overwrite=False, job_params={ "source" : "trackster" } )
                    else:
                        message = 'Attributes updated'
                        new_dataset.set_meta()
                        new_dataset.datatype.after_setting_metadata( new_dataset )

                    # Add HDA subset association.
                    subset_association = trans.app.model.HistoryDatasetAssociationSubset( hda=input_dataset, subset=new_dataset, location=regions_str )
                    trans.sa_session.add( subset_association )

                    subset_dataset = new_dataset

                trans.sa_session.flush()

                # Add dataset to tool's parameters.
                if not set_param_value( tool_params, jida.name, subset_dataset ):
                    return to_json_string( { "error" : True, "message" : "error setting parameter %s" % jida.name } )

        #        
        # Execute tool and handle outputs.
        #
        try:
            subset_job, subset_job_outputs = tool.execute( trans, incoming=tool_params, 
                                                           history=target_history, 
                                                           job_params={ "source" : "trackster" } )
        except Exception, e:
            # Lots of things can go wrong when trying to execute tool.
            return to_json_string( { "error" : True, "message" : e.__class__.__name__ + ": " + str(e) } )