def getData(self, path):
        """get data for track and slice. Save data in persistent cache for further use.

        For functions, path should be an empty tuple.
        """

        if path:
            key = DataTree.path2str(path)
        else:
            key = "all"

        result, fromcache = None, False
        if not self.nocache or self.tracker_options:
            try:
                result = self.cache[key]
                fromcache = True
            except KeyError:
                pass

        kwargs = {}
        if self.tracker_options:
            kwargs["options"] = self.tracker_options

        if result == None:
            try:
                result = self.tracker(*path, **kwargs)
            except Exception, msg:
                self.warn(
                    "exception for tracker '%s', path '%s': msg=%s" % (str(self.tracker), DataTree.path2str(path), msg)
                )
                if VERBOSE:
                    self.warn(traceback.format_exc())
                raise
Exemple #2
0
    def getData( self, path ):
        """get data for track and slice. Save data in persistent cache for further use.

        For functions, path should be an empty tuple.
        """

        if path:
            key = DataTree.path2str(path)
        else:
            key = "all"

        result, fromcache = None, False
        # trackers with options are not cached
        if not self.nocache and not self.tracker_options:
            try:
                result = self.cache[ key ]
                fromcache = True
            except KeyError:
                pass
            except RuntimeError as msg:
                raise RuntimeError( "error when accessing key %s from cache: %s - potential problem with unpickable object?" % (key, msg))

        kwargs = {}
        if self.tracker_options:
            kwargs['options'] = self.tracker_options
        
        if result is None:
            try:
                result = self.tracker( *path, **kwargs )
            except Exception as msg:
                self.warn( "exception for tracker '%s', path '%s': msg=%s" % (str(self.tracker),
                                                                              DataTree.path2str(path), 
                                                                              msg) )
                if VERBOSE: self.warn( traceback.format_exc() )
                raise
        
        # store in cache 
        if not self.nocache and not fromcache:
            # exception - do not store data frames
            # test with None fails for some reason
            self.cache[key] = result

        return result
Exemple #3
0
    def render( self ):
        '''supply the :class:`Renderer.Renderer` with the data to render. 
        
        The data supplied will depend on the ``groupby`` option.

        returns a ResultBlocks data structure.
        '''
        self.debug( "%s: rendering data started for %i items" % (self,
                                                                 len(self.data)))

        # get number of levels required by renderer
        try:
            renderer_nlevels = self.renderer.nlevels
        except AttributeError:
            # set to -1 to avoid any grouping
            # important for user renderers that are functions
            # and have no level attribute.
            renderer_nlevels = -1

        # initiate output structure
        results = ResultBlocks( title = "")

        # convert to data series
        # The data is melted, i.e,
        # BMW    price    10000
        # BMW    speed    100
        # Golf   price    5000
        # Golf   speed    50  
        dataframe = DataTree.asDataFrame( self.data )
        # dataframe.write_csv( "test.csv" )
        
        if dataframe is None:
            self.warn( "%s: no data after conversion" % self )
            raise ValueError( "no data for renderer" )            

        # special patch: set column names to pruned levels
        # if there are no column names
        if len(dataframe.columns) == len(self.pruned):
            if list(dataframe.columns) == list(range( len(dataframe.columns))):
                dataframe.columns = [x[1] for x in self.pruned]
        
        index = dataframe.index

        def getIndexLevels( index ):
            try:
                # hierarchical index
                nlevels = len(index.levels)
            except AttributeError:
                nlevels = 1
                index = [ (x,) for x in index]
            #raise ValueError('data frame without MultiIndex' )
            return nlevels

        nlevels = getIndexLevels( index )

        self.debug( "%s: rendering data started. levels=%i, required levels>=%i, group_level=%s" %\
                        (self, nlevels, 
                         renderer_nlevels,
                         str(self.group_level) ) )

        if renderer_nlevels < 0 and self.group_level <= 0:
            # no grouping for renderers that will accept
            # a dataframe with any level of indices and no explicit
            # grouping has been asked for.
            results.append( self.renderer( dataframe, path = () ) )
        else:
            # user specified group level by default
            group_level = self.group_level

            # set group level to maximum allowed by renderer
            if renderer_nlevels >= 0:
                group_level = max(nlevels - renderer_nlevels, group_level)
                
            # add additional level if necessary
            if nlevels < group_level:
                prefix = tuple(["level%i" % x for x in range( group_level - nlevels)])
                dataframe.index = pandas.MultiIndex.from_tuples( [ prefix + x for x in dataframe.index ] )

            # used to be: group_level + 1
            # hierarchical index
            # numpy.unique converts everything to a string
            # which is not consistent with selecting later
            paths = map( tuple, DataTree.unique( [ x[:group_level] for x in dataframe.index.unique() ] ))

            pathlength = len(paths[0]) - 1

            is_hierarchical = isinstance( dataframe.index, pandas.core.index.MultiIndex )

            if is_hierarchical:
                # Note: can only sort hierarchical indices
                dataframe = dataframe.sortlevel()

                if dataframe.index.lexsort_depth < pathlength:
                    raise ValueError('could not sort data frame: sort depth=%i < pathlength=%i, dataframe=%s' \
                                         % (dataframe.index.lexsort_depth, 
                                            pathlength,
                                            dataframe))
            
            for path in paths:

                if path:
                    if len(path) == nlevels:
                        # extract with loc in order to obtain dataframe
                        work = dataframe.loc[[path]]
                    else:
                        # select data frame as cross-section
                        work = dataframe.xs(path, axis=0 )
                else:
                    # empty tuple - use full data set
                    work = dataframe
                    
                # remove columns and rows in work that are all Na
                work = work.dropna( axis=1, how='all').dropna( axis=0, how='all')
                
                if is_hierarchical and renderer_nlevels >= 0:
                    work_levels = getIndexLevels( work.index )
                    # reduce levels of indices required to that required
                    # for Renderer. This occurs if groupby=none.
                    if work_levels > renderer_nlevels:
                        sep = work_levels - (renderer_nlevels - 1)
                        tuples = [ ( DataTree.path2str( x[:sep] ), ) + x[sep:] \
                                       for x in work.index ]
                        work.index = pandas.MultiIndex.from_tuples( tuples )

                try:
                    results.append( self.renderer( work,
                                                   path = path ))
                except:
                    self.error( "%s: exception in rendering" % self )
                    results.append( ResultBlocks( Utils.buildException( "rendering" ) ) )

        if len(results) == 0:
            self.warn("renderer returned no data.")
            raise ValueError( "renderer returned no data." )

        self.debug( "%s: rendering data finished with %i blocks" % (self.tracker, len(results)))

        return results