Python DataTable.get_subtableの例

プログラミング言語: Python

名前空間/パッケージ名: data

クラス/型: DataTable

メソッド/関数: get_subtable

hotexamples.comのコード掲載数: 2

Python DataTable.get_subtable - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdata.DataTable.get_subtableの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

get_subtable(2)

__init__(1)

よく使われるメソッド

get_subtable (2)

__init__ (1)

コード例 #1

ファイルを表示

ファイル: empirical.py プロジェクト: ethanwhite/macroeco

class Patch:
    '''
    An object representing an empirical census.

    Parameters
    ----------
    data_path : str
        Path to csv file containing census data.
    subset : dict or str
        Dictionary of permanent subset to data, {'column_name': 'condition'},
        which will limit all analysis to records in which column_name meets the
        condition, ie, {'year': ('==', 2005), 'x': [('>', 20), ('<', 40)]}
        restricts analysis to year 2005 and x values between 20 and 40. These
        conditions can also be passed to the individual methods, but subsetting
        the data table up front may save analysis time.  Subsetting on a string
        would look something like {'name' : [('==', 'John'), ('==', 'Harry')]}.
        In addition, subset can be a query string for a SQL database.

    Attributes
    ----------
    data_table : object of class DataTable
        Object containing patch data and metadata.

    '''

    def __init__(self, datapath, subset = {}):
        '''Initialize object of class Patch. See class documentation.'''
        
        # Handle csv 
        self.data_table = DataTable(datapath, subset=subset)
        
        # If datapath is sql or db the subsetting is already done.
        if type(subset) == type({}):
            self.data_table.table = self.data_table.get_subtable(subset)

    
    def sad(self, criteria, clean=False):
        '''
        Calculates an empirical species abundance distribution given criteria.

        Parameters
        ----------
        criteria : dict
            Dictionary of form {column_name: value}. Must contain a key with a 
            value of 'species' indicating the column with species identifiers 
            (this column must be type categorical in metadata). If a column 
            giving the counts of species found at a point is also in the data, 
            a key with the value 'count' should also be given.

            Value has a different meaning depending on column type:
            - metric - number of divisions of data along this axis, int/float
            - categorical - 'split' calculates each category separately,
              'whole' takes the entire column.
        clean : bool
            If True, all the zeros are removed from the sads.  If False, sads
            are left as is.
        
        Returns
        -------
        result : list 
            List of tuples containing results, where the first element is a
            dictionary of criteria for this calculation and second element is a 
            1D ndarray of length species containing the abundance for each 
            species. The third element is 1D array listing identifiers for 
            species in the same order as they appear in the second element of 
            result. 
        '''
        
        spp_list, spp_col, count_col, engy_col, mass, combinations = \
            self.parse_criteria(criteria)

        if spp_col == None:
            raise TypeError('No species column specified in "criteria" ' +
                                                                   'parameter')
        result = []
        for comb in combinations:
            
            subtable = self.data_table.get_subtable(comb)

            sad_list = []
            for species in spp_list:
                spp_subtable = subtable[subtable[spp_col] == species]
                if count_col:
                    count = np.sum(spp_subtable[count_col])
                else:
                    count = len(spp_subtable)
                sad_list.append(count)

            sad_list = np.array(sad_list)

            if clean:
                ind = np.where(sad_list != 0)[0]
                sad_list = sad_list[ind]
                temp_spp_list = spp_list[ind]
            else:
                temp_spp_list = spp_list

                    
            result.append((comb, sad_list, temp_spp_list))

        return result

    def ssad(self, criteria):
        '''
        Calculates empirical species-level spatial abundance distributions
        given criteria.

        Parameters
        ----------
        criteria : dict
            See Patch.sad docstring

        Returns
        -------
        : tuple
            Returns a tuple with two objects.  The first object is an array of
            dicts that correspond to the criteria used to generate each cell.
            The length of the first object in equal to the number of divisions
            specified.  The second object is a dictionary that has length
            species and each keyword is a species.  Each species keyword looks
            up an array with the ssad for the given species.  The array that
            each keyword looks up is the same length as criteria. 


        '''
        sad_return = self.sad(criteria, clean=False)
        spp_list = sad_return[0][2]
        combs, array_res = flatten_sad(sad_return)
        ssad = {}
        
        for i, spp in enumerate(spp_list):
            ssad[spp] = array_res[i,:]

        return combs, ssad

    def parse_criteria(self, criteria):
        '''
        Parses criteria list to get all possible column combinations.

        Parameters
        ----------
        criteria : dict
            (See docstring for Patch.sad)
        energy : bool
            If False, does not return an energy column, if True, returns an
            energy column.
            
        Returns
        -------
        spp_list : ndarray
            1D array listing identifiers for species in the same order as they 
            appear in arrays found in result.
        spp_col : str
            Name of column containing species identifiers.
        count_col : str
            Name of column containing counts, if any.
        combinations : list of dicts
            List of dictionaries giving all possible combinations of criteria. 
            Columns not mentioned in criteria are ignored and will be averaged 
            over in later analyses.

        '''

        spp_list = None
        spp_col = None
        count_col = None
        engy_col = None
        mass_col = None
        combinations = []

        # Calculate all possible combinations of columns based on criteria
        # TODO: Add error checking
        for key, value in criteria.items():
            
            # Look for two special values indicating species and count cols
            if value == 'species':
                spp_list = np.unique(self.data_table.table[key])
                spp_col = key
                continue
            if value == 'count':
                count_col = key
                continue
            if value == 'energy':
                engy_col = key
                continue
            if value == 'mass':
                mass_col = key
                continue

            # Get levels of categorial or metric data
            if value == 'split':  # Categorial
                levels = np.unique(self.data_table.table[key])
                levels_str = [('==' , x.astype(levels.dtype)) for x in levels]
            elif value == 'whole':
                # Random string to minimize chance of overlap?
                levels_str = [('==','whole')]
            else:  # Metric

                # TODO: Throw a warning if the data is not divisible by the
                # divisions specified.
                try:
                    dmin = self.data_table.meta[(key, 'minimum')]
                    dmax = self.data_table.meta[(key, 'maximum')]
                    dprec = self.data_table.meta[(key, 'precision')]

                    # TODO: Error if step < prec
                    step = (dmax + dprec - dmin) / value
                    starts = np.arange(dmin, dmax + dprec, step)
                    ends = starts + step
                except TypeError:
                    raise TypeError('Unable to proceed to with values ' +
                                    'obtained from metadata.  Please check ' + 
                                    'the metadata file and/or parameters file')
                

                starts_str = [('>=', x) for x in starts]
                ends_str = [('<', x) for x in ends]
                levels_str = [list(lvl) for lvl in zip(starts_str, ends_str)]


            # Add these levels to combinations dictionary
            if len(combinations) == 0:  # If first criteria
                for i, level in enumerate(levels_str):
                    combinations.append({key: level})
            else:
                temp_comb = []
                for i, level in enumerate(levels_str):
                    exist_recs = deepcopy(combinations)
                    for rec in exist_recs:
                        rec[key] = level
                    temp_comb += exist_recs
                combinations = temp_comb
        
        if len(combinations) == 0:
            combinations.append({})
        
        return spp_list, spp_col, count_col, engy_col, mass_col, combinations



    def sar(self, div_cols, div_list, criteria, form='sar'):
        '''
        Calulate an empirical species-area relationship given criteria.

        Parameters
        ----------
        div_cols : tuple
            Column names to divide, eg, ('x', 'y'). Must be metric.
        div_list : list of tuples
            List of division pairs in same order as div_cols, eg, [(2,2), 
            (2,4), (4,4)]. Values are number of divisions of div_col.
        criteria : dict
            See docstring for EPatch.sad. Here, criteria SHOULD NOT include 
            items referring to div_cols (if there are any, they are ignored).
        form : string
            'sar' or 'ear' for species or endemics area relationship. EAR is 
            relative to the subtable selected after criteria is applied.

        Returns
        -------
        rec_sar: structured array
            Returns a structured array with fields 'items' and 'area' that
            contains the average items/species for each given area specified by
            critieria.
        full_result : list of ndarrays
            List of same length as areas containing arrays with element for 
            count of species or endemics in each subpatch at corresponding 
            area.
        '''

        # If any element in div_cols in criteria, remove from criteria
        criteria = {k: v for k, v in criteria.items() if k not in div_cols}

        # Loop through div combinations (ie, areas), calc sad, and summarize
        areas = []
        mean_result = []
        full_result = []

        for div in div_list:

            # Add divs to criteria dict
            this_criteria = deepcopy(criteria)
            for i, col in enumerate(div_cols):
                this_criteria[col] = div[i]

            # Get flattened sad for all criteria and this div
            sad_return = self.sad(this_criteria)
            flat_sad = flatten_sad(sad_return)[1]

            # Store results
            if form == 'sar':
                this_full = np.sum((flat_sad > 0), axis=0)
                this_mean = np.mean(this_full)
            elif form == 'ear':
                totcnt = np.sum(flat_sad, axis=1)
                totcnt_arr = \
                    np.array([list(totcnt),]*np.shape(flat_sad)[1]).transpose()

                this_full = np.sum(np.equal(flat_sad, totcnt_arr), axis=0)
                this_mean = np.mean(this_full)
            else:
                raise NotImplementedError('No SAR of form %s available' % form)

            full_result.append(this_full)
            mean_result.append(this_mean)

            # Store area
            area = 1
            for i, col in enumerate(div_cols):
                dmin = self.data_table.meta[(col, 'minimum')]
                dmax = self.data_table.meta[(col, 'maximum')]
                dprec = self.data_table.meta[(col, 'precision')]
                length = (dmax + dprec - dmin)

                area *= length / div[i]

            areas.append(area)

        # Return
        rec_sar = np.array(zip(mean_result, areas), dtype=[('items', np.float),
                                                           ('area', np.float)])
        return rec_sar, full_result

    def ied(self, criteria, normalize=True, exponent=0.75):
        '''
        Calculates the individual energy distribution for the entire community
        given the criteria

        Parameters
        ----------
        criteria : dict
            Dictionary must have contain a key with the value 'energy'.  See
            sad method for further requirements.
        normalize : bool
            If True, this distribution is normalized by dividing by the lowest
            energy value within each element of criteria. If False, returns raw
            energy values.
        exponent : float
            The exponent of the allometric scaling relationship if energy is
            calculated from mass.

        Returns
        -------
        result : list
            List of tuples containing results, where first element is 
            dictionary of criteria for this calculation and second element is a 
            1D ndarray containing the energy measurement of each individual in
            the subset.  The third element is the full (not unique) species
            list for the given criteria. 

        Notes
        -----
        If count_col is None or is all ones, the entire energy column for each
        subtable is returned.  Else, the average energy per individual,
        repeated for each individual is returned. This is equivalent to the psi
        distribution from Harte (2011).


        '''
        
        spp_list, spp_col, count_col, engy_col, mass_col, combinations = \
            self.parse_criteria(criteria)

        if engy_col == None and mass_col == None:
            raise ValueError("No energy or mass column given")
        elif engy_col == None and mass_col != None:
            mass = True
            this_engy = mass_col
        else:
            mass = False
            this_engy = engy_col

        result = []
        for comb in combinations:

            subtable = self.data_table.get_subtable(comb)
            
            # If all counts are not 1
            if count_col and (not np.all(subtable[count_col] == 1)):
                
                # Remove any zero counts
                subtable = subtable[subtable[count_col] != 0]
                # Convert counts to ints
                temp_counts = subtable[count_col].astype(int)

                energy = np.repeat((subtable[this_engy] /
                        subtable[count_col]), temp_counts)
                species = np.repeat(subtable[spp_col], temp_counts)
            else:
                energy = subtable[this_engy] 
                species = subtable[spp_col]

            # Convert mass to energy if mass is True
            if mass:
                energy = (energy ** exponent)
                
            # Normalizing energy
            if normalize:
                energy = energy / np.min(energy)
            result.append((comb, energy, species))

        return result

    def sed(self, criteria, normalize=True, exponent=0.75, clean=False):
        '''
        Calculates the species-level energy distribution for each given species
        in the community.

        Parameters
        ----------
        criteria : dict
            Dictionary must have contain a key with the value 'energy' or
            'mass'.  See sad method for further requirements.
        normalize : bool
            If True, this distribution is normalized by dividing by the lowest
            energy value within each element of criteria. If False, returns raw
            energy values.
        exponent : float
            The exponent of the allometric scaling relationship if energy is
            calculated from mass
        clean : bool
            If False, sed dictionary contains all species.  If True, species
            with no individuals are removed.  This is useful when subsetting.

        Returns
        -------
        result : list of tuples
            Each tuple contains two objects.  The first object is a dict with
            the division specifications that generated the given species energy
            distributions.  The second object is a dict with a keyword
            corresponding to each species in the spp_list.  Each species
            keyword looks up a np.array that contains the given species
            energy distribution.

        Note
        ----
        The theta distribution from Harte (2011) is a an sed.

        '''
        spp_list, spp_col, count_col, engy_col, mass_col, combinations = \
            self.parse_criteria(criteria)

        ied = self.ied(criteria, normalize=normalize, exponent=exponent)

        result = []
        for this_ied in ied:
            this_criteria_sed = {}

            for spp in spp_list:
                spp_ind = (spp == this_ied[2])
                this_spp_sed = this_ied[1][spp_ind]

                if clean: # If True, don't add empty species lists
                    if len(this_spp_sed) > 0:
                        this_criteria_sed[spp] = this_spp_sed
                else:
                    this_criteria_sed[spp] = this_spp_sed

            result.append((this_ied[0], this_criteria_sed))
        
        return result
    
    def ased(self, criteria, normalize=True, exponent=0.75):
        '''
        Calculates the average species energy distribution for each given
        species in a subset. 
        
        Parameters
        ----------
        criteria : dict
            Dictionary must have contain a key with the value 'energy' or
            'mass'.  See sad method for further requirements.
        
        Returns
        -------
        result : list 
            List of tuples containing results, where the first element is a
            dictionary of criteria for this calculation and second element is a 
            1D ndarray of length species containing the average energy for each 
            species. The third element is 1D array listing identifiers for 
            species in the same order as they appear in the second element of 
            result.         

        Notes
        -----
        This is equivalent to the nu distribution from Harte 2011

        '''

        sed = self.sed(criteria, normalize=normalize, exponent=exponent)

        result = []
        for this_sed in sed:
            spp_list = list(this_sed[1].viewkeys())
            spp_list.sort()

            # Take the mean energy for each species
            nu = [np.mean(this_sed[1][spp]) for spp in spp_list if
                                                    len(this_sed[1][spp]) != 0]
            # Truncated spp_list if necessary
            spp_list = [spp for spp in spp_list if len(this_sed[1][spp]) != 0]
            
            result.append((this_sed[0], np.array(nu), np.array(spp_list)))

        return result

コード例 #2

ファイルを表示

ファイル: empirical.py プロジェクト: gavinsimpson/macroeco

class Patch:
    '''
    An object representing an empirical census.

    Parameters
    ----------
    data_path : str
        Path to csv file containing census data.
    subset : dict or str
        Dictionary of permanent subset to data, {'column_name': 'condition'},
        which will limit all analysis to records in which column_name meets the
        condition, ie, {'year': ('==', 2005), 'x': [('>', 20), ('<', 40)]}
        restricts analysis to year 2005 and x values between 20 and 40. These
        conditions can also be passed to the individual methods, but subsetting
        the data table up front may save analysis time.  Subsetting on a string
        would look something like {'name' : [('==', 'John'), ('==', 'Harry')]}.
        In addition, subset can be a query string for a SQL database.

    Attributes
    ----------
    data_table : object of class DataTable
        Object containing patch data and metadata.

    '''

    def __init__(self, datapath, subset = {}):
        '''Initialize object of class Patch. See class documentation.'''
        
        # Handle csv 
        self.data_table = DataTable(datapath, subset=subset)
        
        # If datapath is sql or db the subsetting is already done.
        if type(subset) == type({}):
            self.data_table.table = self.data_table.get_subtable(subset)

    
    def sad(self, criteria, clean=False):
        '''
        Calculates an empirical species abundance distribution given criteria.

        Parameters
        ----------
        criteria : dict
            Dictionary of form {column_name: value}. Must contain a key with a 
            value of 'species' indicating the column with species identifiers 
            (this column must be type categorical in metadata). If a column 
            giving the counts of species found at a point is also in the data, 
            a key with the value 'count' should also be given.

            Value has a different meaning depending on column type:
            - metric - number of divisions of data along this axis, int/float
            - categorical - 'split' calculates each category separately,
              'whole' takes the entire column.
        clean : bool
            If True, all the zeros are removed from the sads.  If False, sads
            are left as is.
        
        Returns
        -------
        result : list 
            List of tuples containing results, where the first element is a
            dictionary of criteria for this calculation and second element is a 
            1D ndarray of length species containing the abundance for each 
            species. The third element is 1D array listing identifiers for 
            species in the same order as they appear in the second element of 
            result. 
        '''
        
        spp_list, spp_col, count_col, engy_col, mass, combinations = \
            self.parse_criteria(criteria)

        if spp_col == None:
            raise TypeError('No species column specified in "criteria" ' +
                                                                   'parameter')
        result = []
        for comb in combinations:
            
            subtable = self.data_table.get_subtable(comb)

            sad_list = []
            for species in spp_list:
                spp_subtable = subtable[subtable[spp_col] == species]
                if count_col:
                    count = np.sum(spp_subtable[count_col])
                else:
                    count = len(spp_subtable)
                sad_list.append(count)

            sad_list = np.array(sad_list)

            if clean:
                ind = np.where(sad_list != 0)[0]
                sad_list = sad_list[ind]
                temp_spp_list = spp_list[ind]
            else:
                temp_spp_list = spp_list

                    
            result.append((comb, sad_list, temp_spp_list))

        return result

    def ssad(self, criteria):
        '''
        Calculates empirical species-level spatial abundance distributions
        given criteria.

        Parameters
        ----------
        criteria : dict
            See Patch.sad docstring

        Returns
        -------
        : tuple
            Returns a tuple with two objects.  The first object is an array of
            dicts that correspond to the criteria used to generate each cell.
            The length of the first object in equal to the number of divisions
            specified.  The second object is a dictionary that has length
            species and each keyword is a species.  Each species keyword looks
            up an array with the ssad for the given species.  The array that
            each keyword looks up is the same length as criteria. 


        '''
        sad_return = self.sad(criteria, clean=False)
        spp_list = sad_return[0][2]
        combs, array_res = flatten_sad(sad_return)
        ssad = {}
        
        for i, spp in enumerate(spp_list):
            ssad[spp] = array_res[i,:]

        return combs, ssad

    def parse_criteria(self, criteria):
        '''
        Parses criteria list to get all possible column combinations.

        Parameters
        ----------
        criteria : dict
            (See docstring for Patch.sad)
        energy : bool
            If False, does not return an energy column, if True, returns an
            energy column.
            
        Returns
        -------
        spp_list : ndarray
            1D array listing identifiers for species in the same order as they 
            appear in arrays found in result.
        spp_col : str
            Name of column containing species identifiers.
        count_col : str
            Name of column containing counts, if any.
        combinations : list of dicts
            List of dictionaries giving all possible combinations of criteria. 
            Columns not mentioned in criteria are ignored and will be averaged 
            over in later analyses.

        '''

        spp_list = None
        spp_col = None
        count_col = None
        engy_col = None
        mass_col = None
        combinations = []

        # Calculate all possible combinations of columns based on criteria
        # TODO: Add error checking
        for key, value in criteria.items():
            
            # Look for two special values indicating species and count cols
            if value == 'species':
                spp_list = np.unique(self.data_table.table[key])
                spp_col = key
                continue
            if value == 'count':
                count_col = key
                continue
            if value == 'energy':
                engy_col = key
                continue
            if value == 'mass':
                mass_col = key
                continue

            # Get levels of categorial or metric data
            if value == 'split':  # Categorial
                levels = np.unique(self.data_table.table[key])
                levels_str = [('==' , x.astype(levels.dtype)) for x in levels]
            elif value == 'whole':
                # Random string to minimize chance of overlap?
                levels_str = [('==','whole')]
            else:  # Metric

                # TODO: Throw a warning if the data is not divisible by the
                # divisions specified.
                try:
                    dmin = self.data_table.meta[(key, 'minimum')]
                    dmax = self.data_table.meta[(key, 'maximum')]
                    dprec = self.data_table.meta[(key, 'precision')]

                    # TODO: Error if step < prec
                    step = (dmax + dprec - dmin) / value
                    starts = np.arange(dmin, dmax + dprec, step)
                    ends = starts + step
                except TypeError:
                    raise TypeError('Unable to proceed to with values ' +
                                    'obtained from metadata.  Please check ' + 
                                    'the metadata file and/or parameters file')
                

                starts_str = [('>=', x) for x in starts]
                ends_str = [('<', x) for x in ends]
                levels_str = [list(lvl) for lvl in zip(starts_str, ends_str)]


            # Add these levels to combinations dictionary
            if len(combinations) == 0:  # If first criteria
                for i, level in enumerate(levels_str):
                    combinations.append({key: level})
            else:
                temp_comb = []
                for i, level in enumerate(levels_str):
                    exist_recs = deepcopy(combinations)
                    for rec in exist_recs:
                        rec[key] = level
                    temp_comb += exist_recs
                combinations = temp_comb
        
        if len(combinations) == 0:
            combinations.append({})
        
        return spp_list, spp_col, count_col, engy_col, mass_col, combinations



    def sar(self, div_cols, div_list, criteria, form='sar', output_N=False):
        '''
        Calculate an empirical species-area relationship given criteria.

        Parameters
        ----------
        div_cols : tuple
            Column names to divide, eg, ('x', 'y'). Must be metric.
        div_list : list of tuples
            List of division pairs in same order as div_cols, eg, [(2,2), 
            (2,4), (4,4)]. Values are number of divisions of div_col.
        criteria : dict
            See docstring for EPatch.sad. Here, criteria SHOULD NOT include 
            items referring to div_cols (if there are any, they are ignored).
        form : string
            'sar' or 'ear' for species or endemics area relationship. EAR is 
            relative to the subtable selected after criteria is applied.
        output_N : bool
            Adds the column N to the output rec array which contains the
            average N for a given area.

        Returns
        -------
        rec_sar: structured array
            Returns a structured array with fields 'items' and 'area' that
            contains the average items/species for each given area specified by
            critieria.
        full_result : list of ndarrays
            List of same length as areas containing arrays with element for 
            count of species or endemics in each subpatch at corresponding 
            area.
        '''

        # If any element in div_cols in criteria, remove from criteria
        criteria = {k: v for k, v in criteria.items() if k not in div_cols}

        # Loop through div combinations (ie, areas), calc sad, and summarize
        areas = []
        mean_result = []
        full_result = []
        N_result = []

        for div in div_list:

            # Add divs to criteria dict
            this_criteria = deepcopy(criteria)
            for i, col in enumerate(div_cols):
                this_criteria[col] = div[i]

            # Get flattened sad for all criteria and this div
            sad_return = self.sad(this_criteria)

            if output_N:
                N_result.append(np.mean([sum(sad[1]) for sad in sad_return]))

            flat_sad = flatten_sad(sad_return)[1]

            # Store results
            if form == 'sar':
                this_full = np.sum((flat_sad > 0), axis=0)
                this_mean = np.mean(this_full)
            elif form == 'ear':
                totcnt = np.sum(flat_sad, axis=1)
                totcnt_arr = \
                    np.array([list(totcnt),]*np.shape(flat_sad)[1]).transpose()

                this_full = np.sum(np.equal(flat_sad, totcnt_arr), axis=0)
                this_mean = np.mean(this_full)
            else:
                raise NotImplementedError('No SAR of form %s available' % form)

            full_result.append(this_full)
            mean_result.append(this_mean)

            # Store area
            area = 1
            for i, col in enumerate(div_cols):
                dmin = self.data_table.meta[(col, 'minimum')]
                dmax = self.data_table.meta[(col, 'maximum')]
                dprec = self.data_table.meta[(col, 'precision')]
                length = (dmax + dprec - dmin)

                area *= length / div[i]

            areas.append(area)

        # Return
        if not output_N:
            rec_sar = np.array(zip(mean_result, areas), dtype=[('items',
                                                np.float), ('area', np.float)])
        else:
            rec_sar = np.array(zip(mean_result, N_result, areas),
              dtype=[('items', np.float), ('N', np.float), ('area', np.float)])

        return rec_sar, full_result


    def universal_sar(self, div_cols, div_list, criteria, include_full=False):
        '''
        Calculates the empirical universal sar given criteria. The universal
        sar calculates the slope of the SAR and the ratio of N / S at all
        the areas in div_cols (where N is the total number of species and S is 
        the total number of species). 

        This function assumes that the div_list contains halvings.  If they are not,
        the function will still work but the results will be meaningless.  An
        example a of div_list with halvings is:

        [(1,1), (1,2), (2,2), (2,4), (4,4)]

        Parameters
        ----------
        div_cols : tuple
            Column names to divide, eg, ('x', 'y'). Must be metric.
        div_list : list of tuples
            List of division pairs in same order as div_cols, eg, [(2,2), 
            (2,4), (4,4)]. Values are number of divisions of div_col.
        criteria : dict
            See docstring for EPatch.sad. Here, criteria SHOULD NOT include 
            items referring to div_cols (if there are any, they are ignored).
        include_full : bool
            If include_full = True, the division (1,1) will be included if it
            was now already included. Else it will not be included.  (1,1) is
            equivalent to the full plot


        Returns
        -------
        z_array : a structured array
            Has the columns names:
            'z' : slope of the SAR at the given area
            'S' : Number of species at the given division
            'N' : Number of individuals at the given division
            'N/S' : The ratio of N/S at the given division


        Notes
        -----
        If you give it n divisions in div_list you will get a structured array
        back that has length n - 2.  Therefore, if you only have one
        '''

        # If (1,1) is not included, include it
        if include_full:
            try:
                div_list.index((1,1))
            except ValueError:
                div_list.insert(0, (1,1))

        # Run sar with the div_cols
        sar = self.sar(div_cols, div_list, criteria, output_N=True)[0]

        # sort by area
        sar = np.sort(sar, order=['area'])[::-1]

        # Calculate z's
        if len(sar) >= 3: # Check the length of sar
            z_list = [z(sar['items'][i - 1], sar['items'][i + 1]) for i in
                 np.arange(1, len(sar)) if sar['items'][i] != sar['items'][-1]] 
        else:
            return np.empty(0, dtype=[('z', np.float), ('S', np.float), ('N',
                                                 np.float), ('N/S', np.float)])

        N_over_S = sar['N'][1:len(sar) - 1] / sar['items'][1:len(sar) - 1]

        z_array = np.array(zip(z_list, sar['items'][1:len(sar) - 1],
            sar['N'][1:len(sar) - 1], N_over_S), dtype=[('z', np.float), ('S',
            np.float), ('N', np.float), ('N/S', np.float)])  
        
        return z_array

    def comm_sep(self, plot_locs, criteria, loc_unit=None):
        '''
        Calculates commonality (Sorensen and Jaccard) between pairs of plots.

        Parameters
        ----------
        plot_locs : dict
            Dictionary with keys equal to each plot name, which must be 
            represented by a column in the data table, and values equal to a 
            tuple of the x and y coordinate of each plot
        criteria : dict
            See docstring for Patch.sad.
        loc_unit : str
            Unit of plot locations. Special cases include 'decdeg' (decimal 
            degrees), returns result in km. Otherwise ignored.

        Returns
        -------
        result: structured array
            Returns a structured array with fields plot-a and plot-b (names of 
            two plots), dist (distance between plots), and sorensen and jaccard 
            (similarity indices). Has row for each unique pair of plots.
        '''

        # Set up sad_dict with key=plot and val=clean sad for that plot
        sad_dict = {}

        # Loop through all plot cols, updating criteria, and getting spp_list
        for plot in plot_locs.keys():

            # Find current count col and remove it from criteria
            for crit_key in criteria.keys():
                if criteria[crit_key] == 'count':
                    criteria.pop(crit_key, None)

            # Add this plot as col with counts
            criteria[plot] = 'count'

            # Get SAD for existing criteria with this plot as count col
            sad_return = self.sad(criteria, clean=True)

            # Check that sad_return only has one element, or throw error
            if len(sad_return) > 1:
                raise NotImplementedError('Too many criteria for comm_sep')

            # Get unique species list for this plot and store in sad_dict
            sad_dict[plot] = sad_return[0][2]

        # Set up recarray to hold Sorensen index for all pairs of plots
        n_pairs = np.sum(np.arange(len(plot_locs.keys())))
        result = np.recarray((n_pairs,), dtype=[('plot-a','S32'),
                                                ('plot-b', 'S32'),
                                                ('spp-a', int),
                                                ('spp-b', int),
                                                ('dist', float),
                                                ('sorensen', float),
                                                ('jaccard', float)])

        # Loop through all combinations of plots and fill in result table
        row = 0
        for pair in itertools.combinations(plot_locs.keys(), 2):

            # Names of plots
            plota = pair[0]
            plotb = pair[1]

            result[row]['plot-a'] = plota
            result[row]['plot-b'] = plotb

            # Calculate inter-plot distance
            if loc_unit == 'decdeg':
                result[row]['dist'] = decdeg_distance(plot_locs[plota], 
                                                      plot_locs[plotb])
            else:
                result[row]['dist'] = distance(plot_locs[plota], 
                                               plot_locs[plotb])

            # Get similarity indices
            spp_a = len(sad_dict[plota])
            spp_b = len(sad_dict[plotb])

            result[row]['spp-a'] = spp_a
            result[row]['spp-b'] = spp_b

            intersect = set(sad_dict[plota]).intersection(sad_dict[plotb])
            union = set(sad_dict[plota]).union(sad_dict[plotb])

            # Fill in zero if denom is zero
            if spp_a + spp_b == 0:
                result[row]['sorensen'] = 0
            else:
                result[row]['sorensen'] = (2*len(intersect)) / (spp_a+spp_b)
            
            if len(union) == 0:
                result[row]['jaccard'] = 0
            else:
                result[row]['jaccard'] = len(intersect) / len(union)

            # Increment row counter
            row += 1

        return result
            

    def ied(self, criteria, normalize=True, exponent=0.75):
        '''
        Calculates the individual energy distribution for the entire community
        given the criteria

        Parameters
        ----------
        criteria : dict
            Dictionary must have contain a key with the value 'energy'.  See
            sad method for further requirements.
        normalize : bool
            If True, this distribution is normalized by dividing by the lowest
            energy value within each element of criteria. If False, returns raw
            energy values.
        exponent : float
            The exponent of the allometric scaling relationship if energy is
            calculated from mass.

        Returns
        -------
        result : list
            List of tuples containing results, where first element is 
            dictionary of criteria for this calculation and second element is a 
            1D ndarray containing the energy measurement of each individual in
            the subset.  The third element is the full (not unique) species
            list for the given criteria. 

        Notes
        -----
        If count_col is None or is all ones, the entire energy column for each
        subtable is returned.  Else, the average energy per individual,
        repeated for each individual is returned. This is equivalent to the psi
        distribution from Harte (2011).


        '''
        
        spp_list, spp_col, count_col, engy_col, mass_col, combinations = \
            self.parse_criteria(criteria)

        if engy_col == None and mass_col == None:
            raise ValueError("No energy or mass column given")
        elif engy_col == None and mass_col != None:
            mass = True
            this_engy = mass_col
        else:
            mass = False
            this_engy = engy_col

        result = []
        for comb in combinations:

            subtable = self.data_table.get_subtable(comb)
            
            # If all counts are not 1
            if count_col and (not np.all(subtable[count_col] == 1)):
                
                # Remove any zero counts
                subtable = subtable[subtable[count_col] != 0]
                # Convert counts to ints
                temp_counts = subtable[count_col].astype(int)

                energy = np.repeat((subtable[this_engy] /
                        subtable[count_col]), temp_counts)
                species = np.repeat(subtable[spp_col], temp_counts)
            else:
                energy = subtable[this_engy] 
                species = subtable[spp_col]

            # Convert mass to energy if mass is True
            if mass:
                energy = (energy ** exponent)
                
            # Normalizing energy
            if normalize:
                energy = energy / np.min(energy)
            result.append((comb, energy, species))

        return result

    def sed(self, criteria, normalize=True, exponent=0.75, clean=False):
        '''
        Calculates the species-level energy distribution for each given species
        in the community.

        Parameters
        ----------
        criteria : dict
            Dictionary must have contain a key with the value 'energy' or
            'mass'.  See sad method for further requirements.
        normalize : bool
            If True, this distribution is normalized by dividing by the lowest
            energy value within each element of criteria. If False, returns raw
            energy values.
        exponent : float
            The exponent of the allometric scaling relationship if energy is
            calculated from mass
        clean : bool
            If False, sed dictionary contains all species.  If True, species
            with no individuals are removed.  This is useful when subsetting.

        Returns
        -------
        result : list of tuples
            Each tuple contains two objects.  The first object is a dict with
            the division specifications that generated the given species energy
            distributions.  The second object is a dict with a keyword
            corresponding to each species in the spp_list.  Each species
            keyword looks up a np.array that contains the given species
            energy distribution.

        Note
        ----
        The theta distribution from Harte (2011) is a an sed.

        '''
        spp_list, spp_col, count_col, engy_col, mass_col, combinations = \
            self.parse_criteria(criteria)

        ied = self.ied(criteria, normalize=normalize, exponent=exponent)

        result = []
        for this_ied in ied:
            this_criteria_sed = {}

            for spp in spp_list:
                spp_ind = (spp == this_ied[2])
                this_spp_sed = this_ied[1][spp_ind]

                if clean: # If True, don't add empty species lists
                    if len(this_spp_sed) > 0:
                        this_criteria_sed[spp] = this_spp_sed
                else:
                    this_criteria_sed[spp] = this_spp_sed

            result.append((this_ied[0], this_criteria_sed))
        
        return result
    
    def ased(self, criteria, normalize=True, exponent=0.75):
        '''
        Calculates the average species energy distribution for each given
        species in a subset. 
        
        Parameters
        ----------
        criteria : dict
            Dictionary must have contain a key with the value 'energy' or
            'mass'.  See sad method for further requirements.
        
        Returns
        -------
        result : list 
            List of tuples containing results, where the first element is a
            dictionary of criteria for this calculation and second element is a 
            1D ndarray of length species containing the average energy for each 
            species. The third element is 1D array listing identifiers for 
            species in the same order as they appear in the second element of 
            result.         

        Notes
        -----
        This is equivalent to the nu distribution from Harte 2011

        '''

        sed = self.sed(criteria, normalize=normalize, exponent=exponent)

        result = []
        for this_sed in sed:
            spp_list = list(this_sed[1].viewkeys())
            spp_list.sort()

            # Take the mean energy for each species
            nu = [np.mean(this_sed[1][spp]) for spp in spp_list if
                                                    len(this_sed[1][spp]) != 0]
            # Truncated spp_list if necessary
            spp_list = [spp for spp in spp_list if len(this_sed[1][spp]) != 0]
            
            result.append((this_sed[0], np.array(nu), np.array(spp_list)))

        return result