예제 #1
0
    def remove_columns(self, col_names=None):
        '''
        This function will remove the all the columns within with names in
        col_names from all the datasets in self.columnar_data.

        Parameters
        ----------
        col_names : string or list
            The name or names of columns to be removed

        '''
        
        if col_names != None:

            if type(col_names) == str:
                col_names = [col_names]
            else:
                col_names = list(col_names)

            # Format column names
            col_names = ff.format_headers(col_names)

            removed_data = []
            for data in self.columnar_data:
                removed_data.append(drop_fields(data, col_names))
            self.columnar_data = removed_data
예제 #2
0
    def split_up_data_by_field(self, split_columns=None):
        '''
        This function will take in the split-columns list and and split the
        data into separate arrays based on the list.  For example, if one were
        to pass in dbh1, dbh2,  dbh3 three copies of the data would be
        made, each being identical except that each would only contain one of
        the instances of dbh. One could also pass [(dbh1, recr1), (dbh2, recr2),
        (dbh3, recr3)].  All other fields in split_columns will be excluded
        other than the fields within the tuple under consideration.

        Parameters
        ----------
        split_columns : list
            a list of tuples specifying the columns by which to split the array
        
        Notes
        -----
        Saves the split array as self.columnar_data.
        
        '''
        #Note: If they enter the wrong column name nothing will be removed
        #Should I error check for this?
        if split_columns != None:
            # Check if split_columns is a list of strings. If so, change it
            # into a list of tuples
            split_columns = [(s,) if type(s) == str else tuple(s) for s in 
                                                                 split_columns]
            
            # Format the names in each tuple
            split_columns = [tuple(ff.format_headers(nms)) for nms in
                                                                 split_columns]

            split_data = []
            given_col_names = []
            for tup in split_columns:
                for name in tup:
                    given_col_names.append(name)
            given_col_names = np.array(given_col_names)


            for data in self.columnar_data:
                for tup in split_columns:
                    ind = np.ones(len(given_col_names), dtype=bool)
                    for name in tup:
                        ind = np.bitwise_and((name != given_col_names), ind)
                    remove_names = given_col_names[ind]
                    split_data.append(drop_fields(data, list(remove_names)))
            self.columnar_data = split_data
예제 #3
0
    def change_column_names(self, change=None, changed_to=None):
        '''
        This function takes a list of column names to be changed and a name
        that they should be changed to

        Parameters
        ----------
        change : list of tuples or strings
            Each tuple or string contains column names. All the column names in
            the first tuple will be changed to the first element in the
            changed_to list and so on.
        changed_to : list
            A list of strings that contain the names that the columns in change
            will be changed to. 

        Notes
        -----
        This function is useful if you would like to merge self.columnar_data
        but the dtype.names are different.

        '''
        if change != None and changed_to != None: 
            if len(change) != len(changed_to):
                raise ValueError('Length of params change and changed_to must'
                                + ' be equal')
            # Convert to tuples if just received strings
            change = [(x,) if type(x) == str else tuple(x) for x in change]

            # Format the names in each tuple
            change = [tuple(ff.format_headers(nms)) for nms in change]

            for data in self.columnar_data:
                column_names = np.array(data.dtype.names)
                for i, name_tup in enumerate(change):
                    for name in name_tup:
                        find = np.where((name == column_names))[0]
                        if len(find) != 0:
                            max_len = np.max([len(x) for x in column_names])
                            if max_len < len(changed_to[i]):
                                column_names = column_names.astype('S' +
                                                       str(len(changed_to[i])))
                            column_names[find[0]] = changed_to[i]
                            data.dtype.names = tuple(column_names)