def remove_columns(self, col_names=None): ''' This function will remove the all the columns within with names in col_names from all the datasets in self.columnar_data. Parameters ---------- col_names : string or list The name or names of columns to be removed ''' if col_names != None: if type(col_names) == str: col_names = [col_names] else: col_names = list(col_names) # Format column names col_names = ff.format_headers(col_names) removed_data = [] for data in self.columnar_data: removed_data.append(drop_fields(data, col_names)) self.columnar_data = removed_data
def split_up_data_by_field(self, split_columns=None): ''' This function will take in the split-columns list and and split the data into separate arrays based on the list. For example, if one were to pass in dbh1, dbh2, dbh3 three copies of the data would be made, each being identical except that each would only contain one of the instances of dbh. One could also pass [(dbh1, recr1), (dbh2, recr2), (dbh3, recr3)]. All other fields in split_columns will be excluded other than the fields within the tuple under consideration. Parameters ---------- split_columns : list a list of tuples specifying the columns by which to split the array Notes ----- Saves the split array as self.columnar_data. ''' #Note: If they enter the wrong column name nothing will be removed #Should I error check for this? if split_columns != None: # Check if split_columns is a list of strings. If so, change it # into a list of tuples split_columns = [(s,) if type(s) == str else tuple(s) for s in split_columns] # Format the names in each tuple split_columns = [tuple(ff.format_headers(nms)) for nms in split_columns] split_data = [] given_col_names = [] for tup in split_columns: for name in tup: given_col_names.append(name) given_col_names = np.array(given_col_names) for data in self.columnar_data: for tup in split_columns: ind = np.ones(len(given_col_names), dtype=bool) for name in tup: ind = np.bitwise_and((name != given_col_names), ind) remove_names = given_col_names[ind] split_data.append(drop_fields(data, list(remove_names))) self.columnar_data = split_data
def change_column_names(self, change=None, changed_to=None): ''' This function takes a list of column names to be changed and a name that they should be changed to Parameters ---------- change : list of tuples or strings Each tuple or string contains column names. All the column names in the first tuple will be changed to the first element in the changed_to list and so on. changed_to : list A list of strings that contain the names that the columns in change will be changed to. Notes ----- This function is useful if you would like to merge self.columnar_data but the dtype.names are different. ''' if change != None and changed_to != None: if len(change) != len(changed_to): raise ValueError('Length of params change and changed_to must' + ' be equal') # Convert to tuples if just received strings change = [(x,) if type(x) == str else tuple(x) for x in change] # Format the names in each tuple change = [tuple(ff.format_headers(nms)) for nms in change] for data in self.columnar_data: column_names = np.array(data.dtype.names) for i, name_tup in enumerate(change): for name in name_tup: find = np.where((name == column_names))[0] if len(find) != 0: max_len = np.max([len(x) for x in column_names]) if max_len < len(changed_to[i]): column_names = column_names.astype('S' + str(len(changed_to[i]))) column_names[find[0]] = changed_to[i] data.dtype.names = tuple(column_names)