def median(headers, data): '''Takes in a data object and list of headers. Returns the median of appropriate data.''' cols = [] for h in headers: cols.append(data.header2col[h]) types = data.get_types() for c in cols: if (types[c].strip() != 'numeric'): print('Only numeric types allowed!') return D = data.columns_data(headers) medians = [] for i in range(0,len(headers)): element = np.median(D[:, i].astype(np.float)) medians.append(element) return medians
def stdev(headers, data): '''Take in a data object and list of headers. Returns the standard deviation of appropriate data.''' cols = [] for h in headers: cols.append(data.header2col[h]) types = data.get_types() for c in cols: if (types[c].strip() != 'numeric'): print('Only numeric types allowed!') return D = data.columns_data(headers) # this is a np matrix deviations = [] for i in range(0,len(headers)): element = np.std(D[:, i].astype(np.float)) deviations.append(element) return deviations
def data_range(headers, data): '''Takes in a Data object and a list of column headers and returns a list of 2-element lists with the min and max values for each column. Only deals with numeric types.''' cols = [] for h in headers: cols.append(data.header2col[h]) types = data.get_types() for c in cols: if (types[c].strip() != 'numeric'): print('Only numeric types allowed!') return D = data.columns_data(headers) mins_maxes = [] for i in range(0,len(headers)): element = [float(np.min(D[:, i].astype(np.float))),float(np.max(D[:, i].astype(np.float)))] mins_maxes.append(element) return mins_maxes
def normalize_columns_separately(headers, data): '''Takes in a Data object and a list of headers. Returns a matrix with each columns normalized so that the min is mapped to 0 and the max is mapped to 1.''' cols = [] for h in headers: cols.append(data.header2col[h]) types = data.get_types() for c in cols: if (types[c].strip() != 'numeric'): print('Only numeric types allowed!') return D = data.columns_data(headers) normcols_sep = D.copy() DR = data_range(headers,data) for i in range(0, len(DR)): a = DR[i][0] #min b = DR[i][1] #max r = b - a normcols_sep[:, i] = (D[:, i].astype(np.float) - a) / r return normcols_sep
def normalize_columns_together(headers, data): '''Takes in a Data object and a list of headers. Returns a matrix with columns normalized so that the min across columns is mapped to 0 and the max across columns is mapped to 1.''' cols = [] for h in headers: cols.append(data.header2col[h]) types = data.get_types() for c in cols: if (types[c].strip() != 'numeric'): print('Only numeric types allowed!') return D = data.columns_data(headers) normcols_tog = D.copy() DR = data_range(headers, data) mins = [] maxes = [] for twople in DR: mins.append(twople[0]) maxes.append(twople[1]) verymin = min(mins) verymax = max(maxes) R = verymax - verymin normcols_tog[:, :] = (D[:, :].astype(np.float) - verymin) / R return normcols_tog