def CentralAndNorm(self): """centralize and normalize the total dataframe """ from centralize_outlier import centralizeOutlier from normalize import linearNormalize df_to_process = self.total_dt.drop(['mcc'], axis=1) centralizeOutlier(df_to_process, devs=3) df_to_process = linearNormalize(df_to_process, ceil=1.0, floor=(-1.0)) mcc = df(self.total_dt['mcc']) self.total_dt = pd.concat([df_to_process, mcc], axis=1)
def NormAndCentralize(self, centralized=True): mcc = self.all_set[:, 0] df_to_process = self.all_set[:, 1:] df_to_process = pd.DataFrame(df_to_process) if centralized == True: centralizeOutlier(df_to_process, devs=self.devs) df_to_process = linearNormalize(df_to_process, ceil=1.0, floor=-1.0) df_to_process = df_to_process.values self.all_set = np.column_stack((mcc, df_to_process))
def preSvcData(self, print_it=False): """ collect the data for SVC, centralize and normalize all the columns except for mcc, user can choose to print the table by setting print_it to True. """ df_to_process = self.svc_sheet.drop(['mcc'], axis=1) centralizeOutlier(df_to_process, devs=self.devs) df_to_process = linearNormalize(df_to_process, ceil=1.0, floor=-1.0) mcc = pd.DataFrame(self.svc_sheet['mcc']) self.svc_sheet = pd.concat([df_to_process, mcc], axis=1)
def preAllData(self, print_it=False, centralized=True): """ for the whole data set: centralize and normalize all the columns except for mcc, user can choose to print the table by setting print_it to True. """ df_to_process = self.all_sheet.drop(['mcc'], axis=1) if centralized == True: centralizeOutlier(df_to_process, devs=self.devs) df_to_process = linearNormalize(df_to_process, ceil=1.0, floor=-1.0) mcc = pd.DataFrame(self.all_sheet['mcc']) self.all_sheet = pd.concat([df_to_process, mcc], axis=1)
def processMccEner(self, centralized=True, normed=True): f = h5py.File(self.h5_path) dt = f[self.all_path][()] f.close() self.whole_dt = pd.DataFrame(dt) self.whole_dt.columns = self.rep_ener_columns self.dropWrongMcc() if centralized == True: df_to_process = self.whole_dt.drop(['mcc'], axis=1) centralizeOutlier(df_to_process, devs=self.devs) if normed == True: df_to_process = linearNormalize(df_to_process, ceil=1.0, floor=-1.0) mcc = pd.DataFrame(self.whole_dt['mcc']) self.whole_dt = pd.concat([mcc, df_to_process], axis=1)