def compute_mean_by_serial_number(self, X): Xcopy = X.copy() self.replace_error_with_nan(Xcopy) self.equipment_average = {} srl_num_list = Xcopy['UNIT_SRL_NUM'].drop_duplicates().tolist() print("\t{}: Computing average value of each equipment".format(mark_timestamp())) for srl_num in srl_num_list: Xsub = Xcopy[Xcopy['UNIT_SRL_NUM']==srl_num][self.features] self.equipment_average[srl_num] = Xsub.mean() print("\t{}: Computing average value of all equipment".format(mark_timestamp())) self.all_equipment_average = Xcopy[self.features].mean() del Xcopy
def replace_error_with_nan(self, X): print("\t{}: Separate sensor value from mean calculation".format(mark_timestamp())) for c in self.features: if c in X.columns: abnormal_idx = X[(X[c]<0) | (X[c]>1E4)].index if len(abnormal_idx) > 0: X.loc[abnormal_idx, c] = np.nan else: X[c] = np.nan return X
def fit(self, X, y=None): print("{}: Fitting Oil Analyisis data for references".format(mark_timestamp())) X = X[(X['MODL_NUM']==self.unit_model) & (X['COMPONENT']==self.component) & (X['HRS_KM_TOT']<self.standard_lifetime) & (X['HRS_KM_TOT']>=self.unit_hours_bin) ][['HRS_KM_TOT']+self.features] X['UNIT_HOURS_GROUP'] = X['HRS_KM_TOT'].map( lambda x: int(self.map_unit_hours(x)/self.unit_hours_bin)*self.unit_hours_bin) self.reference = X.groupby(['UNIT_HOURS_GROUP'])[self.features].mean() return self
def replace_error_with_average_serial_number(self, X): def equipment_exist(srl_num): if srl_num in self.equipment_catalogue: return True else: return False print("\t{}: Replacing error value with average for each serial number".format(mark_timestamp())) for c in self.features: if c in X.columns: # find index which data is anomaly abnormal_idx = X[(X[c].isnull()) | (X[c]<0) | (X[c]>1E4)].index # replace abnormal datum with mean value X.loc[abnormal_idx, c] = X.loc[abnormal_idx, 'UNIT_SRL_NUM']\ .map(lambda x: self.get_equipment_average(x).loc[c] if equipment_exist(x) else self.all_equipment_average.loc[c]) else: X[c] = self.all_equipment_average.loc[c] X[c] = X[c].astype(np.double) return X
def transform(self, X, y=None): print("{}: Transforming data".format(mark_timestamp())) self.replace_error_with_average_serial_number(X) return X
def fit(self, X, y=None): print("{}: Fitting sensor error scaler with data".format(mark_timestamp())) self.equipment_catalogue = X['UNIT_SRL_NUM'].drop_duplicates().tolist() self.compute_mean_by_serial_number(X) print("\t{}: Finish fitting scaler".format(mark_timestamp())) return self