Exemple #1
0
 def compute_mean_by_serial_number(self, X):
     Xcopy = X.copy()
     self.replace_error_with_nan(Xcopy)
     self.equipment_average = {}
     srl_num_list = Xcopy['UNIT_SRL_NUM'].drop_duplicates().tolist()
     print("\t{}: Computing average value of each equipment".format(mark_timestamp()))
     for srl_num in srl_num_list:
         Xsub = Xcopy[Xcopy['UNIT_SRL_NUM']==srl_num][self.features]
         self.equipment_average[srl_num] = Xsub.mean()
     print("\t{}: Computing average value of all equipment".format(mark_timestamp()))
     self.all_equipment_average = Xcopy[self.features].mean()
     del Xcopy
Exemple #2
0
 def replace_error_with_nan(self, X):
     print("\t{}: Separate sensor value from mean calculation".format(mark_timestamp()))
     for c in self.features:
         if c in X.columns:
             abnormal_idx = X[(X[c]<0) | (X[c]>1E4)].index
             if len(abnormal_idx) > 0:
                 X.loc[abnormal_idx, c] = np.nan
         else:
             X[c] = np.nan
     return X
Exemple #3
0
 def fit(self, X, y=None):
     print("{}: Fitting Oil Analyisis data for references".format(mark_timestamp()))
     X = X[(X['MODL_NUM']==self.unit_model) & 
           (X['COMPONENT']==self.component) &
           (X['HRS_KM_TOT']<self.standard_lifetime) &
           (X['HRS_KM_TOT']>=self.unit_hours_bin)
          ][['HRS_KM_TOT']+self.features]
     X['UNIT_HOURS_GROUP'] = X['HRS_KM_TOT'].map(
         lambda x: int(self.map_unit_hours(x)/self.unit_hours_bin)*self.unit_hours_bin)
     self.reference = X.groupby(['UNIT_HOURS_GROUP'])[self.features].mean()
     return self
Exemple #4
0
    def replace_error_with_average_serial_number(self, X):

        def equipment_exist(srl_num):
            if srl_num in self.equipment_catalogue:
                return True
            else:
                return False
            
        print("\t{}: Replacing error value with average for each serial number".format(mark_timestamp()))
        for c in self.features:
            if c in X.columns:
                # find index which data is anomaly
                abnormal_idx = X[(X[c].isnull()) | (X[c]<0) | (X[c]>1E4)].index
                # replace abnormal datum with mean value
                X.loc[abnormal_idx, c] = X.loc[abnormal_idx, 'UNIT_SRL_NUM']\
                    .map(lambda x: self.get_equipment_average(x).loc[c] 
                        if equipment_exist(x) else self.all_equipment_average.loc[c])
            else:
                X[c] = self.all_equipment_average.loc[c]
            X[c] = X[c].astype(np.double)
        return X
Exemple #5
0
 def transform(self, X, y=None):
     print("{}: Transforming data".format(mark_timestamp()))
     self.replace_error_with_average_serial_number(X)
     return X
Exemple #6
0
 def fit(self, X, y=None):
     print("{}: Fitting sensor error scaler with data".format(mark_timestamp()))
     self.equipment_catalogue = X['UNIT_SRL_NUM'].drop_duplicates().tolist()
     self.compute_mean_by_serial_number(X)
     print("\t{}: Finish fitting scaler".format(mark_timestamp()))
     return self