def dataset_prepare_SU(self, sample_num, sample_len): """ here 20 testing-case-level sub-sets will be generated and grepped into two datasets, including: 1. SU_1D_Bearing_DataSet.npz 2. SU_1D_Gear_Dataset.npz the 2 datasets will be prepared for one kind of transferring scenario: working condition transferring """ logging.info( "generating new dataset with each case has (sample_num = %d, sample_len = %d)" % (sample_num, sample_len)) filenames, filelabels = self.filename_SU() os.chdir(self.path_npz) data, labels = [[], []], [[], []] for k, files in enumerate(filenames): for l, file in enumerate(files): signal = np.load(file)["arr"] if len(signal.shape) != 2: raise ValueError( "excepted signal has 2 dims, but got %d dim(s)" % len(signal.shape)) if np.shape(signal)[0] != 8: raise ValueError( "excepted signal's channels' number is 8 , but got %d" % np.shape(signal)[0]) samples = ut.signal_split_as_samples(signal, sample_len, sample_num) data[k].append(samples) sample_labels = np.ones((sample_num, 1)) * filelabels[k][l] if not k: sample_labels = ut.onehot_encoding(sample_labels, 5) " fix me " # considering the combination of inner and outer fault # if "comb" in file: # sample_labels[:, 3] = 1 else: sample_labels = ut.onehot_encoding(sample_labels, 5) labels[k].append(sample_labels) data_bearing, data_gear = np.array(data[0]), np.array(data[1]) labels_bearing, labels_gear = np.array(labels[0]), np.array(labels[1]) os.chdir(self.path_cache) np.savez("SU_1D_Bearing_DataSet.npz", data=data_bearing, labels=labels_bearing) np.savez("SU_1D_Gear_DataSet.npz", data=data_gear, labels=labels_gear) os.chdir(self.path) logging.info("new dataset has been stored") return 0
def dataset_prepare_CWRU(self, sample_num, sample_len): """ this function first use [data_clean_CWRU] to read signals from .text file then samples examples (.shape = sample_num, channels, sample_len) from each signal 40 (actually 48, last 8 is repeated sampled from normal cases) sub-sets will be generate and then be packaged into one dataset. this dataset will be used for 2 transferring scenarios, including: working condition transferring fault extent transferring """ logging.info( "generating new dataset with each case has (sample_num = %d, sample_len = %d)" % (sample_num, sample_len)) signals, filelabels_wc, filelabels_fs = self.data_clean_CWRU() data, labels_wc, labels_fs = [], [], [] for k, signal in enumerate(signals): data.append( ut.signal_split_as_samples(signal, sample_len, sample_num)) sample_labels_wc = np.ones((sample_num, 1)) * filelabels_wc[k] sample_labels_wc = ut.onehot_encoding(sample_labels_wc, class_num=10) labels_wc.append(sample_labels_wc) sample_labels_fs = np.ones((sample_num, 1)) * filelabels_fs[k] sample_labels_fs = ut.onehot_encoding(sample_labels_fs, class_num=4) labels_fs.append(sample_labels_fs) for i in range(2): for signal in signals[-4:]: data.append( ut.signal_split_as_samples(signal, sample_len, sample_num)) sample_labels_wc = np.zeros((sample_num, 1)) sample_labels_wc = ut.onehot_encoding(sample_labels_wc, class_num=10) labels_wc.append(sample_labels_wc) sample_labels_fs = np.zeros((sample_num, 1)) sample_labels_fs = ut.onehot_encoding(sample_labels_fs, class_num=4) labels_fs.append(sample_labels_fs) data, labels_wc, labels_fs = np.array(data), np.array( labels_wc), np.array(labels_fs) os.chdir(self.path_cache) np.savez("CWRU_1D_signal.npz", data=data, labels_wc=labels_wc, labels_fs=labels_fs) os.chdir(self.path) logging.info("new dataset has been stored") return 0
def normal_set_prepare_CWRU(self, sample_num, sample_len): logging.info( "generating normal set with each case has (sample_num = %d, sample_len = %d)" % (sample_num, sample_len)) os.chdir(self.path_txt) files, filelabesl_wc, filelabels_fs = self.filename_CWRU() signals = [] for file in files: if "normal" not in file: continue try: with open(file, "r") as fr: signal = fr.readlines() signal = np.array([float(item.strip()) for item in signal]) except IOError: raise IOError("some wrong with the file %s" % file) signals.append(signal.reshape(1, -1)) os.chdir(self.path) data, labels_wc, labels_fs = [], [], [] for k, signal in enumerate(signals): data.append( ut.signal_split_as_samples(signal, sample_len, sample_num)) sample_labels_wc = np.ones((sample_num, 1)) * 0 sample_labels_wc = ut.onehot_encoding(sample_labels_wc, class_num=10) labels_wc.append(sample_labels_wc) sample_labels_fs = np.ones((sample_num, 1)) * 0 sample_labels_fs = ut.onehot_encoding(sample_labels_fs, class_num=4) labels_fs.append(sample_labels_fs) data, labels_wc, labels_fs = np.array(data), np.array( labels_wc), np.array(labels_fs) os.chdir(self.path_cache) np.savez("CWRU_1D_normal_signal.npz", data=data, labels_wc=labels_wc, labels_fs=labels_fs) os.chdir(self.path) logging.info("new normal set has been stored") return 0
def dataset_prepare_MFPT(self, sample_num, sample_len): """ this function first use [data_clean_MFPT] to read signals from .txt file then samples examples (.shape = sample_num, channels, sample_len) from each signal 15 (actually 18, last 3 is repeated sampled from normal cases) sub-sets will be generate and then be packaged into one dataset. this dataset will be used for 1 transferring scenario: working condition transferring """ logging.info( "generating new dataset with each case has (sample_num = %d, sample_len = %d)" % (sample_num, sample_len)) signals, filelabels = self.data_clean_MFPT() data, labels, = [], [] for k, signal in enumerate(signals): data.append( ut.signal_split_as_samples(signal, sample_len, sample_num)) sample_labels_wc = np.ones((sample_num, 1)) * filelabels[k] sample_labels_wc = ut.onehot_encoding(sample_labels_wc, class_num=3) labels.append(sample_labels_wc) for signal in signals[-3:]: data.append( ut.signal_split_as_samples(signal, sample_len, sample_num)) sample_labels_wc = np.zeros((sample_num, 1)) sample_labels_wc = ut.onehot_encoding(sample_labels_wc, class_num=3) labels.append(sample_labels_wc) data, labels = np.array(data), np.array(labels) os.chdir(self.path_cache) np.savez("MFPT_1D_Bearing_DataSet.npz", data=data, labels=labels) os.chdir(self.path) logging.info("new dataset has been stored") return 0
def normal_set_prepare_SU(self, sample_num, sample_len): logging.info( "generating normal set with each case has (sample_num = %d, sample_len = %d)" % (sample_num, sample_len)) filenames, filelabels = self.filename_SU() os.chdir(self.path_npz) data, labels = [[], []], [[], []] for k, files in enumerate(filenames): for l, file in enumerate(files): if "health" not in file and "Health" not in file: continue signal = np.load(file)["arr"] if len(signal.shape) != 2: raise ValueError( "excepted signal has 2 dims, but got %d dim(s)" % len(signal.shape)) if np.shape(signal)[0] != 8: raise ValueError( "excepted signal's channels' number is 8 , but got %d" % np.shape(signal)[0]) samples = ut.signal_split_as_samples(signal, sample_len, sample_num) data[k].append(samples) sample_labels = np.ones((sample_num, 1)) * 0 sample_labels = ut.onehot_encoding(sample_labels, 5) labels[k].append(sample_labels) data_bearing, data_gear = np.array(data[0]), np.array(data[1]) labels_bearing, labels_gear = np.array(labels[0]), np.array(labels[1]) os.chdir(self.path_cache) np.savez("SU_1D_Healthy_Bearing_DataSet.npz", data=data_bearing, labels=labels_bearing) np.savez("SU_1D_Healthy_Gear_DataSet.npz", data=data_gear, labels=labels_gear) os.chdir(self.path) logging.info("new normal set has been stored") return 0
def normal_set_prepare_MFPT(self, sample_num, sample_len): logging.info( "generating normal set with each case has (sample_num = %d, sample_len = %d)" % (sample_num, sample_len)) os.chdir(self.path_txt) files, filelabels = self.filename_MFPT() signals = [] for i in range( 2 ): # to match six different working conditions, we have 3 normal bearings, sampling twice for file in files: if "normal" not in file: continue try: with open(file, "r") as fr: signal = fr.readlines() signal = np.array( [float(item.strip()) for item in signal]) except IOError: raise IOError("some wrong with the file %s" % file) signal = signal[::2] signals.append(signal.reshape(1, -1)) os.chdir(self.path) data, labels, = [], [] for k, signal in enumerate(signals): data.append( ut.signal_split_as_samples(signal, sample_len, sample_num)) sample_labels_wc = np.ones((sample_num, 1)) * 0 sample_labels_wc = ut.onehot_encoding(sample_labels_wc, class_num=3) labels.append(sample_labels_wc) data, labels = np.array(data), np.array(labels) os.chdir(self.path_cache) np.savez("MFPT_1D_Normal_Bearing_DataSet.npz", data=data, labels=labels) os.chdir(self.path) logging.info("new normal aset has been stored") return 0
def dataset_prepare_PU(self, sample_num, sample_len): """ here 32 testing-case-level sub-sets will be generated and grepped into one dataset this dataset will be prepared for two kind of transferring scenarios, including: working condition transferring artificial-to-real transferring """ logging.info( "generating new dataset with each case has (sample_num = %d, sample_len = %d)" % (sample_num, sample_len)) # get filenames and the labels corresponding to each file filenames, filelabels = self.filename_PU() # read each file, sampling examples from original vibration signals os.chdir(self.path_npz) sample_num = int(sample_num / 20) data, labels = [], [] for k, file in enumerate(filenames): signals = np.load(file)["arr"] if len(signals.shape) != 2: raise ValueError( "excepted signals have 2 dims, but got %d dim(s)" % len(signals.shape)) if np.shape(signals)[0] != 80: raise ValueError( "excepted signals' number is 80 , but got %d" % np.shape(signals)[0]) num, length = signals.shape signals = signals.reshape(num, 1, length) samples = [] for i in range(num): samples.append( ut.signal_split_as_samples(signals[i], sample_len, sample_num)) data.append( np.array(samples).reshape(4, 20 * sample_num, 1, sample_len)) sample_labels = np.ones((80 * sample_num, 1)) * filelabels[k] sample_labels = ut.onehot_encoding(sample_labels, 3) sample_labels = sample_labels.reshape(4, 20 * sample_num, -1) # considering the combination of inner and outer fault if "Inner_Outer" in file: sample_labels[:, :, 1] = 0.8 sample_labels[:, :, 2] = 0.2 if "Outer_Inner" in file: sample_labels[:, :, 1] = 0.2 sample_labels[:, :, 2] = 0.8 labels.append(sample_labels) data, labels = np.array(data), np.array(labels) os.chdir(self.path_cache) np.savez("PU_1D_signal.npz", data=data, labels=labels) os.chdir(self.path) logging.info("new dataset has been stored") return 0