def _removeBaseline(spectra, roi, method, index=-1, inplace=False, **kwargs): if inplace: spectra_c = spectra else: spectra_c = spectra.copy() if(index == -1): # All signals for i in spectra_c.index: new_sig, __ = rp.baseline(spectra_c.wavenumbers, spectra_c.intensity[i], roi, method, **kwargs) spectra_c.intensity[i] = new_sig.reshape(-1,) else: if(isinstance(index, (tuple, list, np.ndarray))): # Multiple signals for i in index: new_sig, __ = rp.baseline(spectra_c.wavenumbers, spectra_c.intensity[i], roi, method, **kwargs) spectra_c.intensity[i] = new_sig.reshape(-1,) elif(isinstance(index, int)): # Only 1 signal new_sig, __ = rp.baseline(spectra_c.wavenumbers, spectra_c.intensity[index], roi, method, **kwargs) spectra_c.intensity[index] = new_sig.reshape(-1,) if not inplace: return spectra_c
def test_baseline(self): x2 = np.arange(1,100,0.5) base_ori = 0.001*x2 base_exp = rampy.funexp(x2,0.1,0.05,50.) base_log = rampy.funlog(x2,1.,1.,1.,1.) y_ori = 1.0 * np.exp(-np.log(2) * ((x2-50.0)/10.0)**2) + 0.05*np.random.randn(len(x2)) y2 = base_ori + y_ori y_exp = base_exp+y_ori y_log = base_log+y_ori # need to define some fitting regions for the spline roi2 = np.array([[1,20],[80,100]]) # calculating the baselines ycalc1, base1 = rampy.baseline(x2,y2,roi2,'poly',polynomial_order=1) #ycalc2, base2 = rampy.baseline(x2,y2,roi2,'gcvspline',s=0.1 ) ycalc3, base3 = rampy.baseline(x2,y2,roi2,'unispline',s=1e0) ycalc4, base4 = rampy.baseline(x2,y2,roi2,'als',lam=10**7,p=0.05) ycalc5, base5 = rampy.baseline(x2,y2,roi2,'arPLS',lam=10**7,ratio=0.1) ycalc6, base6 = rampy.baseline(x2,y2,roi2,'exp',p0_exp=[0.1,0.1,45]) # Testing the shapes np.testing.assert_equal(ycalc1.shape,base1.shape) #np.testing.assert_equal(ycalc2.shape,base2.shape) np.testing.assert_equal(ycalc3.shape,base3.shape) np.testing.assert_equal(ycalc4.shape,base4.shape) np.testing.assert_equal(ycalc5.shape,base5.shape) np.testing.assert_equal(ycalc6.shape,base6.shape) #np.testing.assert_equal(ycalc7.shape,base7.shape) # testing the baselines np.testing.assert_almost_equal(base_ori,base1[:,0],0) #np.testing.assert_almost_equal(base_ori,base2[:,0],0) np.testing.assert_almost_equal(base_ori,base3[:,0],0) np.testing.assert_almost_equal(base_ori,base4[:,0],0) np.testing.assert_almost_equal(base_ori,base5[:,0],0) #exp-log cases np.testing.assert_almost_equal(base_exp,base6[:,0],0) #np.testing.assert_almost_equal(base_log,base7[:,0],0) #testing the corrected data np.testing.assert_almost_equal(y_ori,ycalc1[:,0],1) #np.testing.assert_almost_equal(y_ori,ycalc2[:,0],0) np.testing.assert_almost_equal(y_ori,ycalc3[:,0],0) np.testing.assert_almost_equal(y_ori,ycalc4[:,0],0) np.testing.assert_almost_equal(y_ori,ycalc5[:,0],0) np.testing.assert_almost_equal(y_ori,ycalc6[:,0],0)
def Ctreat(Ramanshift1, Cho_I, n, Cho_path): Cho_I0 = np.mean(Cho_I, axis=0) # 截取数据350~4000cm-1, SG平滑处理 # Cho_I_SG = sp.savgol_filter(Cho_I0[64:1014], 5, 2) # 去基线处理 # x = Ramanshift1[64:1014] y2 = Cho_I_SG roi = np.array([[350, 4000]]) y2_arpls, base_y2 = rampy.baseline(x, y2, roi, 'arPLS', lam=10 ** 5, ratio=0.001) # 归一化处理 # Cho_I_Nor = pd.Normalization(y2_arpls) Cho_I_Nor_n = np.around(Cho_I_Nor, decimals=3) x_ = x[:, np.newaxis] np.savez(Cho_path + '/' + 'Ramanspectra_ (' + str(n) + ').npz', x_=x_, Ramanspectra=Cho_I_Nor_n) # sio.savemat(Hem_path+'/' + 'Ramanspectra_ (' + str(n) + ').mat', {'ramanshift':x_, 'ramanspectra':Hem_I_Nor}) # with open (Hem_path + '/all.mat', 'ab') as mt: # sio.savemat(mt, {'ramanspectra'+str(n):Hem_I_Nor}) # 下面被河蟹了 x0 = pds.DataFrame(x, columns=['Ramanshift']) Cho_I_Nor = pds.DataFrame(Cho_I_Nor, columns=['Ramanspectra ' + str(n)]) pds.merge(x0, Cho_I_Nor, how='outer', left_index=True, right_index=True). \ to_csv(Cho_path + '/' + 'Ramanspectra_ (' + str(n) + ').csv', index=False, float_format='%.3f') a1 = open(Cho_path + '/all.csv') a = pds.read_csv(a1) b1 = open(Cho_path + '/' + 'Ramanspectra_ (' + str(n) + ').csv') b = pds.read_csv(b1) a.merge(b, how='outer', on='Ramanshift').to_csv(Cho_path + '/all.csv', index=False, float_format='%.3f') return Cho_I_Nor
def subtract_background(x, y, method="arPLS", lam=10 ** 6): """ Subtract baseline using defaults """ bir = np.array([[np.min(x), np.max(x)]]) yc, bg = rp.baseline(x, y, bir, method, lam=lam) return yc.T[0]
def baseline_removal(df_spectrum): als_total = [] # need to define some fitting regions for the spline roi = np.array([[0, 100], [200, 220], [280, 290], [420, 430], [480, 500]]) # background: a large gaussian + linear x = np.linspace(50, 1400, 1015) bkg = (60.0 * np.exp(-np.log(2) * ((x - 250.0) / 200.0)**2) + 0.1 * x) * 0.001 for i in range(len(df_spectrum)): ycalc_als, base_als = rampy.baseline(x, df_spectrum.iloc[i], roi, 'als', lam=10**7, p=0.05) B = np.asarray(ycalc_als) als_total.append(B) return (als_total)
def background(self, bir, method="poly", **kwargs): """correct a background from the initial signal I on a map using rampy.baseline Parameters ---------- bir : ndarray arrays of the backgroudn interpolation regions. method : string see rampy.baseline documentation for methods available. Default is polynomial All kwargs argument for rampy.baseline() will be forwarded and can be used there. Returns ------- Background and corrected spectra area available at self.background and self.I_corrected """ self.I_background = np.copy(self.I) self.I_corrected = np.copy(self.I) for i in range(len(self.X)): y_, bkg_ = rp.baseline(self.w, self.I[:, i], bir, method, **kwargs) self.I_corrected[:, i] = y_.ravel() self.I_background[:, i] = bkg_.ravel()
def data_process(data_array): """ 1. Remove first 500 and last 600 value for each data and make a new matrix - useful_data 2. correct base line - corrected_data 3. using filter, smooth the graph - filtered_data """ num_data, num_sensor = data_array.shape # print("data array is ", num_data, num_sensor) time = data_array[500:num_data - 600, 0] kiri_1 = data_array[500:num_data - 600, 1] kiri_2 = data_array[500:num_data - 600, 2] kiri_3 = data_array[500:num_data - 600, 3] kiri_4 = data_array[500:num_data - 600, 4] kiri_5 = data_array[500:num_data - 600, 5] kiri_6 = data_array[500:num_data - 600, 6] roi = np.array([[1, 2], [5, 5.5], [9.5, 10]]) useful_data = np.column_stack( (kiri_1, kiri_2, kiri_3, kiri_4, kiri_5, kiri_6)) # print("useful_data is ", useful_data.shape, type(useful_data)) y1, base1 = rampy.baseline(time, kiri_1, roi, 'poly', polynomial_order=1) y2, base2 = rampy.baseline(time, kiri_2, roi, 'poly', polynomial_order=1) y3, base3 = rampy.baseline(time, kiri_3, roi, 'poly', polynomial_order=1) y4, base4 = rampy.baseline(time, kiri_4, roi, 'poly', polynomial_order=1) y5, base5 = rampy.baseline(time, kiri_5, roi, 'poly', polynomial_order=1) y6, base6 = rampy.baseline(time, kiri_6, roi, 'poly', polynomial_order=1) base = np.column_stack((base1, base2, base3, base4, base5, base6)) corrected_data = np.zeros((len(time), 6)) filtered_data = np.zeros((len(time), 6)) for x in range(6): corrected_data[:, x] = useful_data[:, x] - base[:, x] filtered_data[:, x] = signal.savgol_filter(corrected_data[:, x], 11, 5) processed_data = np.column_stack((time, filtered_data)) return processed_data
########################## D, G, 2D peak fitting ############################ #load data xg = filedatag[:,0] yg_org = filedatag[:,1] x2d = filedata2d[:,0] y2d_org = filedata2d[:,1]/ratio #smooth yg_s = rp.smooth(xg,yg_org,method="whittaker",Lambda=10) y2d_s = rp.smooth(x2d,y2d_org,method="whittaker",Lambda=10) #remove background #g peak bir = np.array([(min(xg),1030),(1900,max(xg))]) yg_cor, background = rp.baseline(xg,yg_s,bir,"arPLS",lam=10**8) yg_corr = yg_cor[:,0] #2d peak bir = np.array([(min(x2d),2550),(3100,max(x2d))]) y2d_cor, background = rp.baseline(x2d,y2d_s,bir,"arPLS",lam=10**8) y2d_corr = y2d_cor[:,0] #fix spectrum y = np.concatenate((y2d_corr,yg_corr)) x = np.concatenate((x2d,xg)) bir = np.array([(min(x),1050.),(1880.,2300.), (2400.,2500),(3050.,max(x))]) yg_corrected, background = rp.baseline(x,y,bir,"arPLS",lam=10**8) y = yg_corrected[:,0]
plt.xlabel("Raman shift, cm$^{-1}$", fontsize=12) plt.ylabel("Normalized intensity, a. u.", fontsize=12) plt.title("Fig. 1: the raw data", fontsize=12, fontweight="bold") # We are interested in fitting the 870-1300 cm$^{-1}$ portion of this spectrum, which can be assigned to the various # symmetric and assymetric stretching vibrations of Si-O bonds in the SiO$_2$ tetrahedra present in the glass network # (see the above cited litterature for details). # # Baseline Removal # # First thing we notice in Fig. 1, we have to remove a baseline because this spectrum is shifted from 0 by some # "background" scattering. For that, we can use the rp.baseline() function bir = np.array([(1000, 1100), (1800, 1900)]) # The regions where the baseline will be fitted y_corr, y_base = rp.baseline( x, y, bir, 'poly', polynomial_order=2) # We fit a polynomial background. f2 = plt.figure(2, figsize=(10, 10)) plt.plot(x, y_corr) # Now we will do some manipulation to have the interested portion of spectrum in a single variable. We will assume # that the errors have not been drastically affected by the correction process (in some case it can be, but this one # is quite straightforward), such that we will use the initial relative errors stored in the "ese0" variable. # signal selection lb = 1100 # The lower boundary of interest hb = 1800 # The upper boundary of interest x_fit = x[np.where((x > lb) & (x < hb))] y_fit = y_corr[np.where((x > lb) & (x < hb))]
def test_baseline(self): x2 = np.arange(1, 100, 0.5) base_ori = 0.001 * x2 base_exp = rampy.funexp(x2, 0.1, 0.05, 50.) base_log = rampy.funlog(x2, 1., 1., 1., 1.) y_ori = 1.0 * np.exp(-np.log(2) * ( (x2 - 50.0) / 10.0)**2) + 0.05 * np.random.randn(len(x2)) y2 = base_ori + y_ori y_exp = base_exp + y_ori y_log = base_log + y_ori # need to define some fitting regions for the spline roi2 = np.array([[1, 20], [80, 100]]) # calculating the baselines ycalc1, base1 = rampy.baseline(x2, y2, roi2, 'poly', polynomial_order=1) #ycalc2, base2 = rampy.baseline(x2,y2,roi2,'gcvspline',s=0.1 ) ycalc3, base3 = rampy.baseline(x2, y2, roi2, 'unispline', s=1e0) ycalc4, base4 = rampy.baseline(x2, y2, roi2, 'als', lam=10**7, p=0.05) ycalc5, base5 = rampy.baseline(x2, y2, roi2, 'arPLS', lam=10**7, ratio=0.1) ycalc6, base6 = rampy.baseline(x2, y2, roi2, 'drPLS') ycalc7, base7 = rampy.baseline(x2, y2, roi2, 'exp', p0_exp=[0.1, 0.1, 45]) # Testing the shapes np.testing.assert_equal(ycalc1.shape, base1.shape) #np.testing.assert_equal(ycalc2.shape,base2.shape) np.testing.assert_equal(ycalc3.shape, base3.shape) np.testing.assert_equal(ycalc4.shape, base4.shape) np.testing.assert_equal(ycalc5.shape, base5.shape) np.testing.assert_equal(ycalc6.shape, base6.shape) np.testing.assert_equal(ycalc7.shape, base7.shape) # testing the baselines np.testing.assert_almost_equal(base_ori, base1[:, 0], 0) #np.testing.assert_almost_equal(base_ori,base2[:,0],0) np.testing.assert_almost_equal(base_ori, base3[:, 0], 0) np.testing.assert_almost_equal(base_ori, base4[:, 0], 0) np.testing.assert_almost_equal(base_ori, base5[:, 0], 0) np.testing.assert_almost_equal(base_ori, base6[:, 0], 0) #exp-log cases np.testing.assert_almost_equal(base_exp, base7[:, 0], 0) #testing the corrected data np.testing.assert_almost_equal(y_ori, ycalc1[:, 0], 1) #np.testing.assert_almost_equal(y_ori,ycalc2[:,0],0) np.testing.assert_almost_equal(y_ori, ycalc3[:, 0], 0) np.testing.assert_almost_equal(y_ori, ycalc4[:, 0], 0) np.testing.assert_almost_equal(y_ori, ycalc5[:, 0], 0) np.testing.assert_almost_equal(y_ori, ycalc6[:, 0], 0) np.testing.assert_almost_equal(y_exp, ycalc7[:, 0], 0)
def Htreat(Ramanshift1, Hem_I, n, Hem_path): Hem_I0 = np.mean(Hem_I, axis=0) # 截取数据350~4000cm-1, SG平滑处理 # Hem_I_SG = sp.savgol_filter(Hem_I0[64:1014], 5, 2) # 去基线处理 # x = Ramanshift1[64:1014] y3 = Hem_I_SG roi = np.array([[350, 4000]]) y3_arpls, base_y3 = rampy.baseline(x, y3, roi, 'arPLS', lam=10 ** 5, ratio=0.001) # 归一化处理 # Hem_I_Nor = pd.Normalization(y3_arpls) Hem_I_Nor_n = np.around(Hem_I_Nor, decimals=3) x_ = x[:, np.newaxis] np.savez(Hem_path + '/' + 'Ramanspectra_ (' + str(n) + ').npz', x_=x_, Ramanspectra=Hem_I_Nor_n) # sio.savemat(Hem_path+'/' + 'Ramanspectra_ (' + str(n) + ').mat', {'ramanshift':x_, 'ramanspectra':Hem_I_Nor}) # with open (Hem_path + '/all.mat', 'ab') as mt: # sio.savemat(mt, {'ramanspectra'+str(n):Hem_I_Nor}) # 下面被河蟹了 x0 = pds.DataFrame(x, columns=['Ramanshift']) Hem_I_Nor = pds.DataFrame(Hem_I_Nor, columns=['Ramanspectra ' + str(n)]) pds.merge(x0, Hem_I_Nor, how='outer', left_index=True, right_index=True). \ to_csv(Hem_path + '/' + 'Ramanspectra_ (' + str(n) + ').csv', index=False, float_format='%.3f') a1 = open(Hem_path + '/all.csv') a = pds.read_csv(a1) b1 = open(Hem_path + '/' + 'Ramanspectra_ (' + str(n) + ').csv') b = pds.read_csv(b1) a.merge(b, how='outer', on='Ramanshift').to_csv(Hem_path + '/all.csv', index=False, float_format='%.3f') return Hem_I_Nor # # 绘图 # # # 控制图形的长和宽单位为英寸, # # 调用figure创建一个绘图对象,并且使它成为当前的绘图对象。 # plt.figure(num=1, figsize=(8, 4)) # # 可以让字体变得跟好看 # # 给所绘制的曲线一个名字,此名字在图示(legend)中显示。 # # 只要在字符串前后添加"$"符号,matplotlib就会使用其内嵌的latex引擎绘制的数学公式。 # # color : 指定曲线的颜色 # # linewidth : 指定曲线的宽度 # plt.plot(x, Tri_I_Nor, label="$Tri$", color="blue", linewidth=1) # plt.plot(x, Cho_I_Nor + 1, label="$Cho$", color="red", linewidth=1) # plt.plot(x, Hem_I_Nor + 2, label="$Hem$", color="green", linewidth=1) # # plt.figure(num=2, figsize=(8, 4)) # # 可以让字体变得跟好看 # # 给所绘制的曲线一个名字,此名字在图示(legend)中显示。 # # 只要在字符串前后添加"$"符号,matplotlib就会使用其内嵌的latex引擎绘制的数学公式。 # # color : 指定曲线的颜色 # # linewidth : 指定曲线的宽度 # plt.plot(Ramanshift, Tri_I0, label="$Tri$", color="blue", linewidth=1) # # 设置X轴的文字 # plt.xlabel("Raman shift/cm-1") # # 设置Y轴的文字 # plt.ylabel("Intensity") # # 设置图表的标题 # plt.title("Raman spectrum") # # 设置Y轴的范围 # plt.ylim() # # 显示图示 # plt.legend() # # 显示出我们创建的所有绘图对象。 # plt.show()
def detect_image(self, raman_data): old_raman = copy.deepcopy(raman_data) raman_data = np.array(list(map(float, raman_data)), dtype=np.float32).reshape(-1, 1, 1) raman_shape = np.array(np.shape(raman_data)[0:2]) old_width = raman_shape[0] old_height = raman_shape[1] raman = np.array(raman_data, dtype=np.float64) raman = (raman - (np.min(raman))) / (np.max(raman) - np.min(raman)) raman = np.expand_dims(raman, 0) # raman shape = [1,1044,1,1] preds = self.model_rpn.predict(raman) # 将预测结果进行解码 anchors = get_anchors((66, 1), old_width, old_height) # preds rpn的预测结果 共有三个维度 # 第一纬度 (1,198,1) 是包含物体的置信的 # 第二维度 (1,198,4) 是先验框的调整参数 # 第三个维度 (1,66,1,1024) 是feature map preds[1][..., 3] = 1 anchors[:, 1] = 0 rpn_results = self.bbox_util.detection_out(preds, anchors, 1, confidence_threshold=0) R = rpn_results[0][:, 2:] R[:, 0] = np.array(np.round(R[:, 0] * old_width / self.config.rpn_stride), dtype=np.int32) R[:, 1] = np.array(np.round(R[:, 1] * old_height), dtype=np.int32) R[:, 2] = np.array(np.round(R[:, 2] * old_width / self.config.rpn_stride), dtype=np.int32) R[:, 3] = np.array(np.round(R[:, 3] * old_height), dtype=np.int32) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] base_layer = preds[2] delete_line = [] for i, r in enumerate(R): if r[2] < 1 or r[3] < 1: delete_line.append(i) R = np.delete(R, delete_line, axis=0) bboxes = [] probs = [] labels = [] for jk in range(R.shape[0] // self.config.num_rois + 1): ROIs = np.expand_dims(R[self.config.num_rois * jk:self.config.num_rois * (jk + 1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0] // self.config.num_rois: #pad R curr_shape = ROIs.shape target_shape = (curr_shape[0], self.config.num_rois, curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded [P_cls, P_regr] = self.model_classifier.predict([base_layer, ROIs]) for ii in range(P_cls.shape[1]): if np.max(P_cls[0, ii, :-1]) < self.confidence: continue label = np.argmax(P_cls[0, ii, :-1]) (x, y, w, h) = ROIs[0, ii, :] cls_num = np.argmax(P_cls[0, ii, :-1]) (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] tx /= self.config.classifier_regr_std[0] ty /= self.config.classifier_regr_std[1] tw /= self.config.classifier_regr_std[2] th /= self.config.classifier_regr_std[3] cx = x + w / 2. cy = y + h / 2. cx1 = tx * w + cx cy1 = ty * h + cy w1 = math.exp(tw) * w h1 = math.exp(th) * h x1 = cx1 - w1 / 2. y1 = cy1 - h1 / 2. x2 = cx1 + w1 / 2 y2 = cy1 + h1 / 2 x1 = int(round(x1)) y1 = int(round(y1)) x2 = int(round(x2)) y2 = int(round(y2)) bboxes.append([x1, y1, x2, y2]) probs.append(np.max(P_cls[0, ii, :-1])) labels.append(label) if len(bboxes) == 0: print("None boxes") Raman_shift = Xexcel('./raman_data/raw_data/RamanShift.xlsx', 'Sheet1') Normal_data = Yexcel( './raman_data/raw_data/yayin/no_origin_label0.xlsx', 'no_origin_label0') # Normal Normal_data = Ygetmean(Normal_data) Cancer_data = np.array(old_raman) Raman_shift = np.array(Raman_shift) Normal_data = np.array(Normal_data) # 截取数据350~4000cm-1 # Lower_limit = np.max(np.where(Raman_shift < 350)) + 1 Upper_limit = np.min(np.where(Raman_shift > 4000)) + 1 Raman_shift_limit = Raman_shift[Lower_limit:Upper_limit] Cancer_data_limit = Cancer_data[Lower_limit:Upper_limit] Normal_data_limit = Normal_data[Lower_limit:Upper_limit] # SG平滑处理# Cancer_data_SG = sp.savgol_filter(Cancer_data_limit, 11, 2) Normal_data_SG = sp.savgol_filter(Normal_data_limit, 11, 2) # 去基线处理 # roi = np.array([[350, 4000]]) Cancer_data_final, Cancer_base_Intensity = rampy.baseline( Raman_shift_limit, Cancer_data_SG, roi, 'arPLS', lam=10**6, ratio=0.001) Normal_data_final, Normal_base_Intensity = rampy.baseline( Raman_shift_limit, Normal_data_SG, roi, 'arPLS', lam=10**6, ratio=0.001) plt.plot(Raman_shift_limit, Normal_data_final, ls="-", lw=2, c="c", label="Normal") plt.plot(Raman_shift_limit, Cancer_data_final, ls="-", lw=1, c="b", label="Cancer") plt.legend() plt.xlabel("yayin") # plt.savefig('./raman_data/raw_data/yayin/yayin_alter.jpg') plt.show() # 筛选出其中得分高于confidence的框 labels = np.array(labels) probs = np.array(probs) boxes = np.array(bboxes, dtype=np.float32) boxes[:, 0] = boxes[:, 0] * self.config.rpn_stride / old_width boxes[:, 1] = boxes[:, 1] * old_height boxes[:, 2] = boxes[:, 2] * self.config.rpn_stride / old_width boxes[:, 3] = boxes[:, 3] * old_height results = np.array( self.bbox_util.nms_for_out(np.array(labels), np.array(probs), np.array(boxes), self.num_classes - 1, 0.4)) top_label_indices = results[:, 0] top_conf = results[:, 1] boxes = results[:, 2:] boxes[:, 0] = boxes[:, 0] * old_width boxes[:, 1] = boxes[:, 1] * old_height boxes[:, 2] = boxes[:, 2] * old_width boxes[:, 3] = boxes[:, 3] * old_height # 画基本图 Raman_shift = Xexcel('./raman_data/raw_data/RamanShift.xlsx', 'Sheet1') Normal_data = Yexcel( './raman_data/raw_data/yayin/no_origin_label0.xlsx', 'no_origin_label0') # Normal Normal_data = Ygetmean(Normal_data) Cancer_data = np.array(old_raman) Raman_shift = np.array(Raman_shift) Normal_data = np.array(Normal_data) # 截取数据350~4000cm-1 # Lower_limit = np.max(np.where(Raman_shift < 350)) + 1 Upper_limit = np.min(np.where(Raman_shift > 4000)) + 1 Raman_shift_limit = Raman_shift[Lower_limit:Upper_limit] Cancer_data_limit = Cancer_data[Lower_limit:Upper_limit] Normal_data_limit = Normal_data[Lower_limit:Upper_limit] # SG平滑处理# Cancer_data_SG = sp.savgol_filter(Cancer_data_limit, 11, 2) Normal_data_SG = sp.savgol_filter(Normal_data_limit, 11, 2) # 去基线处理 # roi = np.array([[350, 4000]]) Cancer_data_final, Cancer_base_Intensity = rampy.baseline( Raman_shift_limit, Cancer_data_SG, roi, 'arPLS', lam=10**6, ratio=0.001) Normal_data_final, Normal_base_Intensity = rampy.baseline( Raman_shift_limit, Normal_data_SG, roi, 'arPLS', lam=10**6, ratio=0.001) plt.plot(Raman_shift_limit, Normal_data_final, ls="-", lw=2, c="c", label="Normal") plt.plot(Raman_shift_limit, Cancer_data_final, ls="-", lw=1, c="b", label="Cancer") plt.legend() plt.xlabel("yayin") for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = top_conf[i] left, top, right, bottom = boxes[i] # left = max(1, np.floor(left + 0.5).astype('int32')) # right = min(1043, np.floor(right + 0.5).astype('int32')) left = max(-30, np.floor(left - 0.5).astype('int32') * 4) right = min(4080, np.floor(right - 0.5).astype('int32') * 4) label = '{} {:.2f}'.format(predicted_class, score) label = label.encode('utf-8') # print(label ," ", "[", left , ", " , right , "]", " ", "[", X[left-1], ",", X[right-1], "]") # plt.axvspan(xmin=X[left-1], xmax=X[right-1], facecolor='y', alpha=0.3) print(label, " ", "[", left, ", ", right, "]") plt.axvspan(xmin=left, xmax=right, facecolor='y', alpha=0.3) plt.show()
def preparing_data(dataliste,**kwargs): """prepare the spectra before processing by the regression techniques Parameters ========== dataliste : Pandas dataframe A liste containing the name of the spectra, located in a folder indicated in pathin Options ======= pathin : string the path of the spectra. Default = './raw/' cutoff : ndarray frequencies delimiting the region of interest for the regression. Default = np.array([850.,1040.]) scale : float scaling coefficient for the intensity. Default = 1000 Returns ======= x : ndarray the x axis as np.arange(300,1290,1.0) record : ndarray the y signal corrected from temperature and excitation line effects (23 °C, 532 nm) record_bas2 : ndarray the baseline fitted to record x_cut : ndarray the x axis of the region of interest record_hf_no_smo : ndarray the y signal in teh region of interest, scaled between 0 and 1 (no smoothing) record_hf : ndarray the y signal in the region of interest, scaled between 0 and 1 and smoothed with a whittaker algorithm. nb_exp : int number of experiments (= length of dataliste) Note ==== Input spectra are assumed to have decreasing frequencies. If not, comment the line `data = rp.flipsp(data)` """ # # Kwargs # cutoff = kwargs.get("cutoff",np.array([850.,1140.])) # roi of the linear baseline scale = kwargs.get("scale",1000) # scaling coefficient pathin = kwargs.get('pathin',"./raw/") # a new x axis for interpolation (all spectra may not have been sampled with the same x) x = np.arange(300,1300,1.0) # for the baseline fit, we grabe two points roi_cutoff = np.array([[cutoff[0]-0.4,cutoff[0]+0.4],[cutoff[1]-0.4,cutoff[1]+0.4]]) # number of spectra nb_exp = len(dataliste) # array to record the treated spectra record = np.ones((x.shape[0],nb_exp)) record_bas2 = np.ones((x.shape[0],nb_exp)) # loop to read the spectra for i in range(nb_exp): data = np.genfromtxt(pathin+dataliste["spectra"].iloc[i],skip_header=1) # we need an increasing x axis for the interpolators so we check this point data = rp.flipsp(data) # finding the minimum between 1200 and 1300 to fit a constant baseline (bas1) idx_roi = np.where(data[:,1] == np.min(data[(data[:,0]>1200)&(data[:,0]<1300),1]))[0][0] roi_bas1 = np.array([[data[idx_roi,0] - 15.,data[idx_roi,0] + 15.]]) y_bas1, bas1 = rp.baseline(data[:,0],data[:,1],roi_bas1,"poly",polynomial_order=0) # resampling y_norm = rp.resample(data[:,0],y_bas1[:,0],x) # correcting from temperature and excitation line effect; the tlcorrection function automatically normalize to the area. trash, y_long, trash = rp.tlcorrection(x,y_norm,23.0,532.0) record[:,i] = y_long[:]*scale #with a scale factor to bring values closer to 1 for representation # now grabbing the signal above the cutting baseline (bas2) in the roi_cutoff portion of spectra y_corr, bas2 = rp.baseline(x,y_long[:],roi_cutoff,"poly",polynomial_order=1.0) x_cut = x[(roi_cutoff[0,0]<=x)&(x<=roi_cutoff[1,1])].reshape(-1,1) y_cut = y_corr[(roi_cutoff[0,0]<=x)&(x<=roi_cutoff[1,1])].reshape(-1,1) # initialisation of output arrays for signal of interest if i == 0: record_hf = np.ones((y_cut.shape[0],nb_exp)) record_hf_no_smo = np.ones((y_cut.shape[0],nb_exp)) # Getting the good signal at HF (above the cut-off baseline) + Min-Max scaling record_hf_no_smo[:,i]= (y_cut[:,0]-np.min(y_cut[:,0]))/(np.max(y_cut[:,0])-np.min(y_cut[:,0])) # smoothing the signal with a Whittaker smoother = improves results record_hf[:,i] = rp.whittaker(record_hf_no_smo[:,i],Lambda = 10.0**3) # wew take care of correcting any deviation from 0 after smoothing y_r_2, _ = rp.baseline(x_cut,record_hf[:,i],roi_cutoff,"poly",p=1.0) record_hf[:,i] = ((y_r_2-np.min(y_r_2))/(np.max(y_r_2)-np.min(y_r_2))).reshape(-1) # for the baseline record_bas2[:,i] = bas2[:,0]*scale return x, record, record_bas2, x_cut, record_hf_no_smo, record_hf, nb_exp
Lower_limit = np.max(np.where(Raman_shift < 350)) + 1 Upper_limit = np.min(np.where(Raman_shift > 4000)) + 1 Raman_shift_limit = Raman_shift[Lower_limit:Upper_limit] Cancer_data_limit = Cancer_data[Lower_limit:Upper_limit] Normal_data_limit = Normal_data[Lower_limit:Upper_limit] # SG平滑处理# Cancer_data_SG = sp.savgol_filter(Cancer_data_limit, 11, 2) Normal_data_SG = sp.savgol_filter(Normal_data_limit, 11, 2) # 去基线处理 # roi = np.array([[350, 4000]]) Cancer_data_final, Cancer_base_Intensity = rampy.baseline(Raman_shift_limit, Cancer_data_SG, roi, 'arPLS', lam=10**6, ratio=0.001) Normal_data_final, Normal_base_Intensity = rampy.baseline(Raman_shift_limit, Normal_data_SG, roi, 'arPLS', lam=10**6, ratio=0.001) # she_label_1 = (she_label_1 - (np.min(she_label_1)))/(np.max(she_label_1) - np.min(she_label_1)) # 归一化 # plt.plot(Raman_shift_limit, Normal_data_final, ls="-", lw = 2, c = "c", label = "Normal") # 帮师兄处理数据用道德 # Cancer_data_final = Cancer_data_final.reshape(-1,).tolist() # print("预处理数据", len(Cancer_data_final))
def fit_spectra(data_liste,method="LL2012",delim='\t',path_in='./raw/',laser=514.532,spline_coeff=0.001, poly_coeff=3): """Calculate the ratios of water and silicate signals from Raman spectra Parameters ---------- data_liste: Pandas DataFrame Contains the list of spectra, see provided file as an example method: string The used method. LL2012: Le Losq et al. (2012); DG2017: Di Genova et al. (2017). See references. delim: string File delimiter. Use '\t' for tabulated text or ',' for comma separated text. path_in: string Path for the spectra laser: float Laser line wavelength in nm spline_coeff: float Smoothing coefficient for the spline baseline. An array of size len(data_liste) can be provided. Default = 0.001. poly_coeff: int Polynomial coefficient for the polynomial baseline function. Default = 3 (DG2017 method). Set to 2 for Behrens et al. (2006) method. Returns ------- x: ndarray Common x axis. y_all: ndarray All raw spectra from data_liste in an array of length len(x) and with as many column as spectra. y_all_corr: ndarray All corrected spectra from data_liste in an array of length len(x) and with as many column as spectra. y_all_base: ndarray All baselines for spectra from data_liste in an array of length len(x) and with as many column as spectra. rws: ndarray The ratio of the water integrated intensity over that of silicate signals. rw: ndarray The integrated intensity of water signal. rs: ndarray The integrated intensity of silicate signals. Raises ------ IOError If method is not set to LL2012 or DG2017. References ---------- C. Le Losq, D. R. Neuville, R. Moretti, J. Roux, Determination of water content in silicate glasses using Raman spectrometry: Implications for the study of explosive volcanism. American Mineralogist. 97, 779–790 (2012). D. Di Genova et al., Effect of iron and nanolites on Raman spectra of volcanic glasses: A reassessment of existing strategies to estimate the water content. Chemical Geology. 475, 76–86 (2017). """ import gcvspline x_all_lf = np.arange(50,1400,1.0) x_all_hf = np.arange(2800,3800,1.0) x = np.hstack((x_all_lf,x_all_hf)) y_all = np.zeros((len(x),len(data_liste))) y_all_base = np.copy(y_all) y_all_corr = np.copy(y_all) rws = np.ones(len(data_liste)) rw = np.ones(len(data_liste)) rs = np.ones(len(data_liste)) record_std = np.zeros((len(data_liste),2)) rois = data_liste.loc[:,"ROI1 lb":"ROI6 hb"] for i in range(len(data_liste)): # importing the spectra sp = np.genfromtxt("./raw/"+data_liste["Name"][i],delimiter=delim,skip_header=1) # constructing an interpolator: this will allow an output of all data with the same X axis f = scipy.interpolate.interp1d(sp[:,0], sp[:,1],fill_value="extrapolate") # temperature and excitation line correction (see Rameau help) x, y_all[:,i], sdf = rp.tlcorrection(x,f(x),23.0,laser,normalisation='intensity') # getting the roi roi = np.array(rois.loc[i]).reshape(int(len(rois.loc[i])/2),2) # calculating baseline if method == "LL2012": # spline try: c_hf, b_hf = rp.baseline(x,y_all[:,i],roi,"gcvspline",s=spline_coeff) except: break y_all_corr[:,i]=c_hf[:,0] y_all_base[:,i]=b_hf[:,0] elif method == "DG2017": # polynomial 3 following DG2017 method # getting portion of interrest x_lf = x[np.where(x<2000.)].reshape(-1) x_hf = x[np.where(x>2000.)].reshape(-1) y_lf = y_all[np.where(x<2000.),i].reshape(-1) y_hf = y_all[np.where(x>2000.),i].reshape(-1) c_lf, b_lf = rp.baseline(x_lf,y_lf,np.array([[0,200],[1240,1500]]),"poly",polynomial_order = poly_coeff) c_hf, b_hf = rp.baseline(x_hf,y_hf,np.array([[2500,3100],[3750,3900]]),"poly",polynomial_order = poly_coeff) y_all_corr[:,i] = np.hstack((c_lf.reshape(-1),c_hf.reshape(-1))) y_all_base[:,i] = np.hstack((b_lf.reshape(-1),b_hf.reshape(-1))) else: raise TypeError('method should be set to LL2012 or DG2017') # Area / Integrated Intensity calculation S = np.trapz(y_all_corr[np.where((x>150)&(x<1250)),i],x[np.where((x>150)&(x<1250))]) W = np.trapz(y_all_corr[np.where((x>3100)&(x<3750)),i],x[np.where((x>3100)&(x<3750))]) # updating the Pandas dataframe rws rs[i] = S[0] rw[i] = W[0] rws[i] = W[0]/S[0] return x, y_all, y_all_corr, y_all_base, rws, rw, rs
selected_samples = [] # List of the sample to get the baseline for fits_by_id = {} for key, df in df_raman.groupby('ID'): name_ech = df.name_ech.unique()[0] is_true = (np.isin(key, selected_samples) or get_all_ech) show_plot_init = is_true base_line = is_true if base_line: x, y = df["shift"].values, df["intensity"].values y_corr, y_base = rp.baseline(x_input=x, y_input=y, bir=regions_of_interest, method='poly', polynomial_order=3) y_corr_flat = y_corr[:, 0] in_boundaries = df["shift"].between(lb, hb) x_fit, y_fit = x[in_boundaries], y_corr_flat[in_boundaries] y_fit = y_fit / np.amax(y_fit) * 10 fits_by_id[key] = pd.DataFrame({'x': x_fit, 'y': y_fit}) # the relative errors after baseline subtraction ese0 = np.sqrt(abs(y_fit)) / abs(y_fit) # normalise spectra to maximum intensity, easier to handle max_intensity = np.amax(y_fit) sigma = abs(ese0 * y_fit) # calculate good ese # create a new plot for showing the spectrum if show_plot_init: