def do_the_job(dfile, sensor, recenter=True, wtsel=None, clean=False, mbasal='meanfirst', alt_smooth=False, wavy=False): """ Transforms the data reconstructing the peaks using some components of the PCA and uses the mean of the baseline points to move the peak :param pcap: Perform or not PCA :param dfile: datafile :param sensor: sensor :param components: Components selected from the PCA :param lbasal: Points to use to move the peak :param recenter: recenters the peak so it is in the center of the window :param basal: moving the peak so the begginning is closer to zero 'meanfirst', first n points 'meanmin', first min points of the first half of the peak :return: """ print(datainfo.dpath + datainfo.name, sensor) f = datainfo.open_experiment_data(mode='r') data = datainfo.get_peaks_resample(f, dfile, sensor) datainfo.close_experiment_data(f) pcap = datainfo.get_peaks_smooth_parameters('pcasmooth') components = datainfo.get_peaks_smooth_parameters('components') baseline = datainfo.get_peaks_smooth_parameters('wbaseline') lbasal = range(baseline) if alt_smooth: parl = datainfo.get_peaks_alt_smooth_parameters('lambda') parp = datainfo.get_peaks_alt_smooth_parameters('p') if data is not None: # if there is a clean list of peaks then the PCA is computed only for the clean peaks if clean: lt = datainfo.get_clean_time(f, dfile, sensor) if lt is not None: ltime = list(lt[()]) print(data.shape) data = data[ltime] print(data.shape) if alt_smooth: trans = np.zeros((data.shape[0], data.shape[1])) for i in range(data.shape[0]): trans[i] = baseline_als(data[i,:], parl, parp) elif pcap: pca = PCA(n_components=data.shape[1]) res = pca.fit_transform(data) sexp = 0.0 ncomp = 0 while sexp < 0.98: sexp += pca.explained_variance_ratio_[ncomp] ncomp += 1 components = ncomp print('VEX=', np.sum(pca.explained_variance_ratio_[0:components]), components) res[:, components:] = 0 trans = pca.inverse_transform(res) else: trans = data # If recenter, find the new center of the peak and crop the data to wtsel milliseconds if recenter: # Original window size in milliseconds wtsel_orig = datainfo.get_peaks_resample_parameters('wtsel') # current window midpoint midpoint = int(trans.shape[1]/2.0) # New window size wtlen = int(trans.shape[1]*(wtsel/wtsel_orig)) wtdisc = int((trans.shape[1] - wtlen)/2.0) # in case we have a odd number of points in the window if wtlen + (2*wtdisc) != wtlen: wtdisci = wtdisc + 1 else: wtdisci = wtdisc new_trans = np.zeros((trans.shape[0], wtlen)) for pk in range(trans.shape[0]): # find current maximum around the midpoint of the current window # Fixed to 10 points around the center center = np.argmax(trans[pk, midpoint-10:midpoint+10]) new_trans[pk] = trans[pk,wtdisci:wtlen-wtdisc] trans = new_trans # Substract the basal if mbasal == 'meanfirst': for row in range(trans.shape[0]): vals = trans[row, lbasal] basal = np.mean(vals) trans[row] -= basal #show_two_signals(trans[row]+basal, trans[row]) elif mbasal == 'meanmin': for row in range(trans.shape[0]): vals = trans[row, 0:trans.shape[1]/2] vals = np.array(sorted(list(vals))) basal = np.mean(vals[lbasal]) trans[row] -= basal #show_two_signals(trans[row]+basal, trans[row]) elif mbasal == 'meanlast': for row in range(trans.shape[0]): vals = trans[row, (trans.shape[1]/3)*2:trans.shape[1]] basal = np.mean(vals) trans[row] -= basal #show_two_signals(trans[row]+basal, trans[row]) elif mbasal == 'alternative': for row in range(trans.shape[0]): basal = find_baseline(trans[row, 0:trans.shape[1]/2], resolution=25) trans[row] -= basal if wavy: sel = outliers_wavy(trans) trans = trans[sel] return trans, dfile, sensor else: return None, dfile, sensor
ldatap = [] ldatappca = [] ltimes = [] for dfiles in [datainfo.datafiles[0]]: print(dfiles) d = f[dfiles + '/' + s + '/' + 'PeaksResample'] dataf = d[()] ldatap.append(dataf) #d = f[dfiles + '/' + s + '/' + 'Time'] #times = d[()] #ltimes.append(times) d = f[dfiles + '/' + s + '/' + 'PeaksResamplePCA'] dataf = d[()] ldatappca.append(dataf) data = ldatap[0] #np.concatenate(ldata) datapca = ldatappca[0] #np.concatenate(ldata) #ptime = ltimes[0] #print(len(data)) long = data.shape[1] / 3 for i in range(5): #range(data.shape[0]): # print dataraw[i] # print data[i] #print('T = %d'%ptime[i]) base = baseline_als(data[i], 5, 0.9) show_signal(base, find_baseline(data[i, :long], resolution=50)) show_signal(base, find_baseline(base[i:long], resolution=100)) #show_two_signals(data[i],datapca[i]) # show_signal(datapca[i])
def do_the_job(dfile, sensor, recenter=True, wtsel=None, clean=False, mbasal='meanfirst', alt_smooth=False, wavy=False, vpca=0.98): """ Transforms the data reconstructing the peaks using some components of the PCA and uses the mean of the baseline points to move the peak :param pcap: Perform or not PCA :param dfile: datafile :param sensor: sensor :param components: Components selected from the PCA :param lbasal: Points to use to move the peak :param recenter: recenters the peak so it is in the center of the window :param basal: moving the peak so the begginning is closer to zero 'meanfirst', first n points 'meanmin', first min points of the first half of the peak :return: """ print(datainfo.dpath + datainfo.name, sensor) f = datainfo.open_experiment_data(mode='r') data = datainfo.get_peaks_resample(f, dfile, sensor) datainfo.close_experiment_data(f) pcap = datainfo.get_peaks_smooth_parameters('pcasmooth') components = datainfo.get_peaks_smooth_parameters('components') baseline = datainfo.get_peaks_smooth_parameters('wbaseline') lbasal = range(baseline) if alt_smooth: parl = datainfo.get_peaks_alt_smooth_parameters('lambda') parp = datainfo.get_peaks_alt_smooth_parameters('p') if data is not None: # if there is a clean list of peaks then the PCA is computed only for the clean peaks if clean: lt = datainfo.get_clean_time(f, dfile, sensor) if lt is not None: ltime = list(lt[()]) print(data.shape) data = data[ltime] print(data.shape) if alt_smooth: trans = np.zeros((data.shape[0], data.shape[1])) for i in range(data.shape[0]): trans[i] = baseline_als(data[i, :], parl, parp) elif pcap: pca = PCA(n_components=data.shape[1]) res = pca.fit_transform(data) sexp = 0.0 ncomp = 0 while sexp < vpca: sexp += pca.explained_variance_ratio_[ncomp] ncomp += 1 components = ncomp print('VEX=', np.sum(pca.explained_variance_ratio_[0:components]), components) res[:, components:] = 0 trans = pca.inverse_transform(res) else: trans = data # If recenter, find the new center of the peak and crop the data to wtsel milliseconds if recenter: # Original window size in milliseconds wtsel_orig = datainfo.get_peaks_resample_parameters('wtsel') # current window midpoint midpoint = int(trans.shape[1] / 2.0) # New window size wtlen = int(trans.shape[1] * (wtsel / wtsel_orig)) wtdisc = int((trans.shape[1] - wtlen) / 2.0) # in case we have a odd number of points in the window if wtlen + (2 * wtdisc) != wtlen: wtdisci = wtdisc + 1 else: wtdisci = wtdisc new_trans = np.zeros((trans.shape[0], wtlen)) for pk in range(trans.shape[0]): # find current maximum around the midpoint of the current window # Fixed to 10 points around the center center = np.argmax(trans[pk, midpoint - 10:midpoint + 10]) new_trans[pk] = trans[pk, wtdisci:wtlen - wtdisc] trans = new_trans # Substract the basal if mbasal == 'meanfirst': for row in range(trans.shape[0]): vals = trans[row, lbasal] basal = np.mean(vals) trans[row] -= basal #show_two_signals(trans[row]+basal, trans[row]) elif mbasal == 'meanmin': for row in range(trans.shape[0]): vals = trans[row, 0:trans.shape[1] / 2] vals = np.array(sorted(list(vals))) basal = np.mean(vals[lbasal]) trans[row] -= basal #show_two_signals(trans[row]+basal, trans[row]) elif mbasal == 'meanmin': for row in range(trans.shape[0]): vals = trans[row, 0:trans.shape[1] / 2] vals = np.array(sorted(list(vals))) basal = np.mean(vals[lbasal]) trans[row] -= basal #show_two_signals(trans[row]+basal, trans[row]) elif mbasal == 'globalmeanfirst': globbasal = np.mean(trans[:, lbasal]) trans -= globbasal elif mbasal == 'alternative': for row in range(trans.shape[0]): basal = find_baseline(trans[row, 0:trans.shape[1] / 2], resolution=25) trans[row] -= basal if wavy: sel = outliers_wavy(trans) trans = trans[sel] return trans, dfile, sensor else: return None, dfile, sensor
for dfiles in [datainfo.datafiles[0]]: print(dfiles) d = f[dfiles + '/' + s + '/' + 'PeaksResample'] dataf = d[()] ldatap.append(dataf) #d = f[dfiles + '/' + s + '/' + 'Time'] #times = d[()] #ltimes.append(times) d = f[dfiles + '/' + s + '/' + 'PeaksResamplePCA'] dataf = d[()] ldatappca.append(dataf) data = ldatap[0] #np.concatenate(ldata) datapca = ldatappca[0] #np.concatenate(ldata) #ptime = ltimes[0] #print(len(data)) long = data.shape[1]/3 for i in range(5): #range(data.shape[0]): # print dataraw[i] # print data[i] #print('T = %d'%ptime[i]) base = baseline_als(data[i], 5, 0.9) show_signal(base, find_baseline(data[i,:long], resolution=50)) show_signal(base, find_baseline(base[i:long], resolution=100)) #show_two_signals(data[i],datapca[i]) # show_signal(datapca[i])