def do_the_job(dpath, dname, dfile, sensor, nn, nstd=6, wavy=5): """ Identifies the outliers in the peaks :param dfile: :param sensor: :return: """ # Detect outliers based on the distribution of the distances of the signals to the knn # Any signal that is farther from its neighbors that a number of standar deviations of the mean knn-distance is out print 'Processing ', sensor, dfile f = h5py.File(dpath + dname + '.hdf5', 'r') d = f[dfile + '/' + sensor + '/' + 'PeaksResample'] data = d[()] neigh = NearestNeighbors(n_neighbors=nn) neigh.fit(data) vdist = np.zeros(data.shape[0]) for i in range(data.shape[0]): vdist[i] = np.sum(neigh.kneighbors(data[i], return_distance=True)[0][0][1:])/(nn-1) dmean = np.mean(vdist) dstd = np.std(vdist) nout = 0 lout = [] for i in range(data.shape[0]): if vdist[i] > dmean + (nstd*dstd): nout += 1 lout.append(i) show_signal(data[i]) elif is_wavy_signal(data[i], wavy): nout += 1 lout.append(i) show_signal(data[i]) return dfile, lout
from scipy.signal import butter, filtfilt #'e120503' lexperiments = ['e160317'] expname = lexperiments[0] datainfo = experiments[expname] f = h5py.File(datainfo.dpath + datainfo.name + '/' + datainfo.name + '.hdf5', 'r') nfile = 23 nsensor = 5 tinit = 0 tfin = 600000 dfile = datainfo.datafiles[nfile] print(dfile) print(datainfo.sensors[nsensor]) d = f[dfile + '/' + 'Raw'] samp = f[dfile + '/Raw'].attrs['Sampling'] data = d[()] for i in range(0, d.shape[0], 500000): print(i) show_signal(data[i:i + 500000, nsensor])
expname = lexperiments[0] datainfo = experiments[expname] print(datainfo.dpath + datainfo.name + '/' + datainfo.name) f = datainfo.open_experiment_data(mode='r') if not args.extra: lsensors = datainfo.sensors else: lsensors = datainfo.extrasensors for sensor in [lsensors[0]]: print(sensor) for dfile in [datainfo.datafiles[0]]: if args.raw == 0: data = datainfo.get_peaks(f, dfile, sensor) elif args.raw == 1: data = datainfo.get_peaks_resample(f, dfile, sensor) else: data = datainfo.get_peaks_resample_PCA(f, dfile, sensor) for i in range(data.shape[0]): if not args.extra: show_signal(data[i]) else: show_signal(-data[i])
lexperiments = ['e150514'] args.extra = True args.raw = 1 expname = lexperiments[0] datainfo = experiments[expname] print(datainfo.dpath + datainfo.name + '/' + datainfo.name) f = datainfo.open_experiment_data(mode='r') if not args.extra: lsensors = datainfo.sensors else: lsensors = datainfo.extrasensors for sensor in [lsensors[0]]: print(sensor) for dfile in [datainfo.datafiles[0]]: if args.raw == 0: data = datainfo.get_peaks(f, dfile, sensor) elif args.raw == 1: data = datainfo.get_peaks_resample(f, dfile, sensor) else: data = datainfo.get_peaks_resample_PCA(f, dfile, sensor) for i in range(data.shape[0]): if not args.extra: show_signal(data[i]) else: show_signal(-data[i])
#'e120503' lexperiments = ["e160317"] expname = lexperiments[0] datainfo = experiments[expname] f = h5py.File(datainfo.dpath + datainfo.name + "/" + datainfo.name + ".hdf5", "r") nfile = 23 nsensor = 5 tinit = 0 tfin = 600000 dfile = datainfo.datafiles[nfile] print(dfile) print(datainfo.sensors[nsensor]) d = f[dfile + "/" + "Raw"] samp = f[dfile + "/Raw"].attrs["Sampling"] data = d[()] for i in range(0, d.shape[0], 500000): print(i) show_signal(data[i : i + 500000, nsensor])
ldatap = [] ldatappca = [] ltimes = [] for dfiles in [datainfo.datafiles[0]]: print(dfiles) d = f[dfiles + '/' + s + '/' + 'PeaksResample'] dataf = d[()] ldatap.append(dataf) #d = f[dfiles + '/' + s + '/' + 'Time'] #times = d[()] #ltimes.append(times) d = f[dfiles + '/' + s + '/' + 'PeaksResamplePCA'] dataf = d[()] ldatappca.append(dataf) data = ldatap[0] #np.concatenate(ldata) datapca = ldatappca[0] #np.concatenate(ldata) #ptime = ltimes[0] #print(len(data)) long = data.shape[1] / 3 for i in range(5): #range(data.shape[0]): # print dataraw[i] # print data[i] #print('T = %d'%ptime[i]) base = baseline_als(data[i], 5, 0.9) show_signal(base, find_baseline(data[i, :long], resolution=50)) show_signal(base, find_baseline(base[i:long], resolution=100)) #show_two_signals(data[i],datapca[i]) # show_signal(datapca[i])
print s ldatap = [] ldatappca = [] ldataraw = [] for dfiles in [datainfo.datafiles[0]]: print dfiles d = f[dfiles + '/' + s + '/' + 'Peaks'] dataf = d[()] ldataraw.append(dataf) d = f[dfiles + '/' + s + '/' + 'PeaksFilter'] dataf = d[()] ldatap.append(dataf) d = f[dfiles + '/' + s + '/' + 'PeaksResamplePCA'] dataf = d[()] ldatappca.append(dataf) data = ldatap[0] #np.concatenate(ldata) datapca = ldatappca[0] #np.concatenate(ldata) dataraw = ldataraw[0] #np.concatenate(ldata) print data.shape, datapca.shape, dataraw.shape print len(data) for i in range(dataraw.shape[0]): print i # print dataraw[i] # print data[i] show_signal(dataraw[i]) # show_two_signals(dataraw[i], detrend(dataraw[i])) show_signal(data[i]) show_signal(datapca[i])
for dfiles in [datainfo.datafiles[0]]: print(dfiles) d = f[dfiles + '/' + s + '/' + 'PeaksResample'] dataf = d[()] ldatap.append(dataf) #d = f[dfiles + '/' + s + '/' + 'Time'] #times = d[()] #ltimes.append(times) d = f[dfiles + '/' + s + '/' + 'PeaksResamplePCA'] dataf = d[()] ldatappca.append(dataf) data = ldatap[0] #np.concatenate(ldata) datapca = ldatappca[0] #np.concatenate(ldata) #ptime = ltimes[0] #print(len(data)) long = data.shape[1]/3 for i in range(5): #range(data.shape[0]): # print dataraw[i] # print data[i] #print('T = %d'%ptime[i]) base = baseline_als(data[i], 5, 0.9) show_signal(base, find_baseline(data[i,:long], resolution=50)) show_signal(base, find_baseline(base[i:long], resolution=100)) #show_two_signals(data[i],datapca[i]) # show_signal(datapca[i])