#help(emd.sift.sift) #for the help page of the function
imf = emd.sift.sift(data, sift_thresh = 1e-8)

	#removing borders of the data: edge effects can be quite heavy in the high frequecies
T = 30
offset = 500
data = data[offset:int(T*srate)+offset]
times = times[offset:int(T*srate)+offset]
WF = WF[offset:int(T*srate)+offset]
imf = imf[offset:int(T*srate)+offset,:]

print("Data are {}s long".format(len(data)/srate))

	#computing PSD of the high frequency residuals
	#computing the PSD of the signal
M = MESA()
M.solve(imf[:,0], method = 'Standard', optimisation_method = 'CAT')
freq = np.linspace(1./times[-1], 0.5*srate,1000) #vector for evaluating the spectrum
spec_imf = M.spectrum(1/srate,freq)
M.solve(data, method = 'Standard')
spec_data = M.spectrum(1/srate,freq)
M.solve(WF, method = "standard")
spec_WF = M.spectrum(1/srate,freq)

	#running pipeline
data_pipeline = imf[:,0]
AnomalyDetection_pipeline(data_pipeline, srate, T_train = 10., N_step = 20000, outfile = None, plot = True, injection = WF)

	#plotting everything
	#In the order 0 mode, you can see the chirp of the injected signal!! 
fig = plt.figure( figsize=(8,4) )
import numpy as np
import scipy.stats 
import matplotlib.pyplot as plt
from style_sheet import init_plotting
from scipy.interpolate import interp1d 
import matplotlib.ticker as ticker

def relative_error(real, estimate): 
    return np.abs(real - estimate) / real 

PSD = 'ligo' #normal or ligo 
save = False
savedir = '../paper/Images/optimisers_comparison/' + PSD + '/'
simulate_data = False 
methods = ['FPE', 'CAT', 'OBD']
M = MESA() 
init_plotting()
plt.close() 
colors = {'FPE': 'r', 'CAT': 'k', 'OBD': 'green'}

print("Dealing with ",PSD)

#Initialize single spectrum dictionaries 
spectra, optimisers, orders, errors = {}, {}, {}, {} 
for method in methods: 
    spectra[method], optimisers[method], orders[method], errors[method] = [], [], [], []

#ensemble dictionaries 
median, p5, p95, ensemble_error = {}, {}, {}, {}

#Generating noise 
def plot_PSD_imf(imf, data, srate, WF = None, title = None, folder = '.'):
	M = MESA()
	fig = plt.figure( figsize=(8,4) )
	ax = fig.gca()
	title_str = "PSD of the EMD modes vs PSD of data"
	if title is not None:
		title_str = title_str+ ' - '+title
	plt.title(title_str)
	
		#spectrum of data
	M.solve(data, method = 'Fast', optimisation_method = 'CAT')
	f, spec_data = M.spectrum(1/srate)
	ax.loglog(f[:len(data)//2], spec_data[:len(data)//2], label= "data")

	if WF is not None:
		M.solve(WF, method = "standard")
		f, spec_WF = M.spectrum(1/srate)
		ax.loglog(f[:len(data)//2], spec_WF[:len(data)//2], label= "WF")

	if imf is not None:
		for i in range(imf.shape[1]):
			print("EMD component {}/{}".format(i+1 ,imf.shape[1]))
			M.solve(imf[:,i], method = 'Standard', optimisation_method = 'CAT')
			f, spec= M.spectrum(1/srate)
			ax.loglog(f[:len(data)//2], spec[:len(data)//2], label= "EMD {}".format(i+1))

	plt.legend()
	if folder is None: return
	if title is None:
		fig.savefig(folder+"/PSD_imf.pdf")
	else:
		fig.savefig(folder+"/PSD_imf-{}.pdf".format(title))
Ejemplo n.º 4
0
if False: #plot bandpassing
	plt.figure()
	plt.title("Data vs filtered data")

	plt.plot(times,data)
	plt.plot(times,data_pass)
	#plt.axvline(1126259462.4-1126259447, c = 'r')
	plt.axvline(t_merger, c = 'r')


	fig_PSDseries = plt.figure(2)
	ax_PSDseries  = fig_PSDseries.add_subplot(111)
	ax_PSDseries.set_ylabel("PSD")
	ax_PSDseries.set_yscale('log')
	M = MESA()
	M.solve(data_pass, early_stop = True, method = 'Standard')

	freq = np.linspace(1./times[-1], 0.5*srate,1000)
	spec = M.spectrum(1/srate,freq)
	ax_PSDseries.loglog(freq, np.abs(spec), c = 'b', label= "bandpass")
	M.solve(data, method = 'Standard')
	freq = np.linspace(1./times[-1], 0.5*srate,1000)
	spec = M.spectrum(1/srate,freq)
	ax_PSDseries.loglog(freq, spec, c = 'r', label= "standard data")
	plt.legend()
	plt.show()

data = data_pass
#data = data + WF
        print("Using real data")
        #data = np.loadtxt("../examples/data/V-V1_GWOSC_4KHZ_R1-1186741846-32.txt")
        #data = pd.read_csv("../../GWAnomalyDetection/maxent/H-H1_GWOSC_16KHZ_R1-1126259447-32.txt.gz", skiprows = 3).to_numpy()
        data = pd.read_csv("H-H1_GWOSC_4KHZ_R1-1246525177-4096.txt.gz",
                           skiprows=3).to_numpy()

    data = np.squeeze(data)
    print("Loaded data: shape {}; srate {}; length {}s".format(
        data.shape, srate,
        len(data) / srate))

    for i, T in enumerate(T_list):
        print("Batch length: {}s".format(T))
        data_T = data[-int(srate * T) - 4096 * 5:-4096 * 5]

        M = MESA()
        M.solve(data_T, early_stop=True, method='Standard')
        print("\tDone MESA")
        freqs, PSD_Welch = psd(data_T,
                               srate,
                               seglen[i] / float(srate),
                               window_function=None,
                               overlap_fraction=0.5,
                               nfft=None,
                               return_onesided=False)
        print("\tDone Welch")
        PSD_MESA = M.spectrum(1. / srate, freqs)

        np.savetxt("plot_data/plot_{}_{}.txt".format(T, use_fake_data),
                   np.column_stack([freqs, PSD_MESA, PSD_Welch]))
    T = 4
    datafile = "data/V-V1_GWOSC_4KHZ_R1-1186741846-32.txt"
    data = np.loadtxt(datafile)[:int(T * 4096)]
    if srate != 4096.:
        data = decimate(data, int(4096 / srate), zero_phase=True)
    if bandpassing == 1:
        from scipy.signal import butter, filtfilt
        bb, ab = butter(4,
                        [f_min_bp / (0.5 * srate), f_max_bp / (0.5 * srate)],
                        btype='band')
        data = filtfilt(bb, ab, data)

    N = data.shape[0]
    f = np.fft.fftfreq(N, d=dt)
    t = np.arange(0, T, step=dt)
    M = MESA()
    start = time.perf_counter()
    P, ak, _ = M.solve(data,
                       method="Fast",
                       optimisation_method="FPE",
                       m=int(2 * N / (2 * np.log(N))))
    print("p = {0}".format(len(ak)))
    elapsed = time.perf_counter()
    elapsed = elapsed - start
    print("Time spent MESA: {0} s".format(elapsed))
    start = time.perf_counter()
    PSD = M.spectrum(dt, f)
    elapsed = time.perf_counter()
    elapsed = elapsed - start
    print("Time spent PSD: {0} s".format(elapsed))
        Wtime, Wtime_series, Wfrequency, Wfrequency_series, Wpsd = generate_data(
            wnoise_frequency, wnoise_spectrum, times[-1], 1 / dt)
        w_interp = interp1d(wnoise_frequency, wnoise_spectrum)

    if Ligo_noise:
        print('Generating Ligo noise series')
        #ligo_frequency, ligo_spectrum = np.loadtxt('LIGO-P1200087-v18-AdV_DESIGN_psd.dat', unpack=True)
        ligo_frequency, ligo_spectrum, _ = np.loadtxt(
            'GWTC1_GW150914_PSDs.dat', unpack=True)
        Ltime, Ltime_series, Lfrequency, Lfrequency_series, Lpsd = generate_data(
            ligo_frequency, ligo_spectrum, times[-1], 1 / dt)
        l_interp = interp1d(ligo_frequency,
                            ligo_spectrum,
                            fill_value='extrapolate')

    M = MESA()
    init_plotting()

    for i, n in enumerate(segment_length):
        if white_noise:
            w_welchFreq, w_welchSpectrum = welch.psd(Wtime_series[:int(N[i])],
                                                     1 / dt, n * dt)
            M.solve(Wtime_series[:int(N[i])])
            w_mesaSpectrum, w_mesaFreq = M.spectrum(dt)

            fig, ax = plt.subplots()
            ax.loglog(w_welchFreq[:n // 2],
                      w_welchSpectrum[:n // 2],
                      color='blue')
            ax.loglog(w_mesaFreq[:int(N[i] // 2)],
                      w_mesaSpectrum[:int(N[i] // 2)],
Ejemplo n.º 8
0
    parser.add_option('-T', default=None, type='float', help='duration of the data')
    (opts,args)=parser.parse_args()
    import time
    import matplotlib.pyplot as plt
    from scipy.signal import decimate
    
    srate = opts.srate
    dt = 1./srate
    datafile = opts.data
    d = np.genfromtxt(datafile, delimiter=',', names=True)
    data = d['CloseLast'][::-1]
    T = len(data)
    N = data.shape[0]
    f = np.fft.fftfreq(N, d=dt)
    t = np.arange(0,T,step=dt)
    M = MESA(data)
    start = time.perf_counter()
    P, ak, _ = M.solve(method = "Fast", optimisation_method = "FPE", m = int(2*N/(2*np.log(N))))
    print("p = {0}".format(len(ak)))
    elapsed = time.perf_counter()
    elapsed = elapsed - start
    print ("Time spent MESA: {0} s".format(elapsed))
    start = time.perf_counter()
    PSD    = M.spectrum(dt,f)
    print(PSD)
    
    elapsed = time.perf_counter()
    elapsed = elapsed - start
    print ("Time spent PSD: {0} s".format(elapsed))

    fig = plt.figure(1)
Ejemplo n.º 9
0
T = 6
t_forecast = 4
datafile = "V-V1_GWOSC_4KHZ_R1-1186741846-32.txt"
data = np.loadtxt(datafile)[:int(T * 4096)]
if srate != 4096.:
    data = decimate(data, int(4096 / srate), zero_phase=True)
if bandpassing == 1:
    from scipy.signal import butter, filtfilt
    bb, ab = butter(4, [f_min_bp / (0.5 * srate), f_max_bp / (0.5 * srate)],
                    btype='band')
    data = filtfilt(bb, ab, data)

N = data.shape[0]
f = np.fft.fftfreq(N, d=dt)
t = np.arange(0, T, step=dt)
M = MESA(data)
start = time.perf_counter()
P, ak, _ = M.solve(method="Fast",
                   optimisation_method="FPE",
                   m=int(2 * N / (2 * np.log(N))))
print("p = {}".format(len(ak)))
elapsed = time.perf_counter()
elapsed = elapsed - start
print("Time spent MESA: {0} s".format(elapsed))
start = time.perf_counter()
PSD = M.spectrum(dt, f)
elapsed = time.perf_counter()
elapsed = elapsed - start
print("Time spent PSD: {0} s".format(elapsed))

fig = plt.figure(1)
Ejemplo n.º 10
0
def AnomalyDetection_pipeline(data, srate, T_train, N_step, outfile = None, plot = True, GPS_time = 0.,injection = None, method = "FPE"):
	"Runs the Anomaly Detection Pipeline on the data given. Trains a memspectrum model with data from 0 to T_train and forecast the rest, with a step of N_step points. If an injection is give, it is plot together with LL series"
	print("Running pipeline @ GPS time {}.\n\tT_train, N_step = {},{}".format(GPS_time,T_train, N_step))

	#TODO: save LL in pickle, so you can have a GPS time as int and the of the LL in float

	LL_list = []
	t_start_list = []

	id_start = int(T_train*srate) #start id for forecasting
	train_data = data[0:id_start]
	times = np.linspace(0, len(data)/srate, len(data))

	#print("Computing PSD on {}s of data".format(T_train))
	M = MESA()
	P, ak, _ = M.solve(train_data, method = "Standard", optimisation_method = method, early_stop = True)

	#starting "pipeline"
	time_step = N_step/srate # step in seconds seconds of data
	Np = 100 #number of predictions

	if plot:
			#timeseries plot
		fig_timeseries = plt.figure()
		ax_timeseries  = fig_timeseries.add_subplot(111)
		ax_timeseries.set_ylabel("Strain")
		ax_timeseries.plot(times[:id_start], train_data[:id_start], linewidth=1., color='r', zorder = 3) #plot train data

			#LL series plot
		fig_LLseries = plt.figure()
		if injection is not None:
			ax_LLseries  = fig_LLseries.add_subplot(211)
			ax_WF  = fig_LLseries.add_subplot(212)
			ax_WF.set_ylabel("Strain")
		else:
			ax_LLseries  = fig_LLseries.add_subplot(111)

		ax_LLseries.set_ylabel("LL")
		ax_LLseries.set_yscale('log')

			#PSD plot
		fig_PSDseries = plt.figure()
		plt.title("PSD of {}s of training data".format(T_train))
		ax_PSDseries  = fig_PSDseries.add_subplot(111)
		ax_PSDseries.set_ylabel("PSD")
		ax_PSDseries.set_yscale('log')
		freq = np.linspace(1./T_train, 0.5*srate,1000)
		spec = M.spectrum(1/srate,freq)
		ax_PSDseries.loglog(freq, spec)

	ids_iterator = range(id_start,len(data)-N_step, N_step)
	for i, id_ in enumerate(ids_iterator): #problem here with missing gaps (or something weird related...)
		sys.stderr.write("\rAnalysing batch {} of {}: t in [{},{}]".format(i+1, len(ids_iterator),
			times[id_],times[id_+N_step]))

			#forecasting predictions
		times_batch = times[id_:id_+N_step+1]
		data_batch = data[id_:id_+N_step+1] #data to predict
	
		forecast_basis = data[id_-max(M.get_p(),1):id_] #batch of data that are the basis for forecasting
		predictions = M.forecast(forecast_basis, N_step+1, Np) #(Np, N_step)
		
			#computing LL	
		#LL = np.zeros((int(id_step)+1,))
		LL = data_LL_gauss(data_batch, predictions)
		l, m, h = np.percentile(predictions, [5,50,95],axis=0)
	
		if plot:
			#ax_LLseries.axvline(times[id_],linestyle='dashed',color='blue') #plotting vertical lines at division
				#plot LL, predictions and data_batch
			ax_LLseries.plot(times_batch, np.cumsum(LL)+1e5)
			ax_timeseries.fill_between(times_batch,l,h,facecolor='turquoise',alpha=0.8, zorder = 0)
			ax_timeseries.plot(times_batch, m, ':', linewidth=.7, color='b', zorder = 2, label = "Median prediction")
			ax_timeseries.plot(times_batch, data_batch, linewidth=1., color='r', zorder = 3, label = "Data")
	
		if i == 0 and plot:
			ax_timeseries.legend()
	
		if plot and injection is not None:
			WF_batch = injection[id_:id_+N_step+1]
			ax_WF.plot(times_batch, WF_batch, linewidth=1., color='k', zorder = 3)
	
		if isinstance(outfile, str):
			LL_list.append(LL) #(N_step,)
			t_start_list.append(times_batch[0])
	sys.stderr.write('\n')

		#creating output dict
	LL_dict = {}
	LL_dict['GPS'] = GPS_time
	LL_dict['srate'] = srate
	LL_dict['N_batches'] = len(LL_list)
	LL_dict['t_start'] = np.array(t_start_list) #(N_batches,)
	LL_dict['LL'] = np.array(LL_list) #(N_batches,D)
	#print(LL_dict['LL'].shape,LL_dict['t_start'].shape)

	if isinstance(outfile, str):
		if not outfile.endswith('.pkl'): outfile +='.pkl'
		with open(outfile, 'wb') as f:
			pickle.dump(LL_dict, f, pickle.HIGHEST_PROTOCOL)

	return LL_dict