def main_compare_gif_gotic2(): start = tconvert('Oct 15 2018 00:00:00') end = tconvert('Oct 21 2018 00:00:00') # gif data pfx = '/Users/miyo/Dropbox/KagraData/gif/' segments = GifData.findfiles(start, end, 'CALC_STRAIN', prefix=pfx) allfiles = [path for files in segments for path in files] strain = TimeSeries.read(source=allfiles, name='CALC_STRAIN', format='gif', pad=numpy.nan, nproc=2) strain = strain.detrend('linear') # gotic data source = '201805010000_201811010000.gotic' gifx = KagraGoticStrain.read(source, start=start, end=end).x gifx = gifx.detrend('linear') gifx = gifx * 0.9 # plot plot = Plot(gifx, strain, xscale='auto-gps') plot.legend() plot.subplots_adjust(right=.86) plot.savefig('result.png') plot.close()
def plot_spectra(clusters, channel, unit='cts', xlog=True, legend=None, xlim=None, **kwargs): from glob import glob from gwpy.frequencyseries import FrequencySeries from gwpy.plot import Plot title = channel psds = {} for cluster in clusters: for filename in glob('*.hdf5'): try: psds[cluster] = FrequencySeries.read(filename, f'{cluster}-{channel}') print(f'found in {filename}') break except KeyError: continue else: raise KeyError(f'Could not find Nº{cluster}') if legend is None: legend = clusters # plot the group in one figure. plt = Plot(*(psds[cluster] for cluster in psds), separate=False, sharex=True, zorder=1, **kwargs) if xlim is not None: plt.gca().set_xlim(xlim) plt.gca().set_ylim((1e-48, 1e-37)) # modify the figure as a whole. # plt.add_segments_bar(dq, label='') # plt.gca().set_color_cycle(['red', 'green', 'blue', 'yellow']) if xlog: plt.gca().set_xscale('log') plt.gca().set_yscale('log') plt.gca().set_ylabel(f'Power Spectral Density [{unit}^2/Hz]') plt.suptitle(title) plt.legend(legend, prop={'size': 15}) # save to png. plt.save(f'{title}.png')
def save_legend(axes, pngfile, loc='center', frameon=False, fontsize='small', **kwargs): """Save a figure with a legend """ fig = Plot() leg = fig.legend(*axes.get_legend_handles_labels(), loc=loc, frameon=frameon, fontsize=fontsize, **kwargs) for line in leg.get_lines(): line.set_linewidth(8) fig.canvas.draw_idle() return save_figure(fig, pngfile, bbox_inches=(leg.get_window_extent().transformed( fig.dpi_scale_trans.inverted())))
def representative_spectra(channels, start, stop, rate, label='kmeans-labels', filename=DEFAULT_FILENAME, prefix='.', downloader=TimeSeriesDict.get, cluster_numbers=None, groups=None, **kwargs): """ Make representative spectra for each cluster based on the median psd for minutes in that cluster. Downloads only the raw minutes in the cluster to save. """ if groups is None: groups = channels # read the labels from the save file. labels = TimeSeries.read(filename, label, start=to_gps(start), end=to_gps(stop)) logger.info(f'Read labels {start} to {stop} from {filename}') if cluster_numbers is None: clusters = list(range(max(labels.value) + 1)) cluster_counts = list( len(labels.value[labels.value == c]) for c in clusters) largest_cluster = cluster_counts.index(max(cluster_counts)) clusters.remove(largest_cluster) logger.info( f'Largest cluster found to be Nº{largest_cluster} ({100 * max(cluster_counts) // len(labels.value)}%). Doing {clusters}.' ) cluster_counts.remove(max(cluster_counts)) else: clusters = cluster_numbers cluster_counts = list( len(labels.value[labels.value == c]) for c in clusters) t, v, d = labels.times, labels.value, diff(labels.value) pairs = list( zip([t[0]] + list(t[:-1][d != 0]), list(t[1:][d != 0]) + [t[-1]])) values = list(v[:-1][d != 0]) + [v[-1]] assert len(pairs) == len(values) # need to include start-| and |-end # l|r l|r l|r l|r # l,r l,r l,r l,r # l r,l r,l r,l r # zip(start + l[1:], r[:-1] + stop) print(pairs) for pair in pairs: print(int(pair[1].value) - int(pair[0].value)) print(values) # use h5py to make a mutable object pointing to a file on disk. save_file, filename = path2h5file( get_path(f'spectra-cache {start}', 'hdf5', prefix=prefix)) logger.debug(f'Initiated hdf5 stream to {filename}') logger.info(f'Patching {filename}...') for i, (dl_start, end) in enumerate(pairs): if values[i] in clusters: if not data_exists(channels, to_gps(end).seconds, save_file): logger.debug( f'Downloading Nº{values[i]} from {dl_start} to {end}...') try: dl = downloader(channels, start=to_gps(dl_start) - LIGOTimeGPS(60), end=to_gps(end) + LIGOTimeGPS(seconds=1)) out = TimeSeriesDict() for n in dl: out[n] = dl[n].resample(**better_aa_opts(dl[n], rate)) write_to_disk(out, to_gps(dl_start).seconds, save_file) except RuntimeError: # Cannot find all relevant data on any known server logger.warning( f"SKIPPING Nº{values[i]} from {dl_start} to {end} !!") logger.info('Reading data...') data = TimeSeriesDict.read(save_file, channels) logger.info('Starting PSD generation...') f = data[channels[0]].crop( start=to_gps(data[channels[0]].times[-1]) - LIGOTimeGPS(60), end=to_gps(data[channels[0]].times[-1])).psd().frequencies d = (to_gps(labels.times[-1]).seconds - to_gps(labels.times[1]).seconds) for i, cluster in enumerate(clusters): try: psds = { channel: FrequencySeries.read(filename, f'{cluster}-{channel}') for channel in channels } logger.info(f'Loaded Nº{cluster}.') except KeyError: logger.info( f'Doing Nº{cluster} ({100 * cluster_counts[i] / len(labels.value):.2f}% of data)...' ) with Progress(f'psd Nº{cluster} ({i + 1}/{len(clusters)})', len(channels) * d) as progress: psds = { channel: FrequencySeries(median(stack([ progress(data[channel].crop, pc * d + (to_gps(time).seconds - to_gps(labels.times[1]).seconds), start=to_gps(time) - LIGOTimeGPS(60), end=to_gps(time)).psd().value for c, time in zip(labels.value, labels.times) if c == cluster ]), axis=0), frequencies=f, name=f'{cluster}-{channel}') for pc, channel in enumerate(channels) } for name in psds.keys(): psds[name].write(filename, **writing_opts) # plotting is slow, so show a nice progress bar. logger.debug('Initiating plotting routine...') with Progress('plotting', len(groups)) as progress: for p, (group, lbls, title) in enumerate(groups): # plot the group in one figure. plt = Plot(*(psds[channel] for channel in group), separate=False, sharex=True, zorder=1, **kwargs) # plt.gca().set_xlim((30,60)) # modify the figure as a whole. # plt.add_segments_bar(dq, label='') plt.gca().set_xscale('log') plt.gca().set_yscale('log') plt.suptitle(title) plt.legend(lbls) # save to png. progress( plt.save, p, get_path(f'{cluster}-{title}', 'png', prefix=f'{prefix}/{cluster}'))
def whiten(data, ffttime, window, low_f, high_f, notch, rate): """ This function whitens the data and band-pass it in the range [low_f, high_f]. Parameters ---------- data: numpy array The signal to whiten as numpy array ffttime: int Portion of the strain to compute the psd window: str Type of function for the windowing low_f: int Lower bound of the band-pass filter high_f: int Upper bound of the band-pass filter notch: list Frequencies of the notch filters. Depends on the detector rate: int Resampling rate. Represents the sampling frequency Returns ------- whitened: numpy array The whitened and band-passed numpy array """ # Band-pass filter in [35, 250] bp = bandpass(float(low_f), float(high_f), data.sample_rate) #Notches for the 1st three harminics of the 60 Hz AC notches = [filter_design.notch(line, data.sample_rate) for line in notch] #Concatenate both filters zpk = filter_design.concatenate_zpks(bp, *notches) #Whiten and band-pass filter white = data.whiten(ffttime, int(ffttime / 2), window='hann') #whiten the data white_down = white.filter(zpk, filtfilt=True).resample( rate=rate, window='hann') #downsample to 2048Hz whitened = np.array(white_down) #Plot version with and without notches plot = Plot(figsize=(15, 6)) ax = plot.gca() ax.plot(white_down, label='Downsampled', alpha=0.7) ax.plot(white.filter(zpk, filtfilt=True), label='Not downsampled', alpha=0.7) ax.set_xscale('auto-gps') ax.set_ylabel('Frequency [Hz]') ax.set_title( 'LIGO-Livingston strain data whitened, band-passed in range [' + str(low_f) + '' + str(high_f) + '] $Hz$') plot.legend() plt.savefig('/home/melissa.lopez/Anomaly_Detection/Algorithm/dummy.png') plt.close() return whitened