Beispiel #1
0
def main_compare_gif_gotic2():
    start = tconvert('Oct 15 2018 00:00:00')
    end = tconvert('Oct 21 2018 00:00:00')

    # gif data
    pfx = '/Users/miyo/Dropbox/KagraData/gif/'
    segments = GifData.findfiles(start, end, 'CALC_STRAIN', prefix=pfx)
    allfiles = [path for files in segments for path in files]
    strain = TimeSeries.read(source=allfiles,
                             name='CALC_STRAIN',
                             format='gif',
                             pad=numpy.nan,
                             nproc=2)
    strain = strain.detrend('linear')

    # gotic data
    source = '201805010000_201811010000.gotic'
    gifx = KagraGoticStrain.read(source, start=start, end=end).x
    gifx = gifx.detrend('linear')
    gifx = gifx * 0.9

    # plot
    plot = Plot(gifx, strain, xscale='auto-gps')
    plot.legend()
    plot.subplots_adjust(right=.86)
    plot.savefig('result.png')
    plot.close()
Beispiel #2
0
def plot_spectra(clusters,
                 channel,
                 unit='cts',
                 xlog=True,
                 legend=None,
                 xlim=None,
                 **kwargs):
    from glob import glob
    from gwpy.frequencyseries import FrequencySeries
    from gwpy.plot import Plot
    title = channel
    psds = {}
    for cluster in clusters:
        for filename in glob('*.hdf5'):
            try:
                psds[cluster] = FrequencySeries.read(filename,
                                                     f'{cluster}-{channel}')
                print(f'found in {filename}')
                break
            except KeyError:
                continue
        else:
            raise KeyError(f'Could not find Nº{cluster}')

    if legend is None:
        legend = clusters

    # plot the group in one figure.
    plt = Plot(*(psds[cluster] for cluster in psds),
               separate=False,
               sharex=True,
               zorder=1,
               **kwargs)
    if xlim is not None:
        plt.gca().set_xlim(xlim)
    plt.gca().set_ylim((1e-48, 1e-37))
    # modify the figure as a whole.
    # plt.add_segments_bar(dq, label='')
    # plt.gca().set_color_cycle(['red', 'green', 'blue', 'yellow'])
    if xlog:
        plt.gca().set_xscale('log')
    plt.gca().set_yscale('log')
    plt.gca().set_ylabel(f'Power Spectral Density [{unit}^2/Hz]')
    plt.suptitle(title)
    plt.legend(legend, prop={'size': 15})

    # save to png.
    plt.save(f'{title}.png')
Beispiel #3
0
def save_legend(axes, pngfile, loc='center', frameon=False, fontsize='small',
                **kwargs):
    """Save a figure with a legend
    """
    fig = Plot()
    leg = fig.legend(*axes.get_legend_handles_labels(), loc=loc,
                     frameon=frameon, fontsize=fontsize, **kwargs)
    for line in leg.get_lines():
        line.set_linewidth(8)
    fig.canvas.draw_idle()
    return save_figure(fig, pngfile,
                       bbox_inches=(leg.get_window_extent().transformed(
                           fig.dpi_scale_trans.inverted())))
Beispiel #4
0
def representative_spectra(channels,
                           start,
                           stop,
                           rate,
                           label='kmeans-labels',
                           filename=DEFAULT_FILENAME,
                           prefix='.',
                           downloader=TimeSeriesDict.get,
                           cluster_numbers=None,
                           groups=None,
                           **kwargs):
    """
    Make representative spectra for each cluster based on the median psd for minutes in that cluster.
    Downloads only the raw minutes in the cluster to save.
    """
    if groups is None:
        groups = channels

    # read the labels from the save file.
    labels = TimeSeries.read(filename,
                             label,
                             start=to_gps(start),
                             end=to_gps(stop))
    logger.info(f'Read labels {start} to {stop} from {filename}')

    if cluster_numbers is None:
        clusters = list(range(max(labels.value) + 1))

        cluster_counts = list(
            len(labels.value[labels.value == c]) for c in clusters)
        largest_cluster = cluster_counts.index(max(cluster_counts))
        clusters.remove(largest_cluster)

        logger.info(
            f'Largest cluster found to be Nº{largest_cluster} ({100 * max(cluster_counts) // len(labels.value)}%). Doing {clusters}.'
        )
        cluster_counts.remove(max(cluster_counts))
    else:
        clusters = cluster_numbers
        cluster_counts = list(
            len(labels.value[labels.value == c]) for c in clusters)

    t, v, d = labels.times, labels.value, diff(labels.value)

    pairs = list(
        zip([t[0]] + list(t[:-1][d != 0]),
            list(t[1:][d != 0]) + [t[-1]]))
    values = list(v[:-1][d != 0]) + [v[-1]]
    assert len(pairs) == len(values)  # need to include start-| and |-end
    # l|r l|r l|r l|r
    # l,r l,r l,r l,r
    # l r,l r,l r,l r # zip(start + l[1:], r[:-1] + stop)

    print(pairs)
    for pair in pairs:
        print(int(pair[1].value) - int(pair[0].value))
    print(values)

    # use h5py to make a mutable object pointing to a file on disk.
    save_file, filename = path2h5file(
        get_path(f'spectra-cache {start}', 'hdf5', prefix=prefix))
    logger.debug(f'Initiated hdf5 stream to {filename}')

    logger.info(f'Patching {filename}...')
    for i, (dl_start, end) in enumerate(pairs):
        if values[i] in clusters:
            if not data_exists(channels, to_gps(end).seconds, save_file):
                logger.debug(
                    f'Downloading Nº{values[i]} from {dl_start} to {end}...')
                try:
                    dl = downloader(channels,
                                    start=to_gps(dl_start) - LIGOTimeGPS(60),
                                    end=to_gps(end) + LIGOTimeGPS(seconds=1))
                    out = TimeSeriesDict()
                    for n in dl:
                        out[n] = dl[n].resample(**better_aa_opts(dl[n], rate))
                    write_to_disk(out, to_gps(dl_start).seconds, save_file)
                except RuntimeError:  # Cannot find all relevant data on any known server
                    logger.warning(
                        f"SKIPPING Nº{values[i]} from {dl_start} to {end} !!")

    logger.info('Reading data...')
    data = TimeSeriesDict.read(save_file, channels)

    logger.info('Starting PSD generation...')

    f = data[channels[0]].crop(
        start=to_gps(data[channels[0]].times[-1]) - LIGOTimeGPS(60),
        end=to_gps(data[channels[0]].times[-1])).psd().frequencies

    d = (to_gps(labels.times[-1]).seconds - to_gps(labels.times[1]).seconds)
    for i, cluster in enumerate(clusters):
        try:
            psds = {
                channel: FrequencySeries.read(filename, f'{cluster}-{channel}')
                for channel in channels
            }
            logger.info(f'Loaded Nº{cluster}.')

        except KeyError:

            logger.info(
                f'Doing Nº{cluster} ({100 * cluster_counts[i] / len(labels.value):.2f}% of data)...'
            )
            with Progress(f'psd Nº{cluster} ({i + 1}/{len(clusters)})',
                          len(channels) * d) as progress:
                psds = {
                    channel: FrequencySeries(median(stack([
                        progress(data[channel].crop,
                                 pc * d + (to_gps(time).seconds -
                                           to_gps(labels.times[1]).seconds),
                                 start=to_gps(time) - LIGOTimeGPS(60),
                                 end=to_gps(time)).psd().value
                        for c, time in zip(labels.value, labels.times)
                        if c == cluster
                    ]),
                                                    axis=0),
                                             frequencies=f,
                                             name=f'{cluster}-{channel}')
                    for pc, channel in enumerate(channels)
                }
            for name in psds.keys():
                psds[name].write(filename, **writing_opts)

        # plotting is slow, so show a nice progress bar.
        logger.debug('Initiating plotting routine...')
        with Progress('plotting', len(groups)) as progress:

            for p, (group, lbls, title) in enumerate(groups):
                # plot the group in one figure.
                plt = Plot(*(psds[channel] for channel in group),
                           separate=False,
                           sharex=True,
                           zorder=1,
                           **kwargs)
                # plt.gca().set_xlim((30,60))
                # modify the figure as a whole.
                # plt.add_segments_bar(dq, label='')
                plt.gca().set_xscale('log')
                plt.gca().set_yscale('log')
                plt.suptitle(title)
                plt.legend(lbls)

                # save to png.
                progress(
                    plt.save, p,
                    get_path(f'{cluster}-{title}',
                             'png',
                             prefix=f'{prefix}/{cluster}'))
def whiten(data, ffttime, window, low_f, high_f, notch, rate):
    """
    This function whitens the data and band-pass it in the range [low_f,  high_f].
    
    Parameters
    ----------
    
    data: numpy array
        The signal to whiten as numpy array
        
    ffttime: int
        Portion of the strain to compute the psd
    
    window: str
        Type of function for the windowing
        
    low_f: int
        Lower bound of the band-pass filter
        
    high_f: int 
        Upper bound of the band-pass filter
    
    notch: list
        Frequencies of the notch filters. Depends on the detector
        
    rate: int
        Resampling rate. Represents the sampling frequency
        
    Returns
    -------
      
    whitened: numpy array
        The whitened and band-passed numpy array
  
    """

    # Band-pass filter in [35, 250]
    bp = bandpass(float(low_f), float(high_f), data.sample_rate)

    #Notches for the 1st three harminics of the 60 Hz AC
    notches = [filter_design.notch(line, data.sample_rate) for line in notch]

    #Concatenate both filters
    zpk = filter_design.concatenate_zpks(bp, *notches)

    #Whiten and band-pass filter
    white = data.whiten(ffttime, int(ffttime / 2),
                        window='hann')  #whiten the data
    white_down = white.filter(zpk, filtfilt=True).resample(
        rate=rate, window='hann')  #downsample to 2048Hz
    whitened = np.array(white_down)

    #Plot version with and without notches
    plot = Plot(figsize=(15, 6))
    ax = plot.gca()
    ax.plot(white_down, label='Downsampled', alpha=0.7)
    ax.plot(white.filter(zpk, filtfilt=True),
            label='Not downsampled',
            alpha=0.7)
    ax.set_xscale('auto-gps')
    ax.set_ylabel('Frequency [Hz]')
    ax.set_title(
        'LIGO-Livingston strain data whitened, band-passed in range [' +
        str(low_f) + '' + str(high_f) + '] $Hz$')
    plot.legend()
    plt.savefig('/home/melissa.lopez/Anomaly_Detection/Algorithm/dummy.png')
    plt.close()

    return whitened