def test_correlate_template_different_amplitudes(self):
        """
        Check that correlations are the same independent of template amplitudes
        """
        data = np.random.randn(20000)
        template = data[1000:1200]
        template_large = template * 10e10
        template_small = template * 10e-10

        cc = correlate_template(data, template)
        cc_large = correlate_template(data, template_large)
        cc_small = correlate_template(data, template_small)
        np.testing.assert_allclose(cc, cc_large)
        np.testing.assert_allclose(cc, cc_small)
 def test_correlate_template_zeros_in_input(self):
     template = np.zeros(10)
     data = read()[0].data[380:420]
     xcorr = correlate_template(data, template)
     np.testing.assert_equal(xcorr, np.zeros(len(xcorr)))
     template[:] = data[:10]
     data[5:20] = 0
     xcorr = correlate_template(data, template)
     np.testing.assert_equal(xcorr[5:11], np.zeros(6))
     data[:] = 0
     xcorr = correlate_template(data, template)
     np.testing.assert_equal(xcorr, np.zeros(len(xcorr)))
     xcorr = correlate_template(data, template, normalize='naive')
     np.testing.assert_equal(xcorr, np.zeros(len(xcorr)))
 def test_correlate_template_versus_correlate(self):
     data = read()[0].data
     template = data[400:600]
     data = data[380:620]
     xcorr1 = correlate_template(data, template, normalize='naive')
     xcorr2 = correlate(data, template, 20)
     np.testing.assert_equal(xcorr1, xcorr2)
 def test_integer_input_equals_float_input(self):
     a = [-3, 0, 4]
     b = [-3, 4]
     c = np.array(a, dtype=float)
     d = np.array(b, dtype=float)
     for demean in (True, False):
         for normalize in (None, 'naive'):
             cc1 = correlate(a, b, 3, demean=demean, normalize=normalize,
                             method='direct')
             cc2 = correlate(c, d, 3, demean=demean, normalize=normalize)
             np.testing.assert_allclose(cc1, cc2)
         for normalize in (None, 'naive', 'full'):
             cc3 = correlate_template(a, b, demean=demean,
                                      normalize=normalize, method='direct')
             cc4 = correlate_template(c, d, demean=demean,
                                      normalize=normalize)
             np.testing.assert_allclose(cc3, cc4)
 def test_correlate_template_correct_alignment_of_normalization(self):
     data = read()[0].data
     template = data[400:600]
     data = data[380:620]
     # test for all combinations of odd and even length input data
     for i1, i2 in ((0, 0), (0, 1), (1, 1), (1, 0)):
         for mode in ('valid', 'same', 'full'):
             for demean in (True, False):
                 xcorr = correlate_template(data[i1:], template[i2:],
                                            mode=mode, demean=demean)
                 self.assertAlmostEqual(np.max(xcorr), 1)
    def test_correlate_template_nodemean_fastmatchedfilter(self):
        """
        Compare non-demeaned result against FMF derived result.

        FMF result obtained by the following:

        import copy
        import numpy as np
        from fast_matched_filter import matched_filter
        from obspy import read

        data = read()[0].data
        template = copy.deepcopy(data[400:600])
        data = data[380:620]
        result = matched_filter(
            templates=template.reshape(1, 1, 1, len(template)),
            moveouts=np.array(0).reshape(1, 1, 1),
            weights=np.array(1).reshape(1, 1, 1),
            data=data.reshape(1, 1, len(data)),
            step=1, arch='cpu')[0]

        .. note::
            FastMatchedFilter doesn't use semver, but result generated by Calum
            Chamberlain on 18 Jan 2018 using up-to-date code, with the patch
            in https://github.com/beridel/fast_matched_filter/pull/12
        """
        result = [
            -1.48108244e-01,   4.71532270e-02,   1.82797655e-01,
            1.92574233e-01,   1.18700281e-01,   1.18958903e-02,
            -9.23405439e-02,  -1.40047163e-01,  -1.00863703e-01,
            -4.86961426e-03,   1.04124829e-01,   1.72662303e-01,
            1.41110823e-01,   1.53776666e-04,  -1.71214968e-01,
            -2.83201426e-01,  -3.04899812e-01,  -2.03215942e-01,
            8.88349637e-02,   5.00749528e-01,   7.18140483e-01,
            5.29728174e-01,   1.30591258e-01,  -1.83402568e-01,
            -3.22406143e-01,  -3.20676118e-01,  -1.98054180e-01,
            -5.06028766e-04,   1.56253457e-01,   1.74580097e-01,
            6.49696961e-02,  -8.56237561e-02,  -1.89858019e-01,
            -1.96504310e-01,  -1.04968190e-01,   2.51029599e-02,
            1.32686019e-01,   2.03692451e-01,   2.11983219e-01,
            0.00000000e+00,   0.00000000e+00]
        data = read()[0].data
        template = data[400:600]
        data = data[380:620]
        # FMF demeans template but does not locally demean data for
        # normalization
        template = template - template.mean()
        cc = correlate_template(data, template, demean=False)
        # FMF misses the last two elements?
        np.testing.assert_allclose(cc[0:-2], result[0:-2], atol=1e-7)
        shift, corr = xcorr_max(cc)
        self.assertEqual(shift, 0)
Beispiel #7
0
    def test_correlate_template_nodemean_fastmatchedfilter(self):
        """
        Compare non-demeaned result against FMF derived result.

        FMF result obtained by the following:

        import copy
        import numpy as np
        from fast_matched_filter import matched_filter
        from obspy import read

        data = read()[0].data
        template = copy.deepcopy(data[400:600])
        data = data[380:620]
        result = matched_filter(
            templates=template.reshape(1, 1, 1, len(template)),
            moveouts=np.array(0).reshape(1, 1, 1),
            weights=np.array(1).reshape(1, 1, 1),
            data=data.reshape(1, 1, len(data)),
            step=1, arch='cpu')[0]

        .. note::
            FastMatchedFilter doesn't use semver, but result generated by Calum
            Chamberlain on 18 Jan 2018 using up-to-date code, with the patch
            in https://github.com/beridel/fast_matched_filter/pull/12
        """
        result = [
            -1.48108244e-01, 4.71532270e-02, 1.82797655e-01, 1.92574233e-01,
            1.18700281e-01, 1.18958903e-02, -9.23405439e-02, -1.40047163e-01,
            -1.00863703e-01, -4.86961426e-03, 1.04124829e-01, 1.72662303e-01,
            1.41110823e-01, 1.53776666e-04, -1.71214968e-01, -2.83201426e-01,
            -3.04899812e-01, -2.03215942e-01, 8.88349637e-02, 5.00749528e-01,
            7.18140483e-01, 5.29728174e-01, 1.30591258e-01, -1.83402568e-01,
            -3.22406143e-01, -3.20676118e-01, -1.98054180e-01, -5.06028766e-04,
            1.56253457e-01, 1.74580097e-01, 6.49696961e-02, -8.56237561e-02,
            -1.89858019e-01, -1.96504310e-01, -1.04968190e-01, 2.51029599e-02,
            1.32686019e-01, 2.03692451e-01, 2.11983219e-01, 0.00000000e+00,
            0.00000000e+00
        ]
        data = read()[0].data
        template = data[400:600]
        data = data[380:620]
        # FMF demeans template but does not locally demean data for
        # normalization
        template = template - template.mean()
        cc = correlate_template(data, template, demean=False)
        # FMF misses the last two elements?
        np.testing.assert_allclose(cc[0:-2], result[0:-2], atol=1e-7)
        shift, corr = xcorr_max(cc)
        self.assertEqual(shift, 0)
Beispiel #8
0
    def test_correlate_template_eqcorrscan(self):
        """
        Test for moving window correlations with "full" normalisation.

        Comparison result is from EQcorrscan v.0.2.7, using the following:

        from eqcorrscan.utils.correlate import get_array_xcorr
        from obspy import read

        data = read()[0].data
        template = data[400:600]
        data = data[380:620]
        eqcorrscan_func = get_array_xcorr("fftw")
        result = eqcorrscan_func(
            stream=data, templates=template.reshape(1, len(template)),
            pads=[0])[0][0]
        """
        result = [
            -2.24548906e-01, 7.10350871e-02, 2.68642932e-01, 2.75941312e-01,
            1.66854098e-01, 1.66086946e-02, -1.29057273e-01, -1.96172655e-01,
            -1.41613603e-01, -6.83271606e-03, 1.45768464e-01, 2.42143899e-01,
            1.98310092e-01, 2.16377302e-04, -2.41576880e-01, -4.00586188e-01,
            -4.32240069e-01, -2.88735539e-01, 1.26461715e-01, 7.09268868e-01,
            9.99999940e-01, 7.22769439e-01, 1.75955653e-01, -2.46459037e-01,
            -4.34027880e-01, -4.32590246e-01, -2.67131507e-01, -6.78363896e-04,
            2.08171085e-01, 2.32197508e-01, 8.64804164e-02, -1.14158235e-01,
            -2.53621429e-01, -2.62945205e-01, -1.40505865e-01, 3.35594788e-02,
            1.77415669e-01, 2.72263527e-01, 2.81718552e-01, 1.38080209e-01,
            -1.27307668e-01
        ]
        data = read()[0].data
        template = data[400:600]
        data = data[380:620]
        cc = correlate_template(data, template)
        np.testing.assert_allclose(cc, result, atol=1e-7)
        shift, corr = xcorr_max(cc)
        self.assertAlmostEqual(corr, 1.0)
        self.assertEqual(shift, 0)
    def test_correlate_template_eqcorrscan(self):
        """
        Test for moving window correlations with "full" normalisation.

        Comparison result is from EQcorrscan v.0.2.7, using the following:

        from eqcorrscan.utils.correlate import get_array_xcorr
        from obspy import read

        data = read()[0].data
        template = data[400:600]
        data = data[380:620]
        eqcorrscan_func = get_array_xcorr("fftw")
        result = eqcorrscan_func(
            stream=data, templates=template.reshape(1, len(template)),
            pads=[0])[0][0]
        """
        result = [
            -2.24548906e-01,  7.10350871e-02,  2.68642932e-01,  2.75941312e-01,
            1.66854098e-01,  1.66086946e-02, -1.29057273e-01, -1.96172655e-01,
            -1.41613603e-01, -6.83271606e-03,  1.45768464e-01,  2.42143899e-01,
            1.98310092e-01,  2.16377302e-04, -2.41576880e-01, -4.00586188e-01,
            -4.32240069e-01, -2.88735539e-01,  1.26461715e-01,  7.09268868e-01,
            9.99999940e-01,  7.22769439e-01,  1.75955653e-01, -2.46459037e-01,
            -4.34027880e-01, -4.32590246e-01, -2.67131507e-01, -6.78363896e-04,
            2.08171085e-01,  2.32197508e-01,  8.64804164e-02, -1.14158235e-01,
            -2.53621429e-01, -2.62945205e-01, -1.40505865e-01,  3.35594788e-02,
            1.77415669e-01,  2.72263527e-01,  2.81718552e-01,  1.38080209e-01,
            -1.27307668e-01]
        data = read()[0].data
        template = data[400:600]
        data = data[380:620]
        cc = correlate_template(data, template)
        np.testing.assert_allclose(cc, result, atol=1e-7)
        shift, corr = xcorr_max(cc)
        self.assertAlmostEqual(corr, 1.0)
        self.assertEqual(shift, 0)
 def test_correlate_template_eqcorrscan_time(self):
     """
     Test full normalization for method='direct'.
     """
     result = [
         -2.24548906e-01,  7.10350871e-02,  2.68642932e-01,  2.75941312e-01,
         1.66854098e-01,  1.66086946e-02, -1.29057273e-01, -1.96172655e-01,
         -1.41613603e-01, -6.83271606e-03,  1.45768464e-01,  2.42143899e-01,
         1.98310092e-01,  2.16377302e-04, -2.41576880e-01, -4.00586188e-01,
         -4.32240069e-01, -2.88735539e-01,  1.26461715e-01,  7.09268868e-01,
         9.99999940e-01,  7.22769439e-01,  1.75955653e-01, -2.46459037e-01,
         -4.34027880e-01, -4.32590246e-01, -2.67131507e-01, -6.78363896e-04,
         2.08171085e-01,  2.32197508e-01,  8.64804164e-02, -1.14158235e-01,
         -2.53621429e-01, -2.62945205e-01, -1.40505865e-01,  3.35594788e-02,
         1.77415669e-01,  2.72263527e-01,  2.81718552e-01,  1.38080209e-01,
         -1.27307668e-01]
     data = read()[0].data
     template = data[400:600]
     data = data[380:620]
     cc = correlate_template(data, template, method='direct')
     np.testing.assert_allclose(cc, result, atol=1e-7)
     shift, corr = xcorr_max(cc)
     self.assertAlmostEqual(corr, 1.0)
     self.assertEqual(shift, 0)
 def test_correlate_template_eqcorrscan_time(self):
     """
     Test full normalization for method='direct'.
     """
     result = [
         -2.24548906e-01, 7.10350871e-02, 2.68642932e-01, 2.75941312e-01,
         1.66854098e-01, 1.66086946e-02, -1.29057273e-01, -1.96172655e-01,
         -1.41613603e-01, -6.83271606e-03, 1.45768464e-01, 2.42143899e-01,
         1.98310092e-01, 2.16377302e-04, -2.41576880e-01, -4.00586188e-01,
         -4.32240069e-01, -2.88735539e-01, 1.26461715e-01, 7.09268868e-01,
         9.99999940e-01, 7.22769439e-01, 1.75955653e-01, -2.46459037e-01,
         -4.34027880e-01, -4.32590246e-01, -2.67131507e-01, -6.78363896e-04,
         2.08171085e-01, 2.32197508e-01, 8.64804164e-02, -1.14158235e-01,
         -2.53621429e-01, -2.62945205e-01, -1.40505865e-01, 3.35594788e-02,
         1.77415669e-01, 2.72263527e-01, 2.81718552e-01, 1.38080209e-01,
         -1.27307668e-01]
     data = read()[0].data
     template = data[400:600]
     data = data[380:620]
     cc = correlate_template(data, template, method='direct')
     np.testing.assert_allclose(cc, result, atol=1e-7)
     shift, corr = xcorr_max(cc)
     self.assertAlmostEqual(corr, 1.0)
     self.assertEqual(shift, 0)
Beispiel #12
0
    def xcorr_cont(self, save_CCF=False, fmt=1):
        '''
            save_CCF: save individual CCFs in project_name/output/Template_match/CCF_records/
            fmt: output format if
                fmt=1:
                    #OriginTime meanCC nSTA templateIDX
                fmt=2:
                    #OriginTime meanCC nSTA templateIDX mean_maxCCC
        '''
        from obspy import UTCDateTime, read, Stream, Trace
        import glob
        from scipy import signal
        from obspy.signal.cross_correlation import correlate_template
        import matplotlib
        matplotlib.use('pdf')  #instead using interactive backend
        import matplotlib.pyplot as plt
        import os
        from repeq.data_proc import cal_CCC

        home = self.home
        project_name = self.project_name

        if self.ms == None:
            print('Run .template_load() first')
            return False
        else:
            #loop the templates (paths)
            for i_tmp in self.ms:
                print('----------------------------------------------')
                print('In template: %s' % (i_tmp))
                tmp_idx = int(
                    i_tmp.split('/')[-1].split('_')[-1].split('.')[0])
                #if the detection exist, and self.overwrite is False, skip
                if not self.overwrite:
                    if os.path.exists(
                            home + '/' + project_name +
                            '/output/Template_match/Detections/Detected_tmp_%05d.npy'
                            %
                        (tmp_idx)) & os.path.exists(
                            home + '/' + project_name +
                            '/output/Template_match/Detections/Detected_tmp_%05d.txt'
                            % (tmp_idx)):
                        print('Both %05d.npy and %05d.txt file exist, skip' %
                              (tmp_idx, tmp_idx))
                        continue
                #if self.overwrite or both .npy and .txt files are not exist
                OUT1 = open(
                    home + '/' + project_name +
                    '/output/Template_match/Detections/Detected_tmp_%05d.txt' %
                    (tmp_idx), 'w')  #output earthquake origin time
                if fmt == 1:
                    OUT1.write('#OriginTime meanCC stdCC nSTA templateIDX\n')
                elif fmt == 2:
                    OUT1.write(
                        '#OriginTime meanCC stdCC nSTA templateIDX mean_maxCCC std_maxCCC\n'
                    )  # mean(max CCC for each stations), so that shift in each sta is negletable
                origintime = UTCDateTime(self.catalog.iloc[tmp_idx].Date +
                                         'T' + self.catalog.iloc[tmp_idx].Time)
                st = read(i_tmp)  #read template in

                #load info data(mainly for P or S wave info)
                pick_info = np.load(home + '/' + project_name +
                                    '/waveforms_template/' +
                                    'template_%05d.npy' % (tmp_idx),
                                    allow_pickle=True)
                pick_info = pick_info.item()

                #read all directories of daily data
                dayst_paths = glob.glob(home + '/' + project_name +
                                        '/waveforms/' + '*000000')
                dayst_paths.sort()

                sav_mean_sh_CCF = [
                ]  #save all the daily CCF for later plotting
                sav_daily_nSTA = []  #number of stations for each daily CCF
                sav_alldays_eq_sta = {
                }  #detailed info for CC,CCC,shifts for every station for all searched days by the same template
                #loop the daily data
                for dayst_path in dayst_paths:
                    sav_NET = []
                    sav_STA = []
                    sav_CHN = []
                    sav_LOC = []
                    sav_phase = []
                    sav_CCF = []
                    sav_travel_npts = []
                    sav_travel_time = []
                    sav_continuousdata = []
                    sav_template = []  #initial for saving
                    YMD = dayst_path.split('/')[-1][:8]
                    print(' --Reading daily data: %s' % (dayst_path))
                    try:
                        i_dayst = read(
                            dayst_path +
                            '/waveforms/merged.ms')  #load daily data
                    except:
                        i_dayst = data_proc.read_obspy(
                            dayst_path +
                            '/waveforms/merged.ms')  #filesize larger than 2GB

                    #print(i_dayst.__str__(extended=True))
                    for i in range(len(st)):
                        #-----loop individual pick/station/comp of template-----
                        NET = st[i].stats.network
                        STA = st[i].stats.station
                        CHN = st[i].stats.channel
                        LOC = st[i].stats.location
                        #in daily data... search for same station,channel,comp,sampling rate....that matches the i_th pick in particular template
                        tmp_dayst = i_dayst.select(
                            network=NET,
                            station=STA,
                            sampling_rate=st[i].stats.sampling_rate,
                            channel=CHN,
                            location=LOC)
                        tmp_dayst = tmp_dayst.copy()
                        if len(tmp_dayst) != 1:
                            if len(tmp_dayst) == 0:
                                #print('Case1. No data found:%s, skip this station'%(STA+'.'+CHN))
                                pass
                            else:
                                #print('Case2. Multiple data found:%d, probably breaking tcs, skip this station'%(len(tmp_dayst)))
                                #print(tmp_dayst) #tmp_dayst should be only one
                                pass
                            continue
                        else:
                            #find the station travel time
                            #if len(phases[phases.Channel.str.startswith(regional.upper()+'.'+STA+'.'+CHN)])==0:
                            #    continue #cannot find station shift
                            travel_time = st[i].stats.starttime + self.tcs_length[
                                0] - origintime  #get travel time implied from template header[sec] (assume data request always correct)
                            travel_npts = int(
                                np.round(
                                    travel_time *
                                    self.sampling_rate))  #travel time in npts

                            #get data value for template and continuous(daily) data
                            template = np.nan_to_num(st[i].data)
                            continuousdata = np.nan_to_num(tmp_dayst[0].data)

                            #run xcorr
                            CCF = correlate_template(continuousdata, template)
                            CCF = np.nan_to_num(CCF)

                            #load info data outside the loop
                            #pick_info = np.load(home+'/'+project_name+'/waveforms_template/'+'template_%05d.npy'%(tmp_idx),allow_pickle=True)
                            #pick_info = pick_info.item()

                            #save for later checking
                            sav_NET.append(NET)
                            sav_STA.append(STA)
                            sav_CHN.append(CHN)
                            sav_LOC.append(LOC)

                            #Update 2020.11.12: order of .ms and pick_info.npy shoud now be the same
                            #Double check! to see if the starttime matchs the pick_info
                            assert np.abs(
                                (UTCDateTime(pick_info['arrival'][i]) -
                                 pick_info['tcs_length'][0]) -
                                st[i].stats.starttime
                            ) < 0.02, 'pick_info and ms starttime does NOT match!'
                            sav_phase.append(
                                pick_info['phase'][i]
                            )  #P or S phase. Causion! previous wrong because ith index in the st is not the ith index in the pick_info
                            #debug
                            #print('appending info:',NET+'.'+STA+'.'+CHN+'.'+LOC,PS)
                            sav_travel_time.append(travel_time)
                            sav_travel_npts.append(travel_npts)
                            sav_CCF.append(CCF)
                            sav_continuousdata.append(continuousdata)
                            sav_template.append(template)

                    if len(sav_CCF) < self.filt_nSTA:
                        print(
                            '   Number of CCF: %d, not enough for threshold' %
                            (len(sav_CCF)))
                        continue  #not enough data available, continue to next daily data

                    #----------dealing with shifting of each CCF----------
                    #travel_npts = np.array(travel_npts)
                    sav_travel_npts = np.array(
                        sav_travel_npts)  #fix typo 2020.12.14
                    sav_travel_time = np.array(sav_travel_time)
                    sh_sav_CCF = np.array(sav_CCF)  #copy the original CCF
                    #shifted CCF based on the template arrival
                    for ii in range(len(sh_sav_CCF)):
                        sh_sav_CCF[ii] = np.roll(sav_CCF[ii],
                                                 -int(sav_travel_npts[ii]))

                    print(
                        '   Number of CCF: %d, continue searching earthquakes'
                        % (len(sav_CCF)))
                    mean_sh_CCF = np.mean(sh_sav_CCF,
                                          axis=0)  #stack/mean all the CCFs.
                    std_sh_CCF = np.std(sh_sav_CCF,
                                        axis=0)  #also calculate std

                    #save the individual CCF in Stream (for only debug purpose)
                    #Update 2020.12.14. save all the info in .npy instead of obspy Stream (to also save shift info)
                    if save_CCF:
                        #raw CCF (unshifted)
                        ST = Stream()
                        for ii, iCCF in enumerate(sav_CCF):
                            tmpCCF = Trace(iCCF)
                            tmpCCF.stats.sampling_rate = i_dayst[
                                0].stats.sampling_rate
                            tmpCCF.stats.starttime = i_dayst[0].stats.starttime
                            tmpCCF.stats.network = sav_NET[ii]
                            tmpCCF.stats.station = sav_STA[ii]
                            tmpCCF.stats.channel = sav_CHN[ii]
                            tmpCCF.stats.location = sav_LOC[ii]
                            ST += tmpCCF
                        #create dict to save info
                        sav_CCF_info = {}
                        sav_CCF_info['CCF_raw'] = ST
                        sav_CCF_info['shift_npts'] = sav_travel_npts
                        sav_CCF_info['shift_time'] = sav_travel_time
                        sav_CCF_info['OT_template'] = origintime
                        np.save(
                            home + '/' + project_name +
                            '/output/Template_match/CCF_records/' +
                            'CCF_template_%05d_daily_%s.npy' % (tmp_idx, YMD),
                            sav_CCF_info)
                        '''
                        #
                        ST = Stream()
                        for ii,iCCF in enumerate(sh_sav_CCF):
                            tmpCCF = Trace(iCCF)
                            tmpCCF.stats.sampling_rate = i_dayst[0].stats.sampling_rate
                            tmpCCF.stats.starttime = i_dayst[0].stats.starttime
                            tmpCCF.stats.network = sav_NET[ii]
                            tmpCCF.stats.station = sav_STA[ii]
                            tmpCCF.stats.channel = sav_CHN[ii]
                            tmpCCF.stats.location = sav_LOC[ii]
                            ST += tmpCCF
                        ST.write(home+'/'+project_name+'/output/Template_match/CCF_records/'+'shftCCF_template_%05d_daily_%s.ms'%(tmp_idx,YMD),format="MSEED")
                        '''

                    #----------Find earthquakes by the mean CCF----------
                    time = i_dayst[0].times()
                    eq_idx = np.where(
                        mean_sh_CCF >= self.filt_CC)[0]  #filter #1

                    #The mean_sh_CCF has length = len(dailydata)-len(template)+1
                    #remove the index that too close to the right edge. #filter #2
                    _idx = np.where(
                        eq_idx < len(mean_sh_CCF) - 1 - np.max(sav_travel_npts)
                    )[0]  #-1 make length to index; max(shift) make sure all the templates wont touch the right bound
                    eq_idx = eq_idx[_idx]

                    sav_eq_sta = {
                    }  #save the detailed result(lag info, CCC value) for use later
                    for neqid in eq_idx:
                        #new_dayst[0].stats.starttime+time[np.argmax(mean_sh_CCF)] #find itself
                        detected_OT = i_dayst[0].stats.starttime + time[
                            neqid] + self.tcs_length[
                                0]  #Origin time of which detection
                        detected_OT_str = detected_OT.strftime(
                            '%Y-%m-%dT%H:%M:%S.%f')[:-4]  #accuracy to 0.01 sec
                        print(
                            '    New event found:',
                            i_dayst[0].stats.starttime + time[neqid] +
                            self.tcs_length[0]
                        )  #find earthquakes,this is the arrival for template.st
                        if fmt == 1:
                            OUT1.write('%s %.3f %.3f %d %s\n' %
                                       (detected_OT_str, mean_sh_CCF[neqid],
                                        std_sh_CCF[neqid], len(sav_STA),
                                        'template_%05d' % (tmp_idx)))
                        elif fmt == 2:
                            #calculate CCC for individual stations
                            sav_maxCCC = []
                            #sav_sh_sec=[]
                            for n in range(len(sav_template)):
                                #loop in every station
                                #print('writing info:',sav_NET[n]+'.'+sav_STA[n]+'.'+sav_CHN[n]+'.'+sav_LOC[n],sav_phase[n])
                                cut_daily = sav_continuousdata[n][
                                    neqid + sav_travel_npts[n]:neqid +
                                    sav_travel_npts[n] + len(sav_template[n])]
                                maxCCC, lag = cal_CCC(sav_template[n],
                                                      cut_daily)
                                if np.isnan(maxCCC):
                                    maxCCC = 0  #this is probably due to cross-correlate on a zero array
                                midd = (
                                    len(cut_daily)
                                ) - 1  #length of b?? at this idx, refdata align with target data
                                sh_sec = (lag - midd) * (
                                    1.0 / self.sampling_rate
                                )  #convert to second (dt correction of P)
                                sav_maxCCC.append(maxCCC)
                                if detected_OT_str in sav_eq_sta:
                                    sav_eq_sta[detected_OT_str][
                                        'net_sta_comp'].append(sav_NET[n] +
                                                               '.' +
                                                               sav_STA[n] +
                                                               '.' +
                                                               sav_CHN[n] +
                                                               '.' +
                                                               sav_LOC[n])
                                    sav_eq_sta[detected_OT_str][
                                        'phase'].append(sav_phase[n])
                                    sav_eq_sta[detected_OT_str]['CCC'].append(
                                        maxCCC)
                                    sav_eq_sta[detected_OT_str]['CC'].append(
                                        sh_sav_CCF[n][neqid])
                                    sav_eq_sta[detected_OT_str][
                                        'shift'].append(sh_sec)

                                else:
                                    #initial dictionary
                                    sav_eq_sta[detected_OT_str] = {}
                                    sav_eq_sta[detected_OT_str][
                                        'net_sta_comp'] = [
                                            sav_NET[n] + '.' + sav_STA[n] +
                                            '.' + sav_CHN[n] + '.' + sav_LOC[n]
                                        ]
                                    sav_eq_sta[detected_OT_str]['phase'] = [
                                        sav_phase[n]
                                    ]
                                    sav_eq_sta[detected_OT_str]['CCC'] = [
                                        maxCCC
                                    ]
                                    sav_eq_sta[detected_OT_str]['CC'] = [
                                        sh_sav_CCF[n][neqid]
                                    ]  #sh_sav_CCF[n][neqid]
                                    sav_eq_sta[detected_OT_str]['shift'] = [
                                        sh_sec
                                    ]

                                #sav_sh_sec.append(sh_sec)
                            OUT1.write('%s %.3f %.3f %d %s %.3f %.3f\n' %
                                       (detected_OT_str,
                                        mean_sh_CCF[neqid], std_sh_CCF[neqid],
                                        len(sav_STA), 'template_%05d' %
                                        (tmp_idx), np.mean(sav_maxCCC),
                                        np.std(sav_maxCCC)))

                    #-----Only for checking: plot the one with largest CC value and check (find itself if the template and daily are the same day)-----
                    if self.plot_check:
                        tmp_T = st[0].times()
                        for i_eqidx, neqid in enumerate(eq_idx):
                            #loop in detection
                            detected_OT = i_dayst[0].stats.starttime + time[
                                neqid] + self.tcs_length[
                                    0]  #Origin time of which detection
                            detected_OT_str = detected_OT.strftime(
                                '%Y-%m-%dT%H:%M:%S.%f'
                            )[:-4]  #accuracy to 0.01 sec
                            plt.figure(1)
                            for n in range(len(sav_template)):
                                #loop in every station
                                #cut_daily = sav_continuousdata[n][np.argmax(mean_sh_CCF)+sav_travel_npts[n]:np.argmax(mean_sh_CCF)+sav_travel_npts[n]+len(sav_template[n])] #old version only plot maximum
                                cut_daily = sav_continuousdata[n][
                                    neqid + sav_travel_npts[n]:neqid +
                                    sav_travel_npts[n] + len(sav_template[n])]
                                cut_daily = cut_daily / np.max(
                                    np.abs(cut_daily))
                                plt.plot(
                                    tmp_T, cut_daily + n, 'k', linewidth=2
                                )  #time series cutted from daily time series
                                plt.plot(tmp_T,
                                         sav_template[n] /
                                         np.max(np.abs(sav_template[n])) + n,
                                         'r',
                                         linewidth=1.2)  #template data
                                plt.text(tmp_T[-1], n,
                                         sav_STA[n] + '.' + sav_CHN[n])
                                #---add individual CC value and max_CCC value---
                                if fmt == 1:
                                    #maxCCC,lag = cal_CCC(sav_template[n],cut_daily)
                                    #midd = (len(cut_daily))-1  #length of b?? at this idx, refdata align with target data
                                    #sh_sec = (lag-midd)*(1.0/self.sampling_rate) #convert to second (dt correction of P)
                                    plt.text(
                                        tmp_T[-1] * 0.05, n,
                                        'CC=%.2f' % (sh_sav_CCF[n][neqid]))
                                elif fmt == 2:
                                    maxCCC = sav_eq_sta[detected_OT_str][
                                        'CCC'][n]
                                    sh_sec = sav_eq_sta[detected_OT_str][
                                        'shift'][n]
                                    plt.text(
                                        tmp_T[-1] * 0.05, n,
                                        'CC=%.2f,max_CCC=%.2f,dt=%.3f' %
                                        (sh_sav_CCF[n][neqid], maxCCC, sh_sec))
                                #Future improvement: if fmt==2, the value have been calculated, just get the value
                                #if fmt == 1:
                                #elif fmt ==2:

                            #plt.title('Time:%s  CC=%5.2f'%((i_dayst[0].stats.starttime+time[neqid]+self.tcs_length[0]).strftime('%H:%M:%S'),np.max(mean_sh_CCF)))
                            plt.title(
                                'Time:%s  CC=%5.2f' %
                                ((i_dayst[0].stats.starttime + time[neqid] +
                                  self.tcs_length[0]).strftime('%H:%M:%S.%f'),
                                 mean_sh_CCF[neqid]))
                            plt.savefig(home + '/' + project_name +
                                        '/output/Template_match/Figs/' +
                                        'template_%05d_daily_%s_%03d.png' %
                                        (tmp_idx, YMD, i_eqidx))
                            plt.close()
                            if i_eqidx > 99:
                                break  #don't plot if more than 99 plots in the same day

                    sav_mean_sh_CCF.append(mean_sh_CCF)
                    sav_daily_nSTA.append(len(sav_CCF))

                    sav_alldays_eq_sta.update(
                        sav_eq_sta)  #not support for fmt=1

                ##------output detailed data(lag information for each station) in .npy ---------
                #only if fmt=2, fmt=1 didnt calculate the CCC
                if fmt == 2:
                    np.save(
                        home + '/' + project_name +
                        '/output/Template_match/Detections/' +
                        'Detected_tmp_%05d.npy' % (tmp_idx),
                        sav_alldays_eq_sta)

                #----plot the mean_shifted_CCF for all days----
                plt.figure(1)
                for n in range(len(sav_mean_sh_CCF)):
                    plt.plot(sav_mean_sh_CCF[n] + n, linewidth=1)
                    if n == 0:
                        plt.text(len(sav_mean_sh_CCF[n]), n, 'N=%d' %
                                 (sav_daily_nSTA[n]))  #number of stations
                    else:
                        plt.text(len(sav_mean_sh_CCF[n]), n, '%d' %
                                 (sav_daily_nSTA[n]))  #number of stations
                plt.title('Mean CCF (template_%05d)' % (tmp_idx), fontsize=16)
                plt.ylabel('Days after %s' %
                           (dayst_paths[0].split('/')[-1][:8]),
                           fontsize=16)
                plt.savefig(home + '/' + project_name +
                            '/output/Template_match/Figs/' +
                            'MeanCCF_%05d.png' % (tmp_idx))
                plt.close()
                OUT1.close()
Beispiel #13
0
# set master event for correlation
masterEvent = waveforms[11]
#masterEvent = waveforms[122]

# open file for output
outFile = h5py.File(path + type + "_correlations.h5", "w")

# make some arrays for storing output
shifts = np.zeros((len(waveforms)))
corrCoefs = np.zeros((len(waveforms)))

for i in range(len(waveforms)):

    # correlate master event and waveform i
    corr = correlate_template(masterEvent, waveforms[i])
    shift, corrCoef = xcorr_max(corr)

    # save output
    shifts[i] = shift
    corrCoefs[i] = corrCoef

    # give the user some output
    print("Correlated master event with " +
          str(round(i / len(waveforms) * 100)) + "% of events")

# write output to file
outFile.create_dataset("corrCoefs", data=corrCoefs)
outFile.create_dataset("shifts", data=shifts)

# close output file
Beispiel #14
0
def lin_corr(patient_id: str, time_begin: list, duration: float, t_lag=0.7, critical_corr=0.7):
    """

    :param patient_id:
    :param time_begin: List with [hour, minute].
    :param duration: In seconds.
    :param t_lag: In seconds.
    :param critical_corr:
    :return:
    """
    # Load and prepare data
    data_mat = loadmat('../data/' + patient_id + '_' + str(time_begin[0]) + 'h.mat')
    info_mat = loadmat('../data/' + patient_id + '_info.mat')
    fs = float(info_mat['fs'])
    sample_begin = int(time_begin[1] * 60 * fs)
    sample_end = sample_begin + int(duration * fs)
    data_raw = data_mat['EEG'][:, sample_begin:sample_end].transpose()

    n_lag = int(t_lag * fs)
    factor = np.exp(-1)

    # Compute normalized cross correlation (NCC)
    cctl = np.zeros((data_raw.shape[1], data_raw.shape[1], (n_lag * 2) + 1))
    for from_ in range(data_raw.shape[1]):
        for to_ in range(data_raw.shape[1]):
            x = data_raw[:, to_]
            y = data_raw[n_lag:-n_lag, from_]
            cctl[from_, to_, :] = cross_correlation.correlate_template(x, y)

    # Calculate peak cross correlation (cc) and corresponding time lag (tl)
    sign = np.sign(np.max(cctl, axis=2) - np.abs(np.min(cctl, axis=2)))
    cc = np.multiply(np.max(np.abs(cctl), axis=2), sign)
    mask = np.where(np.abs(cc) > critical_corr, 1, np.nan)
    tl_n = np.argmax(np.abs(cctl), axis=2)
    tl = (tl_n - n_lag) * mask / fs * 1000  # in [ms]
    tl_no_mask = (tl_n - n_lag) / fs * 1000  # in [ms], used for plots

    # Calculate mean tau
    # Tile and stack values for future operations
    tl_n_stacked = np.dstack([tl_n] * cctl.shape[2])
    arg_tau_stacked = factor * np.dstack([cc] * cctl.shape[2])
    mask_stacked = np.dstack([np.where(np.abs(cc) > critical_corr, 1, 0)] * cctl.shape[2])
    t_indices_tiled = np.tile(np.arange(0, cctl.shape[2]), (cctl.shape[0], cctl.shape[0], 1))
    # Get indices of values close to factor of peak cross correlation
    close_indices = np.isclose(cctl, arg_tau_stacked, rtol=1e-1) * t_indices_tiled
    # Create mask to separate negative and positive tau
    higher_tau_mask = np.where(close_indices - tl_n_stacked > 0, 1, 0)
    lower_tau_mask = np.where((tl_n_stacked - close_indices > 0) & (close_indices != 0), 1, 0)
    # Eliminate possible third occurrence of np.isclose() to factor
    higher_edge_indices = np.where(np.diff(higher_tau_mask) == -1, 1, 0) * t_indices_tiled[:, :, :-1]
    higher_edge_indices = np.min(np.where(higher_edge_indices == 0, np.inf, higher_edge_indices), axis=2)
    higher_third_occ_mask = np.where(t_indices_tiled > np.dstack([higher_edge_indices] * cctl.shape[2]), 0, 1)
    lower_edge_indices = np.where(np.diff(lower_tau_mask) == 1, 1, 0) * t_indices_tiled[:, :, :-1]
    lower_edge_indices = np.max(lower_edge_indices, axis=2)
    lower_third_occ_mask = np.where(t_indices_tiled < np.dstack([lower_edge_indices] * cctl.shape[2]), 0, 1)
    # Apply masks (apply mask for critical correlation separately to get all taus for plots)
    higher_tau_masked_all = close_indices * higher_tau_mask * higher_third_occ_mask
    higher_tau_masked = higher_tau_masked_all * mask_stacked
    lower_tau_masked_all = close_indices * lower_tau_mask * lower_third_occ_mask
    lower_tau_masked = lower_tau_masked_all * mask_stacked
    # Compute median along time lag axis and ignore zero entries
    higher_tau = np.ma.median(np.ma.masked_where(higher_tau_masked == 0, higher_tau_masked), axis=2).filled(0)
    lower_tau = np.ma.median(np.ma.masked_where(lower_tau_masked == 0, lower_tau_masked), axis=2).filled(0)
    # Get taus without mask for critical correlation for plots
    higher_tau_all = np.ma.median(np.ma.masked_where(higher_tau_masked_all == 0, higher_tau_masked_all)
                                  , axis=2).filled(0)
    higher_tau_all = (higher_tau_all - n_lag) / fs * 1000  # in [ms]
    lower_tau_all = np.ma.median(np.ma.masked_where(lower_tau_masked_all == 0, lower_tau_masked_all)
                                 , axis=2).filled(0)
    lower_tau_all = (lower_tau_all - n_lag) / fs * 1000  # in [ms]
    # Calculate mean distance for tau to cc
    tau_n = (higher_tau - lower_tau) / 2
    tau = np.where(tau_n == 0, np.nan, tau_n) / fs * 1000  # in [ms]

    # Additional masks for plots (diagonal, upper triangle, ...)
    tl_masked = tl.copy()
    cc_masked = cc.copy()
    np.fill_diagonal(tl_masked, np.nan)
    np.fill_diagonal(cc_masked, np.nan)
    cc_masked[np.triu_indices(cc_masked.shape[0], k=1)] = np.nan

    # Plot cc, tl and tau
    # General settings
    sns.set_style('white')
    fig = plt.figure(figsize=(10, 13))
    gs = fig.add_gridspec(3, 2)
    cmap_div = copy.copy(mpl.cm.get_cmap('seismic'))
    cmap_div.set_bad('dimgrey')
    cmap_uni = copy.copy(mpl.cm.get_cmap('viridis'))
    cmap_uni.set_bad('dimgrey')

    # Subplot: Peak cross correlation
    ax0 = fig.add_subplot(gs[:1, :1])
    sns.heatmap(cc_masked, cmap=cmap_div, vmin=-1, vmax=1)
    ax0.set_title('Peak cross correlation')
    ax0.set_xlabel('Node idx'), ax0.set_ylabel('Node idx')

    # Subplot: Histogram of peak cross correlation
    ax1 = fig.add_subplot(gs[:1, 1:])
    sns.distplot(cc_masked, kde=False)
    ymin, ymax = ax1.get_ylim()
    xmin, xmax = ax1.get_xlim()
    label = 'Critical corr. = +/- ' + str(critical_corr)
    plt.plot([critical_corr, critical_corr], [ymin, ymax], linestyle='--', color='black', label=label)
    plt.plot([-critical_corr, -critical_corr], [ymin, ymax], linestyle='--', color='black')
    ax1.set_xlim(xmin, xmax), ax1.set_ylim(ymin, ymax)
    plt.legend()
    ax1.set_title('Peak cross correlation histogram')
    ax1.set_xlabel('Peak cross correlation [-]'), ax1.set_ylabel('Nr. of occurrence [-]')

    # Subplot: Time lag
    ax2 = fig.add_subplot(gs[1:2, :1])
    vlim = np.nanmax(np.abs(tl))
    sns.heatmap(tl_masked, cmap=cmap_div, vmin=-vlim, vmax=vlim)
    ax2.set_title('Corresponding time lag [ms]')
    ax2.set_xlabel('Node idx'), ax2.set_ylabel('Node idx')

    # Subplot: Histogram of time lag
    ax3 = fig.add_subplot(gs[1:2, 1:])
    sns.distplot(tl_masked, kde=False)
    ax3.set_title('Time lag histogram')
    ax3.set_xlabel('Time [ms]'), ax3.set_ylabel('Nr. of occurrence [-]')

    # Subplot: Tau
    ax4 = fig.add_subplot(gs[2:, :1])
    sns.heatmap(tau, cmap=cmap_uni)
    ax4.set_title('Corresponding tau [ms]')
    ax4.set_xlabel('Node idx'), ax4.set_ylabel('Node idx')

    # Subplot: Histogram of tau
    ax5 = fig.add_subplot(gs[2:, 1:])
    sns.distplot(np.diagonal(tau), kde=False, label='Auto correlated')
    auto_corr = tau.copy()
    auto_corr[np.diag_indices(auto_corr.shape[0])] = np.nan
    sns.distplot(auto_corr, kde=False, label='Cross correlated')
    ax5.set_title('Tau histogram'), plt.legend()
    ax5.set_xlabel('Time [ms]'), ax5.set_ylabel('Nr. of occurrence [-]')

    plt.tight_layout()
    save_name = patient_id + '_' + str(time_begin[0]) + 'h' + str(time_begin[1]) + 'm'
    plt.savefig('../doc/figures/cc_' + save_name + '.png')
    plt.close()

    # t vector for plots
    plt.figure(figsize=(8, 5))
    t = np.arange(0, cctl.shape[2])
    t = (t - n_lag) / fs * 1000
    n0 = 44  # Base node
    begin_N = 7
    end_N = 14  # Number of line plots
    indices = [i for i in range(begin_N, end_N)]

    n_choices = 75
    valid_choices = np.argwhere(~np.isnan(mask))
    valid_indices = [i[0] * cctl.shape[0] + i[1] for i in valid_choices]
    # indices = np.random.choice(cctl.shape[0] * cctl.shape[1], n_choices, replace=False).tolist()
    indices = valid_indices #np.random.choice(valid_indices, n_choices, replace=False).tolist()
    peaks_x, peaks_y, taus_x_0, taus_x_1, taus_y = [], [], [], [], []
    for i in indices:
        n0 = i % cctl.shape[0]
        n1 = int(math.floor(i / cctl.shape[1]))
        #n1 = n0 + i  # Reference node
        plt.plot(t, cctl[n0, n1, :], label='Nodes ' + str(n0) + ' - ' + str(n1))
        peaks_x.append(tl_no_mask[n0, n1])
        peaks_y.append(cc[n0, n1])
        taus_x_0.append(higher_tau_all[n0, n1])
        taus_x_1.append(lower_tau_all[n0, n1])
        taus_y.append(cc[n0, n1] * factor)
    plt.scatter(peaks_x, peaks_y, color='black', marker='d', label='Peak', zorder=len(indices) + 1)
    plt.scatter(taus_x_0, taus_y, color='black', marker='<', label='Right tau', zorder=len(indices) + 1)
    plt.scatter(taus_x_1, taus_y, color='black', marker='>', label='Left tau', zorder=len(indices) + 1)
    ymin, ymax = plt.gca().get_ylim()
    plt.plot([-t_lag*1000, t_lag*1000], [critical_corr, critical_corr],
             color='black', linestyle=':', label='Critical corr.')
    plt.plot([-t_lag*1000, t_lag*1000], [-critical_corr, -critical_corr], color='black', linestyle=':')
    plt.ylim(ymin, ymax)
    plt.xlabel('Time lag [ms]'), plt.ylabel('NCC [-]')
    plt.title('Normalized cross correlation: examples'), plt.legend(loc='upper right')
    plt.xlim(-t_lag * 1000, t_lag * 1000), plt.grid()
    save_name = patient_id + '_' + str(time_begin[0]) + 'h' + str(time_begin[1]) + 'm'
    plt.savefig('../doc/figures/cctl_' + save_name + '.png')
    plt.close()