Esempio n. 1
0
def analyze_data(families, staloc, nhour, t1, duration, dt, ncpu, icpu):
    """
    """
    nfamilies = int(ceil(len(families) / ncpu))
    ibegin = icpu * nfamilies
    iend = min((icpu + 1) * nfamilies, len(families))
    
    for i in range(ibegin, iend):

        # Create directory to store the LFEs times
        namedir = 'LFEs/' + families['family'].iloc[i]
        if not os.path.exists(namedir):
             os.makedirs(namedir)

        # File to write error messages
        namedir = 'error'
        if not os.path.exists(namedir):
            os.makedirs(namedir)
            errorfile = 'error/' + families['family'].iloc[i] + '.txt'

        # Create dataframe to store LFE times
        df = pd.DataFrame(columns=['year', 'month', 'day', 'hour', \
        'minute', 'second', 'cc', 'nchannel'])

        # Read the templates
        stations = families['stations'].iloc[i].split(',')
        templates = Stream()
        for station in stations:
            data = pickle.load(open(template_dir + '/' + families['family'].iloc[i] + \
            '/' + station + '.pkl', 'rb'))
            if (len(data) == 3):
                EW = data[0]
                NS = data[1]
                UD = data[2]
                EW.stats.station = station
                NS.stats.station = station
                EW.stats.channel = 'E'
                NS.stats.channel = 'N'
                templates.append(EW)
                templates.append(NS)
            else:
                UD = data[0]
            UD.stats.station = station
            UD.stats.channel = 'Z'
            templates.append(UD)                       

        # Loop on hours of data
        for hour in range(0, nhour):
            nchannel = 0
            Tstart = t1 + hour * 3600.0
            Tend = t1 + (hour + 1) * 3600.0 + duration
            delta = Tend - Tstart
            ndata = int(delta / dt) + 1

            # Get the data
            data = []
            for station in stations:
                try:
                    D = read('tmp/' + station + '.mseed')
                    D = D.slice(Tstart, Tend)
                    namefile = 'tmp/' + station + '.pkl'
                    orientation = pickle.load(open(namefile, 'rb'))

                    # Get station metadata for reading response file
                    for ir in range(0, len(staloc)):
                        if (station == staloc['station'][ir]):
                            network = staloc['network'][ir]
                            channels = staloc['channels'][ir]
                            location = staloc['location'][ir]
                            server = staloc['server'][ir]

                    # Orientation of template
                    # Date chosen: April 1st 2008
                    mychannels = channels.split(',')
                    mylocation = location
                    if (mylocation == '--'):
                        mylocation = ''
                    response = '../data/response/' + network + '_' + station + '.xml'
                    inventory = read_inventory(response, format='STATIONXML')
                    reference = []
                    for channel in mychannels:
                        angle = inventory.get_orientation(network + '.' + \
                            station + '.' + mylocation + '.' + channel, \
                            UTCDateTime(2008, 4, 1, 0, 0, 0))
                        reference.append(angle)

                    # Append data to stream
                    if (type(D) == obspy.core.stream.Stream):
                        stationdata = fill_data(D, orientation, station, channels, reference)
                        if (len(stationdata) > 0):
                            for stream in stationdata:
                                data.append(stream)
                except:
                    message = 'No data available for station {} '.format( \
                        station) + 'at time {}/{}/{} - {}:{}:{}\n'.format( \
                        Tstart.year, Tstart.month, Tstart.day, Tstart.hour, \
                        Tstart.minute, Tstart.second)

            # Loop on channels
            for channel in range(0, len(data)):
                subdata = data[channel]
                # Check whether we have a complete one-hour-long recording
                if (len(subdata) == 1):
                    if (len(subdata[0].data) == ndata):
                        # Get the template
                        station = subdata[0].stats.station
                        component = subdata[0].stats.channel
                        template = templates.select(station=station, \
                            component=component)[0]
                        # Cross correlation
                        cctemp = correlate.optimized(template, subdata[0])
                        if (nchannel > 0):
                            cc = np.vstack((cc, cctemp))
                        else:
                            cc = cctemp
                        nchannel = nchannel + 1
    
            if (nchannel > 0):   
                # Compute average cross-correlation across channels
                meancc = np.mean(cc, axis=0)
                if (type_threshold == 'MAD'):
                    MAD = np.median(np.abs(meancc - np.mean(meancc)))
                    index = np.where(meancc >= threshold * MAD)
                elif (type_threshold == 'Threshold'):
                    index = np.where(meancc >= threshold)
                else:
                    raise ValueError('Type of threshold must be MAD or Threshold')
                times = np.arange(0.0, np.shape(meancc)[0] * dt, dt)

                # Get LFE times
                if np.shape(index)[1] > 0:
                    (time, cc) = clean_LFEs(index, times, meancc, dt, freq0)

                    # Add LFE times to dataframe
                    i0 = len(df.index)
                    for j in range(0, len(time)):
                        timeLFE = Tstart + time[j]
                        df.loc[i0 + j] = [int(timeLFE.year), int(timeLFE.month), \
                            int(timeLFE.day), int(timeLFE.hour), \
                            int(timeLFE.minute), timeLFE.second + \
                            timeLFE.microsecond / 1000000.0, cc[j], nchannel]

        # Add to pandas dataframe and save
        namefile = 'LFEs/' + families['family'].iloc[i] + '/catalog.pkl'
        if os.path.exists(namefile):
            df_all = pickle.load(open(namefile, 'rb'))
            df_all = pd.concat([df_all, df], ignore_index=True)
        else:
            df_all = df    
        df_all = df_all.astype(dtype={'year':'int32', 'month':'int32', \
            'day':'int32', 'hour':'int32', 'minute':'int32', \
            'second':'float', 'cc':'float', 'nchannel':'int32'})
        pickle.dump(df_all, open(namefile, 'wb'))
Esempio n. 2
0
def find_LFEs(filename, stations, tbegin, tend, TDUR, filt, \
        freq0, nattempts, waittime, draw=False, type_threshold='MAD', \
        threshold=0.0075):
    """
    Find LFEs with the temporary stations from FAME
    using the templates from Plourde et al. (2015)

    Input:
        type filename = string
        filename = Name of the template
        type stations = list of strings
        stations = name of the stations used for the matched-filter algorithm
        type tebgin = tuplet of 6 integers
        tbegin = Time when we begin looking for LFEs
        type tend = tuplet of 6 integers
        tend = Time we stop looking for LFEs
        type TDUR = float
        TDUR = Time to add before and after the time window for tapering
        type filt = tuple of floats
        filt = Lower and upper frequencies of the filter
        type freq0 = float
        freq0 = Maximum frequency rate of LFE occurrence
        type nattempts = integer
        nattempts = Number of times we try to download data
        type waittime = positive float
        waittime = Type to wait between two attempts at downloading
        type draw = boolean
        draw = Do we draw a figure of the cross-correlation?
        type type_threshold = string
        type_threshold = 'MAD' or 'Threshold'
        type threshold = float
        threshold = Cross correlation value must be higher than that
    Output:
        None
    """

    # Get the network, channels, and location of the stations
    staloc = pd.read_csv('../data/Ducellier/stations_permanent.txt', \
        sep=r'\s{1,}', header=None, engine='python')
    staloc.columns = ['station', 'network', 'channels', 'location', \
        'server', 'latitude', 'longitude', 'time_on', 'time_off']

    # Create directory to store the LFEs times
    namedir = 'LFEs/' + filename
    if not os.path.exists(namedir):
        os.makedirs(namedir)

    # File to write error messages
    namedir = 'error'
    if not os.path.exists(namedir):
        os.makedirs(namedir)
    errorfile = 'error/' + filename + '.txt'

    # Read the templates
    templates = Stream()
    for station in stations:
        data = pickle.load(open('templates_new/' + filename + \
            '/' + station + '.pkl', 'rb'))
        if (len(data) == 3):
            EW = data[0]
            NS = data[1]
            UD = data[2]
            EW.stats.station = station
            NS.stats.station = station
            EW.stats.channel = 'E'
            NS.stats.channel = 'N'
            templates.append(EW)
            templates.append(NS)
        else:
            UD = data[0]
        UD.stats.station = station
        UD.stats.channel = 'Z'
        templates.append(UD)

    # Begin and end time of analysis
    t1 = UTCDateTime(year=tbegin[0], month=tbegin[1], \
        day=tbegin[2], hour=tbegin[3], minute=tbegin[4], \
        second=tbegin[5])
    t2 = UTCDateTime(year=tend[0], month=tend[1], \
        day=tend[2], hour=tend[3], minute=tend[4], \
        second=tend[5])

    # Read the data
    data = []
    for station in stations:
        # Get station metadata for downloading
        for ir in range(0, len(staloc)):
            if (station == staloc['station'][ir]):
                network = staloc['network'][ir]
                channels = staloc['channels'][ir]
                location = staloc['location'][ir]
                server = staloc['server'][ir]

        # Duration of template
        template = templates.select(station=station, component='Z')[0]
        dt = template.stats.delta
        nt = template.stats.npts
        duration = (nt - 1) * dt   
        Tstart = t1 - TDUR
        Tend = t2 + duration + TDUR
        delta = t2 + duration - t1
        ndata = int(delta / dt) + 1

        # Orientation of template
        # Date chosen: April 1st 2008
        mychannels = channels.split(',')
        mylocation = location
        if (mylocation == '--'):
            mylocation = ''
        response = '../data/response/' + network + '_' + station + '.xml'
        inventory = read_inventory(response, format='STATIONXML')
        reference = []
        for channel in mychannels:
            angle = inventory.get_orientation(network + '.' + \
                station + '.' + mylocation + '.' + channel, \
                UTCDateTime(2012, 1, 1, 0, 0, 0))
            reference.append(angle)

        # First case: we can get the data from IRIS
        if (server == 'IRIS'):
            (D, orientation) = get_from_IRIS(station, network, channels, \
                location, Tstart, Tend, filt, dt, nattempts, waittime, \
                errorfile)
        # Second case: we get the data from NCEDC
        elif (server == 'NCEDC'):
            (D, orientation) = get_from_NCEDC(station, network, channels, \
                location, Tstart, Tend, filt, dt, nattempts, waittime, \
                errorfile)
        else:
            raise ValueError('You can only download data from IRIS and NCEDC')

        # Append data to stream
        if (type(D) == obspy.core.stream.Stream):
            stationdata = fill_data(D, orientation, station, channels, reference)
            if (len(stationdata) > 0):
                for stream in stationdata:
                    data.append(stream)

    # Number of hours of data to analyze
    nhour = int(ceil((t2 - t1) / 3600.0))

    # Create dataframe to store LFE times
    df = pd.DataFrame(columns=['year', 'month', 'day', 'hour', \
        'minute', 'second', 'cc', 'nchannel'])

    # Loop on hours of data
    for hour in range(0, nhour):
        nchannel = 0
        Tstart = t1 + hour * 3600.0
        Tend = t1 + (hour + 1) * 3600.0 + duration
        delta = Tend - Tstart
        ndata = int(delta / dt) + 1

        # Loop on channels
        for channel in range(0, len(data)):
            # Cut the data
            subdata = data[channel]
            subdata = subdata.slice(Tstart, Tend)
            # Check whether we have a complete one-hour-long recording
            if (len(subdata) == 1):
                if (len(subdata[0].data) == ndata):
                    # Get the template
                    station = subdata[0].stats.station
                    component = subdata[0].stats.channel
                    template = templates.select(station=station, \
                        component=component)[0]
                    # Cross correlation
                    cctemp = correlate.optimized(template, subdata[0])
                    if (nchannel > 0):
                        cc = np.vstack((cc, cctemp))
                    else:
                        cc = cctemp
                    nchannel = nchannel + 1
    
        if (nchannel > 0):
   
            # Compute average cross-correlation across channels
            meancc = np.mean(cc, axis=0)
            if (type_threshold == 'MAD'):
                MAD = np.median(np.abs(meancc - np.mean(meancc)))
                index = np.where(meancc >= threshold * MAD)
            elif (type_threshold == 'Threshold'):
                index = np.where(meancc >= threshold)
            else:
                raise ValueError('Type of threshold must be MAD or Threshold')
            times = np.arange(0.0, np.shape(meancc)[0] * dt, dt)

            # Get LFE times
            if np.shape(index)[1] > 0:
                (time, cc) = clean_LFEs(index, times, meancc, dt, freq0)

                # Add LFE times to dataframe
                i0 = len(df.index)
                for i in range(0, len(time)):
                    timeLFE = Tstart + time[i]
                    df.loc[i0 + i] = [int(timeLFE.year), int(timeLFE.month), \
                        int(timeLFE.day), int(timeLFE.hour), \
                        int(timeLFE.minute), timeLFE.second + \
                        timeLFE.microsecond / 1000000.0, cc[i], nchannel]

            # Draw figure
            if (draw == True):
                params = {'xtick.labelsize':16,
                          'ytick.labelsize':16}
                pylab.rcParams.update(params) 
                plt.figure(1, figsize=(20, 8))
                if np.shape(index)[1] > 0:
                    for i in range(0, len(time)):
                        plt.axvline(time[i], linewidth=2, color='grey')
                plt.plot(np.arange(0.0, np.shape(meancc)[0] * dt, \
                    dt), meancc, color='black')
                if (type_threshold == 'MAD'):
                    plt.axhline(threshold * MAD, linewidth=2, color='red', \
                        label = '{:6.2f} * MAD'.format(threshold))
                elif (type_threshold == 'Threshold'):
                    plt.axhline(threshold, linewidth=2, color='red', \
                        label = 'Threshold = {:8.4f}'.format(threshold))
                else:
                    raise ValueError( \
                        'Type of threshold must be MAD or Threshold')
                plt.xlim(0.0, (np.shape(meancc)[0] - 1) * dt)
                plt.xlabel('Time (s)', fontsize=24)
                plt.ylabel('Cross-correlation', fontsize=24)
                plt.title('Average cross-correlation across stations', \
                    fontsize=30)
                plt.legend(loc=2, fontsize=24)
                plt.savefig('LFEs/' + filename + '/' + \
                    '{:04d}{:02d}{:02d}_{:02d}{:02d}{:02d}'.format( \
                    Tstart.year, Tstart.month, Tstart.day, Tstart.hour, \
                    Tstart.minute, Tstart.second) + '.png', format='png')
                plt.close(1)

    # Add to pandas dataframe and save
    namefile = 'LFEs/' + filename + '/catalog.pkl'
    if os.path.exists(namefile):
        df_all = pickle.load(open(namefile, 'rb'))
        df_all = pd.concat([df_all, df], ignore_index=True)
    else:
        df_all = df    
    df_all = df_all.astype(dtype={'year':'int32', 'month':'int32', \
        'day':'int32', 'hour':'int32', 'minute':'int32', \
        'second':'float', 'cc':'float', 'nchannel':'int32'})
    pickle.dump(df_all, open(namefile, 'wb'))
Esempio n. 3
0
def find_LFEs(family_file, station_file, template_dir, tbegin, tend, \
    TDUR, duration, filt, freq0, dt, nattempts, waittime, type_threshold='MAD', \
    threshold=0.0075):
    """
    Find LFEs with the temporary stations from FAME
    using the templates from Plourde et al. (2015)

    Input:
        type family_file = string
        family_file = File containing the list of LFE families
        type station_file = string
        station_file = File containing the list of stations
        type template_dir = string
        template_dir = Directory where to find the LFE templates
        type tbegin = tuplet of 6 integers
        tbegin = Time when we begin looking for LFEs
        type tend = tuplet of 6 integers
        tend = Time we stop looking for LFEs
        type TDUR = float
        TDUR = Time to add before and after the time window for tapering
        type duration = float
        duration = Duration of the LFE templates
        type filt = tuple of floats
        filt = Lower and upper frequencies of the filter
        type freq0 = float
        freq0 = Maximum frequency rate of LFE occurrence
        type dt = float
        dt = Time step for the LFE templates
        type nattempts = integer
        nattempts = Number of times we try to download data
        type waittime = positive float
        waittime = Type to wait between two attempts at downloading
        type type_threshold = string
        type_threshold = 'MAD' or 'Threshold'
        type threshold = float
        threshold = Cross correlation value must be higher than that
    Output:
        None
    """

    # Get the network, channels, and location of the stations
    staloc = pd.read_csv(station_file, \
        sep=r'\s{1,}', header=None, engine='python')
    staloc.columns = ['station', 'network', 'channels', 'location', \
        'server', 'latitude', 'longitude', 'time_on', 'time_off']

    # Begin and end time of analysis
    t1 = UTCDateTime(year=tbegin[0], month=tbegin[1], \
        day=tbegin[2], hour=tbegin[3], minute=tbegin[4], \
        second=tbegin[5])
    t2 = UTCDateTime(year=tend[0], month=tend[1], \
        day=tend[2], hour=tend[3], minute=tend[4], \
        second=tend[5])

    # Number of hours of data to analyze
    nhour = int(ceil((t2 - t1) / 3600.0))

    # Begin and end time of downloading
    Tstart = t1 - TDUR
    Tend = t2 + duration + TDUR

    # Temporary directory to store the data
    namedir = 'tmp'
    if not os.path.exists(namedir):
        os.makedirs(namedir)

    # Download the data from the stations
    for ir in range(0, len(staloc)):
        station = staloc['station'][ir]
        network = staloc['network'][ir]
        channels = staloc['channels'][ir]
        location = staloc['location'][ir]
        server = staloc['server'][ir]
        time_on = staloc['time_on'][ir]
        time_off = staloc['time_off'][ir]

        # File to write error messages
        namedir = 'error'
        if not os.path.exists(namedir):
            os.makedirs(namedir)
        errorfile = 'error/' + station + '.txt'

        # Check whether there are data for this period of time
        year_on = int(time_on[0:4])
        month_on = int(time_on[5:7])
        day_on = int(time_on[8:10])
        year_off = int(time_off[0:4])
        month_off = int(time_off[5:7])
        day_off = int(time_off[8:10])
        if ((Tstart > UTCDateTime(year=year_on, month=month_on, day=day_on)) \
           and (Tend < UTCDateTime(year=year_off, month=month_off, day=day_off))):

            # First case: we can get the data from IRIS
            if (server == 'IRIS'):
                (D, orientation) = get_from_IRIS(station, network, channels, \
                    location, Tstart, Tend, filt, dt, nattempts, waittime, \
                    errorfile, DATADIR)
            # Second case: we get the data from NCEDC
            elif (server == 'NCEDC'):
                (D, orientation) = get_from_NCEDC(station, network, channels, \
                    location, Tstart, Tend, filt, dt, nattempts, waittime, \
                    errorfile, DATADIR)
            else:
                raise ValueError(
                    'You can only download data from IRIS and NCEDC')

            # Store the data into temporary files
            if (type(D) == obspy.core.stream.Stream):
                D.write('tmp/' + station + '.mseed', format='MSEED')
                namefile = 'tmp/' + station + '.pkl'
                pickle.dump(orientation, open(namefile, 'wb'))

    # Loop on families
    families = pd.read_csv(family_file, \
        sep=r'\s{1,}', header=None, engine='python')
    families.columns = ['family', 'stations']
    for i in range(0, len(families)):

        # Create directory to store the LFEs times
        namedir = 'LFEs/' + families['family'].iloc[i]
        if not os.path.exists(namedir):
            os.makedirs(namedir)

        # File to write error messages
        namedir = 'error'
        if not os.path.exists(namedir):
            os.makedirs(namedir)
            errorfile = 'error/' + families['family'].iloc[i] + '.txt'

        # Create dataframe to store LFE times
        df = pd.DataFrame(columns=['year', 'month', 'day', 'hour', \
        'minute', 'second', 'cc', 'nchannel'])

        # Read the templates
        stations = families['stations'].iloc[i].split(',')
        templates = Stream()
        for station in stations:
            templatefile = template_dir + '/' + \
                families['family'].iloc[i] + '/' + station + '.pkl'
            with open(templatefile, 'rb') as f:
                data = pickle.load(f)
            if (len(data) == 3):
                EW = data[0]
                NS = data[1]
                UD = data[2]
                EW.stats.station = station
                NS.stats.station = station
                EW.stats.channel = 'E'
                NS.stats.channel = 'N'
                templates.append(EW)
                templates.append(NS)
            else:
                UD = data[0]
            UD.stats.station = station
            UD.stats.channel = 'Z'
            templates.append(UD)

        # Loop on hours of data
        for hour in range(0, nhour):
            nchannel = 0
            Tstart = t1 + hour * 3600.0
            Tend = t1 + (hour + 1) * 3600.0 + duration
            delta = Tend - Tstart
            ndata = int(delta / dt) + 1

            # Get the data
            data = []
            for station in stations:
                try:
                    D = read('tmp/' + station + '.mseed')
                    D = D.slice(Tstart, Tend)
                    namefile = 'tmp/' + station + '.pkl'
                    orientation = pickle.load(open(namefile, 'rb'))

                    # Get station metadata for reading response file
                    for ir in range(0, len(staloc)):
                        if (station == staloc['station'][ir]):
                            network = staloc['network'][ir]
                            channels = staloc['channels'][ir]
                            location = staloc['location'][ir]
                            server = staloc['server'][ir]

                    # Orientation of template
                    # Date chosen: April 1st 2008
                    mychannels = channels.split(',')
                    mylocation = location
                    if (mylocation == '--'):
                        mylocation = ''
                    response = os.path.join(
                        DATADIR,
                        'response/') + network + '_' + station + '.xml'
                    inventory = read_inventory(response, format='STATIONXML')
                    reference = []
                    for channel in mychannels:
                        angle = inventory.get_orientation(network + '.' + \
                            station + '.' + mylocation + '.' + channel, \
                            UTCDateTime(2020, 1, 1, 0, 0, 0))
                        reference.append(angle)

                    # Append data to stream
                    if (type(D) == obspy.core.stream.Stream):
                        stationdata = fill_data(D, orientation, station,
                                                channels, reference)
                        if (len(stationdata) > 0):
                            for stream in stationdata:
                                data.append(stream)
                except:
                    message = 'No data available for station {} '.format( \
                        station) + 'at time {}/{}/{} - {}:{}:{}\n'.format( \
                        Tstart.year, Tstart.month, Tstart.day, Tstart.hour, \
                        Tstart.minute, Tstart.second)

            # Loop on channels
            for channel in range(0, len(data)):
                subdata = data[channel]
                # Check whether we have a complete one-hour-long recording
                if (len(subdata) == 1):
                    if (len(subdata[0].data) == ndata):
                        # Get the template
                        station = subdata[0].stats.station
                        component = subdata[0].stats.channel
                        template = templates.select(station=station, \
                            component=component)[0]
                        # Cross correlation
                        cctemp = correlate.optimized(template, subdata[0])
                        if (nchannel > 0):
                            cc = np.vstack((cc, cctemp))
                        else:
                            cc = cctemp
                        nchannel = nchannel + 1

            if (nchannel > 0):
                # Compute average cross-correlation across channels
                meancc = np.mean(cc, axis=0)
                if (type_threshold == 'MAD'):
                    MAD = np.median(np.abs(meancc - np.mean(meancc)))
                    index = np.where(meancc >= threshold * MAD)
                elif (type_threshold == 'Threshold'):
                    index = np.where(meancc >= threshold)
                else:
                    raise ValueError(
                        'Type of threshold must be MAD or Threshold')
                times = np.arange(0.0, np.shape(meancc)[0] * dt, dt)

                # Get LFE times
                if np.shape(index)[1] > 0:
                    (time, cc) = clean_LFEs(index, times, meancc, dt, freq0)

                    # Add LFE times to dataframe
                    i0 = len(df.index)
                    for j in range(0, len(time)):
                        timeLFE = Tstart + time[j]
                        df.loc[i0 + j] = [int(timeLFE.year), int(timeLFE.month), \
                            int(timeLFE.day), int(timeLFE.hour), \
                            int(timeLFE.minute), timeLFE.second + \
                            timeLFE.microsecond / 1000000.0, cc[j], nchannel]

        # Add to pandas dataframe and save
        df_all = df
        df_all = df_all.astype(dtype={'year':'int32', 'month':'int32', \
            'day':'int32', 'hour':'int32', 'minute':'int32', \
            'second':'float', 'cc':'float', 'nchannel':'int32'})
        df_all.to_csv('LFEs/' + families['family'].iloc[i] + '/catalog_' + \
            '{:04d}{:02d}{:02d}_{:02d}{:02d}{:02d}'.format(tbegin[0], \
            tbegin[1], tbegin[2], tbegin[3], tbegin[4], tbegin[5]) + '.csv')
Esempio n. 4
0
def analyze_data(families, staloc, tbegin, tend, \
    freq0, type_threshold, threshold, ncpu, icpu):
    """
    """
    nfamilies = int(ceil(len(families) / ncpu))
    ibegin = icpu * nfamilies
    iend = min((icpu + 1) * nfamilies, len(families))

    # Loop on families
    for i in range(ibegin, iend):

        # Create directory to store the LFEs times
        namedir = 'LFEs/' + families['family'].iloc[i]
        if not os.path.exists(namedir):
            os.makedirs(namedir)

        # File to write number of stations
        namedir = 'nstations'
        if not os.path.exists(namedir):
            os.makedirs(namedir)
        stationfile = 'nstations/' + families['family'].iloc[i] + '.txt'

        # Create dataframe to store LFE times
        df = pd.DataFrame(columns=['year', 'month', 'day', 'hour', \
            'minute', 'second', 'cc', 'nchannel'])

        # Read the templates
        stations = families['stations'].iloc[i].split(',')
        templates = Stream()
        orientations = []
        names = []
        for station in stations:
            subset = staloc.loc[staloc['station'] == station]
            channels = subset['channels'].iloc[0]
            mychannels = channels.split(',')
            for channel in mychannels:
                data = pickle.load(open(template_dir + '/' + \
                    families['family'].iloc[i] + '/' + station + '_' + \
                    channel + '.pkl', 'rb'))
                template = data[0]
                angle = data[1]
                templates.append(template)
                orientations.append(angle)
                names.append(station + '_' + channel)

        # Check the time step of the stations
        subset = staloc.loc[staloc['station'].isin(stations)]
        if len(subset['dt'].value_counts()) == 1:
            dt = subset['dt'].iloc[0]
        else:
            raise ValueError('All stations must have the same time step')

        # Number of hours of data to analyze
        t1 = UTCDateTime(year=tbegin[0], month=tbegin[1], \
            day=tbegin[2], hour=tbegin[3], minute=tbegin[4], \
            second=tbegin[5])
        t2 = UTCDateTime(year=tend[0], month=tend[1], \
            day=tend[2], hour=tend[3], minute=tend[4], \
            second=tend[5])
        nhour = int(ceil((t2 - t1) / 3600.0))
        duration = families['duration'].iloc[i]

        # To rotate components
        swap = {'E': 'N', 'N': 'E', '1': '2', '2': '1'}

        # Loop on hours of data
        for hour in range(0, nhour):
            Tstart = t1 + hour * 3600.0
            Tend = t1 + (hour + 1) * 3600.0 + duration
            delta = Tend - Tstart
            ndata = int(delta / dt) + 1

            # Get the data
            data = []
            for station in stations:
                subset = staloc.loc[staloc['station'] == station]
                channels = subset['channels'].iloc[0]
                mychannels = channels.split(',')
                for num, channel in enumerate(mychannels):
                    try:
                        D = read('tmp/' + station + '_' + channel + '.mseed')
                        D = D.slice(Tstart, Tend)

                        if (type(D) == obspy.core.stream.Stream):
                            namefile = 'tmp/' + station + '_' + channel + \
                                '.pkl'
                            orientation = pickle.load(open(namefile, 'rb')) \
                                [num]
                            index = names.index(station + '_' + channel)
                            reference = orientations[index]

                            # Rotate components
                            if (len(mychannels) > 1) and (num < 2):
                                if orientation != reference:
                                    channel_new = channel[0:2] + \
                                        swap[channel[2]]
                                    D_new = read('tmp/' + station + '_' + \
                                        channel_new + '.mseed')
                                    D_new = D_new.slice(Tstart, Tend)
                                    namefile = 'tmp/' + station + '_' + \
                                        channel_new + '.pkl'
                                    if num == 0:
                                        orientation_new = pickle.load(open( \
                                            namefile, 'rb'))[1]
                                    else:
                                        orientation_new = pickle.load(open( \
                                            namefile, 'rb'))[0]
                                    index = names.index(station + '_' + \
                                        channel_new)
                                    reference_new = orientations[index]
                                    if channel[2] in ['E', '1']:
                                        D = rotate_data(D, D_new, \
                                            orientation, orientation_new, \
                                            reference, reference_new, 'E')
                                    else:
                                        D = rotate_data(D_new, D, \
                                            orientation_new, orientation, \
                                            reference_new, reference, 'N')

                            # Append stream to data
                            data.append(D)
                    except:
                        message = 'No data available for station {}'.format( \
                            station) + ' and channel {}'.format(channel) + \
                            ' at time {}/{}/{} - {}:{}:{}\n'.format( \
                            Tstart.year, Tstart.month, Tstart.day, \
                            Tstart.hour, Tstart.minute, Tstart.second)

            # Loop on channels
            nchannel = 0
            for j in range(0, len(data)):
                subdata = data[j]
                # Check whether we have a complete one-hour-long recording
                if (len(subdata) == 1):
                    if (len(subdata[0].data) == ndata):
                        # Get the template
                        station = subdata[0].stats.station
                        channel = subdata[0].stats.channel
                        template = templates.select(station=station, \
                            channel=channel)[0]
                        # Cross correlation
                        cctemp = correlate.optimized(template, subdata[0])
                        if (nchannel > 0):
                            cc = np.vstack((cc, cctemp))
                        else:
                            cc = cctemp
                        nchannel = nchannel + 1

            # Write number of channels
            with open(stationfile, 'a') as file:
                file.write('{} {} {} {} {}\n'.format(Tstart.year, \
                    Tstart.month, Tstart.day, Tstart.hour, nchannel))

            if (nchannel > 0):
                # Compute average cross-correlation across channels
                if len(np.shape(cc)) == 1:
                    meancc = cc
                else:
                    meancc = np.mean(cc, axis=0)
                if (type_threshold == 'MAD'):
                    MAD = np.median(np.abs(meancc - np.mean(meancc)))
                    index = np.where(meancc >= threshold * MAD)
                elif (type_threshold == 'Threshold'):
                    index = np.where(meancc >= threshold)
                else:
                    raise ValueError( \
                        'Type of threshold must be MAD or Threshold')
                times = np.arange(0.0, np.shape(meancc)[0] * dt, dt)

                # Get LFE times
                if np.shape(index)[1] > 0:
                    (time, cc) = clean_LFEs(index, times, meancc, dt, freq0)

                    # Add LFE times to dataframe
                    i0 = len(df.index)
                    for j in range(0, len(time)):
                        timeLFE = Tstart + time[j]
                        df.loc[i0 + j] = [int(timeLFE.year), \
                            int(timeLFE.month), int(timeLFE.day), \
                            int(timeLFE.hour), int(timeLFE.minute), \
                            timeLFE.second + timeLFE.microsecond / 1000000.0, \
                            cc[j], nchannel]

        # Add to pandas dataframe and save
        namefile = 'LFEs/' + families['family'].iloc[i] + '/catalog.pkl'
        if os.path.exists(namefile):
            df_all = pickle.load(open(namefile, 'rb'))
            df_all = pd.concat([df_all, df], ignore_index=True)
        else:
            df_all = df
        df_all = df_all.astype(dtype={'year':'int32', 'month':'int32', \
            'day':'int32', 'hour':'int32', 'minute':'int32', \
            'second':'float', 'cc':'float', 'nchannel':'int32'})
        pickle.dump(df_all, open(namefile, 'wb'))