Ejemplo n.º 1
0
def rot_12_NE(st: Stream, meta):
    '''
    Performs a  12 -> NE rotation.

    :param st: A stream containing the traces to rotate.
    :param dict meta: Dictionary contaning the metadata for all streams.

    :return: The rotated streams.
    '''

    st2 = st.select(channel="??1")
    for tr in st2:
        id1 = tr.id
        id2 = id1[:-1] + '2'
        tr1 = tr
        try:
            tr2 = st.select(id=id2)[0]
        except IndexError:
            st.remove(tr)
            logger.warning("%s Channel 2 not found. Impossible to rotate",
                           tr.id)
            continue

        timeA = max(tr1.stats.starttime, tr2.stats.starttime)
        timeB = min(tr1.stats.endtime, tr2.stats.endtime)
        tr1.trim(timeA, timeB)
        tr2.trim(timeA, timeB)
        azi = meta[id1]['azimuth']
        tr1.data, tr2.data = rot2D(tr1.data, tr2.data, -azi)

    return st
Ejemplo n.º 2
0
def test_xyzalgorithm_uneccesary_channel_empty():
    """XYZAlgorithm_test.test_xyzalgorithm_uneccesary_channel_gaps()

    confirms the process will run when an uneccesary channel is input
    but contains gaps or is completely empty. ie. gaps in 'Z' channel
    or and empty 'F' channel. This also makes sure the 'Z' and 'F' channels
    are passed without any modification.
    """
    algorithm = XYZAlgorithm("obs", "mag")
    timeseries = Stream()
    timeseries += __create_trace("H", [1, 1])
    timeseries += __create_trace("E", [1, 1])
    timeseries += __create_trace("Z", [1, np.NaN])
    timeseries += __create_trace("F", [np.NaN, np.NaN])
    outstream = algorithm.process(timeseries)
    assert_equal(
        outstream.select(channel="Z")[0].data.all(),
        timeseries.select(channel="Z")[0].data.all(),
    )
    assert_equal(
        outstream.select(channel="F")[0].data.all(),
        timeseries.select(channel="F")[0].data.all(),
    )
    ds = outstream.select(channel="D")
    # there is 1 trace
    assert_equal(len(ds), 1)
    d = ds[0]
    # d has 2 values (same as input)
    assert_equal(len(d.data), 2)
    # d has no NaN values
    assert_equal(np.isnan(d).any(), False)
def test_xyzalgorithm_uneccesary_channel_empty():
    """XYZAlgorithm_test.test_xyzalgorithm_uneccesary_channel_gaps()

    confirms the process will run when an uneccesary channel is input
    but contains gaps or is completely empty. ie. gaps in 'Z' channel
    or and empty 'F' channel. This also makes sure the 'Z' and 'F' channels
    are passed without any modification.
    """
    algorithm = XYZAlgorithm('obs', 'mag')
    timeseries = Stream()
    timeseries += __create_trace('H', [1, 1])
    timeseries += __create_trace('E', [1, 1])
    timeseries += __create_trace('Z', [1, np.NaN])
    timeseries += __create_trace('F', [np.NaN, np.NaN])
    outstream = algorithm.process(timeseries)
    assert_equals(outstream.select(channel='Z')[0].data.all(),
        timeseries.select(channel='Z')[0].data.all())
    assert_equals(outstream.select(channel='F')[0].data.all(),
        timeseries.select(channel='F')[0].data.all())
    ds = outstream.select(channel='D')
    # there is 1 trace
    assert_equals(len(ds), 1)
    d = ds[0]
    # d has 2 values (same as input)
    assert_equals(len(d.data), 2)
    # d has no NaN values
    assert_equals(np.isnan(d).any(), False)
Ejemplo n.º 4
0
def save_raw(saved: dict, st: Stream, rawloc: str, inv: Inventory,
             saveasdf: bool):
    """
    Save the raw waveform data in the desired format.
    The point of this function is mainly that the waveforms will be saved
    with the correct associations and at the correct locations.

    :param saved: Dictionary holding information about the original streams
        to identify them afterwards.
    :type saved: dict
    :param st: obspy stream holding all data (from various stations)
    :type st: Stream
    :param rawloc: Parental directory (with phase) to save the files in.
    :type rawloc: str
    :param inv: The inventory holding all the station information
    :type inv: Inventory
    :param saveasdf: If True the data will be saved in asdf format.
    :type saveasdf: bool
    """
    # Just use the same name
    for evt, startt, endt, net, stat in zip(saved['event'], saved['startt'],
                                            saved['endt'], saved['net'],
                                            saved['stat']):
        # earlier we downloaded all locations, but we don't really want
        # to have several, so let's just keep one
        try:
            sst = st.select(network=net, station=stat)
            # This might actually be empty if so, let's just skip
            if sst.count() == 0:
                logging.debug(f'No trace of {net}.{stat} in Stream.')
                continue
            slst = sst.slice(startt, endt)
            # Only write the prevelant location
            locs = [tr.stats.location for tr in sst]
            filtloc = max(set(locs), key=locs.count)
            sslst = slst.select(location=filtloc)
            if saveasdf:
                sinv = inv.select(net, stat, starttime=startt, endtime=endt)
                write_st(sslst, evt, rawloc, sinv)
            else:
                save_raw_mseed(evt, sslst, rawloc, net, stat)
        except Exception as e:
            logging.error(e)
Ejemplo n.º 5
0
def analyze_data(families, staloc, nhour, t1, duration, dt, ncpu, icpu):
    """
    """
    nfamilies = int(ceil(len(families) / ncpu))
    ibegin = icpu * nfamilies
    iend = min((icpu + 1) * nfamilies, len(families))
    
    for i in range(ibegin, iend):

        # Create directory to store the LFEs times
        namedir = 'LFEs/' + families['family'].iloc[i]
        if not os.path.exists(namedir):
             os.makedirs(namedir)

        # File to write error messages
        namedir = 'error'
        if not os.path.exists(namedir):
            os.makedirs(namedir)
            errorfile = 'error/' + families['family'].iloc[i] + '.txt'

        # Create dataframe to store LFE times
        df = pd.DataFrame(columns=['year', 'month', 'day', 'hour', \
        'minute', 'second', 'cc', 'nchannel'])

        # Read the templates
        stations = families['stations'].iloc[i].split(',')
        templates = Stream()
        for station in stations:
            data = pickle.load(open(template_dir + '/' + families['family'].iloc[i] + \
            '/' + station + '.pkl', 'rb'))
            if (len(data) == 3):
                EW = data[0]
                NS = data[1]
                UD = data[2]
                EW.stats.station = station
                NS.stats.station = station
                EW.stats.channel = 'E'
                NS.stats.channel = 'N'
                templates.append(EW)
                templates.append(NS)
            else:
                UD = data[0]
            UD.stats.station = station
            UD.stats.channel = 'Z'
            templates.append(UD)                       

        # Loop on hours of data
        for hour in range(0, nhour):
            nchannel = 0
            Tstart = t1 + hour * 3600.0
            Tend = t1 + (hour + 1) * 3600.0 + duration
            delta = Tend - Tstart
            ndata = int(delta / dt) + 1

            # Get the data
            data = []
            for station in stations:
                try:
                    D = read('tmp/' + station + '.mseed')
                    D = D.slice(Tstart, Tend)
                    namefile = 'tmp/' + station + '.pkl'
                    orientation = pickle.load(open(namefile, 'rb'))

                    # Get station metadata for reading response file
                    for ir in range(0, len(staloc)):
                        if (station == staloc['station'][ir]):
                            network = staloc['network'][ir]
                            channels = staloc['channels'][ir]
                            location = staloc['location'][ir]
                            server = staloc['server'][ir]

                    # Orientation of template
                    # Date chosen: April 1st 2008
                    mychannels = channels.split(',')
                    mylocation = location
                    if (mylocation == '--'):
                        mylocation = ''
                    response = '../data/response/' + network + '_' + station + '.xml'
                    inventory = read_inventory(response, format='STATIONXML')
                    reference = []
                    for channel in mychannels:
                        angle = inventory.get_orientation(network + '.' + \
                            station + '.' + mylocation + '.' + channel, \
                            UTCDateTime(2008, 4, 1, 0, 0, 0))
                        reference.append(angle)

                    # Append data to stream
                    if (type(D) == obspy.core.stream.Stream):
                        stationdata = fill_data(D, orientation, station, channels, reference)
                        if (len(stationdata) > 0):
                            for stream in stationdata:
                                data.append(stream)
                except:
                    message = 'No data available for station {} '.format( \
                        station) + 'at time {}/{}/{} - {}:{}:{}\n'.format( \
                        Tstart.year, Tstart.month, Tstart.day, Tstart.hour, \
                        Tstart.minute, Tstart.second)

            # Loop on channels
            for channel in range(0, len(data)):
                subdata = data[channel]
                # Check whether we have a complete one-hour-long recording
                if (len(subdata) == 1):
                    if (len(subdata[0].data) == ndata):
                        # Get the template
                        station = subdata[0].stats.station
                        component = subdata[0].stats.channel
                        template = templates.select(station=station, \
                            component=component)[0]
                        # Cross correlation
                        cctemp = correlate.optimized(template, subdata[0])
                        if (nchannel > 0):
                            cc = np.vstack((cc, cctemp))
                        else:
                            cc = cctemp
                        nchannel = nchannel + 1
    
            if (nchannel > 0):   
                # Compute average cross-correlation across channels
                meancc = np.mean(cc, axis=0)
                if (type_threshold == 'MAD'):
                    MAD = np.median(np.abs(meancc - np.mean(meancc)))
                    index = np.where(meancc >= threshold * MAD)
                elif (type_threshold == 'Threshold'):
                    index = np.where(meancc >= threshold)
                else:
                    raise ValueError('Type of threshold must be MAD or Threshold')
                times = np.arange(0.0, np.shape(meancc)[0] * dt, dt)

                # Get LFE times
                if np.shape(index)[1] > 0:
                    (time, cc) = clean_LFEs(index, times, meancc, dt, freq0)

                    # Add LFE times to dataframe
                    i0 = len(df.index)
                    for j in range(0, len(time)):
                        timeLFE = Tstart + time[j]
                        df.loc[i0 + j] = [int(timeLFE.year), int(timeLFE.month), \
                            int(timeLFE.day), int(timeLFE.hour), \
                            int(timeLFE.minute), timeLFE.second + \
                            timeLFE.microsecond / 1000000.0, cc[j], nchannel]

        # Add to pandas dataframe and save
        namefile = 'LFEs/' + families['family'].iloc[i] + '/catalog.pkl'
        if os.path.exists(namefile):
            df_all = pickle.load(open(namefile, 'rb'))
            df_all = pd.concat([df_all, df], ignore_index=True)
        else:
            df_all = df    
        df_all = df_all.astype(dtype={'year':'int32', 'month':'int32', \
            'day':'int32', 'hour':'int32', 'minute':'int32', \
            'second':'float', 'cc':'float', 'nchannel':'int32'})
        pickle.dump(df_all, open(namefile, 'wb'))
Ejemplo n.º 6
0
def find_LFEs(family_file, station_file, template_dir, tbegin, tend, \
    TDUR, duration, filt, freq0, dt, nattempts, waittime, type_threshold='MAD', \
    threshold=0.0075):
    """
    Find LFEs with the temporary stations from FAME
    using the templates from Plourde et al. (2015)

    Input:
        type family_file = string
        family_file = File containing the list of LFE families
        type station_file = string
        station_file = File containing the list of stations
        type template_dir = string
        template_dir = Directory where to find the LFE templates
        type tbegin = tuplet of 6 integers
        tbegin = Time when we begin looking for LFEs
        type tend = tuplet of 6 integers
        tend = Time we stop looking for LFEs
        type TDUR = float
        TDUR = Time to add before and after the time window for tapering
        type duration = float
        duration = Duration of the LFE templates
        type filt = tuple of floats
        filt = Lower and upper frequencies of the filter
        type freq0 = float
        freq0 = Maximum frequency rate of LFE occurrence
        type dt = float
        dt = Time step for the LFE templates
        type nattempts = integer
        nattempts = Number of times we try to download data
        type waittime = positive float
        waittime = Type to wait between two attempts at downloading
        type type_threshold = string
        type_threshold = 'MAD' or 'Threshold'
        type threshold = float
        threshold = Cross correlation value must be higher than that
    Output:
        None
    """

    # Get the network, channels, and location of the stations
    staloc = pd.read_csv(station_file, \
        sep=r'\s{1,}', header=None, engine='python')
    staloc.columns = ['station', 'network', 'channels', 'location', \
        'server', 'latitude', 'longitude', 'time_on', 'time_off']

    # Begin and end time of analysis
    t1 = UTCDateTime(year=tbegin[0], month=tbegin[1], \
        day=tbegin[2], hour=tbegin[3], minute=tbegin[4], \
        second=tbegin[5])
    t2 = UTCDateTime(year=tend[0], month=tend[1], \
        day=tend[2], hour=tend[3], minute=tend[4], \
        second=tend[5])

    # Number of hours of data to analyze
    nhour = int(ceil((t2 - t1) / 3600.0))

    # Begin and end time of downloading
    Tstart = t1 - TDUR
    Tend = t2 + duration + TDUR

    # Temporary directory to store the data
    namedir = 'tmp'
    if not os.path.exists(namedir):
        os.makedirs(namedir)

    # Download the data from the stations
    for ir in range(0, len(staloc)):
        station = staloc['station'][ir]
        network = staloc['network'][ir]
        channels = staloc['channels'][ir]
        location = staloc['location'][ir]
        server = staloc['server'][ir]
        time_on = staloc['time_on'][ir]
        time_off = staloc['time_off'][ir]

        # File to write error messages
        namedir = 'error'
        if not os.path.exists(namedir):
            os.makedirs(namedir)
        errorfile = 'error/' + station + '.txt'

        # Check whether there are data for this period of time
        year_on = int(time_on[0:4])
        month_on = int(time_on[5:7])
        day_on = int(time_on[8:10])
        year_off = int(time_off[0:4])
        month_off = int(time_off[5:7])
        day_off = int(time_off[8:10])
        if ((Tstart > UTCDateTime(year=year_on, month=month_on, day=day_on)) \
           and (Tend < UTCDateTime(year=year_off, month=month_off, day=day_off))):

            # First case: we can get the data from IRIS
            if (server == 'IRIS'):
                (D, orientation) = get_from_IRIS(station, network, channels, \
                    location, Tstart, Tend, filt, dt, nattempts, waittime, \
                    errorfile, DATADIR)
            # Second case: we get the data from NCEDC
            elif (server == 'NCEDC'):
                (D, orientation) = get_from_NCEDC(station, network, channels, \
                    location, Tstart, Tend, filt, dt, nattempts, waittime, \
                    errorfile, DATADIR)
            else:
                raise ValueError(
                    'You can only download data from IRIS and NCEDC')

            # Store the data into temporary files
            if (type(D) == obspy.core.stream.Stream):
                D.write('tmp/' + station + '.mseed', format='MSEED')
                namefile = 'tmp/' + station + '.pkl'
                pickle.dump(orientation, open(namefile, 'wb'))

    # Loop on families
    families = pd.read_csv(family_file, \
        sep=r'\s{1,}', header=None, engine='python')
    families.columns = ['family', 'stations']
    for i in range(0, len(families)):

        # Create directory to store the LFEs times
        namedir = 'LFEs/' + families['family'].iloc[i]
        if not os.path.exists(namedir):
            os.makedirs(namedir)

        # File to write error messages
        namedir = 'error'
        if not os.path.exists(namedir):
            os.makedirs(namedir)
            errorfile = 'error/' + families['family'].iloc[i] + '.txt'

        # Create dataframe to store LFE times
        df = pd.DataFrame(columns=['year', 'month', 'day', 'hour', \
        'minute', 'second', 'cc', 'nchannel'])

        # Read the templates
        stations = families['stations'].iloc[i].split(',')
        templates = Stream()
        for station in stations:
            templatefile = template_dir + '/' + \
                families['family'].iloc[i] + '/' + station + '.pkl'
            with open(templatefile, 'rb') as f:
                data = pickle.load(f)
            if (len(data) == 3):
                EW = data[0]
                NS = data[1]
                UD = data[2]
                EW.stats.station = station
                NS.stats.station = station
                EW.stats.channel = 'E'
                NS.stats.channel = 'N'
                templates.append(EW)
                templates.append(NS)
            else:
                UD = data[0]
            UD.stats.station = station
            UD.stats.channel = 'Z'
            templates.append(UD)

        # Loop on hours of data
        for hour in range(0, nhour):
            nchannel = 0
            Tstart = t1 + hour * 3600.0
            Tend = t1 + (hour + 1) * 3600.0 + duration
            delta = Tend - Tstart
            ndata = int(delta / dt) + 1

            # Get the data
            data = []
            for station in stations:
                try:
                    D = read('tmp/' + station + '.mseed')
                    D = D.slice(Tstart, Tend)
                    namefile = 'tmp/' + station + '.pkl'
                    orientation = pickle.load(open(namefile, 'rb'))

                    # Get station metadata for reading response file
                    for ir in range(0, len(staloc)):
                        if (station == staloc['station'][ir]):
                            network = staloc['network'][ir]
                            channels = staloc['channels'][ir]
                            location = staloc['location'][ir]
                            server = staloc['server'][ir]

                    # Orientation of template
                    # Date chosen: April 1st 2008
                    mychannels = channels.split(',')
                    mylocation = location
                    if (mylocation == '--'):
                        mylocation = ''
                    response = os.path.join(
                        DATADIR,
                        'response/') + network + '_' + station + '.xml'
                    inventory = read_inventory(response, format='STATIONXML')
                    reference = []
                    for channel in mychannels:
                        angle = inventory.get_orientation(network + '.' + \
                            station + '.' + mylocation + '.' + channel, \
                            UTCDateTime(2020, 1, 1, 0, 0, 0))
                        reference.append(angle)

                    # Append data to stream
                    if (type(D) == obspy.core.stream.Stream):
                        stationdata = fill_data(D, orientation, station,
                                                channels, reference)
                        if (len(stationdata) > 0):
                            for stream in stationdata:
                                data.append(stream)
                except:
                    message = 'No data available for station {} '.format( \
                        station) + 'at time {}/{}/{} - {}:{}:{}\n'.format( \
                        Tstart.year, Tstart.month, Tstart.day, Tstart.hour, \
                        Tstart.minute, Tstart.second)

            # Loop on channels
            for channel in range(0, len(data)):
                subdata = data[channel]
                # Check whether we have a complete one-hour-long recording
                if (len(subdata) == 1):
                    if (len(subdata[0].data) == ndata):
                        # Get the template
                        station = subdata[0].stats.station
                        component = subdata[0].stats.channel
                        template = templates.select(station=station, \
                            component=component)[0]
                        # Cross correlation
                        cctemp = correlate.optimized(template, subdata[0])
                        if (nchannel > 0):
                            cc = np.vstack((cc, cctemp))
                        else:
                            cc = cctemp
                        nchannel = nchannel + 1

            if (nchannel > 0):
                # Compute average cross-correlation across channels
                meancc = np.mean(cc, axis=0)
                if (type_threshold == 'MAD'):
                    MAD = np.median(np.abs(meancc - np.mean(meancc)))
                    index = np.where(meancc >= threshold * MAD)
                elif (type_threshold == 'Threshold'):
                    index = np.where(meancc >= threshold)
                else:
                    raise ValueError(
                        'Type of threshold must be MAD or Threshold')
                times = np.arange(0.0, np.shape(meancc)[0] * dt, dt)

                # Get LFE times
                if np.shape(index)[1] > 0:
                    (time, cc) = clean_LFEs(index, times, meancc, dt, freq0)

                    # Add LFE times to dataframe
                    i0 = len(df.index)
                    for j in range(0, len(time)):
                        timeLFE = Tstart + time[j]
                        df.loc[i0 + j] = [int(timeLFE.year), int(timeLFE.month), \
                            int(timeLFE.day), int(timeLFE.hour), \
                            int(timeLFE.minute), timeLFE.second + \
                            timeLFE.microsecond / 1000000.0, cc[j], nchannel]

        # Add to pandas dataframe and save
        df_all = df
        df_all = df_all.astype(dtype={'year':'int32', 'month':'int32', \
            'day':'int32', 'hour':'int32', 'minute':'int32', \
            'second':'float', 'cc':'float', 'nchannel':'int32'})
        df_all.to_csv('LFEs/' + families['family'].iloc[i] + '/catalog_' + \
            '{:04d}{:02d}{:02d}_{:02d}{:02d}{:02d}'.format(tbegin[0], \
            tbegin[1], tbegin[2], tbegin[3], tbegin[4], tbegin[5]) + '.csv')
Ejemplo n.º 7
0
class GetIIData(object):
	# initialize input vars
	def __init__(self, year, startday, network, **kwargs):
		# initialize year/start/net
		# if statement to check for main args set QUERY=True	
		# else sys.exit(1)
		if (year != "") and (startday != "") and (network != ""):	
			self.year = year
			self.startday = startday
			self.network = network
			QUERY = True
		else:
			QUERY = False

		# loop through **kwargs and initialize optargs
		self.endday = ""	# init endday string
		self.station = "" 	# init station string
		self.location = ""	# init location string
		self.channel = ""	# init channel string
		self.debug = False	# init debug
		self.archive = False	# init archive
		endday = self.endday
		for key,val in kwargs.iteritems(): 
			if key == "endday": self.endday = val
			elif key == "station": self.station = val
			elif key == "location": self.location = val
			elif key == "channel": self.channel = val
			elif key == "debug": self.debug = self.toBool(val)
			elif key == "archive": self.archive = self.toBool(val) 

		# print arguments if 'debug' mode
		if self.debug:
			print "Year: " + self.year
			print "Start Day: " + self.startday
			print "End Day: " + self.endday
			print "Network: " + self.network
			print "Station: " + self.station
			print "Location: " + self.location
			print "Channel: " + self.channel

		# handle wildcards
		if self.location == "?":
			self.location = "*"
		if self.channel == "?":
			self.channel = "*"
		if self.station == "?":
			self.station = "*"

		# set start/end to UTCDateTime object
		#--------------------------------------------------------------------
		self.startTime = UTCDateTime(year + startday +"T00:00:00.000")
		# If no end day in parser default to 1 day
		if self.endday == "?":
			self.endday = str(int(self.startday) + 1).zfill(3)
			self.endTime = self.startTime + 24*60*60
		else:
			self.endTime = UTCDateTime(year + self.endday +"T00:00:00.000")
		print "Here is our start time: " + self.startTime.formatIRISWebService()
		print "Here is our end time:   " + self.endTime.formatIRISWebService()
		self.days = int(self.endday)- int(self.startday)
		# there are 24, 1 hour increments in a day
		self.hours = (int(self.endday)- int(self.startday)) * 24 
		# Will only run if main args are given
		# check QUERY flag if True continue
		if QUERY:
			self.queryData()
		else:
			print '\nNo main args given.'
			print 'Exiting\n'
			sys.exit(1)
	
	def queryData(self):
		# code from IRIS client 
		# Here we pull the data
		client = Client("IRIS")
		DupStations = []
		DupLocations = []
		DupChannels = []
		self.st = Stream()
		self.STAWILD = False
		self.LOCWILD = False
		self.CHANWILD = False
		
		try:
			timeout = 300
			socket.setdefaulttimeout(timeout)
			# this needs to have a get_waveform that queries data 1 hour at a time
			# data cant query right now if the data is too bulky
			# also needs to include a timeout exception
			for hourIndex in range(0,self.hours): #this cant be days... has to be hours
				self.startTime1 = self.startTime + (hourIndex)*1*60*60
				self.endTime1 = self.startTime + (hourIndex+1)*1*60*60
				requestArray = [(self.network,self.station,self.location, \
					self.channel,self.startTime1,self.endTime1)]
				self.st1 = client.get_waveforms_bulk(requestArray)
				self.st += self.st1
				print self.st	
				print
				#self.st = client.get_waveforms_bulk(timeout=10,requestArray)
				
			for self.tr in self.st:
				#Here we remove the M data quality and go with D
				self.tr.stats.mseed['dataquality'] = 'D'
				if self.debug:
					if self.station == '*':
						self.STAWILD = True
						DupStations.append(self.tr.stats.station)				
		    			elif self.station != '*':
                				self.STAWILD = False
	
            				if self.location == '*':
						self.LOCWILD = True	
                				DupLocations.append(self.tr.stats.location)
		    			elif self.location != '*':
						self.LOCWILD = False 
				
					if self.channel == '*':
						self.CHANWILD = True	
                				DupChannels.append(self.tr.stats.channel)
		    			elif self.channel != '*':
						self.CHANWILD = False 
		#except TimeoutError:
			#print 'Get waveform timeout, exiting...'
			#sys.exit(0)	
		except:
			print 'Trouble getting data'
			sys.exit(0)
		
		# Takes duplicate stations out of list and 
		# makes station, location, and channel into an array 
		# for looping( probably easier way but it works)
		self.stations = list(set(DupStations))
		if self.station != '*':
			self.stations.append(self.station)
		self.locations = list(set(DupLocations))
		if self.location != '*':
			self.locations.append(self.location)
		self.channels = list(set(DupChannels))
		if self.channel != '*':	
			self.channels.append(self.channel)
		print
		print "Station(s) being pulled: " + str(self.stations)
		print "Location(s) being pulled: " + str(self.locations)
		print "Channel(s) being pulled: " + str(self.channels)
			
		# Now call code to store streams in mseed files
		self.storeMSEED()
	
	def storeMSEED(self):
		#Main program
		#code for storing MSEED files
		codepath = '/home/mkline/dev/getIIdataBackup/TEST_ARCHIVE/'
		self.stFinal = Stream()
		for self.channel in self.channels:
			self.trace2 = self.st.select(channel = self.channel)
			for self.location in self.locations:
				self.trace1 = self.trace2.select(location = self.location)
				for self.station in self.stations:
					print
					print "For station, location, and channel: " \
						+ self.station +" "+ self.location +" "+ self.channel
					trace = self.trace1.select(station = self.station)
					trace.merge()
					trace.sort()
					trace.count()
					for dayIndex in range(0,self.days):
						print "Day properties: "
						#startTime works better than trace[0].stats.starttime
						trimStart = self.startTime + (dayIndex)*24*60*60
						trimEnd = self.startTime + (dayIndex+1)*24*60*60
						print "Start of day: " + str(trimStart)
						print "End of day:   " + str(trimEnd)
						#Converting date into julian day to store in directory
						timesplit = re.split('T', str(trimStart))
						s = timesplit[0]
						fmt = '%Y-%m-%d'
						dt = datetime.datetime.strptime(s, fmt)
						tt = dt.timetuple()
						NewStartDay = str(tt.tm_yday).zfill(3)
						self.stFinal = trace.copy()
						self.stFinal.trim(starttime = trimStart, endtime = trimEnd)
						# This if statement is used to make sure traces with no 
						# data dont get added to the directory structure
						if not self.stFinal or str(self.stFinal[0].max()) == '--':
							print "No trace for given day"
						else:
							#Added the directory structures in here since you won't want to
							#add directory structures that you don't use
							self.stFinal = self.stFinal.split()
							if not os.path.exists(codepath + self.network + '_' + self.station  + '/'):
								os.mkdir(codepath + self.network + '_' + self.station  + '/')
							if not os.path.exists(codepath + self.network + '_' + self.station  + '/' \
								+ self.year + '/'):
								os.mkdir(codepath + self.network + '_' + self.station  + '/' \
								+ self.year + '/')
							stpath = codepath + self.network + '_' + self.station  + '/' + self.year + \
								'/' + self.year + '_' + NewStartDay + '/'
							if not os.path.exists(stpath):
								os.mkdir(stpath)
							# Here we write the data using STEIM 2 and 512 record lengths
							self.stFinal.write(stpath + self.stFinal[0].stats.location + '_' + \
								self.stFinal[0].stats.channel + '.512.seed', format='MSEED', \
								reclen = 512, encoding='STEIM2')
							print self.stFinal
						
							

	# convert optional boolean strings to boolean vars
	def toBool(self, value):
		"""
		Converts 'string' to boolean. Raises exception for invalid formats
			True values: 1, True, true, "1", "True", "true", "yes", "y", "t"
			False values: 0, False, false, "0", "False", "false", "no", "n", "f" 
		"""
		if str(value).lower() in ("true", "yes", "t", "y", "1"): return True
		if str(value).lower() in ("false", "no", "f", "n", "0"): return False
		raise Exception('Invalid value for boolean conversion: ' + str(value))
Ejemplo n.º 8
0
def find_LFEs(filename, stations, tbegin, tend, TDUR, filt, \
        freq0, nattempts, waittime, draw=False, type_threshold='MAD', \
        threshold=0.0075):
    """
    Find LFEs with the temporary stations from FAME
    using the templates from Plourde et al. (2015)

    Input:
        type filename = string
        filename = Name of the template
        type stations = list of strings
        stations = name of the stations used for the matched-filter algorithm
        type tebgin = tuplet of 6 integers
        tbegin = Time when we begin looking for LFEs
        type tend = tuplet of 6 integers
        tend = Time we stop looking for LFEs
        type TDUR = float
        TDUR = Time to add before and after the time window for tapering
        type filt = tuple of floats
        filt = Lower and upper frequencies of the filter
        type freq0 = float
        freq0 = Maximum frequency rate of LFE occurrence
        type nattempts = integer
        nattempts = Number of times we try to download data
        type waittime = positive float
        waittime = Type to wait between two attempts at downloading
        type draw = boolean
        draw = Do we draw a figure of the cross-correlation?
        type type_threshold = string
        type_threshold = 'MAD' or 'Threshold'
        type threshold = float
        threshold = Cross correlation value must be higher than that
    Output:
        None
    """

    # Get the network, channels, and location of the stations
    staloc = pd.read_csv('../data/Ducellier/stations_permanent.txt', \
        sep=r'\s{1,}', header=None, engine='python')
    staloc.columns = ['station', 'network', 'channels', 'location', \
        'server', 'latitude', 'longitude', 'time_on', 'time_off']

    # Create directory to store the LFEs times
    namedir = 'LFEs/' + filename
    if not os.path.exists(namedir):
        os.makedirs(namedir)

    # File to write error messages
    namedir = 'error'
    if not os.path.exists(namedir):
        os.makedirs(namedir)
    errorfile = 'error/' + filename + '.txt'

    # Read the templates
    templates = Stream()
    for station in stations:
        data = pickle.load(open('templates_new/' + filename + \
            '/' + station + '.pkl', 'rb'))
        if (len(data) == 3):
            EW = data[0]
            NS = data[1]
            UD = data[2]
            EW.stats.station = station
            NS.stats.station = station
            EW.stats.channel = 'E'
            NS.stats.channel = 'N'
            templates.append(EW)
            templates.append(NS)
        else:
            UD = data[0]
        UD.stats.station = station
        UD.stats.channel = 'Z'
        templates.append(UD)

    # Begin and end time of analysis
    t1 = UTCDateTime(year=tbegin[0], month=tbegin[1], \
        day=tbegin[2], hour=tbegin[3], minute=tbegin[4], \
        second=tbegin[5])
    t2 = UTCDateTime(year=tend[0], month=tend[1], \
        day=tend[2], hour=tend[3], minute=tend[4], \
        second=tend[5])

    # Read the data
    data = []
    for station in stations:
        # Get station metadata for downloading
        for ir in range(0, len(staloc)):
            if (station == staloc['station'][ir]):
                network = staloc['network'][ir]
                channels = staloc['channels'][ir]
                location = staloc['location'][ir]
                server = staloc['server'][ir]

        # Duration of template
        template = templates.select(station=station, component='Z')[0]
        dt = template.stats.delta
        nt = template.stats.npts
        duration = (nt - 1) * dt   
        Tstart = t1 - TDUR
        Tend = t2 + duration + TDUR
        delta = t2 + duration - t1
        ndata = int(delta / dt) + 1

        # Orientation of template
        # Date chosen: April 1st 2008
        mychannels = channels.split(',')
        mylocation = location
        if (mylocation == '--'):
            mylocation = ''
        response = '../data/response/' + network + '_' + station + '.xml'
        inventory = read_inventory(response, format='STATIONXML')
        reference = []
        for channel in mychannels:
            angle = inventory.get_orientation(network + '.' + \
                station + '.' + mylocation + '.' + channel, \
                UTCDateTime(2012, 1, 1, 0, 0, 0))
            reference.append(angle)

        # First case: we can get the data from IRIS
        if (server == 'IRIS'):
            (D, orientation) = get_from_IRIS(station, network, channels, \
                location, Tstart, Tend, filt, dt, nattempts, waittime, \
                errorfile)
        # Second case: we get the data from NCEDC
        elif (server == 'NCEDC'):
            (D, orientation) = get_from_NCEDC(station, network, channels, \
                location, Tstart, Tend, filt, dt, nattempts, waittime, \
                errorfile)
        else:
            raise ValueError('You can only download data from IRIS and NCEDC')

        # Append data to stream
        if (type(D) == obspy.core.stream.Stream):
            stationdata = fill_data(D, orientation, station, channels, reference)
            if (len(stationdata) > 0):
                for stream in stationdata:
                    data.append(stream)

    # Number of hours of data to analyze
    nhour = int(ceil((t2 - t1) / 3600.0))

    # Create dataframe to store LFE times
    df = pd.DataFrame(columns=['year', 'month', 'day', 'hour', \
        'minute', 'second', 'cc', 'nchannel'])

    # Loop on hours of data
    for hour in range(0, nhour):
        nchannel = 0
        Tstart = t1 + hour * 3600.0
        Tend = t1 + (hour + 1) * 3600.0 + duration
        delta = Tend - Tstart
        ndata = int(delta / dt) + 1

        # Loop on channels
        for channel in range(0, len(data)):
            # Cut the data
            subdata = data[channel]
            subdata = subdata.slice(Tstart, Tend)
            # Check whether we have a complete one-hour-long recording
            if (len(subdata) == 1):
                if (len(subdata[0].data) == ndata):
                    # Get the template
                    station = subdata[0].stats.station
                    component = subdata[0].stats.channel
                    template = templates.select(station=station, \
                        component=component)[0]
                    # Cross correlation
                    cctemp = correlate.optimized(template, subdata[0])
                    if (nchannel > 0):
                        cc = np.vstack((cc, cctemp))
                    else:
                        cc = cctemp
                    nchannel = nchannel + 1
    
        if (nchannel > 0):
   
            # Compute average cross-correlation across channels
            meancc = np.mean(cc, axis=0)
            if (type_threshold == 'MAD'):
                MAD = np.median(np.abs(meancc - np.mean(meancc)))
                index = np.where(meancc >= threshold * MAD)
            elif (type_threshold == 'Threshold'):
                index = np.where(meancc >= threshold)
            else:
                raise ValueError('Type of threshold must be MAD or Threshold')
            times = np.arange(0.0, np.shape(meancc)[0] * dt, dt)

            # Get LFE times
            if np.shape(index)[1] > 0:
                (time, cc) = clean_LFEs(index, times, meancc, dt, freq0)

                # Add LFE times to dataframe
                i0 = len(df.index)
                for i in range(0, len(time)):
                    timeLFE = Tstart + time[i]
                    df.loc[i0 + i] = [int(timeLFE.year), int(timeLFE.month), \
                        int(timeLFE.day), int(timeLFE.hour), \
                        int(timeLFE.minute), timeLFE.second + \
                        timeLFE.microsecond / 1000000.0, cc[i], nchannel]

            # Draw figure
            if (draw == True):
                params = {'xtick.labelsize':16,
                          'ytick.labelsize':16}
                pylab.rcParams.update(params) 
                plt.figure(1, figsize=(20, 8))
                if np.shape(index)[1] > 0:
                    for i in range(0, len(time)):
                        plt.axvline(time[i], linewidth=2, color='grey')
                plt.plot(np.arange(0.0, np.shape(meancc)[0] * dt, \
                    dt), meancc, color='black')
                if (type_threshold == 'MAD'):
                    plt.axhline(threshold * MAD, linewidth=2, color='red', \
                        label = '{:6.2f} * MAD'.format(threshold))
                elif (type_threshold == 'Threshold'):
                    plt.axhline(threshold, linewidth=2, color='red', \
                        label = 'Threshold = {:8.4f}'.format(threshold))
                else:
                    raise ValueError( \
                        'Type of threshold must be MAD or Threshold')
                plt.xlim(0.0, (np.shape(meancc)[0] - 1) * dt)
                plt.xlabel('Time (s)', fontsize=24)
                plt.ylabel('Cross-correlation', fontsize=24)
                plt.title('Average cross-correlation across stations', \
                    fontsize=30)
                plt.legend(loc=2, fontsize=24)
                plt.savefig('LFEs/' + filename + '/' + \
                    '{:04d}{:02d}{:02d}_{:02d}{:02d}{:02d}'.format( \
                    Tstart.year, Tstart.month, Tstart.day, Tstart.hour, \
                    Tstart.minute, Tstart.second) + '.png', format='png')
                plt.close(1)

    # Add to pandas dataframe and save
    namefile = 'LFEs/' + filename + '/catalog.pkl'
    if os.path.exists(namefile):
        df_all = pickle.load(open(namefile, 'rb'))
        df_all = pd.concat([df_all, df], ignore_index=True)
    else:
        df_all = df    
    df_all = df_all.astype(dtype={'year':'int32', 'month':'int32', \
        'day':'int32', 'hour':'int32', 'minute':'int32', \
        'second':'float', 'cc':'float', 'nchannel':'int32'})
    pickle.dump(df_all, open(namefile, 'wb'))
Ejemplo n.º 9
0
def analyze_data(families, staloc, tbegin, tend, \
    freq0, type_threshold, threshold, ncpu, icpu):
    """
    """
    nfamilies = int(ceil(len(families) / ncpu))
    ibegin = icpu * nfamilies
    iend = min((icpu + 1) * nfamilies, len(families))

    # Loop on families
    for i in range(ibegin, iend):

        # Create directory to store the LFEs times
        namedir = 'LFEs/' + families['family'].iloc[i]
        if not os.path.exists(namedir):
            os.makedirs(namedir)

        # File to write number of stations
        namedir = 'nstations'
        if not os.path.exists(namedir):
            os.makedirs(namedir)
        stationfile = 'nstations/' + families['family'].iloc[i] + '.txt'

        # Create dataframe to store LFE times
        df = pd.DataFrame(columns=['year', 'month', 'day', 'hour', \
            'minute', 'second', 'cc', 'nchannel'])

        # Read the templates
        stations = families['stations'].iloc[i].split(',')
        templates = Stream()
        orientations = []
        names = []
        for station in stations:
            subset = staloc.loc[staloc['station'] == station]
            channels = subset['channels'].iloc[0]
            mychannels = channels.split(',')
            for channel in mychannels:
                data = pickle.load(open(template_dir + '/' + \
                    families['family'].iloc[i] + '/' + station + '_' + \
                    channel + '.pkl', 'rb'))
                template = data[0]
                angle = data[1]
                templates.append(template)
                orientations.append(angle)
                names.append(station + '_' + channel)

        # Check the time step of the stations
        subset = staloc.loc[staloc['station'].isin(stations)]
        if len(subset['dt'].value_counts()) == 1:
            dt = subset['dt'].iloc[0]
        else:
            raise ValueError('All stations must have the same time step')

        # Number of hours of data to analyze
        t1 = UTCDateTime(year=tbegin[0], month=tbegin[1], \
            day=tbegin[2], hour=tbegin[3], minute=tbegin[4], \
            second=tbegin[5])
        t2 = UTCDateTime(year=tend[0], month=tend[1], \
            day=tend[2], hour=tend[3], minute=tend[4], \
            second=tend[5])
        nhour = int(ceil((t2 - t1) / 3600.0))
        duration = families['duration'].iloc[i]

        # To rotate components
        swap = {'E': 'N', 'N': 'E', '1': '2', '2': '1'}

        # Loop on hours of data
        for hour in range(0, nhour):
            Tstart = t1 + hour * 3600.0
            Tend = t1 + (hour + 1) * 3600.0 + duration
            delta = Tend - Tstart
            ndata = int(delta / dt) + 1

            # Get the data
            data = []
            for station in stations:
                subset = staloc.loc[staloc['station'] == station]
                channels = subset['channels'].iloc[0]
                mychannels = channels.split(',')
                for num, channel in enumerate(mychannels):
                    try:
                        D = read('tmp/' + station + '_' + channel + '.mseed')
                        D = D.slice(Tstart, Tend)

                        if (type(D) == obspy.core.stream.Stream):
                            namefile = 'tmp/' + station + '_' + channel + \
                                '.pkl'
                            orientation = pickle.load(open(namefile, 'rb')) \
                                [num]
                            index = names.index(station + '_' + channel)
                            reference = orientations[index]

                            # Rotate components
                            if (len(mychannels) > 1) and (num < 2):
                                if orientation != reference:
                                    channel_new = channel[0:2] + \
                                        swap[channel[2]]
                                    D_new = read('tmp/' + station + '_' + \
                                        channel_new + '.mseed')
                                    D_new = D_new.slice(Tstart, Tend)
                                    namefile = 'tmp/' + station + '_' + \
                                        channel_new + '.pkl'
                                    if num == 0:
                                        orientation_new = pickle.load(open( \
                                            namefile, 'rb'))[1]
                                    else:
                                        orientation_new = pickle.load(open( \
                                            namefile, 'rb'))[0]
                                    index = names.index(station + '_' + \
                                        channel_new)
                                    reference_new = orientations[index]
                                    if channel[2] in ['E', '1']:
                                        D = rotate_data(D, D_new, \
                                            orientation, orientation_new, \
                                            reference, reference_new, 'E')
                                    else:
                                        D = rotate_data(D_new, D, \
                                            orientation_new, orientation, \
                                            reference_new, reference, 'N')

                            # Append stream to data
                            data.append(D)
                    except:
                        message = 'No data available for station {}'.format( \
                            station) + ' and channel {}'.format(channel) + \
                            ' at time {}/{}/{} - {}:{}:{}\n'.format( \
                            Tstart.year, Tstart.month, Tstart.day, \
                            Tstart.hour, Tstart.minute, Tstart.second)

            # Loop on channels
            nchannel = 0
            for j in range(0, len(data)):
                subdata = data[j]
                # Check whether we have a complete one-hour-long recording
                if (len(subdata) == 1):
                    if (len(subdata[0].data) == ndata):
                        # Get the template
                        station = subdata[0].stats.station
                        channel = subdata[0].stats.channel
                        template = templates.select(station=station, \
                            channel=channel)[0]
                        # Cross correlation
                        cctemp = correlate.optimized(template, subdata[0])
                        if (nchannel > 0):
                            cc = np.vstack((cc, cctemp))
                        else:
                            cc = cctemp
                        nchannel = nchannel + 1

            # Write number of channels
            with open(stationfile, 'a') as file:
                file.write('{} {} {} {} {}\n'.format(Tstart.year, \
                    Tstart.month, Tstart.day, Tstart.hour, nchannel))

            if (nchannel > 0):
                # Compute average cross-correlation across channels
                if len(np.shape(cc)) == 1:
                    meancc = cc
                else:
                    meancc = np.mean(cc, axis=0)
                if (type_threshold == 'MAD'):
                    MAD = np.median(np.abs(meancc - np.mean(meancc)))
                    index = np.where(meancc >= threshold * MAD)
                elif (type_threshold == 'Threshold'):
                    index = np.where(meancc >= threshold)
                else:
                    raise ValueError( \
                        'Type of threshold must be MAD or Threshold')
                times = np.arange(0.0, np.shape(meancc)[0] * dt, dt)

                # Get LFE times
                if np.shape(index)[1] > 0:
                    (time, cc) = clean_LFEs(index, times, meancc, dt, freq0)

                    # Add LFE times to dataframe
                    i0 = len(df.index)
                    for j in range(0, len(time)):
                        timeLFE = Tstart + time[j]
                        df.loc[i0 + j] = [int(timeLFE.year), \
                            int(timeLFE.month), int(timeLFE.day), \
                            int(timeLFE.hour), int(timeLFE.minute), \
                            timeLFE.second + timeLFE.microsecond / 1000000.0, \
                            cc[j], nchannel]

        # Add to pandas dataframe and save
        namefile = 'LFEs/' + families['family'].iloc[i] + '/catalog.pkl'
        if os.path.exists(namefile):
            df_all = pickle.load(open(namefile, 'rb'))
            df_all = pd.concat([df_all, df], ignore_index=True)
        else:
            df_all = df
        df_all = df_all.astype(dtype={'year':'int32', 'month':'int32', \
            'day':'int32', 'hour':'int32', 'minute':'int32', \
            'second':'float', 'cc':'float', 'nchannel':'int32'})
        pickle.dump(df_all, open(namefile, 'wb'))
Ejemplo n.º 10
0
class GetIIData(object):
    # initialize input vars
    def __init__(self, year, startday, network, **kwargs):
        # initialize year/start/net
        # if statement to check for main args set QUERY=True
        # else sys.exit(1)
        if (year != "") and (startday != "") and (network != ""):
            self.year = year
            self.startday = startday
            self.network = network
            QUERY = True
        else:
            QUERY = False

        # loop through **kwargs and initialize optargs
        self.endday = ""  # init endday string
        self.station = ""  # init station string
        self.location = ""  # init location string
        self.channel = ""  # init channel string
        self.debug = False  # init debug
        self.archive = False  # init archive
        endday = self.endday
        for key, val in kwargs.iteritems():
            if key == "endday": self.endday = val
            elif key == "station": self.station = val
            elif key == "location": self.location = val
            elif key == "channel": self.channel = val
            elif key == "debug": self.debug = self.toBool(val)
            elif key == "archive": self.archive = self.toBool(val)

        # print arguments if 'debug' mode
        if self.debug:
            print "Year: " + self.year
            print "Start Day: " + self.startday
            print "End Day: " + self.endday
            print "Network: " + self.network
            print "Station: " + self.station
            print "Location: " + self.location
            print "Channel: " + self.channel

        # handle wildcards
        if self.location == "?":
            self.location = "*"
        if self.channel == "?":
            self.channel = "*"
        if self.station == "?":
            self.station = "*"

        # set start/end to UTCDateTime object
        #--------------------------------------------------------------------
        self.startTime = UTCDateTime(year + startday + "T00:00:00.000")
        # If no end day in parser default to 1 day
        if self.endday == "?":
            self.endday = str(int(self.startday) + 1).zfill(3)
            self.endTime = self.startTime + 24 * 60 * 60
        else:
            self.endTime = UTCDateTime(year + self.endday + "T00:00:00.000")
        print "Here is our start time: " + self.startTime.formatIRISWebService(
        )
        print "Here is our end time:   " + self.endTime.formatIRISWebService()
        self.days = int(self.endday) - int(self.startday)
        # there are 24, 1 hour increments in a day
        self.hours = (int(self.endday) - int(self.startday)) * 24
        # Will only run if main args are given
        # check QUERY flag if True continue
        if QUERY:
            self.queryData()
        else:
            print '\nNo main args given.'
            print 'Exiting\n'
            sys.exit(1)

    def queryData(self):
        # code from IRIS client
        # Here we pull the data
        client = Client("IRIS")
        DupStations = []
        DupLocations = []
        DupChannels = []
        self.st = Stream()
        self.STAWILD = False
        self.LOCWILD = False
        self.CHANWILD = False

        try:
            timeout = 300
            socket.setdefaulttimeout(timeout)
            # this needs to have a get_waveform that queries data 1 hour at a time
            # data cant query right now if the data is too bulky
            # also needs to include a timeout exception
            for hourIndex in range(
                    0, self.hours):  #this cant be days... has to be hours
                self.startTime1 = self.startTime + (hourIndex) * 1 * 60 * 60
                self.endTime1 = self.startTime + (hourIndex + 1) * 1 * 60 * 60
                requestArray = [(self.network,self.station,self.location, \
                 self.channel,self.startTime1,self.endTime1)]
                self.st1 = client.get_waveforms_bulk(requestArray)
                self.st += self.st1
                print self.st
                print
                #self.st = client.get_waveforms_bulk(timeout=10,requestArray)

            for self.tr in self.st:
                #Here we remove the M data quality and go with D
                self.tr.stats.mseed['dataquality'] = 'D'
                if self.debug:
                    if self.station == '*':
                        self.STAWILD = True
                        DupStations.append(self.tr.stats.station)
                    elif self.station != '*':
                        self.STAWILD = False

                    if self.location == '*':
                        self.LOCWILD = True
                        DupLocations.append(self.tr.stats.location)
                    elif self.location != '*':
                        self.LOCWILD = False

                    if self.channel == '*':
                        self.CHANWILD = True
                        DupChannels.append(self.tr.stats.channel)
                    elif self.channel != '*':
                        self.CHANWILD = False
        #except TimeoutError:
        #print 'Get waveform timeout, exiting...'
        #sys.exit(0)
        except:
            print 'Trouble getting data'
            sys.exit(0)

        # Takes duplicate stations out of list and
        # makes station, location, and channel into an array
        # for looping( probably easier way but it works)
        self.stations = list(set(DupStations))
        if self.station != '*':
            self.stations.append(self.station)
        self.locations = list(set(DupLocations))
        if self.location != '*':
            self.locations.append(self.location)
        self.channels = list(set(DupChannels))
        if self.channel != '*':
            self.channels.append(self.channel)
        print
        print "Station(s) being pulled: " + str(self.stations)
        print "Location(s) being pulled: " + str(self.locations)
        print "Channel(s) being pulled: " + str(self.channels)

        # Now call code to store streams in mseed files
        self.storeMSEED()

    def storeMSEED(self):
        #Main program
        #code for storing MSEED files
        codepath = '/home/mkline/dev/getIIdataBackup/TEST_ARCHIVE/'
        self.stFinal = Stream()
        for self.channel in self.channels:
            self.trace2 = self.st.select(channel=self.channel)
            for self.location in self.locations:
                self.trace1 = self.trace2.select(location=self.location)
                for self.station in self.stations:
                    print
                    print "For station, location, and channel: " \
                     + self.station +" "+ self.location +" "+ self.channel
                    trace = self.trace1.select(station=self.station)
                    trace.merge()
                    trace.sort()
                    trace.count()
                    for dayIndex in range(0, self.days):
                        print "Day properties: "
                        #startTime works better than trace[0].stats.starttime
                        trimStart = self.startTime + (dayIndex) * 24 * 60 * 60
                        trimEnd = self.startTime + (dayIndex +
                                                    1) * 24 * 60 * 60
                        print "Start of day: " + str(trimStart)
                        print "End of day:   " + str(trimEnd)
                        #Converting date into julian day to store in directory
                        timesplit = re.split('T', str(trimStart))
                        s = timesplit[0]
                        fmt = '%Y-%m-%d'
                        dt = datetime.datetime.strptime(s, fmt)
                        tt = dt.timetuple()
                        NewStartDay = str(tt.tm_yday).zfill(3)
                        self.stFinal = trace.copy()
                        self.stFinal.trim(starttime=trimStart, endtime=trimEnd)
                        # This if statement is used to make sure traces with no
                        # data dont get added to the directory structure
                        if not self.stFinal or str(
                                self.stFinal[0].max()) == '--':
                            print "No trace for given day"
                        else:
                            #Added the directory structures in here since you won't want to
                            #add directory structures that you don't use
                            self.stFinal = self.stFinal.split()
                            if not os.path.exists(codepath + self.network +
                                                  '_' + self.station + '/'):
                                os.mkdir(codepath + self.network + '_' +
                                         self.station + '/')
                            if not os.path.exists(codepath + self.network + '_' + self.station  + '/' \
                             + self.year + '/'):
                                os.mkdir(codepath + self.network + '_' + self.station  + '/' \
                                + self.year + '/')
                            stpath = codepath + self.network + '_' + self.station  + '/' + self.year + \
                             '/' + self.year + '_' + NewStartDay + '/'
                            if not os.path.exists(stpath):
                                os.mkdir(stpath)
                            # Here we write the data using STEIM 2 and 512 record lengths
                            self.stFinal.write(stpath + self.stFinal[0].stats.location + '_' + \
                             self.stFinal[0].stats.channel + '.512.seed', format='MSEED', \
                             reclen = 512, encoding='STEIM2')
                            print self.stFinal

    # convert optional boolean strings to boolean vars
    def toBool(self, value):
        """
		Converts 'string' to boolean. Raises exception for invalid formats
			True values: 1, True, true, "1", "True", "true", "yes", "y", "t"
			False values: 0, False, false, "0", "False", "false", "no", "n", "f" 
		"""
        if str(value).lower() in ("true", "yes", "t", "y", "1"): return True
        if str(value).lower() in ("false", "no", "f", "n", "0"): return False
        raise Exception('Invalid value for boolean conversion: ' + str(value))