def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--inventory', type=str,
                        help='File containing a FDSNStationXML list of stations to retrieve')
    parser.add_argument('--events', type=str,
                        help='Path to the directory that contains the events')
    parser.add_argument('--output', type=str,
                        help='Path to the output directory')
    parser.add_argument('--event_fraction', type=float,
                        help='Fraction of events to use')
    parser.add_argument('--extra_samples', type=int, default=0,
                        help='Number of extra samples to generate for each event')
    parser.add_argument('--save_mseed', action='store_true',
                        help='Whether to save a miniSEED backup of the streams')
    parser.add_argument('--use-fdsn', action='store_true',
                        help='Try to fetch streams from FDSN if they are not available locally')
    args = parser.parse_args()

    client = fdsn.Client('KNMI')

    # Store station metadata
    inventory = read_inventory(args.inventory)
    station_dict = {}
    for network in inventory.networks:
        for station in network.stations:
            station_dict[(network.code, station.code)] = station

    # Read event catalog
    catalog, all_events = read_catalog_file(args.event_fraction, station_dict)

    # Count events
    sum_events = 0
    num_stations = 0
    for network_code, station_code in catalog:
        num_events = len(catalog[(network_code, station_code)])
        logger.debug('%s %s --- %d events', network_code, station_code, num_events)
        sum_events += num_events
        num_stations += 1
    logger.info('Events: %d / %d Stations: %d', sum_events, len(all_events), num_stations)

    # Create directories if needed
    for version in ['mseed', 'raw', 'filter']:
        for sample_type in [str(event_type) for event_type in range(4)]:
            dirname = os.path.join(args.output, version, sample_type, '')
            if not os.path.exists(dirname):
                os.makedirs(dirname)

    # Get events
    num_event_samples = get_event_samples(client, catalog, inventory, station_dict,
                                          args.output, augment_copies=args.extra_samples,
                                          save_mseed=args.save_mseed, try_fdsn=args.use_fdsn)
    logger.info('Saved %d event samples', num_event_samples)

    # Get noise
    get_noise_samples_single(client, all_events, inventory, station_dict,
                             num_event_samples * (1 + args.extra_samples),
                             args.output)
Beispiel #2
0
def main(args):

    dirname = args.event_files_path
    if not os.path.exists(dirname):
        os.makedirs(dirname)

    # read list of channels to use
    inventory = read_inventory(args.channel_file)
    inventory = inventory.select(channel=args.channel_prefix + 'Z',
                                 sampling_rate=args.sampling_rate)

    depth_str = '' if  (args.mindepth == None and args.maxdepth == None) else \
        (str(args.mindepth) if args.mindepth != None else (str(-999.9)  + '-' + str(args.maxdepth)  if args.maxdepth != None else '')) + 'km_'
    filename_base = '' \
        +  (str(args.minradius) if args.minradius != None else str(0.0)) + '-' + str(args.maxradius) + 'deg_' \
        +  depth_str \
        +  'M' + str(args.minmagnitude) + '-' \
            + (str(args.maxmagnitude) if args.maxmagnitude != None else '')

    print 'filename_base', filename_base

    events_starttime = UTCDateTime(args.starttime)
    events_endtime = UTCDateTime(args.endtime)
    print 'events_starttime', events_starttime
    print 'events_endtime', events_endtime

    for net in inventory:
        for sta in net:
            outfile = os.path.join(args.event_files_path, net.code + '_' + sta.code + '_'  \
                                   + filename_base + '.xml')
            client = fdsn.Client(args.base_url)
            print 'net_sta', net.code + '_' + sta.code
            print 'sta.start_date', sta.start_date
            print 'sta.end_date', sta.end_date
            tstart = sta.start_date if sta.start_date > events_starttime else events_starttime
            tend = sta.end_date if sta.end_date < events_endtime else events_endtime
            print 'tstart', tstart
            print 'tend', tend
            if not tstart < tend:
                continue
            try:
                catalog = client.get_events(latitude=sta.latitude, longitude=sta.longitude, \
                                   starttime=tstart, endtime=tend, \
                                   minradius=args.minradius, maxradius=args.maxradius, \
                                   mindepth=args.mindepth, maxdepth=args.maxdepth, \
                                   minmagnitude=args.minmagnitude, maxmagnitude=args.maxmagnitude, \
                                   includeallorigins=False, includeallmagnitudes= False, includearrivals=False)
            except Exception as ex:
                print 'Skipping net:', net.code, 'sta:', sta.code, 'Exception:', ex,
                continue
            #, filename=args.outfile)
            catalog.write(outfile, 'QUAKEML')
            print catalog.count(), 'events:', 'written to:', outfile
Beispiel #3
0
 def run(self):
     client = fdsn.Client(self.url)
     args = self._args
     try:
         catalog = client.get_events(**args)
     except fdsn.header.FDSNException as e:
         if 'No data available' in str(e):
             self._logger.info('No data available between {} and {}'.format(
                 self._args['starttime'], self._args['endtime']))
         else:
             self._logger.error('FDSNException: ' + str(e))
         self.data_received.emit(None)
     else:
         result = {
             'importer': ObsPyCatalogImporter(catalog),
             'time_range': [args.get(a) for a in ['starttime', 'endtime']]
         }
         self.data_received.emit(result)
Beispiel #4
0
def get_inventory(conf):
    """ Get or load the inventory depending on whether it exists or not.  Be sure to delete
  the inventory if you change relevant parameters in the config file such as region or 
  channel matching."""
    if os.path.exists(conf["inventoryfile"]):
        inv = read_inventory(conf["inventoryfile"])
        return inv
    else:
        fdsnclient = fdsn.Client("IRIS")
        temp = fdsnclient.get_stations(
            minlatitude=conf['region']['minlatitude'],
            maxlatitude=conf['region']['maxlatitude'],
            minlongitude=conf['region']['minlongitude'],
            maxlongitude=conf['region']['maxlongitude'],
            level='channel',
            starttime=conf['starttime'],
            endtime=conf['endtime'])
        # Select the channels from the temporary inventory
        inv = temp.select(channel="[BH]H?")
        inv.write(conf["inventoryfile"], format='STATIONXML')
        logging.info(str(inv))
    return inv
Beispiel #5
0
network = 'CC'
if len(sys.argv) > 1:
    day = UTCDateTime(sys.argv[1])
else:
    day = UTCDateTime('2017-022T00:00:00.0')
secperday = 24 * 60 * 60.
datadir = "data/"
respdir = "resp/"
qcfigs = "qcfigs/"
qcdata = "qcdata/"
path_verify(qcdata)
path_verify(datadir)
path_verify(qcfigs)
path_verify(respdir)

client = fdsn.Client("IRIS", timeout=240)

for station in stations:
    # Look for channel files that already exist
    files_exist = False
    files = glob.glob("%s%d/%03d/%s.%s*.seed" %
                      (datadir, day.year, day.julday, network, station))
    st = Stream()
    # Read files from disk if they exist
    if len(files) > 0:
        files_exist = True
        for file in files:
            st += read(file)
    else:
        try:
            st = client.get_waveforms(network, station, "*", "*", day,
Beispiel #6
0
def build_tt_tables(minlat=None,
                    maxlat=None,
                    minlon=None,
                    maxlon=None,
                    channel_codes=['EH', 'BH', 'HH'],
                    db=None,
                    maxdist=500.,
                    source_depth=5.):
    """ channel_codes select channels that start with those codes  
  maximum distance is in km
  source depth is generally set to the average earthquake depth for the region you are working
  for more granularity use the 3D associator
  """
    # Create a connection to an sqlalchemy database
    tt_engine = create_engine(db, echo=False)
    tt_stations_1D.BaseTT1D.metadata.create_all(tt_engine)
    TTSession = sessionmaker(bind=tt_engine)
    tt_session = TTSession()
    # Create a cliet to IRIS FDSN
    fdsnclient = fdsn.Client("IRIS")
    # Create an obspy inventory of stations
    #http://docs.obspy.org/packages/autogen/obspy.clients.fdsn.client.Client.get_stations.html#obspy.clients.fdsn.client.Client.get_stations
    inv = fdsnclient.get_stations(minlatitude=minlat,
                                  maxlatitude=maxlat,
                                  minlongitude=minlon,
                                  maxlongitude=maxlon,
                                  level='channel')
    # Plot our results just for fun
    inv.plot(projection='ortho', color_per_network='True')
    # Now save these station into the 1D travel-time table database
    # The associator could be modified to interact with Obspy Inventory objects
    for net in inv:
        network = net.code
        for sta in net:
            loccodes = []
            for ch in sta:
                #         print(ch)
                #         print(dir(ch))
                for cc in channel_codes:
                    if re.match(cc, ch.code):
                        if not ch.location_code in loccodes:
                            loccodes.append(ch.location_code)
            for loc in loccodes:
                station = tt_stations_1D.Station1D(sta.code, network, loc,
                                                   sta.latitude, sta.longitude,
                                                   sta.elevation)
                # Save the station locations in the database
                tt_session.add(station)
            tt_session.commit()

    # Now we have to build our traveltime lookup tables
    # We will use IASP91 here but obspy.taup does let you build your own model
    velmod = taup.TauPyModel(model='iasp91')
    # Define our distances we want to use in our lookup table
    delta_distance = 1.  # km for spacing tt calculations
    # Probably better to use a progressive type scheme instead of linear, but this is an example
    distance_km = np.arange(0, maxdist + delta_distance, delta_distance)
    for d_km in distance_km:
        d_deg = geodetics.kilometer2degrees(d_km)
        ptimes = []
        stimes = []
        p_arrivals = velmod.get_travel_times(source_depth_in_km=source_depth,
                                             distance_in_degree=d_deg,
                                             phase_list=['P', 'p'])
        for p in p_arrivals:
            ptimes.append(p.time)
        s_arrivals = velmod.get_travel_times(source_depth_in_km=source_depth,
                                             distance_in_degree=d_deg,
                                             phase_list=['S', 's'])
        for s in s_arrivals:
            stimes.append(s.time)
        tt_entry = tt_stations_1D.TTtable1D(d_km, d_deg, np.min(ptimes),
                                            np.min(stimes),
                                            np.min(stimes) - np.min(ptimes))
        tt_session.add(tt_entry)
        tt_session.commit(
        )  # Probably faster to do the commit outside of loop but oh well
    tt_session.close()
    return inv
Beispiel #7
0
"""

from obspy.clients import fdsn
from obspy import UTCDateTime

from mt_metadata.timeseries.stationxml import XMLInventoryMTExperiment
from mth5.mth5 import MTH5
from mth5.timeseries import RunTS

network = "ZU"
station = "CAS04"
start = UTCDateTime("2020-06-02T18:41:43.000000Z")
end = UTCDateTime("2020-07-13T21:46:12.000000Z")

# need to know network, station, start and end times before hand
client = fdsn.Client("IRIS")

# get the data
streams = client.get_waveforms(network, station, None, None, start, end)

# get the metadata
inventory = client.get_stations(
    start, end, network=network, station=station, level="channel"
)
# translate obspy.core.Inventory to an mt_metadata.timeseries.Experiment
translator = XMLInventoryMTExperiment()
experiment = translator.xml_to_mt(inventory)

# initiate MTH5 file
m = MTH5()
m.open_mth5(r"from_iris_dmc.h5", "w")
# Designed to be run with Programs, Data and Plots subfolders under a Seismic folder
# Copy this file into the Programs folder
# Copy ShakeNetwork2019.csv into the Data folder
# Developed by Mark Vanstone using Thonny, a free Python IDE designed for new programmers
#
from datetime import datetime
from obspy import UTCDateTime, Stream
from obspy.geodetics import gps2dist_azimuth
from obspy.clients import fdsn, iris
import matplotlib.pyplot as plt
from matplotlib.transforms import blended_transform_factory
client = fdsn.Client()
irisclient = iris.Client()

# EQ details and paramaters for data selection and plotting
eqname = "M6.0 Puerto Rico"
eqlat = 17.8694
eqlon = -66.8088
eqlatlon = (eqlat, eqlon)
eqtime = "2020-01-11 12:54:45"
# Plot parameters
plots = [
    'normal', 'section', 'distance'
]  # choose normal or section, section by distance or angle, also see sortkey below
sectiondx = 1e5  # distance between tick marks on x-axis of distance section
angledx = 2  # angle between tick marks on x-axis of section by angle
sortkey = 0  # 0 = sort by distance from epicentre, 2 = sort by azimuth, which is effective for the normal plot
# Vancouver Island excludes - noisy or geographically misplaced recorders
exclude = [
    'RB293', 'R93B1', 'R7813', 'R7783', 'R5A78', 'RDCBA', 'R1E5E', 'R923A',
    'RE650', 'R37BE', 'RB0B5', 'R3D81', 'R6392', 'RCD29', 'RCE32', 'R6324',
Beispiel #9
0
    def get_inventory_from_df(self, df, client=None, data=True):
        """
        Get an :class:`obspy.Inventory` object from a
        :class:`pandas.DataFrame`

        :param df: DataFrame with columns

            - 'network'   --> FDSN Network code
            - 'station'   --> FDSN Station code
            - 'location'  --> FDSN Location code
            - 'channel'   --> FDSN Channel code
            - 'start'     --> Start time YYYY-MM-DDThh:mm:ss
            - 'end'       --> End time YYYY-MM-DDThh:mm:ss

        :type df: :class:`pandas.DataFrame`
        :param client: FDSN client
        :type client: string
        :param data: True if you want data False if you want just metadata,
        defaults to True
        :type data: boolean, optional
        :return: An inventory of metadata requested and data
        :rtype: :class:`obspy.Inventory` and :class:`obspy.Stream`

        .. seealso:: https://docs.obspy.org/packages/obspy.clients.fdsn.html#id1

        .. note:: If any of the column values are blank, then any value will
        searched for.  For example if you leave 'station' blank, any station
        within the given start and end time will be returned.

        """
        if client is not None:
            self.client = client

        df = self._validate_dataframe(df)

        # get the metadata from an obspy client
        client = fdsn.Client(self.client)

        # creat an empty stream to add to
        streams = obsread()
        streams.clear()

        inv = Inventory(networks=[], source="MTH5")

        # sort the values to be logically ordered
        df.sort_values(self.column_names[:-1])

        used_network = dict()
        used_station = dict()
        for row in df.itertuples():
            # First for loop builds out networks and stations
            if row.network not in used_network:
                net_inv = client.get_stations(
                    row.start, row.end, network=row.network, level="network"
                )
                returned_network = net_inv.networks[0]
                used_network[row.network] = [row.start]
            elif used_network.get(
                row.network
            ) is not None and row.start not in used_network.get(row.network):
                net_inv = client.get_stations(
                    row.start, row.end, network=row.network, level="network"
                )
                returned_network = net_inv.networks[0]
                used_network[row.network].append(row.start)
            else:
                continue
            for st_row in df.itertuples():
                if row.network != st_row.network:
                    continue
                else:
                    if st_row.station not in used_station:
                        sta_inv = client.get_stations(
                            st_row.start,
                            st_row.end,
                            network=row.network,
                            station=st_row.station,
                            level="station",
                        )
                        returned_sta = sta_inv.networks[0].stations[0]
                        used_station[st_row.station] = [st_row.start]
                    elif used_station.get(
                        st_row.station
                    ) is not None and st_row.start not in used_station.get(
                        st_row.station
                    ):
                        # Checks for epoch
                        sta_inv = client.get_stations(
                            st_row.start,
                            st_row.end,
                            network=st_row.network,
                            station=st_row.station,
                            level="station",
                        )
                        returned_sta = sta_inv.networks[0].stations[0]
                        used_station[st_row.station].append(st_row.start)
                    else:
                        continue
                for ch_row in df.itertuples():
                    if (
                        ch_row.network == row.network
                        and st_row.station == ch_row.station
                        and ch_row.start == st_row.start
                    ):
                        cha_inv = client.get_stations(
                            ch_row.start,
                            ch_row.end,
                            network=ch_row.network,
                            station=ch_row.station,
                            loc=ch_row.location,
                            channel=ch_row.channel,
                            level="response",
                        )
                        returned_chan = cha_inv.networks[0].stations[0].channels[0]
                        returned_sta.channels.append(returned_chan)

                        # -----------------------------
                        # get data if desired
                        if data:
                            streams = (
                                client.get_waveforms(
                                    ch_row.network,
                                    ch_row.station,
                                    ch_row.location,
                                    ch_row.channel,
                                    UTCDateTime(ch_row.start),
                                    UTCDateTime(ch_row.end),
                                )
                                + streams
                            )
                    else:
                        continue

                returned_network.stations.append(returned_sta)
            inv.networks.append(returned_network)

        return inv, streams
Beispiel #10
0
def ant_download():
 #===============================================================================
        # preliminaries
        #===============================================================================
        
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    outdir = os.path.join('data','raw')
    targetloc=os.path.join(outdir,'rank'+str(rank))
    if not os.path.isdir(targetloc):
        os.mkdir(targetloc)
    respfileloc=os.path.join('meta','resp')
    if os.path.isdir(respfileloc)==False:
        cmd='mkdir '+respfileloc
        os.system(cmd)
    
    if rank == 0:
        client = fdsn.Client()
         #===============================================================================
        #- read station list
        #- create output directory
        #- set parameters #===============================================================================
 
    # network, channel, location and station list
    #stalist=cfg.ids#os.path.join('input','downloadlist.txt')
    fh=open(cfg.ids,'r')
    ids=fh.read().split('\n')
    
    # Verbose?
    if cfg.verbose:
        v=True
        vfetchdata='-v '
    else:
        vfetchdata=''
        
    # Quality?
    quality = cfg.quality
        
    # time interval of request
    t1=cfg.t_start
    t1str=UTCDateTime(t1).strftime('%Y.%j.%H.%M.%S')
    t2=cfg.t_end
    t2str=UTCDateTime(t2).strftime('%Y.%j.%H.%M.%S')
    
    # data segment length
    if cfg.seconds_segment==None:
        winlen=UTCDateTime(t2)-UTCDateTime(t1)
    else:
        winlen = int(cfg.seconds_segment)
    
    # minimum length
    minlen = int(cfg.seconds_minimum)
    
    # geographical region
    lat_min=cfg.lat_min
    lat_max=cfg.lat_max
    lon_min=cfg.lon_min
    lon_max=cfg.lon_max
    
    #===============================================================================
    
    #- Assign each rank its own chunk to download
    #===============================================================================
    
    clen=int(float(len(ids))/float(size))
    chunk=(rank*clen, (rank+1)*clen)
    myids=ids[chunk[0]:chunk[1]]
    if rank==size-1:
        myids=ids[chunk[0]:]
       
    
    #===============================================================================
    
    # Station loop
    #===============================================================================
      
    for id in myids:
        
        if id=='': continue
        network=id.split('.')[0]
        station=id.split('.')[1]
        channel=id.split('.')[3]
         #===============================================================================
        
        # Time window loop
         #===============================================================================
        t = UTCDateTime(t1)
        while t < UTCDateTime(t2):
            
            tstart = UTCDateTime(t).strftime('%Y-%m-%d')
            tstartstr = UTCDateTime(t).strftime('%Y.%j.%H.%M.%S')
            
            tstep = min((UTCDateTime(t)+winlen),UTCDateTime(t2)).\
            strftime('%Y-%m-%d')
            tstepstr = min((UTCDateTime(t)+winlen),UTCDateTime(t2)).\
            strftime('%Y.%j.%H.%M.%S')
            
            
            #-Formulate a polite request
            filename=os.path.join(targetloc,id+'.'+tstartstr+'.'+tstepstr+'.mseed')
            
              
            if os.path.exists(filename)==False:
                
                #print network, station, location, channel
                print('\n Rank '+str(rank),file=None)
                print('\n Attempting to download data from: '+id,file=None)
                print(filename)
                
                reqstring_iris = '{} {} -N {} -S {} -C {} -s {} -e {} -msl {} --lat \
                {}:{} --lon {}:{} -o {} -Q {}'.format(os.path.join(_ROOT,'tools_ext','FetchData')\
                ,vfetchdata,network,station,channel,tstart,tstep,minlen,lat_min,lat_max,lon_min,\
                lon_max,filename,quality)
                
                reqstring_arclink = '{} {} -N {} -S {} -C {} -s {} -e {} -msl {} --lat \
                {}:{} --lon {}:{} -o {} -Q {}'.format(os.path.join(_ROOT,'tools_ext','FetchDataArc')\
                ,vfetchdata,network,station,channel,tstart,tstep,minlen,lat_min,lat_max,lon_min,\
                lon_max,filename,quality)
                
                
                #reqstring=_ROOT+'/tools/FetchData '+vfetchdata+' -N '+network+ \
                # ' -S '+station+' -C '+channel+' -s '+tstart+' -e '+tstep+ \
                # ' -msl '+minlen+' --lat '+lat_min+':'+lat_max+ \
                #' --lon '+lon_min+':'+lon_max+' -o '+filename+' -Q '+quality
                if cfg.data_center == 'iris' or cfg.data_center=='any':
                    os.system(reqstring_iris)
                elif cfg.data_center == 'arclink' or cfg.data_center=='any': 
                    os.system(reqstring_arclink)
            t += winlen
        
        tstart = UTCDateTime(t1).strftime('%Y-%m-%d')
        print('\n Downloading response information from: '+id+'\n')
        
        #===============================================================================
        
        # Within Station loop: Download resp files
         #===============================================================================        
        reqstring_resp_iris = '{} {} -N {} -S {} -C {} -s {} -e {} --lat \
        {}:{} --lon {}:{} -rd {} -Q {}'.format(
        os.path.join(_ROOT,'tools_ext','FetchData'),vfetchdata,network\
        ,station,channel,tstart,tstep,lat_min,lat_max,lon_min,\
        lon_max,respfileloc,quality)
        
        reqstring_resp_arclink = '{} {} -N {} -S {} -C {} -s {} -e {} --lat \
        {}:{} --lon {}:{} -rd {} -Q {}'.format(
        os.path.join(_ROOT,'tools_ext','FetchDataArc'),vfetchdata,network\
        ,station,channel,tstart,tstep,lat_min,lat_max,lon_min,\
        lon_max,respfileloc,quality)
        
        if cfg.data_center == 'iris' or cfg.data_center=='any':
            os.system(reqstring_resp_iris)
        elif cfg.data_center == 'arclink' or cfg.data_center=='any':
            os.system(reqstring_resp_arclink)
            
        

    # Clean up (some files come back with 0 data)
    os.system(os.path.join(_ROOT,'tools','cleandir.sh')+' '+targetloc)
    cmd = 'mv '+targetloc+'/* '+targetloc+'/..'
    print(cmd)
    os.system(cmd)
    os.system('rmdir '+targetloc)

        #===============================================================================
        
        # Separate Station loop: Download stationxml
         #===============================================================================        
    if rank == 0:
        for id in ids:
            if id=='': continue
            network=id.split('.')[0]
            station=id.split('.')[1]
            xmlfile=os.path.join('meta','stationxml','{}.{}.xml'.format(network,station))
            # Metadata request with obspy
            if os.path.exists(xmlfile)==False:
                client.get_stations(network=network,station=station,
                filename=xmlfile,level='response')        

    comm.Barrier()
 #===============================================================================
 
     # After download completed on all ranks: Check availability
  #==============================================================================   
 
    
    if rank==0:
        outfile=os.path.join(outdir,'download_report.txt')
        outf=open(outfile,'w')
        
        print('Attempted to download data from stations: \n',file=outf)
        print('****************************************** \n',file=outf)
        for id in ids:
            print(id,file=outf)
        print('****************************************** \n',file=outf)
        stalist=os.path.join('input','downloadlist.txt')
        fh=open(stalist,'r')
        ids=fh.read().split('\n')
        
        noreturn=[]
        
        for id in ids:
            if id=='': continue
           
            fls=glob(os.path.join(outdir,id+'*'))
            fls.sort()
           
            if fls != []:
                print('Files downloaded for id: '+id,file=outf)
                print('First file: '+fls[0],file=outf)
                print('Last file: '+fls[-1],file=outf)
                print('****************************************** \n',file=outf)    
            else: 
                noreturn.append(id)
            
        if noreturn != []:
            print('NO files downloaded for: \n',file=outf)
            
            print(noreturn,file=outf)
     
        print('****************************************** \n',file=outf)
        print('Download parameters were: \n',file=outf)
        print('****************************************** \n',file=outf)
        outf.close()
        
        os.system('cat input/config_download.json >> '+outfile)
    
    return()
            
Beispiel #11
0
def main(args):
    
    random.seed(datetime.now())
    
    if args.n_distances < 1:
        args.n_distances = None
    # print distance classifications
    if args.n_distances != None:
        print 'dist_class, dist_deg, dist_km'
        for dclass in range(0, args.n_distances, 1):
            dist_deg = util.classification2distance(dclass, args.n_distances)
            dist_km = geo.degrees2kilometers(dist_deg)
            print "{}   {:.2f}   {:.1f}".format(dclass, dist_deg, dist_km)
        print ''
     
    if args.n_magnitudes < 1:
        args.n_magnitudes = None
    # print magtitude classifications
    if args.n_magnitudes != None:
        print 'mag_class, mag'
        for mclass in range(0, args.n_magnitudes, 1):
            mag = util.classification2magnitude(mclass, args.n_magnitudes)
            print "{}   {:.2f}".format(mclass, mag)
        print ''
     
    if args.n_depths < 1:
        args.n_depths = None
    # print depth classifications
    if args.n_depths != None:
        print 'depth_class, depth'
        for dclass in range(0, args.n_depths, 1):
            depth = util.classification2depth(dclass, args.n_depths)
            print "{}   {:.1f}".format(dclass, depth)
        print ''
     
    if args.n_azimuths < 1:
        args.n_azimuths = None
    # print azimuth classifications
    if args.n_azimuths != None:
        print 'azimuth_class, azimuth'
        for aclass in range(0, args.n_azimuths, 1):
            azimuth = util.classification2azimuth(aclass, args.n_azimuths)
            print "{}   {:.1f}".format(aclass, azimuth)
        print ''
     
    
    if not os.path.exists(args.outpath):
        os.makedirs(args.outpath)
        
    # save arguments
    with open(os.path.join(args.outpath, 'params.pkl'), 'w') as file:
        file.write(pickle.dumps(args)) # use `pickle.loads` to do the reverse
        
    for dataset in ['train', 'validate', 'test']:
        for datatype in ['events', 'noise']:
            datapath = os.path.join(args.outpath, dataset, datatype)
            if not os.path.exists(datapath):
                os.makedirs(datapath)
            mseedpath = os.path.join(datapath, 'mseed')
            if not os.path.exists(mseedpath):
                os.makedirs(mseedpath)
            mseedpath = os.path.join(datapath, 'mseed_raw')
            if not os.path.exists(mseedpath):
                os.makedirs(mseedpath)
            if datatype == 'events':
                xmlpath = os.path.join(datapath, 'xml')
                if not os.path.exists(xmlpath):
                    os.makedirs(xmlpath)

        
    # read catalog of events
    #filenames = args.event_files_path + os.sep + '*.xml'
    catalog_dict = {}
    catalog_all = []
    for dirpath, dirnames, filenames in os.walk(args.event_files_path):
        for name in filenames:
            if name.endswith(".xml"):
                file = os.path.join(dirpath, name)
                catalog = read_events(file)
                target_count = int(args.event_fraction * float(catalog.count()))
                print catalog.count(), 'events:', 'read from:', file, 'will use:', target_count, 'since args.event_fraction=', args.event_fraction
                if (args.event_fraction < 1.0):
                    while catalog.count() > target_count:
                        del catalog[random.randint(0, catalog.count() - 1)]
                if not args.systematic:
                    tokens = name.split('_')
                    net_sta = tokens[0] + '_' + tokens[1]
                    if not net_sta in catalog_dict:
                        catalog_dict[net_sta] = catalog
                    else:
                        catalog_dict[net_sta] += catalog
                    # sort catalog by date
                    catalog_dict[net_sta] = Catalog(sorted(catalog_dict[net_sta], key=lambda e: e.origins[0].time))
                else:
                    catalog_all += catalog
    
    # read list of channels to use
    inventory_full = read_inventory(args.channel_file)
    inventory_full = inventory_full.select(channel=args.channel_prefix+'Z', sampling_rate=args.sampling_rate)
    #print(inventory)
    
    client = fdsn.Client(args.base_url)
    
    # get existing already processed event channel dictionary
    try:
        with open(os.path.join(args.outpath, 'event_channel_dict.pkl'), 'r') as file:
            event_channel_dict = pickle.load(file)
    except IOError:
        event_channel_dict = {}
    print 'Existing event_channel_dict size:', len(event_channel_dict)

    n_noise = int(0.5 + float(args.n_streams) * args.noise_fraction)
    n_events = args.n_streams - n_noise
    n_validate = int(0.5 + float(n_events) * args.validation_fraction)
    n_test = int(0.5 + float(n_events) * args.test_fraction)
    n_train = n_events - n_validate - n_test
    n_count = 0;
    n_streams = 0
    
    if args.systematic:
        event_ndx = 0
        net_ndx = 0
        sta_ndx = 0
        channel_ndx = -1


    
#     distance_id_count = {}
#     max_num_for_distance_id = {}
#     if args.n_distances != None:
#         # train
#         distance_id_count['train'] = [0] * args.n_distances
#         max_num_for_distance_id['train'] = 1 + int(2.0 * float(n_train) / float(args.n_distances))
#         print 'Maximum number events for each distance bin train:', max_num_for_distance_id['train']
#         # validate
#         distance_id_count['validate'] = [0] * args.n_distances
#         max_num_for_distance_id['validate'] = 1 + int(2.0 * float(n_validate) / float(args.n_distances))
#         print 'Maximum number events for each distance bin validate:', max_num_for_distance_id['validate']
#         # test
#         distance_id_count['test'] = [0] * args.n_distances
#         max_num_for_distance_id['test'] = 1 + int(2.0 * float(n_test) / float(args.n_distances))
#         print 'Maximum number events for each distance bin test:', max_num_for_distance_id['test']
        
    while args.systematic or n_streams < args.n_streams:
        
        try:
        
            # choose event or noise
            is_noise = n_streams >= n_events
            
            # reset validate test count if switching from event to  noise
            if n_streams == n_events:
                n_validate = int(0.5 + float(n_noise) * args.validation_fraction)
                n_test = int(0.5 + float(n_noise) * args.test_fraction)
                n_train = n_noise - n_validate - n_test
                n_count = 0;
                
            # set out paths
            if is_noise:
                datatype = 'noise'
            else:
                datatype = 'events'
            if n_count < n_train:
                dataset = 'train'
            elif n_count < n_train + n_validate:
                dataset = 'validate'
            else:
                dataset = 'test'
            datapath = os.path.join(args.outpath, dataset, datatype)

            # get random channel from Inventory
            #inventory = inventory_full.select(time=origin.time)
            inventory = inventory_full
            
            if args.systematic:
                try:
                    catalog, event_ndx, event, origin, channel, net_ndx, net, sta_ndx, sta, channel_ndx \
                        = get_systematic_channel(inventory, catalog_all, is_noise, event_ndx, net_ndx, sta_ndx, channel_ndx)
                except ValueError:
                    break
            else:
                try:
                    catalog, event_ndx, event, origin, channel, net_ndx, net, sta_ndx, sta, channel_ndx = get_random_channel(inventory, catalog_dict, is_noise)
                except ValueError:
                    continue
                                
            distance_id = 0
            distance = -999.0
            magnitude = -999.0
            depth = -999.0
            azimuth = -999.0
            if not is_noise:
                dist_meters, azim, bazim = geo.gps2dist_azimuth(channel.latitude, channel.longitude, origin.latitude, origin.longitude, a=geo.WGS84_A, f=geo.WGS84_F)
                distance = geo.kilometer2degrees(dist_meters / 1000.0, radius=6371)
                azimuth = azim
                magnitude = event.preferred_magnitude().mag
                depth = origin.depth / 1000.0
                if args.n_distances != None:
                    distance_id = util.distance2classification(distance, args.n_distances)
#                                 if distance_id_count[dataset][distance_id] >= max_num_for_distance_id[dataset]:
#                                     print 'Skipping event_channel: distance bin', distance_id, 'for', dataset, 'already full:', \
#                                         distance_id_count[dataset][distance_id], '/', max_num_for_distance_id[dataset]
#                                     continue

            print ''
            print 'Event:', origin.time.isoformat(), event.event_descriptions[0].text, \
            ', Dist(deg): {:.2f} Dist(km): {:.1f} ID: {}'.format(distance, geo.degrees2kilometers(distance), distance_id), \
            ', Mag: {:.2f}'.format(magnitude), \
            ', Depth(km): {:.1f}'.format(depth), \
            ', Az(deg): {:.1f}'.format(azimuth)
            print 'Retrieving channels:', (n_streams + 1), '/ ', args.n_streams, (', NOISE, ' if  is_noise else ', EVENT, '), 'event', event_ndx, origin.time, \
                ', net', net_ndx, ', sta', sta_ndx, ', chan', channel_ndx, \
                ', ', net.code, sta.code, \
                channel.code, channel.location_code, \
                channel.sample_rate
            # check station was available at origin.time
            if not sta.is_active(time=origin.time):
                print 'Skipping event_channel: station not active at origin.time:'
                continue
            #key = str(event_ndx) + '_' + str(net_ndx) + '_' + str(sta_ndx) + '_' + str(channel_ndx) + '_' + str(is_noise)
            key = str(event_ndx) + '_' + net.code + '_' + sta.code + '_' + channel.code + '_' + str(is_noise)
            if key in event_channel_dict:
                print 'Skipping event_channel: already processed.'
                continue
            event_channel_dict[key] = 1
                
            # get start time for waveform request
            ttime = get_first_P_travel_time(origin, channel)
            arrival_time = origin.time + ttime
            if is_noise:
                # get start time of next event
                event2 = catalog[event_ndx + 1]
                origin2 = event2.preferred_origin()
                # check that origins are at least min time apart
                if origin2.time - origin.time < MIN_INTER_EVENT_TIME:
                    print 'Skipping noise event_channel: inter event time too small: ', str(origin2.time - origin.time), \
                        origin2.time, origin.time
                    continue
                ttime2 = get_first_P_travel_time(origin2, channel)
                arrival_time2 = origin2.time + ttime2
                arrival_time = (arrival_time + ((arrival_time2 - arrival_time) / 2.0)) - args.window_start
            
            start_time = arrival_time - args.window_start
                                    
            # request data for 3 channels
            
            #for orientation in ['Z', 'N', 'E', '1', '2']:
            #    req_chan = args.channel_prefix + orientation
            channel_name = net.code + '_' + sta.code + '_' + channel.location_code + '_' + args.channel_prefix
            padded_start_time = start_time - WINDOW_PADDING_FDSN
            padded_end_time = start_time + args.window_length + 2.0 * WINDOW_PADDING_FDSN
            chan_param = args.channel_prefix + '?'
            # kluge to get url used for data request
            kwargs = {'network': net.code, 'station': sta.code, 'location': channel.location_code, 'channel': chan_param,
                      'starttime': padded_start_time, 'endtime': padded_end_time}                      
            #url = client._create_url_from_parameters('dataselect', DEFAULT_PARAMETERS['dataselect'],  **kwargs)
            url = fdsn.client.build_url(client.base_url, 'dataselect', client.major_versions['dataselect'], "query", parameters=kwargs)
            print '  java net.alomax.seisgram2k.SeisGram2K', '\"', url, '\"'
            try:
                stream = client.get_waveforms(  \
                                               net.code, sta.code, channel.location_code, chan_param, \
                                               padded_start_time, padded_end_time, \
                                               attach_response=True)
                
            except fdsn.header.FDSNException as ex:
                print 'Skipping channel:', channel_name, 'FDSNException:', ex, 
                continue
                                    
            print stream
            # TEST
#                         for trace in stream:
#                             print '==========> trace.stats', trace.stats
                
            # check some things
            if (len(stream) != 3):
                print 'Skipping channel: len(stream) != 3:', channel_name
                continue
            ntrace = 0
            for trace in stream:
                if (len(trace) < 1):
                    print 'Skipping trace: len(trace) < 1:', channel_name
                    continue
                if (trace.stats.starttime > start_time or trace.stats.endtime < start_time + args.window_length):
                    print 'Skipping trace: does not contain required time window:', channel_name
                    continue
                ntrace += 1
            if (ntrace != 3):
                print 'Skipping channel: ntrace != 3:', channel_name
                continue
            
            # pre-process streams
            # sort so that channels will be ingested in NN always in same order ENZ
            stream.sort(['channel'])
            # detrend - this is meant to be equivalent to detrend or a long period low-pass (e.g. at 100sec) applied to real-time data
            stream.detrend(type='linear')
            for trace in stream:
                # correct for required sampling rate
                if abs(trace.stats.sampling_rate - args.sampling_rate) / args.sampling_rate > 0.01:
                    trace.resample(args.sampling_rate)
                    
            # apply high-pass filter if requested
            if args.hp_filter_freq > 0.0:
                stream.filter('highpass', freq=args.hp_filter_freq, corners=args.hp_filter_corners)
            
            # check signal to noise ratio, if fail, repeat on 1sec hp data to capture local/regional events in longer period microseismic noise
            sn_type = 'BRB'
            first_pass = True;
            while True:
                if is_noise:
                    snrOK = True
                else:
                    snrOK = False
                for trace in stream:
                    # slice with 1sec margin of error for arrival time to: 1) avoid increasing noise amplitude with signal, 2) avoid missing first P in signal
                    if (first_pass):
                        signal_slice = trace.slice(starttime=arrival_time - 1.0, endtime=arrival_time - 1.0 + args.snr_window_length)
                        noise_slice = trace.slice(endtime=arrival_time - 1.0) 
                    else:
                        # highpass at 1sec
                        filt_trace = trace.copy()
                        filt_trace.filter('highpass', freq=1.0, corners=4)
                        signal_slice = filt_trace.slice(starttime=arrival_time - 1.0, endtime=arrival_time - 1.0 + args.snr_window_length)
                        noise_slice = filt_trace.slice(endtime=arrival_time - 1.0) 
                        sn_type = '1HzHP'
                    # check signal to noise around arrival_time
                    # ratio of std
                    asignal = signal_slice.std()
                    anoise = noise_slice.std()
                    snr = asignal / anoise
                    print trace.id, sn_type, 'snr:', snr, 'std_signal:', asignal, 'std_noise:', anoise
                    # ratio of peak amplitudes (DO NOT USE, GIVE UNSTABLE RESULTS!)
#                                 asignal = signal_slice.max()
#                                 anoise = noise_slice.max()
#                                 snr = np.absolute(asignal / anoise)
#                                 print trace.id, sn_type, 'snr:', snr, 'amax_signal:', asignal, 'amax_noise:', anoise
                    if is_noise:
                        snrOK = snrOK and snr <= MAX_SNR_NOISE
                        if not snrOK:
                            break
                    else:
                        snrOK = snrOK or snr >= args.snr_accept
                if (first_pass and not snrOK and args.hp_filter_freq < 0.0):
                    first_pass = False;
                    continue
                else:
                    break

            if (not snrOK):
                if is_noise:
                    print 'Skipping channel:', sn_type, 'snr >', MAX_SNR_NOISE,  'on one or more traces:', channel_name
                else:
                    print 'Skipping channel:', sn_type, 'snr < args.snr_accept:', args.snr_accept, 'on all traces:', channel_name
                continue
               
            # trim data to required window
            # try to make sure samples and start/end times align as closely as possible to first trace
            trace = stream.traces[0]
            trace = trace.slice(starttime=start_time, endtime=start_time + args.window_length, nearest_sample=True)
            start_time = trace.stats.starttime
            stream = stream.slice(starttime=start_time, endtime=start_time + args.window_length, nearest_sample=True)
            
            cstart_time = '%04d.%02d.%02d.%02d.%02d.%02d.%03d' % \
                (start_time.year, start_time.month, start_time.day, start_time.hour, start_time.minute, \
                 start_time.second, start_time.microsecond // 1000)

            # process each trace
            try:
                for trace in stream:
                    # correct for overall sensitivity or gain
                    trace.normalize(trace.stats.response.instrument_sensitivity.value)
                    trace.data = trace.data.astype(np.float32)
                    # write miniseed
                    #tracefile = os.path.join(datapath, 'mseed', trace.id + '.' + cstart_time + '.mseed')
                    #trace.write(tracefile, format='MSEED', encoding='FLOAT32')
                    #print 'Channel written:', tracefile, trace.count(), 'samples'
            except AttributeError as err:
                print 'Skipping channel:', channel_name,  ': Error applying trace.normalize():' , err
                
            filename_root =  channel_name + '.' + cstart_time

            # write raw miniseed
            streamfile = os.path.join(datapath, 'mseed_raw', filename_root + '.mseed')
            stream.write(streamfile, format='MSEED', encoding='FLOAT32')
            print 'Stream written:', stream.count(), 'traces:'
            print '  java net.alomax.seisgram2k.SeisGram2K', streamfile
                
            # store absolute maximum
            stream_max = np.absolute(stream.max()).max()
            # normalize by absolute maximum
            stream.normalize(global_max = True)
            
            # 20180521 AJL
            # spherical coordinates
            # raw data always in same order ENZ
            # tensor indexing is [traces, datapoints, comps]
            if args.spherical:
                rad2deg = 180.0 / math.pi
                # calculate modulus
                temp_square = np.add(np.square(stream.traces[0].data), np.add(np.square(stream.traces[1].data), np.square(stream.traces[2].data)))
                temp_modulus = np.sqrt(temp_square)
                # calculate azimuth
                temp_azimuth = np.add( np.multiply(np.arctan2(stream.traces[0].data, stream.traces[1].data), rad2deg), 180.0)
                # calculate inclination
                temp_inclination = np.multiply(np.arcsin(np.divide(stream.traces[2].data, temp_modulus)), rad2deg)
                # reset stream data to spherical coordinates
                stream.traces[0].data = temp_inclination
                stream.traces[1].data = temp_azimuth
                temp_modulus = np.multiply(temp_modulus, 100.0)  # increase scale for plotting purposes
                stream.traces[2].data = temp_modulus


            # put absolute maximum normalization in first element of data array, to seed NN magnitude estimation
            # 20180816 AJL - do not mix max with data
            # for trace in stream:
            #    trace.data[0] = stream_max
            print 'stream_max', stream_max
            

            # write processed miniseed
            streamfile = os.path.join(datapath, 'mseed', filename_root + '.mseed')
            stream.write(streamfile, format='MSEED', encoding='FLOAT32')
            print 'Stream written:', stream.count(), 'traces:'
            print '  java net.alomax.seisgram2k.SeisGram2K', streamfile
                
            # write event waveforms and distance_id in .tfrecords
            magnitude_id = 0
            depth_id = 0
            azimuth_id = 0
            if not is_noise:
#                             if args.n_distances != None:
#                                 distance_id_count[dataset][distance_id] += 1
                if args.n_magnitudes != None:
                    magnitude_id = util.magntiude2classification(magnitude, args.n_magnitudes)
                if args.n_depths != None:
                    depth_id = util.depth2classification(depth, args.n_depths)
                if args.n_azimuths != None:
                    azimuth_id = util.azimuth2classification(azimuth, args.n_azimuths)
            else:
                distance_id = -1
                distance = 0.0
            output_name = filename_root + '.tfrecords'
            output_path = os.path.join(datapath, output_name)
            writer = DataWriter(output_path)
            writer.write(stream, stream_max, distance_id, magnitude_id, depth_id, azimuth_id, distance, magnitude, depth, azimuth)
            if not is_noise:
                print '==== Event stream tfrecords written:', output_name, \
                'Dist(deg): {:.2f} Dist(km): {:.1f} ID: {}'.format(distance, geo.degrees2kilometers(distance), distance_id), \
                ', Mag: {:.2f} ID: {}'.format(magnitude, magnitude_id), \
                ', Depth(km): {:.1f} ID: {}'.format(depth, depth_id), \
                ', Az(deg): {:.1f} ID: {}'.format(azimuth, azimuth_id)
            else:
                print '==== Noise stream tfrecords written:', output_name, 'ID: Dist {}, Mag {}, Depth {}, Az {}'.format(distance_id, magnitude_id, depth_id, azimuth_id)
                
            # write event data
            if not is_noise:
                filename = os.path.join(datapath, 'xml', filename_root + '.xml')
                event.write(filename, 'QUAKEML')
           
            n_streams += 1
            n_count += 1
                    
        except KeyboardInterrupt:
            print 'Stopping: KeyboardInterrupt'
            break

        except Exception as ex:
            print 'Skipping stream: Exception:', ex
            traceback.print_exc()
            continue

    print n_streams, 'streams:', 'written to:', args.outpath

    # save event_channel_dict
    with open(os.path.join(args.outpath, 'event_channel_dict.pkl'), 'w') as file:
        file.write(pickle.dumps(event_channel_dict))