forked from cerinunn/pdart
-
Notifications
You must be signed in to change notification settings - Fork 0
/
raw_import.py
executable file
·292 lines (240 loc) · 9.51 KB
/
raw_import.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Run the initial import from the csv files to the raw SEED files.
The raw files have not had the frames reconstructed yet.
:copyright:
The pdart Development Team & Ceri Nunn
:license:
GNU Lesser General Public License, Version 3
(https://www.gnu.org/copyleft/lesser.html)
"""
from __future__ import (absolute_import, division, print_function,
unicode_literals)
from future.builtins import * # NOQA
from datetime import timedelta
import os
import io
import gzip
import glob
import numpy as np
import numpy.ma as ma
import logging, logging.handlers
import csv
import fnmatch
import shutil
from obspy.core.utcdatetime import UTCDateTime
from obspy.core import Stream, Trace, Stats, read
global DELTA
DELTA = 0.15094
INVALID = -99999
def call_raw_import(
stations = ['S11','S12','S14','S15','S16'],
csv_dir='.',
raw_dir='.',
start_time=UTCDateTime('1969-07-21T03:00:00.000000Z'),
end_time=UTCDateTime('1977-09-30T:21:00.000000Z'),
write_gzip=True):
'''
Makes 'raw' stream files from gzipped csv files.
Calls raw_import()
'''
for station in stations:
# check that the overall directory exists
if not os.path.exists(raw_dir):
msg = ("The directory {} doesn't exist".format(raw_dir))
raise IOError(msg)
else:
# make the subdirectory with the station name
raw_dir_station = os.path.join(raw_dir, station)
if not os.path.exists(raw_dir_station):
os.makedirs(raw_dir_station)
time_interval = timedelta(hours=3)
start = start_time
while start < end_time:
# find csv gzip importfile with the start time
csv_file_gzip = '%s.csv.gz' % start.strftime("%Y-%m-%dT%H:%M:%S")
csv_file_gzip = os.path.join(csv_dir, csv_file_gzip)
# import the stream from the gzipped file
stream = raw_import(csv_file_gzip)
if len(stream) > 0:
stations = []
for tr in stream:
stations.append(tr.stats.station)
stations = list(set(stations))
for station in stations:
station_stream = stream.select(station=station)
# station_stream = station_stream.split()
stream_file = '%s_%s.MINISEED' % (start.strftime("%Y-%m-%dT%H:%M:%S"), station)
gzip_file = '%s.gz' % (stream_file)
raw_dir_station = os.path.join(raw_dir, station)
stream_file = os.path.join(raw_dir_station, stream_file)
gzip_file = os.path.join(raw_dir_station, gzip_file)
station_stream.write(stream_file, 'MSEED')
if write_gzip:
with open(stream_file, 'rb') as f_in, gzip.open(gzip_file, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
os.unlink(stream_file)
print(station_stream)
# increment the time interval
start += time_interval
print('Done')
def raw_import(gzip_filename):
"""
Makes a 'raw' stream file from the gzipped csv file.
The csv file has been downloaded from the JAXA website.
The method makes a raw stream which does not yet have the frames
reconstructed.
:type gzip_filename: str
:param gzip_filename: gzipped filename of the CSV file to be read.
:rtype: :class:`~obspy.core.stream.Stream`
:return: A ObsPy Stream object.
"""
# read the gzipped csv file
with gzip.open(gzip_filename, 'rt') as fh:
# read file
buf = []
header = next(fh).split(',')
# read the header
# it should contain either 1 channel or 3
if len(header) == 8:
# the RESP files use either 'MH1', 'MH2', 'MHZ'
# the JAXA files use 'LPX', 'LPY', 'LPZ'
# X should point north, Y east, but this is not always the case
# so we rename LPX to MH1, and LPY to MH2
channels = ['MH1', 'MH2', 'MHZ']
raw_channels = ['_M1', '_M2', '_MZ']
for line in fh:
temp = line.split(',')
try:
temp[4] = UTCDateTime(temp[4])
except ValueError as e:
# this is a specific error which is found in the csv file
if temp[4] == '1975-49-11 19:13:04.232000':
temp[4] = UTCDateTime('1975-09-11 19:13:04.232000')
else:
raise
try:
temp[0] = int(temp[0])
except ValueError as e:
# this is a specific error which is found in the csv file
if temp[4] == UTCDateTime('1975-09-15 12:53:36.849000') and temp[0] == '<3':
temp[0] = 83
else:
raise
buf.append((temp[1], temp[2], temp[4],
int(temp[0]), int(temp[3]), int(temp[5]), int(temp[6]),
int(temp[7])))
elif len(header) == 6:
channels = ['SPZ']
raw_channels = ['_SZ']
for line in fh:
# check the manual list of points which have been removed
if line in remove_manually:
continue
temp = line.split(',')
# the original order:
# frame_count, ap_station, ground_station, nc, time, spz
# make a tuple (in a new order so that it can be sorted):
# ap_station, ground_station, time, frame_count, nc, spz
buf.append((temp[1], temp[2],
UTCDateTime(temp[4]),
int(temp[0]), int(temp[3]), int(temp[5])))
# sort by ap_station, ground_station and time (and also everything else,
# but that won't matter)
buf.sort()
stream = Stream()
data_x = []
data_y = []
data_z = []
data_sz = []
abs_times = []
frame_count_ncs = []
corr_frame_count_ncs = []
stats = Stats()
stats.delta = DELTA
network = 'XA'
last_id = None
for data in buf:
# read in the data from the buffer
station = data[0].rjust(3,'S')
ground_station = data[1].rjust(2,'0')
time = data[2]
frame_count = data[3]
nc = data[4]
# create a combination of frame count and nc - from 0.0 to 89.75
frame_count_nc = float(frame_count) + (float(nc) - 1.) * 0.25
id = "{0:s}.{1:s}.{2:s}.{3:s}".format(
network, station, ground_station, channels[0])
# check whether we are adding to an existing one, or creating a new one
if (last_id is None or last_id != id):
# before creating the new one, add previous trace(s) to the stream
if len(abs_times) > 0:
_make_traces(stream=stream, stats=stats, header=header,channels=raw_channels, data_x=data_x,data_y=data_y, data_z=data_z, data_sz=data_sz,abs_times=abs_times,frame_count_ncs=frame_count_ncs)
data_x = []
data_y = []
data_z = []
data_sz = []
abs_times = []
frame_count_ncs = []
stats = Stats()
stats.delta = DELTA
stats.starttime = time
stats.network = network
stats.station = station
stats.location = ground_station
# add the data) from any line
if len(header) == 8:
data_x.append(data[5])
data_y.append(data[6])
data_z.append(data[7])
else:
data_sz.append(data[5])
abs_times.append(time.timestamp)
frame_count_ncs.append(frame_count_nc)
last_id = id
# add the last one
if len(abs_times) > 0:
_make_traces(stream=stream, stats=stats, header=header,channels=raw_channels, data_x=data_x,data_y=data_y, data_z=data_z, data_sz=data_sz,abs_times=abs_times,frame_count_ncs=frame_count_ncs)
return stream
def _make_traces(stream=None, stats=None, header=None, channels=None,
data_x=None, data_y=None, data_z=None, data_sz=None,
abs_times=None,frame_count_ncs=None):
'''
Make the traces from the lists imported from the csv file.
'''
if stream is None:
stream = Stream()
if len(abs_times) > 1:
if len(header) == 8:
stats.channel = channels[0]
_append_stream(stream, data_x, stats)
stats_y = stats.copy()
stats_y.channel = channels[1]
_append_stream(stream, data_y, stats_y)
stats_z = stats.copy()
stats_z.channel = channels[2]
_append_stream(stream, data_z, stats_z)
else:
stats.channel = channels[0]
_append_stream(stream, data_sz, stats)
stats_times = stats.copy()
stats_times.channel = '_TT'
_append_stream(stream, abs_times, stats_times)
stats_frames= stats.copy()
stats_frames.channel = '_FR'
_append_stream(stream, frame_count_ncs, stats_frames)
def _append_stream(stream, data_list, stats):
'''
Add the trace to the stream (whilst checking the data type)
'''
if type(data_list[0]) == float:
parsed_data = np.array(data_list,'float64')
else:
parsed_data = np.array(data_list,'int32')
if INVALID in parsed_data:
parsed_data = ma.masked_equal(parsed_data, INVALID)
stats.npts = len(parsed_data)
# append the trace
stream.append(Trace(data=parsed_data, header=stats))
return stream