def load_ts_obj(self, ts_fn): """ load an MT file """ if isinstance(ts_fn, str): ext = os.path.splitext(ts_fn)[-1][1:].lower() if ext == 'z3d': self.logger.info('Opening Z3D file {0}'.format(ts_fn)) z3d_obj = zen.Zen3D(ts_fn) z3d_obj.read_z3d() ts_obj = z3d_obj.ts_obj elif ext in ['ex', 'ey', 'hx', 'hy', 'hz']: self.logger.info('Opening ascii file {0}'.format(ts_fn)) ts_obj = mtts.MTTS() ts_obj.read_file(ts_fn) elif ext in ['bnn', 'bin']: self.logger.info('Opening NIMS file {0}'.format(ts_fn)) nims_obj = nims.NIMS(ts_fn) ts_obj = [ nims_obj.hx, nims_obj.hy, nims_obj.hz, nims_obj.ex, nims_obj.ey ] elif isinstance(ts_fn, mtts.MTTS): ts_obj = ts_fn self.logger.info('Loading MT object') else: raise mtts.MTTSError("Do not understand {0}".format(type(ts_fn))) return ts_obj
def combine_z3d_files(self, z3d_df, new_sampling_rate=4, t_buffer=3600): """ Combine all z3d files for a given station and given component for processing to get long period estimations. :param str z3d_path: full path to z3d files :param str component: component to combine :param int new_sampling_rate: new sampling rate of the data :param int t_buffer: buffer for the last time series, should be length of longest schedule chunk """ attr_list = [ "station", "channel_number", "component", "coordinate_system", "dipole_length", "azimuth", "units", "lat", "lon", "elev", "datum", "data_logger", "instrument_id", "calibration_fn", "declination", "fn", "conversion", "gain", ] sv_path = Path(z3d_df.fn_ascii[0]).parent combined_entries = [] for comp in ["ex", "ey", "hx", "hy", "hz"]: if len(list(sv_path.glob("*combined_4.{0}".format(comp)))) == 1: comp_fn = list(sv_path.glob("*combined_4.{0}".format(comp)))[0] print("INFO: skipping {0} already exists".format(comp_fn)) continue # sort out files for the given component comp_df = z3d_df[z3d_df.component == comp].copy() if len(comp_df) == 0: print( "Warning: Skipping {0} because no Z3D files found.".format( comp)) continue # sort the data frame by date comp_df = comp_df.sort_values("start") # get start date and end at last start date, get time difference start_dt = comp_df.start.min() end_dt = comp_df.stop.max() t_diff = int((end_dt - start_dt).total_seconds()) # make a new MTTS object that will have a length that is buffered # at the end to make sure there is room for the data, will trimmed new_ts = mtts.MTTS() new_ts.ts = np.zeros(int((t_diff + t_buffer) * new_sampling_rate)) new_ts.sampling_rate = new_sampling_rate new_ts.start_time_utc = start_dt.isoformat() # make an attribute dictionary that can be used to fill in the new # MTTS object attr_dict = dict([(key, []) for key in attr_list]) # loop over each z3d file for the given component for row in comp_df.itertuples(): z_obj = zen.Zen3D(row.fn_z3d) z_obj.read_z3d() t_obj = z_obj.ts_obj if row.component in ["ex", "ey"]: t_obj.ts.data /= row.dipole_length / 1000 t_obj.units = "mV/km" print("Using scales {0} = {1} m".format( row.component, row.dipole_length)) # decimate to the required sampling rate t_obj.decimate(int(z_obj.df / new_sampling_rate)) # fill the new time series with the data at the appropriate times new_ts.ts.data[ (new_ts.ts.index >= t_obj.ts.index[0]) & (new_ts.ts.index <= t_obj.ts.index[-1])] = t_obj.ts.data # get the end date as the last z3d file end_date = z_obj.ts_obj.ts.index[-1] # fill attribute data frame for attr in attr_list: attr_dict[attr].append(getattr(t_obj, attr)) # need to trim the data new_ts.ts = new_ts.ts.data[(new_ts.ts.index >= start_dt) & ( new_ts.ts.index <= end_date)].to_frame() # fill gaps with forwards or backwards values, this seems to work # better than interpolation and is faster than regression. # The gaps should be max 13 seconds if everything went well new_ts.ts.data[new_ts.ts.data == 0] = np.nan new_ts.ts.data.fillna(method="ffill", inplace=True) # fill the new MTTS with the appropriate metadata attr_df = pd.DataFrame(attr_dict) for attr in attr_list: try: attr_series = attr_df[attr][attr_df[attr] != 0] try: setattr(new_ts, attr, attr_series.median()) except TypeError: setattr(new_ts, attr, attr_series.mode()[0]) except ValueError: print("Warning: could not set {0}".format(attr)) ascii_fn = "{0}_combined_{1}.{2}".format(new_ts.station, int(new_ts.sampling_rate), new_ts.component) sv_fn_ascii = sv_path.joinpath(ascii_fn) new_ts.write_ascii_file(sv_fn_ascii.as_posix()) entry = { "station": new_ts.station, "start": new_ts.start_time_utc, "stop": new_ts.stop_time_utc, "sampling_rate": new_ts.sampling_rate, "component": new_ts.component, "fn_z3d": None, "aximuth": new_ts.azimuth, "dipole_length": new_ts.dipole_length, "coil_number": new_ts.instrument_id, "latitude": new_ts.lat, "longitude": new_ts.lon, "elevation": new_ts.elev, "n_samples": new_ts.n_samples, "fn_ascii": sv_fn_ascii, } combined_entries.append(entry) # make data frame of combined information and append to existing # data frame combined_df = pd.DataFrame(combined_entries) full_df = z3d_df.append(combined_df) return full_df
def convert_to_mtts( self, z3d_df, block_dict=None, notch_dict=None, dec=1, overwrite=False, combine=True, combine_sampling_rate=4, ): """ Convert z3d files to MTTS objects and write ascii files if they do not already exist. :param z3d_df: dataframe holding information about z3d files see help for more information on data frame structure :type z3d_df: pandas.DataFrame :param block_dict: dictionary of blocks to use. Has keys of sample rate and values of a list of blocks to use, defaults to None :type block_dict: dictionary, optional :param notch_dict: dictionary of notches to apply for each sample rate keys are sampling rates, values are notch dice defaults to None, if an empy dictionary is used then notches at 60 Hz and harmonics is applied :type notch_dict: dictionary, optional :param dec: decimation frequency, defaults to 1 :type dec: int, optional :return: dataframe filled with timeseries information :rtype: pandas.DataFrame .. todo:: Add examples of notch dict, block dict """ # if the block dictionary is empty make one that covers all files if block_dict is None: block_dict = self._make_block_dict(z3d_df) # loop over each entry in the data frame for entry in z3d_df.itertuples(): # check to see if the file already exists if entry.fn_ascii is not None: # if the file exists and no overwrite get information and skip if Path(entry.fn_ascii).exists() and overwrite is False: print("INFO: Skipping {0}".format(entry.fn_ascii)) ts_obj = mtts.MTTS() ts_obj.read_ascii_header(entry.fn_ascii) z3d_df.at[entry.Index, "stop"] = pd.Timestamp(ts_obj.stop_time_utc) z3d_df.at[entry.Index, "n_samples"] = ts_obj.n_samples z3d_df.at[entry.Index, "start"] = pd.Timestamp(ts_obj.start_time_utc) z3d_df.at[entry.Index, "fn_ascii"] = ts_obj.fn continue # check if the block should be used if entry.block in block_dict[entry.sampling_rate]: z3d_obj = zen.Zen3D(entry.fn_z3d) z3d_obj.read_z3d() ts_obj = z3d_obj.ts_obj # write mtpy mt file z3d_obj.write_ascii_mt_file(notch_dict=notch_dict, dec=dec) # get information from time series and fill data frame z3d_df.at[entry.Index, "stop"] = pd.Timestamp(ts_obj.stop_time_utc) z3d_df.at[entry.Index, "n_samples"] = ts_obj.n_samples z3d_df.at[entry.Index, "start"] = pd.Timestamp(ts_obj.start_time_utc) z3d_df.at[entry.Index, "fn_ascii"] = z3d_obj.fn_mt_ascii if combine: csr = combine_sampling_rate z3d_df = self.combine_z3d_files(z3d_df, new_sampling_rate=csr) return z3d_df
# -*- coding: utf-8 -*- """ Created on Fri Sep 17 15:43:30 2021 @author: jpeacock """ from mtpy.core import ts import numpy as np f = r"d:\2021UTC\n_mag_volts_rs50\0865\20210406.npy" hx = np.fromfile(f)[200:] t = ts.MTTS() t.ts = hx t.sampling_rate = 50 t.start_time_utc = "2020-01-01T00:00:00"
def combine_z3d_files(self, z3d_df, new_sampling_rate=4, t_buffer=3600, remote=False): """ Combine all z3d files for a given station and given component for processing to get long period estimations. :param str z3d_path: full path to z3d files :param str component: component to combine :param int new_sampling_rate: new sampling rate of the data :param int t_buffer: buffer for the last time series, should be length of longest schedule chunk """ attr_list = ['station', 'channel_number', 'component', 'coordinate_system', 'dipole_length', 'azimuth', 'units', 'lat', 'lon', 'elev', 'datum', 'data_logger', 'instrument_id', 'calibration_fn', 'declination', 'fn', 'conversion', 'gain'] # need to look for first none empty try: fn_series = z3d_df.fn_ascii[z3d_df.fn_ascii != 'None'] sv_path = Path(fn_series[fn_series.index[0]]).parent except IndexError: fn_series = z3d_df.fn_z3d[z3d_df.fn_z3d != 'None'] sv_path = Path(fn_series[fn_series.index[0]]).parent sv_path = Path(sv_path).joinpath('TS') combined_entries = [] if remote: comp_list = ['hx', 'hy'] else: comp_list = ['ex', 'ey', 'hx', 'hy', 'hz'] for comp in comp_list: # sometimes there is no HZ and skip if not comp in list(z3d_df.component.unique()): continue cal_fn = z3d_df[z3d_df.component == comp].cal_fn.mode()[0] # check to see if file exists check for upper and lower case suffix_list = ['.{0}'.format(cc) for cc in [comp.lower(), comp.upper()]] cfn_list = [fn_path for fn_path in sv_path.rglob('*_4.*') if fn_path.suffix in suffix_list] if len(cfn_list) == 1: comp_fn = cfn_list[0] if comp_fn.suffix[1:] == comp.lower(): new_name = comp_fn.with_suffix('.{0}'.format(comp.upper())) comp_fn = comp_fn.rename(new_name) print('INFO: skipping {0} already exists'.format(comp_fn)) ts_obj = mtts.MTTS() ts_obj.read_ascii_header(comp_fn) entry = {'station': ts_obj.station, 'start': ts_obj.start_time_utc, 'stop': ts_obj.stop_time_utc, 'sampling_rate': ts_obj.sampling_rate, 'component': ts_obj.component, 'fn_z3d': None, 'azimuth': ts_obj.azimuth, 'dipole_length': ts_obj.dipole_length, 'coil_number': ts_obj.instrument_id, 'latitude': ts_obj.lat, 'longitude': ts_obj.lon, 'elevation': ts_obj.elev, 'n_samples': ts_obj.n_samples, 'fn_ascii': comp_fn, 'remote': remote, 'block': 0, 'zen_num': ts_obj.data_logger, 'cal_fn': cal_fn} combined_entries.append(entry) continue # sort out files for the given component comp_df = z3d_df[z3d_df.component == comp].copy() if len(comp_df) == 0: print('WARNING: Skipping {0} because no Z3D files found.'.format(comp)) continue # sort the data frame by date comp_df = comp_df.sort_values('start') # get start date and end at last start date, get time difference start_dt = comp_df.start.min() try: end_dt = comp_df.stop.max() t_diff = int((end_dt - start_dt).total_seconds()) except ValueError: t_diff = 4 * 3600 * 48 # make a new MTTS object that will have a length that is buffered # at the end to make sure there is room for the data, will trimmed new_ts = mtts.MTTS() new_ts.ts = np.zeros(int((t_diff + t_buffer) * new_sampling_rate)) new_ts.sampling_rate = new_sampling_rate new_ts.start_time_utc = start_dt.isoformat() # make an attribute dictionary that can be used to fill in the new # MTTS object attr_dict = dict([(key, []) for key in attr_list]) # loop over each z3d file for the given component for row in comp_df.itertuples(): z_obj = zen.Zen3D(row.fn_z3d) z_obj.read_z3d() t_obj = z_obj.ts_obj if row.component in ['ex', 'ey']: t_obj.ts.data /= (row.dipole_length/1000) t_obj.units = 'mV/km' print('INFO: Using scales {0} = {1} m'.format(row.component, row.dipole_length)) # decimate to the required sampling rate t_obj.decimate(int(z_obj.df/new_sampling_rate)) # fill the new time series with the data at appropriate times new_ts.ts.data[(new_ts.ts.index >= t_obj.ts.index[0]) & (new_ts.ts.index <= t_obj.ts.index[-1])] = t_obj.ts.data # get the end date as the last z3d file end_date = z_obj.ts_obj.ts.index[-1] # fill attribute data frame for attr in attr_list: attr_dict[attr].append(getattr(t_obj, attr)) # need to trim the data new_ts.ts = new_ts.ts.data[(new_ts.ts.index >= start_dt) & (new_ts.ts.index <= end_date)].to_frame() # fill gaps with forwards or backwards values, this seems to work # better than interpolation and is faster than regression. # The gaps should be max 13 seconds if everything went well new_ts.ts.data[new_ts.ts.data == 0] = np.nan new_ts.ts.data.fillna(method='ffill', inplace=True) # fill the new MTTS with the appropriate metadata attr_df = pd.DataFrame(attr_dict) for attr in attr_list: try: attr_series = attr_df[attr][attr_df[attr] != 0] try: setattr(new_ts, attr, attr_series.median()) except TypeError: setattr(new_ts, attr, attr_series.mode()[0]) except ValueError: print('WARNING: could not set {0}'.format(attr)) ascii_fn = '{0}_combined_{1}.{2}'.format(new_ts.station, int(new_ts.sampling_rate), new_ts.component.upper()) sv_fn_ascii = sv_path.joinpath(ascii_fn) new_ts.write_ascii_file(sv_fn_ascii.as_posix()) entry = {'station': new_ts.station, 'start': new_ts.start_time_utc, 'stop': new_ts.stop_time_utc, 'sampling_rate': new_ts.sampling_rate, 'component': new_ts.component, 'fn_z3d': None, 'azimuth': new_ts.azimuth, 'dipole_length': new_ts.dipole_length, 'coil_number': new_ts.instrument_id, 'latitude': new_ts.lat, 'longitude': new_ts.lon, 'elevation': new_ts.elev, 'n_samples': new_ts.n_samples, 'fn_ascii': sv_fn_ascii, 'remote': remote, 'block': 0, 'zen_num': new_ts.data_logger, 'cal_fn': cal_fn} combined_entries.append(entry) # make data frame of combined information and append to existing # data frame combined_df = pd.DataFrame(combined_entries) full_df = z3d_df.append(combined_df) return full_df
def from_df_to_mtts(self, z3d_df, block_dict=None, notch_dict=None, overwrite=False, combine=True, combine_sampling_rate=4, remote=False): """ Convert z3d files to MTTS objects and write ascii files if they do not already exist. :param z3d_df: dataframe holding information about z3d files see help for more information on data frame structure :type z3d_df: pandas.DataFrame :param block_dict: dictionary of blocks to use. Has keys of sample rate and values of a list of blocks to use, defaults to None :type block_dict: dictionary, optional :param notch_dict: dictionary of notches to apply for each sample rate keys are sampling rates, values are notch dice defaults to None, if an empy dictionary is used then notches at 60 Hz and harmonics is applied :type notch_dict: dictionary, optional :return: dataframe filled with timeseries information :rtype: pandas.DataFrame .. todo:: Add examples of notch dict, block dict """ # if the block dictionary is empty make one that covers all files block_dict = self._validate_block_dict(z3d_df, block_dict) if remote: z3d_df = z3d_df[z3d_df.component.isin(['hx', 'hy'])] # loop over each entry in the data frame for entry in z3d_df.itertuples(): # test for sampling rate in block dictionary try: block_dict[entry.sampling_rate] except KeyError: continue if entry.block in block_dict[entry.sampling_rate]: # check to see if the file already exists # need to skip looking for seconds because of GPS difference fn_ascii = entry.fn_ascii sv_date = entry.start.strftime('%Y%m%d') sv_time = entry.start.strftime('%H%M') station = self.z3d_path.name sv_path = self.z3d_path.joinpath('TS') if fn_ascii == 'None': fn_test = '{0}_{1}_{2}*'.format(station, sv_date, sv_time) sv_ext = '{0}.{1}'.format(int(entry.sampling_rate), entry.component.upper()) try: fn_ascii = [p for p in sv_path.glob(fn_test) if sv_ext in p.name][0] except IndexError: fn_ascii = sv_path.joinpath('{0}_{1}_{2}_{3}.{4}'.format( station, sv_date, sv_time, int(entry.sampling_rate), entry.component.upper())) # if the file exists and no overwrite get information and skip if Path(fn_ascii).exists() and overwrite is False: print('INFO: Skipping {0}'.format(fn_ascii)) ts_obj = mtts.MTTS() ts_obj.read_ascii_header(fn_ascii) z3d_df.at[entry.Index, 'stop'] = pd.Timestamp(ts_obj.stop_time_utc) z3d_df.at[entry.Index, 'n_samples'] = ts_obj.n_samples z3d_df.at[entry.Index, 'start'] = pd.Timestamp(ts_obj.start_time_utc) z3d_df.at[entry.Index, 'fn_ascii'] = ts_obj.fn z3d_df.at[entry.Index, 'remote'] = remote continue # make file if it does not exist else: z3d_obj = zen.Zen3D(entry.fn_z3d) z3d_obj.read_z3d() ts_obj = z3d_obj.ts_obj ts_obj.calibration_fn = entry.cal_fn # write mtpy mt file z3d_obj.write_ascii_mt_file(notch_dict=notch_dict) # get information from time series and fill data frame z3d_df.at[entry.Index, 'stop'] = pd.Timestamp(ts_obj.stop_time_utc) z3d_df.at[entry.Index, 'n_samples'] = ts_obj.n_samples z3d_df.at[entry.Index, 'start'] = pd.Timestamp(ts_obj.start_time_utc) z3d_df.at[entry.Index, 'fn_ascii'] = z3d_obj.fn_mt_ascii z3d_df.at[entry.Index, 'remote'] = remote if combine: csr = combine_sampling_rate z3d_df = self.combine_z3d_files(z3d_df, new_sampling_rate=csr, remote=remote) z3d_df.start = pd.to_datetime(z3d_df.start) z3d_df.stop = pd.to_datetime(z3d_df.stop) return z3d_df
# In[ ]: ### some generic parameters for making time series sampling_rate = 64 n_samples = sampling_rate * 3600 * 4 ### make a time array t = np.arange(n_samples) * 1./sampling_rate # In[ ]: for comp in ['ex', 'ey', 'hx', 'hy', 'hz']: ts_obj = ts.MTTS() if 'x' in comp: ts_obj.azimuth = 0 elif 'y' in comp: ts_obj.azimuth = 90 ts_obj.component = comp ts_obj.coordinate_system = 'geomagnetic' ts_obj.datum = 'WGS84' ts_obj.declination = -11.5 ts_obj.data_logger = 'example' ts_obj.instrument_id = 'test' ts_obj.lat = 40.0 ts_obj.lon = -120.00 ts_obj.sampling_rate = sampling_rate ts_obj.start_time_utc = '2018-01-01T12:00:00.00 UTC' ts_obj.station = 'MT01'
def combine_z3d_files(z3d_path, new_sampling_rate=4, t_buffer=8 * 3600, comp_list=['ex', 'ey', 'hx', 'hy', 'hz']): """ Combine all z3d files for a given station and given component for processing and getting the long period estimations. :param str z3d_path: full path to z3d files :param str component: component to combine :param int new_sampling_rate: new sampling rate of the data :param int t_buffer: buffer for the last time series, should be length of longest schedule chunk """ st = datetime.datetime.now() attr_list = [ "station", "channel_number", "component", "coordinate_system", "dipole_length", "azimuth", "units", "lat", "lon", "elev", "datum", "data_logger", "instrument_id", "calibration_fn", "declination", "fn", "conversion", "gain", ] fn_df = get_z3d_info(z3d_path) return_fn_list = [] for comp in comp_list: if len(fn_df[comp]) == 0: print('Warning: Skipping {0} because no Z3D files found.'.format( comp)) continue comp_df = pd.DataFrame(fn_df[comp]) # sort the data frame by date comp_df = comp_df.sort_values('start') # get start date and end at last start date, get time difference start_dt = datetime.datetime.fromisoformat(comp_df.start.min()) end_dt = datetime.datetime.fromisoformat(comp_df.start.max()) t_diff = (end_dt - start_dt).total_seconds() ### make a new MTTS object that will have a length that is buffered ### at the end to make sure there is room for the data, will trimmed new_ts = ts.MTTS() new_ts.ts = np.zeros(int((t_diff + t_buffer) * sampling_rate)) new_ts.sampling_rate = sampling_rate new_ts.start_time_utc = start_dt # make an attribute dictionary that can be used to fill in the new # MTTS object attr_dict = dict([(key, []) for key in attr_list]) # loop over each z3d file for the given component for row in comp_df.itertuples(): z_obj = zen.Zen3D(row.fn) print(row.fn) z_obj.read_z3d() t_obj = z_obj.ts_obj # decimate to the required sampling rate t_obj.decimate(int(z_obj.df / sampling_rate)) # fill the new time series with the data at the appropriate times print(f"start = {t_obj.ts.index[0]}, end = {t_obj.ts.index[-1]}") new_ts.ts.data[(new_ts.ts.index >= t_obj.ts.index[0]) & ( new_ts.ts.index <= t_obj.ts.index[-1])] = t_obj.ts.data # get the end date as the last z3d file end_date = z_obj.ts_obj.ts.index[-1] # fill attribute data frame for attr in attr_list: attr_dict[attr].append(getattr(t_obj, attr)) # need to trim the data new_ts.ts = new_ts.ts.data[(new_ts.ts.index >= start_dt) & (new_ts.ts.index <= end_date)].to_frame() # fill gaps with forwards or backwards values, this seems to work # better than interpolation and is faster than regression. # The gaps should be max 13 seconds if everything went well new_ts.ts.data[new_ts.ts.data == 0] = np.nan new_ts.ts.data.fillna(method='ffill', inplace=True) # fill the new MTTS with the appropriate metadata attr_df = pd.DataFrame(attr_dict) for attr in attr_list: try: attr_series = attr_df[attr][attr_df[attr] != 0] try: setattr(new_ts, attr, attr_series.median()) except TypeError: setattr(new_ts, attr, attr_series.mode()[0]) except ValueError: print('Warning: could not set {0}'.format(attr)) ascii_fn = '{0}_combined_{1}.{2}'.format(new_ts.station, int(new_ts.sampling_rate), new_ts.component) sv_fn_ascii = z3d_path.joinpath(ascii_fn) new_ts.write_ascii_file(sv_fn_ascii.absolute()) return_fn_list.append(sv_fn_ascii) et = datetime.datetime.now() compute_time = (et - st).total_seconds() print(" Combining took {0:.2f} seconds".format(compute_time)) return return_fn_list
fn_df = get_z3d_info(z3d_path) return_fn_list = [] comp_df = pd.DataFrame(fn_df[comp]) # sort the data frame by date comp_df = comp_df.sort_values('start') # get start date and end at last start date, get time difference start_dt = datetime.datetime.fromisoformat(comp_df.start.min()) end_dt = datetime.datetime.fromisoformat(comp_df.start.max()) t_diff = (end_dt - start_dt).total_seconds() # make a new MTTS object that will have a length that is buffered # at the end to make sure there is room for the data, will trimmed new_ts = ts.MTTS() new_ts.ts = np.zeros(int((t_diff + t_buffer) * sampling_rate)) new_ts.sampling_rate = sampling_rate new_ts.start_time_utc = start_dt # make an attribute dictionary that can be used to fill in the new # MTTS object attr_dict = dict([(key, []) for key in attr_list]) # loop over each z3d file for the given component index = 1 for row in comp_df.itertuples(): z_obj = zen.Zen3D(row.fn) print(row) z_obj.read_z3d() t_obj = z_obj.ts_obj # decimate to the required sampling rate