def run_timeseries2time_func(inps): # basic info atr = readfile.read_attribute(inps.timeseries_file) length, width = int(atr['LENGTH']), int(atr['WIDTH']) num_date = inps.numDate dates = np.array(inps.dateList) seconds = atr.get('CENTER_LINE_UTC', 0) # use the 1st date as reference if not found, e.g. timeseriesResidual.h5 file if "REF_DATE" not in atr.keys() and not inps.ref_date: inps.ref_date = inps.dateList[0] print( 'WARNING: No REF_DATE found in time-series file or input in command line.' ) print(' Set "--ref-date {}" and continue.'.format(inps.dateList[0])) # get deformation model from parsers model, num_param = read_inps2model(inps) ## output preparation # time_func_param: attributes atrV = dict(atr) atrV['FILE_TYPE'] = 'velocity' atrV['UNIT'] = 'm/year' atrV['START_DATE'] = inps.dateList[0] atrV['END_DATE'] = inps.dateList[-1] atrV['DATE12'] = '{}_{}'.format(inps.dateList[0], inps.dateList[-1]) if inps.ref_yx: atrV['REF_Y'] = inps.ref_yx[0] atrV['REF_X'] = inps.ref_yx[1] if inps.ref_date: atrV['REF_DATE'] = inps.ref_date # time_func_param: config parameter print('add/update the following configuration metadata:\n{}'.format( configKeys)) for key in configKeys: atrV[key_prefix + key] = str(vars(inps)[key]) # time_func_param: instantiate output file ds_name_dict, ds_unit_dict = model2hdf5_dataset(model, ds_shape=(length, width))[1:] writefile.layout_hdf5(inps.outfile, metadata=atrV, ds_name_dict=ds_name_dict, ds_unit_dict=ds_unit_dict) # timeseries_res: attributes + instantiate output file if inps.save_res: atrR = dict(atr) for key in ['REF_DATE']: if key in atrR.keys(): atrR.pop(key) writefile.layout_hdf5(inps.res_file, metadata=atrR, ref_file=inps.timeseries_file) ## estimation # calc number of box based on memory limit memoryAll = (num_date + num_param * 2 + 2) * length * width * 4 if inps.bootstrap: memoryAll += inps.bootstrapCount * num_param * length * width * 4 num_box = int(np.ceil(memoryAll * 3 / (inps.maxMemory * 1024**3))) box_list = cluster.split_box2sub_boxes(box=(0, 0, width, length), num_split=num_box, dimension='y', print_msg=True) # loop for block-by-block IO for i, box in enumerate(box_list): box_wid = box[2] - box[0] box_len = box[3] - box[1] num_pixel = box_len * box_wid if num_box > 1: print('\n------- processing patch {} out of {} --------------'. format(i + 1, num_box)) print('box width: {}'.format(box_wid)) print('box length: {}'.format(box_len)) # initiate output m = np.zeros((num_param, num_pixel), dtype=dataType) m_std = np.zeros((num_param, num_pixel), dtype=dataType) # read input print('reading data from file {} ...'.format(inps.timeseries_file)) ts_data = readfile.read(inps.timeseries_file, box=box)[0] # referencing in time and space # for file w/o reference info. e.g. ERA5.h5 if inps.ref_date: print('referecing to date: {}'.format(inps.ref_date)) ref_ind = inps.dateList.index(inps.ref_date) ts_data -= np.tile(ts_data[ref_ind, :, :], (ts_data.shape[0], 1, 1)) if inps.ref_yx: print('referencing to point (y, x): ({}, {})'.format( inps.ref_yx[0], inps.ref_yx[1])) ref_box = (inps.ref_yx[1], inps.ref_yx[0], inps.ref_yx[1] + 1, inps.ref_yx[0] + 1) ref_val = readfile.read(inps.timeseries_file, box=ref_box)[0] ts_data -= np.tile(ref_val.reshape(ts_data.shape[0], 1, 1), (1, ts_data.shape[1], ts_data.shape[2])) ts_data = ts_data[inps.dropDate, :, :].reshape(inps.numDate, -1) if atrV['UNIT'] == 'mm': ts_data *= 1. / 1000. ts_std = None if inps.ts_std_file: ts_std = readfile.read(inps.ts_std_file, box=box)[0] ts_std = ts_std[inps.dropDate, :, :].reshape(inps.numDate, -1) # set zero value to a fixed small value to avoid divide by zero epsilon = 1e-5 ts_std[ts_std < epsilon] = epsilon # mask invalid pixels print('skip pixels with zero/nan value in all acquisitions') ts_stack = np.nanmean(ts_data, axis=0) mask = np.multiply(~np.isnan(ts_stack), ts_stack != 0.) del ts_stack if ts_std is not None: print('skip pxiels with nan STD value in any acquisition') num_std_nan = np.sum(np.isnan(ts_std), axis=0) mask *= num_std_nan == 0 del num_std_nan ts_data = ts_data[:, mask] num_pixel2inv = int(np.sum(mask)) idx_pixel2inv = np.where(mask)[0] print('number of pixels to invert: {} out of {} ({:.1f}%)'.format( num_pixel2inv, num_pixel, num_pixel2inv / num_pixel * 100)) # go to next if no valid pixel found if num_pixel2inv == 0: continue ### estimation / solve Gm = d print('estimating time functions via linalg.lstsq ...') if inps.bootstrap: ## option 1 - least squares with bootstrapping # Bootstrapping is a resampling method which can be used to estimate properties # of an estimator. The method relies on independently sampling the data set with # replacement. print( 'estimating time function STD with bootstrap resampling ({} times) ...' .format(inps.bootstrapCount)) # calc model of all bootstrap sampling rng = np.random.default_rng() m_boot = np.zeros((inps.bootstrapCount, num_param, num_pixel2inv), dtype=dataType) prog_bar = ptime.progressBar(maxValue=inps.bootstrapCount) for i in range(inps.bootstrapCount): # bootstrap resampling boot_ind = rng.choice(inps.numDate, size=inps.numDate, replace=True) boot_ind.sort() # estimation m_boot[i] = time_func.estimate_time_func( model=model, date_list=dates[boot_ind].tolist(), dis_ts=ts_data[boot_ind], seconds=seconds)[1] prog_bar.update(i + 1, suffix='iteration {} / {}'.format( i + 1, inps.bootstrapCount)) prog_bar.close() #del ts_data # get mean/std among all bootstrap sampling m[:, mask] = m_boot.mean(axis=0).reshape(num_param, -1) m_std[:, mask] = m_boot.std(axis=0).reshape(num_param, -1) del m_boot else: ## option 2 - least squares with uncertainty propagation G, m[:, mask], e2 = time_func.estimate_time_func( model=model, date_list=inps.dateList, dis_ts=ts_data, seconds=seconds) #del ts_data ## Compute the covariance matrix for model parameters: Gm = d # C_m_hat = (G.T * C_d^-1, * G)^-1 # linear propagation from the TS covariance matrix. (option 2.1) # = sigma^2 * (G.T * G)^-1 # assuming obs errors are normally dist. in time. (option 2.2a) # Based on the law of integrated expectation, we estimate the obs sigma^2 using # the OLS estimation residual e_hat_i = d_i - d_hat_i # sigma^2 = sigma_hat^2 * N / (N - P) (option 2.2b) # = (e_hat.T * e_hat) / (N - P) # sigma_hat^2 = (e_hat.T * e_hat) / N if ts_std is not None: # option 2.1 - linear propagation from time-series covariance matrix print( 'estimating time function STD from time-series STD pixel-by-pixel ...' ) prog_bar = ptime.progressBar(maxValue=num_pixel2inv) for i in range(num_pixel2inv): idx = idx_pixel2inv[i] try: C_ts_inv = np.diag(1. / np.square(ts_std[:, idx].flatten())) m_var = np.diag(linalg.inv( G.T.dot(C_ts_inv).dot(G))).astype(np.float32) m_std[:, idx] = np.sqrt(m_var) except linalg.LinAlgError: m_std[:, idx] = np.nan prog_bar.update(i + 1, every=200, suffix='{}/{} pixels'.format( i + 1, num_pixel2inv)) prog_bar.close() else: # option 2.2a - assume obs errors following normal dist. in time print( 'estimating time function STD from time-series fitting residual ...' ) G_inv = linalg.inv(np.dot(G.T, G)) m_var = e2.reshape(1, -1) / (num_date - num_param) m_std[:, mask] = np.sqrt( np.dot(np.diag(G_inv).reshape(-1, 1), m_var)) # option 2.2b - simplified form for linear velocity (without matrix linear algebra) # The STD can also be calculated using Eq. (10) from Fattahi and Amelung (2015, JGR) # ts_diff = ts_data - np.dot(G, m) # t_diff = G[:, 1] - np.mean(G[:, 1]) # vel_std = np.sqrt(np.sum(ts_diff ** 2, axis=0) / np.sum(t_diff ** 2) / (num_date - 2)) # write - time func params block = [box[1], box[3], box[0], box[2]] ds_dict = model2hdf5_dataset(model, m, m_std, mask=mask)[0] for ds_name, data in ds_dict.items(): writefile.write_hdf5_block(inps.outfile, data=data.reshape(box_len, box_wid), datasetName=ds_name, block=block) # write - residual file if inps.save_res: block = [0, num_date, box[1], box[3], box[0], box[2]] ts_res = np.ones( (num_date, box_len * box_wid), dtype=np.float32) * np.nan ts_res[:, mask] = ts_data - np.dot(G, m)[:, mask] writefile.write_hdf5_block(inps.res_file, data=ts_res.reshape( num_date, box_len, box_wid), datasetName='timeseries', block=block) return inps.outfile
def prepare_geometry_definition_radar(self): """Get src_def and dest_def for lookup table in radar-coord (from ISCE, DORIS)""" def mark_lat_lon_anomoly(lat, lon): """mask pixels with abnormal values (0, etc.) This is found on sentinelStack multiple swath lookup table file. """ # ignore pixels with zero value zero_mask = np.multiply(lat != 0., lon != 0.) # ignore anomaly non-zero values # by get the most common data range (d_min, d_max) based on histogram mask = np.array(zero_mask, np.bool_) for data in [lat, lon]: bin_value, bin_edge = np.histogram(data[mask], bins=10) # if there is anomaly, histogram won't be evenly distributed while np.max(bin_value) > np.sum(zero_mask) * 0.3: # find the continous bins where the largest bin is --> normal data range bin_value_thres = ut.median_abs_deviation_threshold( bin_value, cutoff=3) bin_label = ndimage.label(bin_value > bin_value_thres)[0] idx = np.where( bin_label == bin_label[np.argmax(bin_value)])[0] # convert to min/max data value bin_step = bin_edge[1] - bin_edge[0] d_min = bin_edge[idx[0]] - bin_step / 2. d_max = bin_edge[idx[-1] + 1] + bin_step / 2. mask *= np.multiply(data >= d_min, data <= d_max) bin_value, bin_edge = np.histogram(data[mask], bins=10) # set invalid pixels to fixed values lat[mask == 0] = 90. lon[mask == 0] = 0. return lat, lon, mask # read lookup table: lat/lon at pixel center # src for radar2geo # dest for geo2radar print('read latitude / longitude from lookup table file: {}'.format( self.lut_file)) lat_file = self.lat_file if self.lat_file else self.lut_file lon_file = self.lon_file if self.lon_file else self.lut_file lut_lat = readfile.read(lat_file, datasetName='latitude')[0].astype(np.float32) lut_lon = readfile.read(lon_file, datasetName='longitude')[0].astype(np.float32) lut_lat, lut_lon, mask = mark_lat_lon_anomoly(lut_lat, lut_lon) # radar2geo (with block-by-block support) if 'Y_FIRST' not in self.src_meta.keys(): # src_lat/lon0/1 src_lat0 = np.nanmax(lut_lat[mask]) src_lat1 = np.nanmin(lut_lat[mask]) src_lon0 = np.nanmin(lut_lon[mask]) src_lon1 = np.nanmax(lut_lon[mask]) # parameter 1 - lalo_step (output grid) if self.lalo_step is None: try: # ensure the same pixel area before / after geocoding merged_meta = {**self.lut_meta, **self.src_meta} lat_c = (src_lat0 + src_lat1) / 2. lat_step, lon_step = ut.auto_lat_lon_step_size( merged_meta, lat_c) except KeyError: # ensure the same matrix shape before / after geocoding # if not enough metadata found for the above lat_step = (src_lat1 - src_lat0) / (lut_lat.shape[0] - 1) lon_step = (src_lon1 - src_lon0) / (lut_lat.shape[1] - 1) self.lalo_step = (abs(lat_step) * -1., abs(lon_step)) else: # ensure lat/lon step sign self.lalo_step = (abs(self.lalo_step[0]) * -1., abs(self.lalo_step[1]) * 1.) print('output pixel size in (lat, lon) in degree: {}'.format( self.lalo_step)) # parameter 2 / 3 - SNWE (at pixel outer boundary; output grid) / length & width if self.SNWE is None: self.SNWE = (src_lat1 + self.lalo_step[0] / 2.0, src_lat0 - self.lalo_step[0] / 2.0, src_lon0 - self.lalo_step[1] / 2.0, src_lon1 + self.lalo_step[1] / 2.0) self.length = int( np.rint((self.SNWE[0] - self.SNWE[1]) / self.lalo_step[0])) self.width = int( np.rint((self.SNWE[3] - self.SNWE[2]) / self.lalo_step[1])) # adjust SNWE ending coordinate (S, E) for precise alignment self.SNWE = (self.SNWE[1] + self.lalo_step[0] * self.length, self.SNWE[1], self.SNWE[2], self.SNWE[2] + self.lalo_step[1] * self.width) print('output area extent in (S, N, W, E) in degree: {}'.format( self.SNWE)) print('output file row / column number: ({}, {})'.format( self.length, self.width)) # parameter 4 - list of boxes & geometry definitions self.src_box_list = [] self.src_def_list = [] self.dest_box_list = [] self.dest_def_list = [] # split dest_box (in grid) self.dest_box_list = split_box2sub_boxes(box=(0, 0, self.width, self.length), num_split=self.num_box, dimension='y', print_msg=True) # dest_box --> src_box / src_def / dest_def for i, dest_box in enumerate(self.dest_box_list): if self.num_box > 1: print('preparing geometry for dest_box {}/{}: {}'.format( i + 1, self.num_box, dest_box)) # dest_lat/lon at pixel center lat_num = dest_box[3] - dest_box[1] lon_num = dest_box[2] - dest_box[0] lat0 = self.SNWE[1] + self.lalo_step[0] * (dest_box[1] + 0.5) lat1 = self.SNWE[1] + self.lalo_step[0] * (dest_box[3] - 0.5) lon0 = self.SNWE[2] + self.lalo_step[1] * (dest_box[0] + 0.5) lon1 = self.SNWE[2] + self.lalo_step[1] * (dest_box[2] - 0.5) dest_lat, dest_lon = np.mgrid[lat0:lat1:lat_num * 1j, lon0:lon1:lon_num * 1j] # src_box src_area = (src_lat1 - src_lat0) * (src_lon1 - src_lon0) dest_area = (lat1 - lat0) * (lon1 - lon0) if dest_area < src_area * 0.5: # reduction of swath data # https://pyresample.readthedocs.io/en/latest/data_reduce.html # get src_box (in swath) from lat/lon (from dest_box in grid) print('searching relevant box covering the current SNWE') flag = pr.data_reduce.get_valid_index_from_lonlat_grid( dest_lon, dest_lat, lut_lon, lut_lat, radius_of_influence=3000) idx_row, idx_col = np.where(flag) src_box = (np.min(idx_col), np.min(idx_row), np.max(idx_col), np.max(idx_row)) else: src_box = (0, 0, lut_lat.shape[1], lut_lat.shape[0]) # geometry definition src_def = pr.geometry.SwathDefinition( lons=lut_lon[src_box[1]:src_box[3], src_box[0]:src_box[2]], lats=lut_lat[src_box[1]:src_box[3], src_box[0]:src_box[2]]) dest_def = pr.geometry.GridDefinition(lons=dest_lon, lats=dest_lat) self.src_box_list.append(src_box) self.src_def_list.append(src_def) self.dest_def_list.append(dest_def) # geo2radar (WITHOUT block-by-block support) else: # parameter 1 - lalo_step (input grid) self.lalo_step = [ float(self.src_meta['Y_STEP']), float(self.src_meta['X_STEP']) ] print('input pixel size in (lat, lon) in degree: {}'.format( self.lalo_step)) # parameter 2 - SNWE (input grid) lat0 = float(self.src_meta['Y_FIRST']) lon0 = float(self.src_meta['X_FIRST']) if not self.SNWE: # default SNWE --> src_box src_box = (0, 0, int(self.src_meta['WIDTH']), int(self.src_meta['LENGTH'])) else: # custom input SNWE --> src_box # to align SNWE to precisely to source file in geo-coord src_box = ( int(np.rint( (self.SNWE[2] - lon0) / self.lalo_step[1])), # x0 - W int(np.rint( (self.SNWE[1] - lat0) / self.lalo_step[0])), # y0 - N int(np.rint( (self.SNWE[3] - lon0) / self.lalo_step[1])), # x1 - E int(np.rint( (self.SNWE[0] - lat0) / self.lalo_step[0]))) # y1 - S # src_box --> SNWE self.SNWE = ( lat0 + self.lalo_step[0] * src_box[3], # S - y1 lat0 + self.lalo_step[0] * src_box[1], # N - y0 lon0 + self.lalo_step[1] * src_box[0], # W - x0 lon0 + self.lalo_step[1] * src_box[2]) # E - x1 print('input area extent in (S, N, W, E) in degree: {}'.format( self.SNWE)) # parameter 3 - length / width (output grid) self.length, self.width = lut_lat.shape # src_lat/lon (at pixel center) src_len = src_box[3] - src_box[1] src_wid = src_box[2] - src_box[0] src_lat0 = self.SNWE[1] + self.lalo_step[0] * (src_box[1] + 0.5) src_lat1 = self.SNWE[1] + self.lalo_step[0] * (src_box[3] - 0.5) src_lon0 = self.SNWE[2] + self.lalo_step[1] * (src_box[0] + 0.5) src_lon1 = self.SNWE[2] + self.lalo_step[1] * (src_box[2] - 0.5) src_lat, src_lon = np.mgrid[src_lat0:src_lat1:src_len * 1j, src_lon0:src_lon1:src_wid * 1j] # parameter 4 - list of boxes & geometry definitions self.src_box_list = [src_box] self.src_def_list = [ pr.geometry.GridDefinition(lons=src_lon, lats=src_lat) ] self.dest_box_list = [(0, 0, self.width, self.length)] self.dest_def_list = [ pr.geometry.SwathDefinition(lons=lut_lon, lats=lut_lat) ] self.num_box = 1 return
def run_timeseries2time_func(inps): # basic info atr = readfile.read_attribute(inps.timeseries_file) length, width = int(atr['LENGTH']), int(atr['WIDTH']) num_date = inps.numDate dates = np.array(inps.dateList) # get deformation model from parsers model, num_param = read_inps2model(inps) ## output preparation # attributes atr['FILE_TYPE'] = 'velocity' atr['UNIT'] = 'm/year' atr['START_DATE'] = inps.dateList[0] atr['END_DATE'] = inps.dateList[-1] atr['DATE12'] = '{}_{}'.format(inps.dateList[0], inps.dateList[-1]) if inps.ref_yx: atr['REF_Y'] = inps.ref_yx[0] atr['REF_X'] = inps.ref_yx[1] if inps.ref_date: atr['REF_DATE'] = inps.ref_date # config parameter print('add/update the following configuration metadata:\n{}'.format(configKeys)) for key in configKeys: atr[key_prefix+key] = str(vars(inps)[key]) # instantiate output file layout_hdf5(inps.outfile, atr, model) ## estimation # calc number of box based on memory limit memoryAll = (num_date + num_param * 2 + 2) * length * width * 4 if inps.bootstrap: memoryAll += inps.bootstrapCount * num_param * length * width * 4 num_box = int(np.ceil(memoryAll * 3 / (inps.maxMemory * 1024**3))) box_list = cluster.split_box2sub_boxes(box=(0, 0, width, length), num_split=num_box, dimension='y', print_msg=True) # loop for block-by-block IO for i, box in enumerate(box_list): box_width = box[2] - box[0] box_length = box[3] - box[1] num_pixel = box_length * box_width if num_box > 1: print('\n------- processing patch {} out of {} --------------'.format(i+1, num_box)) print('box width: {}'.format(box_width)) print('box length: {}'.format(box_length)) # initiate output m = np.zeros((num_param, num_pixel), dtype=dataType) m_std = np.zeros((num_param, num_pixel), dtype=dataType) # read input print('reading data from file {} ...'.format(inps.timeseries_file)) ts_data = readfile.read(inps.timeseries_file, box=box)[0] # referencing in time and space # for file w/o reference info. e.g. ERA5.h5 if inps.ref_date: print('referecing to date: {}'.format(inps.ref_date)) ref_ind = inps.dateList.index(inps.ref_date) ts_data -= np.tile(ts_data[ref_ind, :, :], (ts_data.shape[0], 1, 1)) if inps.ref_yx: print('referencing to point (y, x): ({}, {})'.format(inps.ref_yx[0], inps.ref_yx[1])) ref_box = (inps.ref_yx[1], inps.ref_yx[0], inps.ref_yx[1]+1, inps.ref_yx[0]+1) ref_val = readfile.read(inps.timeseries_file, box=ref_box)[0] ts_data -= np.tile(ref_val.reshape(ts_data.shape[0], 1, 1), (1, ts_data.shape[1], ts_data.shape[2])) ts_data = ts_data[inps.dropDate, :, :].reshape(inps.numDate, -1) if atr['UNIT'] == 'mm': ts_data *= 1./1000. # mask invalid pixels print('skip pixels with zero/nan value in all acquisitions') ts_stack = np.nanmean(ts_data, axis=0) mask = np.multiply(~np.isnan(ts_stack), ts_stack!=0.) del ts_stack ts_data = ts_data[:, mask] num_pixel2inv = int(np.sum(mask)) print('number of pixels to invert: {} out of {} ({:.1f}%)'.format( num_pixel2inv, num_pixel, num_pixel2inv/num_pixel*100)) # go to next if no valid pixel found if num_pixel2inv == 0: block = [box[1], box[3], box[0], box[2]] write_hdf5_block(inps.outfile, model, m, m_std, mask=mask, block=block) continue ### estimation / solve Gm = d if inps.bootstrap: ## option 1 - least squares with bootstrapping # Bootstrapping is a resampling method which can be used to estimate properties # of an estimator. The method relies on independently sampling the data set with # replacement. try: from sklearn.utils import resample except ImportError: raise ImportError('can not import scikit-learn!') print('using bootstrap resampling {} times ...'.format(inps.bootstrapCount)) # calc model of all bootstrap sampling m_boot = np.zeros((inps.bootstrapCount, num_param, num_pixel2inv), dtype=dataType) prog_bar = ptime.progressBar(maxValue=inps.bootstrapCount) for i in range(inps.bootstrapCount): # bootstrap resampling boot_ind = resample(np.arange(inps.numDate), replace=True, n_samples=inps.numDate) boot_ind.sort() # estimation m_boot[i] = estimate_time_func(dates[boot_ind].tolist(), ts_data[boot_ind], model)[1] prog_bar.update(i+1, suffix='iteration {} / {}'.format(i+1, inps.bootstrapCount)) prog_bar.close() del ts_data # get mean/std among all bootstrap sampling print('calculate mean and standard deviation of bootstrap estimations') m[:, mask] = m_boot.mean(axis=0).reshape(num_param, -1) m_std[:, mask] = m_boot.std(axis=0).reshape(num_param, -1) del m_boot else: ## option 2 - least squares with uncertainty propagation print('estimate time functions via linalg.lstsq ...') G, m[:, mask], e2 = estimate_time_func(inps.dateList, ts_data, model) del ts_data ## Compute the covariance matrix for model parameters: Gm = d # C_m_hat = (G.T * C_d^-1, * G)^-1 # the most generic form # = sigma^2 * (G.T * G)^-1 # assuming the obs error is normally distributed in time. # Based on the law of integrated expectation, we estimate the obs sigma^2 using # the OLS estimation residual e_hat_i = d_i - d_hat_i # sigma^2 = sigma_hat^2 * N / (N - P) # = (e_hat.T * e_hat) / (N - P) # sigma_hat^2 = (e_hat.T * e_hat) / N G_inv = linalg.inv(np.dot(G.T, G)) m_var = e2.reshape(1, -1) / (num_date - num_param) m_std[:, mask] = np.sqrt(np.dot(np.diag(G_inv).reshape(-1, 1), m_var)) ## for linear velocity, the STD can also be calculated # using Eq. (10) from Fattahi and Amelung (2015, JGR) # ts_diff = ts_data - np.dot(G, m) # t_diff = G[:, 1] - np.mean(G[:, 1]) # vel_std = np.sqrt(np.sum(ts_diff ** 2, axis=0) / np.sum(t_diff ** 2) / (num_date - 2)) # write block = [box[1], box[3], box[0], box[2]] write_hdf5_block(inps.outfile, model, m, m_std, mask=mask, block=block) return inps.outfile
def change_timeseries_ref_date(ts_file, ref_date, outfile=None, max_memory=4.0, force=False): """Change input file reference date to a different one. Parameters: ts_file : str, timeseries file to be changed ref_date : str, date in YYYYMMDD format outfile : if str, save to a different file if None, modify the data value in the existing input file """ ts_file = os.path.abspath(ts_file) if not outfile: outfile = ts_file outfile = os.path.abspath(outfile) print('-'*50) print('change reference date for file: {}'.format(ts_file)) atr = readfile.read_attribute(ts_file) dsName = atr['FILE_TYPE'] # if the input reference date is the same as the existing one. if ref_date == atr.get('REF_DATE', None) and not force: print('input refDate is the same as the existing REF_DATE.') if outfile == ts_file: print('Nothing to be done.') return ts_file else: print('Copy {} to {}'.format(ts_file, outfile)) shutil.copy2(ts_file, outfile) return outfile # basic info obj = timeseries(ts_file) obj.open(print_msg=False) num_date = obj.numDate length = obj.length width = obj.width ref_idx = obj.dateList.index(ref_date) # get list of boxes for block-by-block IO num_box = int(np.ceil((num_date * length * width * 4 * 2) / (max_memory * 1024**3))) box_list = split_box2sub_boxes(box=(0, 0, width, length), num_split=num_box, dimension='y', print_msg=True) # updating existing file or write new file if outfile == ts_file: mode = 'r+' else: mode = 'a' # instantiate output file writefile.layout_hdf5(outfile, ref_file=ts_file) # loop for block-by-block IO for i, box in enumerate(box_list): box_width = box[2] - box[0] box_length = box[3] - box[1] if num_box > 1: print('\n------- processing patch {} out of {} --------------'.format(i+1, num_box)) print('box width: {}'.format(box_width)) print('box length: {}'.format(box_length)) # reading print('reading data ...') ts_data = readfile.read(ts_file, box=box)[0] print('referencing in time ...') dshape = ts_data.shape ts_data -= np.tile(ts_data[ref_idx, :, :].reshape(1, dshape[1], dshape[2]), (dshape[0], 1, 1)) # writing block = (0, num_date, box[1], box[3], box[0], box[2]) writefile.write_hdf5_block(outfile, data=ts_data, datasetName=dsName, block=block, mode=mode) # update metadata print('update "REF_DATE" attribute value to {}'.format(ref_date)) with h5py.File(outfile, 'r+') as f: f.attrs['REF_DATE'] = ref_date f.attrs['FILE_PATH'] = outfile return outfile
def correct_dem_error(inps): """Correct DEM error of input timeseries file""" start_time = time.time() # limit the number of threads to 1 # for slight speedup and big CPU usage save num_threads_dict = cluster.set_num_threads("1") ## 1. input info # 1.1 read date info ts_obj = timeseries(inps.timeseries_file) ts_obj.open() num_date = ts_obj.numDate length, width = ts_obj.length, ts_obj.width num_step = len(inps.stepFuncDate) # exclude dates date_flag = read_exclude_date(inps.excludeDate, ts_obj.dateList)[0] if inps.polyOrder > np.sum(date_flag): raise ValueError( "input poly order {} > number of acquisition {}! Reduce it!". format(inps.polyOrder, np.sum(date_flag))) # 1.2 design matrix part 1 - time func for surface deformation G_defo = get_design_matrix4defo(inps) ## 2. prepare output # 2.1 metadata meta = dict(ts_obj.metadata) print( 'add/update the following configuration metadata to file:\n{}'.format( configKeys)) for key in configKeys: meta[key_prefix + key] = str(vars(inps)[key]) # 2.2 instantiate est. DEM error dem_err_file = 'demErr.h5' meta['FILE_TYPE'] = 'dem' meta['UNIT'] = 'm' ds_name_dict = {'dem': [np.float32, (length, width), None]} writefile.layout_hdf5(dem_err_file, ds_name_dict, metadata=meta) # 2.3 instantiate corrected time-series ts_cor_file = inps.outfile meta['FILE_TYPE'] = 'timeseries' writefile.layout_hdf5(ts_cor_file, metadata=meta, ref_file=inps.timeseries_file) # 2.4 instantiate residual phase time-series ts_res_file = os.path.join(os.path.dirname(inps.outfile), 'timeseriesResidual.h5') writefile.layout_hdf5(ts_res_file, metadata=meta, ref_file=inps.timeseries_file) ## 3. run the estimation and write to disk # 3.1 split ts_file into blocks to save memory # 1st dimension size: ts (obs / cor / res / step) + dem_err/inc_angle/rg_dist (+pbase) num_epoch = num_date * 3 + num_step + 3 if inps.geom_file: geom_obj = geometry(inps.geom_file) geom_obj.open(print_msg=False) if 'bperp' in geom_obj.datasetNames: num_epoch += num_date # split in row/line direction based on the input memory limit num_box = int( np.ceil((num_epoch * length * width * 4) * 2.5 / (inps.maxMemory * 1024**3))) box_list = cluster.split_box2sub_boxes(box=(0, 0, width, length), num_split=num_box, dimension='y') # 3.2 prepare the input arguments for *_patch() data_kwargs = { 'G_defo': G_defo, 'ts_file': inps.timeseries_file, 'geom_file': inps.geom_file, 'date_flag': date_flag, 'phase_velocity': inps.phaseVelocity, } # 3.3 invert / write block-by-block for i, box in enumerate(box_list): box_wid = box[2] - box[0] box_len = box[3] - box[1] if num_box > 1: print('\n------- processing patch {} out of {} --------------'. format(i + 1, num_box)) print('box width: {}'.format(box_wid)) print('box length: {}'.format(box_len)) # update box argument in the input data data_kwargs['box'] = box # invert if not inps.cluster: # non-parallel delta_z, ts_cor, ts_res = correct_dem_error_patch( **data_kwargs)[:-1] else: # parallel print('\n\n------- start parallel processing using Dask -------') # initiate the output data delta_z = np.zeros((box_len, box_wid), dtype=np.float32) ts_cor = np.zeros((num_date, box_len, box_wid), dtype=np.float32) ts_res = np.zeros((num_date, box_len, box_wid), dtype=np.float32) # initiate dask cluster and client cluster_obj = cluster.DaskCluster(inps.cluster, inps.numWorker, config_name=inps.config) cluster_obj.open() # run dask delta_z, ts_cor, ts_res = cluster_obj.run( func=correct_dem_error_patch, func_data=data_kwargs, results=[delta_z, ts_cor, ts_res]) # close dask cluster and client cluster_obj.close() print('------- finished parallel processing -------\n\n') # write the block to disk # with 3D block in [z0, z1, y0, y1, x0, x1] # and 2D block in [y0, y1, x0, x1] # DEM error - 2D block = [box[1], box[3], box[0], box[2]] writefile.write_hdf5_block(dem_err_file, data=delta_z, datasetName='dem', block=block) # corrected time-series - 3D block = [0, num_date, box[1], box[3], box[0], box[2]] writefile.write_hdf5_block(ts_cor_file, data=ts_cor, datasetName='timeseries', block=block) # residual time-series - 3D block = [0, num_date, box[1], box[3], box[0], box[2]] writefile.write_hdf5_block(ts_res_file, data=ts_res, datasetName='timeseries', block=block) # roll back to the origial number of threads cluster.roll_back_num_threads(num_threads_dict) # time info m, s = divmod(time.time() - start_time, 60) print('time used: {:02.0f} mins {:02.1f} secs.'.format(m, s)) return dem_err_file, ts_cor_file, ts_res_file
def run_timeseries2time_func(inps): # basic info atr = readfile.read_attribute(inps.timeseries_file) length, width = int(atr['LENGTH']), int(atr['WIDTH']) num_date = inps.numDate dates = np.array(inps.dateList) seconds = atr.get('CENTER_LINE_UTC', 0) # use the 1st date as reference if not found, e.g. timeseriesResidual.h5 file if "REF_DATE" not in atr.keys() and not inps.ref_date: inps.ref_date = inps.dateList[0] print( 'WARNING: No REF_DATE found in time-series file or input in command line.' ) print(' Set "--ref-date {}" and continue.'.format(inps.dateList[0])) # get deformation model from parsers model, num_param = read_inps2model(inps) ## output preparation # time_func_param: attributes atrV = dict(atr) atrV['FILE_TYPE'] = 'velocity' atrV['UNIT'] = 'm/year' atrV['START_DATE'] = inps.dateList[0] atrV['END_DATE'] = inps.dateList[-1] atrV['DATE12'] = '{}_{}'.format(inps.dateList[0], inps.dateList[-1]) if inps.ref_yx: atrV['REF_Y'] = inps.ref_yx[0] atrV['REF_X'] = inps.ref_yx[1] if inps.ref_date: atrV['REF_DATE'] = inps.ref_date # time_func_param: config parameter print('add/update the following configuration metadata:\n{}'.format( configKeys)) for key in configKeys: atrV[key_prefix + key] = str(vars(inps)[key]) # time_func_param: instantiate output file ds_name_dict, ds_unit_dict = model2hdf5_dataset(model, ds_shape=(length, width))[1:] writefile.layout_hdf5(inps.outfile, metadata=atrV, ds_name_dict=ds_name_dict, ds_unit_dict=ds_unit_dict) # timeseries_res: attributes + instantiate output file if inps.save_res: atrR = dict(atr) # remove REF_DATE attribute for key in ['REF_DATE']: if key in atrR.keys(): atrR.pop(key) # prepare ds_name_dict manually, instead of using ref_file, to support --ex option date_len = len(inps.dateList[0]) ds_name_dict = { "date": [ np.dtype(f'S{date_len}'), (num_date, ), np.array(inps.dateList, dtype=np.string_) ], "timeseries": [np.float32, (num_date, length, width), None] } writefile.layout_hdf5(inps.res_file, ds_name_dict=ds_name_dict, metadata=atrR) ## estimation # calc number of box based on memory limit memoryAll = (num_date + num_param * 2 + 2) * length * width * 4 if inps.bootstrap: memoryAll += inps.bootstrapCount * num_param * length * width * 4 num_box = int(np.ceil(memoryAll * 3 / (inps.maxMemory * 1024**3))) box_list = cluster.split_box2sub_boxes(box=(0, 0, width, length), num_split=num_box, dimension='y', print_msg=True) # loop for block-by-block IO for i, box in enumerate(box_list): box_wid = box[2] - box[0] box_len = box[3] - box[1] num_pixel = box_len * box_wid if num_box > 1: print('\n------- processing patch {} out of {} --------------'. format(i + 1, num_box)) print('box width: {}'.format(box_wid)) print('box length: {}'.format(box_len)) # initiate output m = np.zeros((num_param, num_pixel), dtype=dataType) m_std = np.zeros((num_param, num_pixel), dtype=dataType) # read input print('reading data from file {} ...'.format(inps.timeseries_file)) ts_data = readfile.read(inps.timeseries_file, box=box)[0] # referencing in time and space # for file w/o reference info. e.g. ERA5.h5 if inps.ref_date: print('referecing to date: {}'.format(inps.ref_date)) ref_ind = inps.dateList.index(inps.ref_date) ts_data -= np.tile(ts_data[ref_ind, :, :], (ts_data.shape[0], 1, 1)) if inps.ref_yx: print('referencing to point (y, x): ({}, {})'.format( inps.ref_yx[0], inps.ref_yx[1])) ref_box = (inps.ref_yx[1], inps.ref_yx[0], inps.ref_yx[1] + 1, inps.ref_yx[0] + 1) ref_val = readfile.read(inps.timeseries_file, box=ref_box)[0] ts_data -= np.tile(ref_val.reshape(ts_data.shape[0], 1, 1), (1, ts_data.shape[1], ts_data.shape[2])) ts_data = ts_data[inps.dropDate, :, :].reshape(inps.numDate, -1) if atrV['UNIT'] == 'mm': ts_data *= 1. / 1000. ts_cov = None if inps.ts_cov_file: print( f'reading time-series covariance matrix from file {inps.ts_cov_file} ...' ) ts_cov = readfile.read(inps.ts_cov_file, box=box)[0] if len(ts_cov.shape) == 4: # full covariance matrix in 4D --> 3D if inps.numDate < ts_cov.shape[0]: ts_cov = ts_cov[inps.dropDate, :, :, :] ts_cov = ts_cov[:, inps.dropDate, :, :] ts_cov = ts_cov.reshape(inps.numDate, inps.numDate, -1) elif len(ts_cov.shape) == 3: # diaginal variance matrix in 3D --> 2D if inps.numDate < ts_cov.shape[0]: ts_cov = ts_cov[inps.dropDate, :, :] ts_cov = ts_cov.reshape(inps.numDate, -1) ## set zero value to a fixed small value to avoid divide by zero #epsilon = 1e-5 #ts_cov[ts_cov<epsilon] = epsilon # mask invalid pixels print('skip pixels with zero/nan value in all acquisitions') ts_stack = np.nanmean(ts_data, axis=0) mask = np.multiply(~np.isnan(ts_stack), ts_stack != 0.) del ts_stack #if ts_cov is not None: # print('skip pxiels with nan STD value in any acquisition') # num_std_nan = np.sum(np.isnan(ts_cov), axis=0) # mask *= num_std_nan == 0 # del num_std_nan ts_data = ts_data[:, mask] num_pixel2inv = int(np.sum(mask)) idx_pixel2inv = np.where(mask)[0] print('number of pixels to invert: {} out of {} ({:.1f}%)'.format( num_pixel2inv, num_pixel, num_pixel2inv / num_pixel * 100)) # go to next if no valid pixel found if num_pixel2inv == 0: continue ### estimation / solve Gm = d print('estimating time functions via linalg.lstsq ...') if inps.bootstrap: ## option 1 - least squares with bootstrapping # Bootstrapping is a resampling method which can be used to estimate properties # of an estimator. The method relies on independently sampling the data set with # replacement. print( 'estimating time function STD with bootstrap resampling ({} times) ...' .format(inps.bootstrapCount)) # calc model of all bootstrap sampling rng = np.random.default_rng() m_boot = np.zeros((inps.bootstrapCount, num_param, num_pixel2inv), dtype=dataType) prog_bar = ptime.progressBar(maxValue=inps.bootstrapCount) for i in range(inps.bootstrapCount): # bootstrap resampling boot_ind = rng.choice(inps.numDate, size=inps.numDate, replace=True) boot_ind.sort() # estimation m_boot[i] = time_func.estimate_time_func( model=model, date_list=dates[boot_ind].tolist(), dis_ts=ts_data[boot_ind], seconds=seconds)[1] prog_bar.update(i + 1, suffix='iteration {} / {}'.format( i + 1, inps.bootstrapCount)) prog_bar.close() #del ts_data # get mean/std among all bootstrap sampling m[:, mask] = m_boot.mean(axis=0).reshape(num_param, -1) m_std[:, mask] = m_boot.std(axis=0).reshape(num_param, -1) del m_boot # get design matrix to calculate the residual time series G = time_func.get_design_matrix4time_func(inps.dateList, model=model, ref_date=inps.ref_date, seconds=seconds) else: ## option 2 - least squares with uncertainty propagation G, m[:, mask], e2 = time_func.estimate_time_func( model=model, date_list=inps.dateList, dis_ts=ts_data, seconds=seconds) #del ts_data ## Compute the covariance matrix for model parameters: # G * m = d # C_m_hat = G+ * C_d * G+.T # # For ordinary least squares estimation: # G+ = (G.T * G)^-1 * G.T (option 2.1) # # For weighted least squares estimation: # G+ = (G.T * C_d^-1 * G)^-1 * G.T * C_d^-1 # => C_m_hat = (G.T * C_d^-1 * G)^-1 (option 2.2) # # Assuming normality of the observation errors (in the time domain) with a variance of sigma^2 # we have C_d = sigma^2 * I, then the above equation is simplfied into: # C_m_hat = sigma^2 * (G.T * G)^-1 (option 2.3) # # Based on the law of integrated expectation, we estimate the obs sigma^2 using # the OLS estimation residual as: # e_hat = d - d_hat # => sigma_hat^2 = (e_hat.T * e_hat) / N # => sigma^2 = sigma_hat^2 * N / (N - P) (option 2.4) # = (e_hat.T * e_hat) / (N - P) # which is the equation (10) from Fattahi and Amelung (2015, JGR) if ts_cov is not None: # option 2.1 - linear propagation from time-series (co)variance matrix # TO DO: save the full covariance matrix of the time function parameters # only the STD is saved right now covar_flag = True if len(ts_cov.shape) == 3 else False msg = 'estimating time function STD from time-serries ' msg += 'covariance pixel-by-pixel ...' if covar_flag else 'variance pixel-by-pixel ...' print(msg) # calc the common pseudo-inverse matrix Gplus = linalg.pinv(G) # loop over each pixel # or use multidimension matrix multiplication # m_cov = Gplus @ ts_cov @ Gplus.T prog_bar = ptime.progressBar(maxValue=num_pixel2inv) for i in range(num_pixel2inv): idx = idx_pixel2inv[i] # cov: time-series -> time func ts_covi = ts_cov[:, :, idx] if covar_flag else np.diag( ts_cov[:, idx]) m_cov = np.linalg.multi_dot([Gplus, ts_covi, Gplus.T]) m_std[:, idx] = np.sqrt(np.diag(m_cov)) prog_bar.update(i + 1, every=200, suffix='{}/{} pixels'.format( i + 1, num_pixel2inv)) prog_bar.close() else: # option 2.3 - assume obs errors following normal dist. in time print( 'estimating time function STD from time-series fitting residual ...' ) G_inv = linalg.inv(np.dot(G.T, G)) m_var = e2.reshape(1, -1) / (num_date - num_param) m_std[:, mask] = np.sqrt( np.dot(np.diag(G_inv).reshape(-1, 1), m_var)) # option 2.4 - simplified form for linear velocity (without matrix linear algebra) # The STD can also be calculated using Eq. (10) from Fattahi and Amelung (2015, JGR) # ts_diff = ts_data - np.dot(G, m) # t_diff = G[:, 1] - np.mean(G[:, 1]) # vel_std = np.sqrt(np.sum(ts_diff ** 2, axis=0) / np.sum(t_diff ** 2) / (num_date - 2)) # write - time func params block = [box[1], box[3], box[0], box[2]] ds_dict = model2hdf5_dataset(model, m, m_std, mask=mask)[0] for ds_name, data in ds_dict.items(): writefile.write_hdf5_block(inps.outfile, data=data.reshape(box_len, box_wid), datasetName=ds_name, block=block) # write - residual file if inps.save_res: block = [0, num_date, box[1], box[3], box[0], box[2]] ts_res = np.ones( (num_date, box_len * box_wid), dtype=np.float32) * np.nan ts_res[:, mask] = ts_data - np.dot(G, m)[:, mask] writefile.write_hdf5_block(inps.res_file, data=ts_res.reshape( num_date, box_len, box_wid), datasetName='timeseries', block=block) return inps.outfile
def diff_file(file1, file2, out_file=None, force=False, max_num_pixel=2e8): """calculate/write file1 - file2 Parameters: file1 - str, path of file1 file2 - list of str, path of file2(s) out_file - str, path of output file force - bool, overwrite existing output file max_num_pixel - float, maximum number of pixels for each block """ start_time = time.time() if not out_file: fbase, fext = os.path.splitext(file1) if len(file2) > 1: raise ValueError( 'Output file name is needed for more than 2 files input.') out_file = '{}_diff_{}{}'.format( fbase, os.path.splitext(os.path.basename(file2[0]))[0], fext) print('{} - {} --> {}'.format(file1, file2, out_file)) # Read basic info atr1 = readfile.read_attribute(file1) k1 = atr1['FILE_TYPE'] atr2 = readfile.read_attribute(file2[0]) k2 = atr2['FILE_TYPE'] print('input files are: {} and {}'.format(k1, k2)) if k1 == 'timeseries': if k2 not in ['timeseries', 'giantTimeseries']: raise Exception( 'Input multiple dataset files are not the same file type!') if len(file2) > 1: raise Exception( ('Only 2 files substraction is supported for time-series file,' ' {} input.'.format(len(file2) + 1))) atr1 = readfile.read_attribute(file1) atr2 = readfile.read_attribute(file2[0]) dateList1 = timeseries(file1).get_date_list() if k2 == 'timeseries': dateList2 = timeseries(file2[0]).get_date_list() unit_fac = 1. elif k2 == 'giantTimeseries': dateList2 = giantTimeseries(file2[0]).get_date_list() unit_fac = 0.001 # check reference point ref_date, ref_y, ref_x = check_reference(atr1, atr2) # check dates shared by two timeseries files dateListShared = [i for i in dateList1 if i in dateList2] dateShared = np.ones((len(dateList1)), dtype=np.bool_) if dateListShared != dateList1: print('WARNING: {} does not contain all dates in {}'.format( file2, file1)) if force: dateListEx = list(set(dateList1) - set(dateListShared)) print( 'Continue and enforce the differencing for their shared dates only.' ) print( '\twith following dates are ignored for differencing:\n{}'. format(dateListEx)) dateShared[np.array([dateList1.index(i) for i in dateListEx])] = 0 else: raise Exception( 'To enforce the differencing anyway, use --force option.') # instantiate the output file writefile.layout_hdf5(out_file, ref_file=file1) # block-by-block IO length, width = int(atr1['LENGTH']), int(atr1['WIDTH']) num_box = int(np.ceil(len(dateList1) * length * width / max_num_pixel)) box_list = cluster.split_box2sub_boxes(box=(0, 0, width, length), num_split=num_box, dimension='y', print_msg=True) if ref_y and ref_x: ref_box = (ref_x, ref_y, ref_x + 1, ref_y + 1) ref_val = readfile.read(file2[0], datasetName=dateListShared, box=ref_box)[0] * unit_fac for i, box in enumerate(box_list): if num_box > 1: print('\n------- processing patch {} out of {} --------------'. format(i + 1, num_box)) print('box: {}'.format(box)) # read data2 (consider different reference_date/pixel) print('read from file: {}'.format(file2[0])) data2 = readfile.read( file2[0], datasetName=dateListShared, box=box)[0] * unit_fac if ref_y and ref_x: print('* referencing data from {} to y/x: {}/{}'.format( os.path.basename(file2[0]), ref_y, ref_x)) data2 -= np.tile(ref_val.reshape(-1, 1, 1), (1, data2.shape[1], data2.shape[2])) if ref_date: print('* referencing data from {} to date: {}'.format( os.path.basename(file2[0]), ref_date)) ref_ind = dateListShared.index(ref_date) data2 -= np.tile(data2[ref_ind, :, :], (data2.shape[0], 1, 1)) # read data1 print('read from file: {}'.format(file1)) data = readfile.read(file1, box=box)[0] # apply differencing mask = data == 0. data[dateShared] -= data2 data[mask] = 0. # Do not change zero phase value del data2 # write the block block = [0, data.shape[0], box[1], box[3], box[0], box[2]] writefile.write_hdf5_block(out_file, data=data, datasetName=k1, block=block) elif all(i == 'ifgramStack' for i in [k1, k2]): obj1 = ifgramStack(file1) obj1.open() obj2 = ifgramStack(file2[0]) obj2.open() dsNames = list(set(obj1.datasetNames) & set(obj2.datasetNames)) if len(dsNames) == 0: raise ValueError('no common dataset between two files!') dsName = [i for i in ifgramDatasetNames if i in dsNames][0] # read data print('reading {} from file {} ...'.format(dsName, file1)) data1 = readfile.read(file1, datasetName=dsName)[0] print('reading {} from file {} ...'.format(dsName, file2[0])) data2 = readfile.read(file2[0], datasetName=dsName)[0] # consider reference pixel if 'unwrapphase' in dsName.lower(): print('referencing to pixel ({},{}) ...'.format( obj1.refY, obj1.refX)) ref1 = data1[:, obj1.refY, obj1.refX] ref2 = data2[:, obj2.refY, obj2.refX] for i in range(data1.shape[0]): data1[i, :][data1[i, :] != 0.] -= ref1[i] data2[i, :][data2[i, :] != 0.] -= ref2[i] # operation and ignore zero values data1[data1 == 0] = np.nan data2[data2 == 0] = np.nan data = data1 - data2 del data1, data2 data[np.isnan(data)] = 0. # write to file dsDict = {} dsDict[dsName] = data writefile.write(dsDict, out_file=out_file, ref_file=file1) # Sing dataset file else: data1 = readfile.read(file1)[0] data = np.array(data1, data1.dtype) for fname in file2: data2 = readfile.read(fname)[0] data = np.array(data, dtype=np.float32) - np.array( data2, dtype=np.float32) data = np.array(data, data1.dtype) print('writing >>> ' + out_file) writefile.write(data, out_file=out_file, metadata=atr1) m, s = divmod(time.time() - start_time, 60) print('time used: {:02.0f} mins {:02.1f} secs'.format(m, s)) return out_file