def _make_processed_dataset(filename, preprocess, root_dir, sub_dir_in_processed, Tx_locations, Rx_locations, nCx, nCy): # for filename in files: pkl_name = os.path.join(root_dir, filename) data = read_pkl(pkl_name) # check if the data is dict and have "resistance" and "resistivity_log10" keys if (not isinstance(data, dict) or data.get('resistance') is None or data.get('resistivity_log10') is None): raise Exception( 'data is not a dict or dict does not contain essential keys') # preprocess for k, v in preprocess.items(): if k == 'add_noise' and v.get('perform'): add_noise(data['resistance'], **v.get('kwargs')) elif k == 'log_transform' and v.get('perform'): log_transform(data['resistance'], **v.get('kwargs')) elif k == 'to_midpoint' and v.get('perform'): data['resistance'] = to_midpoint(data['resistance'], Tx_locations, Rx_locations) elif k == 'to_txrx' and v.get('perform'): data['resistance'] = to_txrx(data['resistance'], Tx_locations, Rx_locations) elif k == 'to_section' and v.get('perform'): data['resistivity_log10'] = to_section(data['resistivity_log10'], nCx, nCy) # save pickle in processed dir new_pkl_name = os.path.join(sub_dir_in_processed, re.sub(r'raw', r'processed', filename)) write_pkl(data, new_pkl_name) return data
def read_dataset(input_file_path, target_file_path, read_dataset_info): """Read dataset from pickle files and preprocess it. Parameters ---------- input_file_path : str, os.PathLike or pathlib.Path The path of pickle file. target_file_path : str, os.PathLike or pathlib.Path The path of pickle file. read_dataset_info : dict Returns ------- resistance : numpy.ndarray The input data of the neural network. resistivity_log10 : numpy.ndarray The target data of the neural network. """ # read data and assign # data = read_pkl(file_path.numpy().decode('utf-8')) # resistance = data['resistance'] # resistivity_log10 = data['resistivity_log10'] data = read_pkl(input_file_path.numpy().decode('utf-8')) resistance = data data = read_pkl(target_file_path.numpy().decode('utf-8')) resistivity_log10 = data # parse read_dataset_info dictionary preprocess = read_dataset_info['preprocess'] Tx_locations = read_dataset_info['Tx_locations'] Rx_locations = read_dataset_info['Rx_locations'] nCx = read_dataset_info['nCx'] nCy = read_dataset_info['nCy'] # preprocess for k, v in preprocess.items(): if k == 'add_noise' and v.get('perform'): add_noise(resistance, **v.get('kwargs')) elif k == 'log_transform' and v.get('perform'): log_transform(resistance, **v.get('kwargs')) elif k == 'to_midpoint' and v.get('perform'): resistance = to_midpoint(resistance, Tx_locations, Rx_locations) elif k == 'to_txrx' and v.get('perform'): resistance = to_txrx(resistance, Tx_locations, Rx_locations) elif k == 'to_section' and v.get('perform'): resistivity_log10 = to_section(resistivity_log10, nCx, nCy) return resistance, resistivity_log10
def _forward_simulation(pkl_name, simulator): data = read_pkl(pkl_name) # shape_V = data['synthetic_resistance'].shape resistivity = np.flipud(np.power( 10, data['predicted_resistivity_log10'])).flatten() # stop printing messages with contextlib.redirect_stdout(None): data['predicted_resistance'] = simulator.make_synthetic_data( resistivity, std=0, force=True) write_pkl(data, pkl_name)
def _process_resistivity(filename, save_resistivity_dir, processes, to_float32, nCx, nCy): raw_resistivity = read_pkl(filename) pkl_name = os.path.basename(filename) save_resistivity_pkl = os.path.join(save_resistivity_dir, pkl_name) for process, kwargs in processes.items(): if process == 'to_section': raw_resistivity = to_section(raw_resistivity, nCx, nCy) if to_float32: raw_resistivity = raw_resistivity.astype('float32') write_pkl(raw_resistivity, save_resistivity_pkl)
def _process_resistance(filename, save_resistance_dir, processes, to_float32, Tx_locations, Rx_locations, nCx, nCy): raw_resistance = read_pkl(filename) pkl_name = os.path.basename(filename) save_resistance_pkl = os.path.join(save_resistance_dir, pkl_name) for process, kwargs in processes.items(): if process == 'add_noise': add_noise(raw_resistance, **kwargs) elif process == 'log_transform': log_transform(raw_resistance, **kwargs) elif process == 'to_midpoint': raw_resistance = to_midpoint(raw_resistance, Tx_locations, Rx_locations) elif process == 'to_txrx': raw_resistance = to_txrx(raw_resistance, Tx_locations, Rx_locations) if to_float32: raw_resistance = raw_resistance.astype('float32') write_pkl(raw_resistance, save_resistance_pkl)
def get_data(self, temp_file_list): resistance = np.empty((len(temp_file_list), *self.input_shape)) for i, file in enumerate(temp_file_list): data = read_pkl(file) if self.preprocess['to_midpoint']['perform']: resistance[i, ] = to_midpoint(data['resistance'], self.Tx_locations, self.Rx_locations) elif self.preprocess['to_txrx']['perform']: resistance[i, ] = to_txrx(data['resistance'], self.Tx_locations, self.Rx_locations) else: resistance[i, ] = data['resistance'].reshape(self.input_shape) for k, v in self.preprocess.items(): if k == 'add_noise' and v.get('perform'): add_noise(resistance, **v.get('kwargs')) elif k == 'log_transform' and v.get('perform'): log_transform(resistance, **v.get('kwargs')) return resistance
# parse config and setting custom_NN = config['custom_NN'] dataset_rootdir = config['dataset_rootdir'] training_dir = os.path.join(dataset_rootdir, 'training') validation_dir = os.path.join(dataset_rootdir, 'validation') training_resistance_dir = os.path.join(training_dir, 'resistance', config['resistance_dirname']) training_resistivity_dir = os.path.join(training_dir, 'resistivity', config['resistivity_dirname']) validation_resistance_dir = os.path.join(validation_dir, 'resistance', config['resistance_dirname']) validation_resistivity_dir = os.path.join(validation_dir, 'resistivity', config['resistivity_dirname']) simulator_pkl = os.path.join(dataset_rootdir, 'simulator.pkl') simulator = read_pkl(simulator_pkl) # for physical simulation save_model_dir = config['save_model_dir'] os.makedirs(save_model_dir, exist_ok=True) save_weights_dir = os.path.join(save_model_dir, 'weights') tb_log_dir = os.path.join(save_model_dir, 'logs', datetime.now().strftime("%Y%m%d-%H%M%S")) os.makedirs(tb_log_dir, exist_ok=True) pre_trained_weight_h5 = config[ 'pre_trained_weights'] # training from this weights. trained_weight_h5 = os.path.join( save_weights_dir, 'trained_weight.h5') # save trained weights to this file. # accelerate enable_XLA = config['enable_XLA'] enable_mixed_float16 = config['enable_mixed_float16'] # hyper parameters
def make_processed_dataset(config_file): """ Preprocess raw dataset and save it to processed directory. Parameters ---------- config_file : str, pathlib.Path or dict The path to the configured yaml file or the dictionary for configuration. Returns ------- None """ config = read_config_file(config_file) dataset_dir = config['dataset_dir'] to_float32 = config['save_as_float32'] # save_processed_data_dir = config['save_processed_data_dir'] preprocess_resistance = config['preprocess']['resistance'] preprocess_resistivity = config['preprocess']['resistivity'] simulator_pkl = os.path.join(dataset_dir, 'simulator.pkl') # save_simulator_pkl = os.path.join(save_processed_data_dir, 'simulator.pkl') # do_preprocess = any(value['perform'] for action, value in preprocess.items()) simulator = read_pkl(simulator_pkl) # read nCx and nCy nCx = simulator.mesh.nCx # number of cell center mesh in the x direction nCy = simulator.mesh.nCy # number of cell center mesh in the z (y) direction # read Tx_locations and Rx_locations Tx_locations = simulator.urf.abmn_locations[:, :4] Rx_locations = simulator.urf.abmn_locations[:, 4:] # expand simulator.config and save it # simulator.config = { # 'generating': simulator.config, # config for generate data # 'preprocessing': config # config for preprocess data # } # os.makedirs(save_processed_data_dir, exist_ok=True) # write_pkl(simulator, save_simulator_pkl) for sub_dir in ('training', 'validation', 'testing'): resistance_dir = os.path.join(dataset_dir, sub_dir, 'resistance') resistivity_dir = os.path.join(dataset_dir, sub_dir, 'resistivity') raw_resistance_dir = os.path.join(resistance_dir, 'raw') raw_resistivity_dir = os.path.join(resistivity_dir, 'raw') raw_resistance_list = get_pkl_list(raw_resistance_dir) raw_resistivity_list = get_pkl_list(raw_resistivity_dir) # create resistance directory save_resistance_dir_list = [] for _, processes in preprocess_resistance.items(): process_description_list = [] for process, kwargs in processes.items(): if process == 'add_noise': process_description_list.append('[' + '_'.join([ f"{int(kwargs['scale']*100):0>3}%", kwargs['noise_type'], 'noise' ]) + ']') elif process == 'log_transform': process_description_list.append('[log_transform]') elif process == 'to_midpoint': process_description_list.append('[midpoint]') elif process == 'to_txrx': process_description_list.append('[txrx]') save_resistance_dir = os.path.join( resistance_dir, '_'.join(process_description_list)) os.makedirs(save_resistance_dir, exist_ok=True) save_resistance_dir_list.append(save_resistance_dir) # create resistivity directory save_resistivity_dir_list = [] for _, processes in preprocess_resistivity.items(): process_description_list = [] for process, kwargs in processes.items(): if process == 'to_section': process_description_list.append('[section]') save_resistivity_dir = os.path.join( resistivity_dir, '_'.join(process_description_list)) os.makedirs(save_resistivity_dir, exist_ok=True) save_resistivity_dir_list.append(save_resistivity_dir) # preprocess resistance for i, (_, processes) in enumerate(preprocess_resistance.items()): save_resistance_dir = save_resistance_dir_list.pop(0) par = partial(_process_resistance, save_resistance_dir=save_resistance_dir, processes=processes, to_float32=to_float32, Tx_locations=Tx_locations, Rx_locations=Rx_locations, nCx=nCx, nCy=nCy) pool = mp.Pool(processes=mp.cpu_count(), maxtasksperchild=1) for data in tqdm( pool.imap_unordered(par, raw_resistance_list), desc=f'Preprocess data and save to {save_resistance_dir}', total=len(raw_resistance_list)): pass pool.close() pool.join() # Serial version # for raw_resistance_pkl in raw_resistance_list: # raw_resistance = read_pkl(raw_resistance_pkl) # pkl_name = os.path.basename(raw_resistance_pkl) # save_resistance_pkl = os.path.join( # save_resistance_dir, pkl_name # ) # for process, kwargs in processes.items(): # if process == 'add_noise': # add_noise(raw_resistance, **kwargs) # elif process == 'log_transform': # log_transform(raw_resistance, **kwargs) # elif process == 'to_midpoint': # raw_resistance = to_midpoint( # raw_resistance, Tx_locations, Rx_locations # ) # elif process == 'to_txrx': # raw_resistance = to_txrx( # raw_resistance, Tx_locations, Rx_locations # ) # if to_float32: # raw_resistance = raw_resistance.astype('float32') # write_pkl(raw_resistance, save_resistance_pkl) # preprocess resistivity for i, (_, processes) in enumerate(preprocess_resistivity.items()): save_resistivity_dir = save_resistivity_dir_list.pop(0) par = partial(_process_resistivity, save_resistivity_dir=save_resistivity_dir, processes=processes, to_float32=to_float32, nCx=nCx, nCy=nCy) pool = mp.Pool(processes=mp.cpu_count(), maxtasksperchild=1) for data in tqdm( pool.imap_unordered(par, raw_resistivity_list), desc=f'Preprocess data and save to {save_resistivity_dir}', total=len(raw_resistivity_list)): pass pool.close() pool.join() # for raw_resistivity_pkl in raw_resistivity_list: # raw_resistivity = read_pkl(raw_resistivity_pkl) # pkl_name = os.path.basename(raw_resistivity_pkl) # save_resistivity_pkl = os.path.join( # save_resistivity_dir, pkl_name # ) # for process, kwargs in processes.items(): # if process == 'to_section': # raw_resistivity = to_section( # raw_resistivity, nCx, nCy # ) # if to_float32: # raw_resistivity = raw_resistivity.astype('float32') # write_pkl(raw_resistivity, save_resistivity_pkl) print("IF YOU WANT TO GET THE RAW resistivity_log10, YOU SHOULD USE" + " `raw_resistivity_log10 = np.flipud(resistivity_log10).flatten()`")
from erinn.utils.io_utils import read_config_file, read_pkl from erinn.utils.vis_utils import plot_result_synth # TODO: Organize reusable code snippets into functions FILEDIR = os.path.dirname(__file__) # read config config_file = os.path.join(FILEDIR, '..', '..', 'config', 'for_plot.yml') config = read_config_file(config_file) # parse config and setting model_dir = os.path.join(FILEDIR, config['model_dir']) save_figs_dir = os.path.join(config['save_figs_dir']) predictions_dir = os.path.join(model_dir, 'predictions') simulator_pkl = os.path.join(model_dir, 'simulator.pkl') simulator = read_pkl(simulator_pkl) num_figs = config["num_figs"] if isinstance(num_figs, str): if num_figs == 'all': num_figs = np.inf # use np.inf to save all figures else: raise(ValueError('String input of "num_figs" only accepts "all"')) elif not isinstance(num_figs, int): raise(TypeError('Input of "num_figs" only accepts "str" and "int" types')) os.makedirs(save_figs_dir, exist_ok=True) iterator_pred = os.scandir(predictions_dir) plot_result_synth(iterator_pred, num_figs, simulator, save_dir=save_figs_dir)
def plot_data(iterator, simulator, num_figs): SRCLOC = simulator.urf.abmn_locations[:, :4] RECLOC = simulator.urf.abmn_locations[:, 4:] active_idx = simulator.active_idx nCx = simulator.mesh.nCx nCy = simulator.mesh.nCy vectorCCx = simulator.mesh.vectorCCx vectorCCy = simulator.mesh.vectorCCy num_figs = 1 if num_figs < 1 else num_figs i = 1 for file in iterator: data = read_pkl(file.path) print(data['resistance'].shape, data['resistivity_log10'].shape) resistance = data['resistance'] resistivity = data['resistivity_log10'] # plot resistance # txrx version fig, ax = plt.subplots(figsize=(16, 9)) im = ax.imshow( to_txrx( resistance, SRCLOC, RECLOC, value=np.nan )[:, :, 0], origin='lower' ) divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) cbar = fig.colorbar(im, cax=cax) ax.set_xlabel('Rx_pair') ax.set_ylabel('Tx_pair') cbar.set_label(r'$\Delta V/I$') # midpoint version fig, ax = plt.subplots(figsize=(4, 3)) im = ax.imshow( to_midpoint( resistance, SRCLOC, RECLOC, value=np.nan )[:, :, 0] ) divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) cbar = fig.colorbar(im, cax=cax) ax.set_xlabel('common midpoint') ax.set_ylabel('count') cbar.set_label(r'$\Delta V/I$') ax.set_aspect('auto', adjustable='box') # plot resistivity # imshow version fig, ax = plt.subplots() im = simulator.mesh.plotImage(resistivity[active_idx], ax=ax) divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) cbar = fig.colorbar(im[0], cax=cax) ax.set_xlabel('m') ax.set_ylabel('m') cbar.set_label(r'$\Omega \bullet m (log_{10})$') # contourf version fig, ax = plt.subplots() simulator.mesh.plotImage(resistivity[active_idx], ax=ax) im = ax.contourf(vectorCCx, vectorCCy, resistivity[active_idx].reshape((nCy, nCx))) divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) cbar = fig.colorbar(im, cax=cax) ax.set_xlabel('m') ax.set_ylabel('m') cbar.set_label(r'$\Omega \bullet m (log_{10})$') plt.show() if i == num_figs: break else: i += 1
'for_predict_resistivity.yml') config = read_config_file(config_file) # parse config and setting custom_NN = config['custom_NN'] dataset_rootdir = os.path.join(FILEDIR, config['dataset_rootdir']) testing_dir = os.path.join(dataset_rootdir, 'testing') testing_resistance_dir = os.path.join(testing_dir, 'resistance', config['resistance_dirname']) testing_resistivity_dir = os.path.join(testing_dir, 'resistivity', config['resistivity_dirname']) raw_resistance_dir = os.path.join(testing_dir, 'resistance', "raw") model_dir = config['model_dir'] simulator_pkl = os.path.join(FILEDIR, model_dir, 'simulator.pkl') simulator = read_pkl(simulator_pkl) weights_dir = os.path.join(FILEDIR, model_dir, 'weights') trained_weights = os.path.join(FILEDIR, weights_dir, 'trained_weight.h5') save_predictions_dir = os.path.join(FILEDIR, model_dir, 'predictions') preprocess = config['preprocess'] gpus = config['num_gpu'] # Allowing GPU memory growth and set visible GPU # References: # https://www.tensorflow.org/guide/gpu # https://qiita.com/studio_haneya/items/4dfaf2fb2ac44818e7e0 if tf.__version__.startswith('1.'): config = tf.ConfigProto() config.gpu_options.allow_growth = True session = tf.Session(config=config) tf.keras.backend.set_session(session)
from numba import njit from erinn.utils.io_utils import read_pkl FILEDIR = os.path.dirname(__file__) # %% workdir = os.path.join(FILEDIR, '..', 'ERI', 'template-python', 'scripts', 'preprocessing') os.chdir(workdir) resistance_pkl = os.path.join('..', '..', 'data', 'trial1', 'training', 'resistance', 'raw', '000001.pkl') simulator_pkl = os.path.join('..', '..', 'data', 'trial1', 'simulator.pkl') resistance = read_pkl(resistance_pkl) simulator = read_pkl(simulator_pkl) abmn_id = simulator.urf.abmn_id num_electrode = len( np.unique( np.hstack( (simulator.urf.Tx_id.flatten(), simulator.urf.Rx_id.flatten())))) # %% # Index column is coresponding to A/M, B/N, space Index = np.array( sorted(list(combinations(np.arange(1, num_electrode + 1), 2)), key=lambda ab: ab[1] - ab[0])) Index = np.hstack((Index, np.expand_dims(Index[:, 1] - Index[:, 0], axis=1))) # np.expand_dims(Index[:, 1] - Index[:, 0], axis=1) # Equivalent to x[:, np.newaxis]
def make_processed_dataset(config_file): """ Preprocess raw dataset and save it to processed directory. Parameters ---------- config_file : str, pathlib.Path or dict The path to the configured yaml file or the dictionary for configuration. Returns ------- None """ config = read_config_file(config_file) raw_data_dir = config['raw_data_dir'] save_processed_data_dir = config['save_processed_data_dir'] preprocess = config['preprocess'] simulator_pkl = os.path.join(raw_data_dir, 'simulator.pkl') save_simulator_pkl = os.path.join(save_processed_data_dir, 'simulator.pkl') do_preprocess = any(value['perform'] for action, value in preprocess.items()) simulator = read_pkl(simulator_pkl) # read nCx and nCy nCx = simulator.mesh.nCx # number of cell center mesh in the x direction nCy = simulator.mesh.nCy # number of cell center mesh in the z (y) direction # read Tx_locations and Rx_locations Tx_locations = simulator.urf.abmn_locations[:, :4] Rx_locations = simulator.urf.abmn_locations[:, 4:] # expand simulator.config and save it simulator.config = { 'generate': simulator.config, # config for generate data 'preprocess': config # config for preprocess data } os.makedirs(save_processed_data_dir, exist_ok=True) write_pkl(simulator, save_simulator_pkl) if do_preprocess: pattern_raw_pkl = re.compile('raw_data_\d{6}.pkl') for root_dir, sub_dirs, files in os.walk(raw_data_dir): # filter files list so the files list will contain pickle files that match the pattern files = list(filter(pattern_raw_pkl.match, files)) # If the files list is empty, continue to the next iteration of the loop if not files: continue # make sub directory sub_dir_in_processed = re.sub(raw_data_dir, save_processed_data_dir, root_dir) os.makedirs(sub_dir_in_processed, exist_ok=True) # Parallel version! par = partial(_make_processed_dataset, preprocess=preprocess, root_dir=root_dir, sub_dir_in_processed=sub_dir_in_processed, Tx_locations=Tx_locations, Rx_locations=Rx_locations, nCx=nCx, nCy=nCy) pool = mp.Pool(processes=mp.cpu_count(), maxtasksperchild=1) for data in tqdm( pool.imap_unordered(par, files), desc=f'Preprocess data and save to {sub_dir_in_processed}', total=len(files)): pass pool.close() pool.join() # Serial version! # for filename in files: # pkl_name = os.path.join(root_dir, filename) # data = read_pkl(pkl_name) # # check if the data is dict and have "resistance" and "resistivity_log10" keys # if (not isinstance(data, dict) # or data.get('resistance') is None # or data.get('resistivity_log10') is None): # continue # # preprocess # for k, v in preprocess.items(): # if k == 'add_noise' and v.get('perform'): # add_noise(data['resistance'], **v.get('kwargs')) # elif k == 'log_transform' and v.get('perform'): # log_transform(data['resistance'], **v.get('kwargs')) # elif k == 'to_midpoint' and v.get('perform'): # data['resistance'] = to_midpoint( # data['resistance'], Tx_locations, Rx_locations # ) # elif k == 'to_txrx' and v.get('perform'): # data['resistance'] = to_txrx( # data['resistance'], Tx_locations, Rx_locations # ) # elif k == 'to_section' and v.get('perform'): # data['resistivity_log10'] = to_section( # data['resistivity_log10'], nCx, nCy # ) # # save pickle in processed dir # new_pkl_name = os.path.join( # sub_dir_in_processed, # re.sub(r'raw', r'processed', filename) # ) # write_pkl(data, new_pkl_name) # show information about input / target tensor shape try: print("The shape of resistance (shape of NN input data): " + f"{data['resistance'].shape}") print("The shape of resistivity (shape of NN target data): " + f"{data['resistivity_log10'].shape}") print( "IF YOU WANT TO GET THE RAW resistivity_log10, YOU SHOULD USE" + " `raw_resistivity_log10 = np.flipud(resistivity_log10).flatten()`" ) except NameError as err: pass # no pickle files