def _update_ann_data(self): '''Load ANN output data from currently selected ANN analysis''' import os import pandas import yamlord from .config import paths, fnames path_output = _join(self.path_project, paths['ann'], self.current_ann) self.cfg_ann = yamlord.read_yaml( _join(path_output, fnames['cfg']['ann'])) self.train = _rpickle(_join(path_output, fnames['ann']['train'])) self.valid = _rpickle(_join(path_output, fnames['ann']['valid'])) self.test = _rpickle(_join(path_output, fnames['ann']['test'])) self.results_tune = _rpickle(_join(path_output, fnames['ann']['tune'])) self.results_dataset = _rpickle( _join(path_output, fnames['ann']['dataset'])) self.tune_cms = _rpickle(_join(path_output, fnames['ann']['cms_tune'])) self.tune_cms = _rpickle(_join(path_output, fnames['ann']['cms_data'])) file_post = _join(self.path_project, paths['ann'], self.current_ann, fnames['ann']['post']) self.post = yamlord.read_yaml(file_post) return None
def __init__(self, path_project, sgl_dur=2): '''Initiate the Analysis object Args ---- path_project: str Path to project directory created with `smartmove.create_project()` method. sgl_dur: int Duration of sub-glide splits (seconds, Defaults to `2`) ''' import os import yamlord from .config import paths, fnames self.path_project = os.path.abspath(path_project) file_cfg_project = _join(path_project, fnames['cfg']['project']) self.cfg_project = yamlord.read_yaml(file_cfg_project) file_cfg_ann = _join(path_project, fnames['cfg']['ann']) file_cfg_glide = _join(path_project, fnames['cfg']['glide']) file_cfg_filt = _join(path_project, fnames['cfg']['filt']) file_cfg_experiments = _join(path_project, fnames['cfg']['exp_bounds']) self.cfg_ann = yamlord.read_yaml(file_cfg_ann) self.cfg_glide = yamlord.read_yaml(file_cfg_glide) self.cfg_filt = yamlord.read_yaml(file_cfg_filt) self.cfg_experiments = yamlord.read_yaml(file_cfg_experiments) self.sgl_dur = sgl_dur return None
def run_ann(self, plots=False, debug=False): '''Perfom ANN analysis Args ---- plots: bool Plots loss, accuracy, and error during training debug: bool Runs a single configuration of ANN hyperparameters ''' import datetime import yamlord from . import ann from .config import paths, fnames # Define output directory now = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') self.current_ann = 'theanets_{}'.format(now) # Pre-process sub-glide data for ANN (compile, add `rho_mod`) sgls_all = ann.pre.process(self.path_project, self.cfg_project, self.cfg_ann) # Run the ANN analysis ann.ann.run(self.path_project, self.current_ann, self.cfg_project, self.cfg_ann, sgls_all, plots=plots, debug=debug) # Reload `cfg_project` after ANN analyses additions in `ann.ann.run()` file_cfg_project = _join(self.path_project, fnames['cfg']['project']) self.cfg_project = yamlord.read_yaml(file_cfg_project) # Reload `cfg_ann` from analysis output directory, load output data file_cfg_ann = _join(self.path_project, paths['ann'], self.current_ann, fnames['cfg']['ann']) self.cfg_ann = yamlord.read_yaml(file_cfg_ann) # Post process data self.post = ann.post.process(self.path_project, self.current_ann, self.cfg_ann) # Update ANN output directory list and load data self._update_ann_analyses() self._update_ann_data() return None
def load_lleo(path_data_tag, file_cal_acc, file_cal_prop): '''Load lleo data for calculating body condition Args ---- path_data_tag: str Path to datalogger data files file_cal_acc: str Path to accelerometer calibration file to use for calibrating file_cal_prop: str Path to propeller calibration file to use for calibrating Returns ------- tag: pandas.DataFrame Dataframe with sensor data and calibrated sensor data dt_a: float Sampling rate of interpolated data fs_a: float Sampling frequency of interpolated data ''' import numpy import os from pylleo import lleoio, lleocal import yamlord # Parse tag model and id from directory/experiment name experiment_id = os.path.split(path_data_tag)[1].replace('-', '') tag_model = experiment_id.split('_')[1] tag_id = int(experiment_id.split('_')[2]) # Load calibrate data cal_dict = yamlord.read_yaml(file_cal_acc) # Verify sensor ID of data matches ID of CAL if str(cal_dict['tag_id']) != str(tag_id): raise SystemError('Data `tag_id` ({}) does not match calibration ' '`tag_id` ({})'.format(tag_id, cal_dict['tag_id'])) # Load meta data meta = lleoio.read_meta(path_data_tag, tag_model, tag_id) # Load data sample_f = 1 tag = lleoio.read_data(meta, path_data_tag, sample_f, overwrite=False) # Apply calibration to data tag = lleocal.calibrate_acc(tag, cal_dict) # Calibrate propeller measurements to speed m/s^2 tag = lleocal.calibrate_propeller(tag, file_cal_prop) # Linearly interpolate data tag.interpolate('linear', inplace=True) # Get original sampling rates of accelerometer and depth sensors dt_a = float(meta['parameters']['acceleration_x']['Interval(Sec)']) fs_a = 1 / dt_a return tag, dt_a, fs_a
def create_project(path_project): """Generate project based on values in *d*.""" from collections import OrderedDict import datetime import importlib import os import shutil import yamlord from .config import paths, fnames # Get path to pylleo requirements file module = importlib.util.find_spec('smartmove') module_path = os.path.split(module.origin)[0] # Copy configuration files from `smartmove/_templates/` to `project_path` fname_cfg_project = fnames['cfg']['project'] fname_cfg_exp = fnames['cfg']['exp_bounds'] fname_cfg_ann = fnames['cfg']['ann'] fname_cfg_glide = fnames['cfg']['glide'] fname_cfg_filt = fnames['cfg']['filt'] for fname in [ fname_cfg_project, fname_cfg_exp, fname_cfg_ann, fname_cfg_glide, fname_cfg_filt ]: src = os.path.join(module_path, '_templates', fname) dst = os.path.join(path_project, fname) shutil.copyfile(src, dst) # Add creation datetime and versions to `cfg_project` d = yamlord.read_yaml(os.path.join(path_project, fname_cfg_project)) d['meta'] = OrderedDict() d.move_to_end('meta', last=False) d['meta']['created'] = datetime.datetime.now().strftime( '%Y-%m-%d %H:%M:%S') d['meta']['versions'] = utils.get_versions('smartmove') yamlord.write_yaml(d, os.path.join(path_project, fname_cfg_project)) # Create project sub-paths if not existing for key in paths.keys(): p = os.path.join(path_project, paths[key]) if not os.path.isdir(p): os.makedirs(p, exist_ok=True) print('\nYour project directory has been created at {}.\n' 'You must now copy your datalogger data to the `{}` directory, ' 'the body condition `.csv` files to the `{}` directory, and the CTD ' '`.mat` file to the `{}` directory'.format(path_project, paths['tag'], paths['csv'], paths['ctd'])) return None
def __init__(self, file): # Fetch yaml file as ordered dict self.file = file self.data = yamlord.read_yaml(self.file) logging.debug(self.data)
def make_all(path_project, path_analysis, studymap=True): '''Load data and generate all plots Args ---- path_project: str Path to project directory created with `smartmove.create_project()` method. path_analysis: The directory name of the ANN analysis to produce plots for ''' import os import pandas import yamlord from ..ann import pre from ..ann import utils_ann from ..config import paths, fnames from . import utils from . import latex # Create output path for plots path_plot = _join(path_project, 'paper/figures') os.makedirs(path_plot, exist_ok=True) # Define path to ANN analysis data path_output = _join(path_project, paths['ann'], path_analysis) # Load ANN configuration file_cfg_ann = _join(path_output, fnames['cfg']['ann']) cfg_ann = yamlord.read_yaml(file_cfg_ann) # Load experiment data file_field = _join(path_project, paths['csv'], fnames['csv']['field']) file_isotope = _join(path_project, paths['csv'], fnames['csv']['isotope']) field, isotope = pre.add_rhomod(file_field, file_isotope) # Compile experiments adding columns necessary for tables/figures exps_all = utils.compile_exp_data(path_project, field, cfg_ann) # Plot SGLs against `rho_mod` plot_sgls_tmbd(exps_all, path_plot=path_plot) # Plot confusion matrix file_cms_data = _join(path_output, fnames['ann']['cms_tune']) cms_tune = pandas.read_pickle(file_cms_data) cm_test = cms_tune['validation']['cm'] targets = cms_tune['targets'] valid_targets = cms_tune['validation']['targets'] target_ids = [ i for i in range(len(targets)) if targets[i] in valid_targets ] tick_labels = list() xlabel = r'Predicted $\rho_{mod}$ bin ($kg \cdot m^{-3}$)' ylabel = r'Observed $\rho_{mod}$ bin ($kg \cdot m^{-3})$' for i in range(len(valid_targets)): tick_labels.append('{}: {}<='.format(target_ids[i], valid_targets[i])) utils_ann.plot_confusion_matrix(cm_test, tick_labels, xlabel=xlabel, ylabel=ylabel, normalize=False, title='', cmap=None, xlabel_rotation=45, path_plot=path_plot) # Plot learning curve of ANN file_results_data = _join(path_output, fnames['ann']['dataset']) results_dataset = pandas.read_pickle(file_results_data) m = utils.last_monitors(results_dataset) plot_learning_curves(m, path_plot) if studymap: # Plot study area plot studyarea(path_plot) # Plot hyperparameter accuracy performance file_results_tune = _join(path_output, fnames['ann']['tune']) results_tune = pandas.read_pickle(file_results_tune) plot_ann_performance(cfg_ann, results_tune, path_plot) # Plot subglide heatmaps plot_sgl_histos(path_project, cfg_ann, path_plot) # Plot example subglide plot plot_sgl_highlight(path_project, cfg_ann, path_plot, clip_x=True) # Convert all `.eps` images to `.png` for fig in os.listdir(path_plot): if fig.endswith('.eps'): # Convert eps to png fname = os.path.splitext(fig)[0] latex.utils.pdf_to_img(path_plot, fname, in_ext='eps', out_ext='png', dpi='300') utils.crop_png(_join(path_plot, fname + '.png')) return None
def _process_tag_data(path_project, cfg_project, cfg_glide, path_exp, tag, fs_a, plots=True, debug=False): '''Calculate body conditions summary statistics Args ---- path_project: Parent path for project cfg_project: OrderedDict Dictionary of configuration parameters for the current project cfg_glide: OrderedDict Dictionary of configuration parameters for glide identification path_exp: str Directory name of `tag` data being processed tag: pandas.DataFrame Data loaded from tag with associated sensors fs_a: float Sampling frequency (i.e. number of samples per second) plots: bool Switch for turning on plots (Default `True`). When activated plots for reviewing signal processing will be displayed. debug: bool Switch for turning on debugging (Default `False`). When activated values for `cutoff_freq` and `J` will be set to generic values and diagnostic plots of the `speed` parameter in `tag` will be displayed. Returns ------- cfg: OrderedDict tag: pandas.DataFrame Data loaded from tag with associated sensors, with added fields from signal processing dives: pandas.DataFrame Start and stop indices and attributes for dive events in `tag` data, including: start_idx, stop_idx, dive_dur, depths_max, depths_max_idx, depths_mean, compr_mean. masks: pandas.DataFrame Boolean masks for slicing identified dives, glides, and sub-glides from the `tag` dataframe. exp_ind: OrderedDict Start and stop indices of `tag` data to be analyzed ''' from collections import OrderedDict import numpy from os.path import join as _join import pandas import pyotelem from pyotelem.plots import plotdives, plotdsp import yamlord import copy from .. import utils from . import utils_ctd from ..config import paths, fnames exp_idxs = [None, None] file_cfg_exp = _join(path_project, fnames['cfg']['exp_bounds']) cfg = copy.deepcopy(cfg_glide) try: cfg_exp = yamlord.read_yaml(file_cfg_exp) except: cfg_exp = OrderedDict() # 1 Select indices for analysis #-------------------------------------------------------------------------- print('* Select indices for analysis\n') if path_exp in cfg_exp: exp_idxs[0] = cfg_exp[path_exp]['start_idx'] exp_idxs[1] = cfg_exp[path_exp]['stop_idx'] else: # Plot accelerometer axes, depths, and propeller speed plotdives.plot_triaxial_depths_speed(tag) # Get indices user input - mask exp_idxs[0] = pyotelem.utils.recursive_input('Analysis start index', int) exp_idxs[1] = pyotelem.utils.recursive_input('Analysis stop index', int) cfg_exp[path_exp] = OrderedDict() cfg_exp[path_exp]['start_idx'] = exp_idxs[0] cfg_exp[path_exp]['stop_idx'] = exp_idxs[1] yamlord.write_yaml(cfg_exp, file_cfg_exp) # Create dataframe for storing masks for various views of the data masks = pandas.DataFrame(index=range(len(tag)), dtype=bool) # Create mask of values to be considered part of the analysis masks['exp'] = False masks['exp'][exp_idxs[0]:exp_idxs[1]] = True # Create indices array `exp_ind` for analysis exp_ind = numpy.where(masks['exp'])[0] # 1.3 Calculate pitch, roll, and heading #-------------------------------------------------------------------------- print('* Calculate pitch, roll, heading\n') tag['p'], tag['r'], tag['h'] = pyotelem.dynamics.prh( tag['Ax_g'].values, tag['Ay_g'].values, tag['Az_g'].values) # 2 Define dives #-------------------------------------------------------------------------- print('* Define dives\n') dives, masks['dive'] = pyotelem.dives.finddives2(tag['depth'].values, cfg_glide['min_depth']) # 3.2.1 Determine `stroke_frq` fluking rate and cut-off frequency #-------------------------------------------------------------------------- print('* Get stroke frequency\n') # calculate power spectrum of the accelerometer data at the whale frame Ax_g = tag['Ax_g'][masks['exp']].values Az_g = tag['Az_g'][masks['exp']].values # NOTE change `stroke_ratio` here to modify selectio method # should be OK other than t_max, these values are too high if debug is False: cutoff_frq, stroke_frq, stroke_ratio = pyotelem.glides.get_stroke_freq( Ax_g, Az_g, fs_a, cfg_glide['nperseg'], cfg_glide['peak_thresh'], stroke_ratio=None) # Store user input cutoff and stroke frequencies cfg['cutoff_frq'] = cutoff_frq cfg['stroke_frq'] = stroke_frq cfg['stroke_ratio'] = stroke_ratio else: cutoff_frq = 0.3 cfg['cutoff_frq'] = cutoff_frq # 3.2.2 Separate low and high frequency signals #-------------------------------------------------------------------------- print('* Separate accelerometry to high and low-pass signals\n') order = 5 cutoff_str = str(cfg['cutoff_frq']) for btype, suffix in zip(['low', 'high'], ['lf', 'hf']): b, a, = pyotelem.dsp.butter_filter(cfg['cutoff_frq'], fs_a, order=order, btype=btype) for param in ['Ax_g', 'Ay_g', 'Az_g']: key = '{}_{}_{}'.format(param, suffix, cutoff_str) tag[key] = pyotelem.dsp.butter_apply(b, a, tag[param].values) # Plot low and high frequency accelerometer signals if plots is True: plotdsp.plot_lf_hf(tag['Ax_g'][masks['exp']], tag['Ax_g_lf_' + cutoff_str][masks['exp']], tag['Ax_g_hf_' + cutoff_str][masks['exp']], title='x axis') plotdsp.plot_lf_hf(tag['Ay_g'][masks['exp']], tag['Ay_g_lf_' + cutoff_str][masks['exp']], tag['Ay_g_hf_' + cutoff_str][masks['exp']], title='y axis') plotdsp.plot_lf_hf(tag['Az_g'][masks['exp']], tag['Az_g_lf_' + cutoff_str][masks['exp']], tag['Az_g_hf_' + cutoff_str][masks['exp']], title='z axis') # 3.2.3 Calculate the smooth pitch from the low pass filter acceleration # signal to avoid incorporating signals above the stroking periods #-------------------------------------------------------------------------- print('* Calculate low-pass pitch, roll, heading\n') prh_lf = pyotelem.dynamics.prh( tag['Ax_g_lf_' + cutoff_str].values, tag['Ay_g_lf_' + cutoff_str].values, tag['Az_g_lf_' + cutoff_str].values, ) tag['p_lf'], tag['r_lf'], tag['h_lf'] = prh_lf # 4 Define precise descent and ascent phases #-------------------------------------------------------------------------- print('* Get precise indices of descents, ascents, phase and bottom\n') masks['des'], masks['asc'] = pyotelem.dives.get_des_asc2( tag['depth'].values, masks['dive'].values, tag['p_lf'].values, cfg['cutoff_frq'], fs_a, order=5) # Typecast `des` and `asc` columns to `bool` masks = masks.astype(bool) if plots is True: plotdives.plot_dives_pitch(tag['depth'][masks['exp']], masks['dive'][masks['exp']], masks['des'][masks['exp']], masks['asc'][masks['exp']], tag['p'][masks['exp']], tag['p_lf'][masks['exp']]) # 8 Estimate seawater density around the tagged animal #-------------------------------------------------------------------------- print('* Estimate seawater density\n') # Study location and max depth to average salinities lon = cfg_project['experiment']['coords']['lon'] lat = cfg_project['experiment']['coords']['lat'] lat = cfg_project['experiment']['coords']['lat'] max_depth = cfg_project['experiment']['net_depth'] # Read data fname_ctd = cfg_project['experiment']['fname_ctd'] file_ctd_mat = _join(path_project, paths['ctd'], fname_ctd) t = tag['temperature'].values tag['dsw'] = utils_ctd.get_seawater_densities(file_ctd_mat, t, lon, lat, max_depth) # 6.1 Extract strokes and glides using heave # high-pass filtered (HPF) acceleration signal, axis=3 #-------------------------------------------------------------------------- # Two methods for estimating stroke frequency `stroke_frq`: # * from the body rotations (pry) using the magnetometer method # * from the dorso-ventral axis of the HPF acceleration signal. # For both methods, t_max and J need to be determined. # Choose a value for J based on a plot showing distribution of signals: # hpf-x, when detecting glides in the next step use Ahf_Anlf() with axis=0 # hpf-z when detecting glides in the next step use Ahf_Anlf() with axis=2 print('* Get fluke signal threshold\n') if debug is False: # Plot PSD for J selection Ax_g_hf = tag['Ax_g_hf_' + cutoff_str][masks['exp']].values Az_g_hf = tag['Az_g_hf_' + cutoff_str][masks['exp']].values f_wx, Sx, Px, dpx = pyotelem.dsp.calc_PSD_welch(Ax_g_hf, fs_a, nperseg=512) f_wz, Sz, Pz, dpz = pyotelem.dsp.calc_PSD_welch(Az_g_hf, fs_a, nperseg=512) import matplotlib.pyplot as plt fig, (ax1, ax2) = plt.subplots(1, 2) ax1.plot(f_wx, Sx, label='hf-x PSD') ax1.plot(f_wz, Sz, label='hf-z PSD') ax1.legend(loc='upper right') ax2.plot(tag['datetimes'][masks['exp']], Ax_g_hf, label='hf-x') ax2.plot(tag['datetimes'][masks['exp']], Az_g_hf, label='hf-z') ax2.legend(loc='upper right') fig.autofmt_xdate() plt.show() # Get user selection for J - select one for both axes cfg['J'] = pyotelem.utils.recursive_input('J (fluke magnitude)', float) else: cfg['J'] = 0.4 return cfg, tag, dives, masks, exp_ind
def make_all(path_project, path_analysis): '''Load data and generate all tables Args ---- path_project: str Path to project directory created with `smartmove.create_project()` method. path_analysis: The directory name of the ANN analysis to produce plots for ''' import numpy import os import pandas import yamlord from ..ann import pre from ..config import paths, fnames from . import utils from . import table_attributes # Load ANN configuration for selection analysis file_cfg_ann = _join(path_project, paths['ann'], path_analysis, fnames['cfg']['ann']) cfg_ann = yamlord.read_yaml(file_cfg_ann) # Load table titlesi and captions table_attrs = table_attributes.get_all() # Create table output directory path_table = _join(path_project, 'paper/tables') os.makedirs(path_table, exist_ok=True) # Load field and isotop data file_field = _join(path_project, paths['csv'], fnames['csv']['field']) file_isotope = _join(path_project, paths['csv'], fnames['csv']['isotope']) field, isotope = pre.add_rhomod(file_field, file_isotope) # Digitize animals animals = sorted(numpy.unique(isotope['animal']), reverse=True) i = 1 for a in animals: ind = numpy.where(isotope['animal'] == a) isotope.loc[isotope.index[ind], 'animal'] = i i += 1 # Compile experiments adding columns necessary for tables/figures exps_all = utils.compile_exp_data(path_project, field, cfg_ann) # Create isotope experiments table ignore_isotope = [ 'length', 'girth', 'contributer1', 'contributer2', 'notes' ] name_isotope = table_isotope.__name__ # Ensure only rows with an ID are added to the table isotope = isotope[~numpy.isnan(isotope['id'])] isotope = utils.filter_dataframe(isotope, ignore_isotope) table_isotope(name_isotope, table_attrs[name_isotope], path_table, isotope) # Create compiled experiment table name_exps = table_exps.__name__ table_exps(name_exps, table_attrs[name_exps], path_table, exps_all) # Create ANN hyperparameter table name_ann_params = table_ann_params.__name__ table_ann_params(name_ann_params, table_attrs[name_ann_params], path_table, cfg_ann) # Create ANN target bin description table name_fdescr = table_ann_target_descr.__name__ file_post = _join(path_project, paths['ann'], path_analysis, fnames['ann']['post']) post = yamlord.read_yaml(file_post) data = utils.target_value_descr(post) table_ann_target_descr(name_fdescr, table_attrs[name_fdescr], path_table, data) # Create input feature statistics table name_fstats = table_ann_feature_stats.__name__ fname_sgls = fnames['ann']['sgls'] file_sgls = _join(path_project, paths['ann'], path_analysis, fname_sgls) sgls_all = pandas.read_pickle(file_sgls) sgls_all = sgls_all.dropna() feature_cols = [ 'abs_depth_change', 'dive_phase_int', 'mean_a', 'mean_depth', 'mean_pitch', 'mean_speed', 'mean_swdensity', 'total_depth_change', 'total_speed_change', ] input_stats = utils.input_feature_stats(sgls_all, feature_cols) table_ann_feature_stats(name_fstats, table_attrs[name_fstats], path_table, input_stats) # Create target value statistics table name_tstats = table_ann_target_stats.__name__ file_train = _join(path_project, paths['ann'], path_analysis, fnames['ann']['train']) file_valid = _join(path_project, paths['ann'], path_analysis, fnames['ann']['valid']) file_test = _join(path_project, paths['ann'], path_analysis, fnames['ann']['test']) train = pandas.read_pickle(file_train) valid = pandas.read_pickle(file_train) test = pandas.read_pickle(file_train) target_stats = utils.target_value_stats(train, valid, test) table_ann_target_stats(name_tstats, table_attrs[name_tstats], path_table, target_stats) return None