Esempio n. 1
0
def create_project(path_project):
    """Generate project based on values in *d*."""
    from collections import OrderedDict
    import datetime
    import importlib
    import os
    import shutil
    import yamlord

    from .config import paths, fnames

    # Get path to pylleo requirements file
    module = importlib.util.find_spec('smartmove')
    module_path = os.path.split(module.origin)[0]

    # Copy configuration files from `smartmove/_templates/` to `project_path`
    fname_cfg_project = fnames['cfg']['project']
    fname_cfg_exp = fnames['cfg']['exp_bounds']
    fname_cfg_ann = fnames['cfg']['ann']
    fname_cfg_glide = fnames['cfg']['glide']
    fname_cfg_filt = fnames['cfg']['filt']
    for fname in [
            fname_cfg_project, fname_cfg_exp, fname_cfg_ann, fname_cfg_glide,
            fname_cfg_filt
    ]:
        src = os.path.join(module_path, '_templates', fname)
        dst = os.path.join(path_project, fname)
        shutil.copyfile(src, dst)

    # Add creation datetime and versions to `cfg_project`
    d = yamlord.read_yaml(os.path.join(path_project, fname_cfg_project))
    d['meta'] = OrderedDict()
    d.move_to_end('meta', last=False)
    d['meta']['created'] = datetime.datetime.now().strftime(
        '%Y-%m-%d %H:%M:%S')
    d['meta']['versions'] = utils.get_versions('smartmove')

    yamlord.write_yaml(d, os.path.join(path_project, fname_cfg_project))

    # Create project sub-paths if not existing
    for key in paths.keys():
        p = os.path.join(path_project, paths[key])
        if not os.path.isdir(p):
            os.makedirs(p, exist_ok=True)

    print('\nYour project directory has been created at {}.\n'
          'You must now copy your datalogger data to the `{}` directory, '
          'the body condition `.csv` files to the `{}` directory, and the CTD '
          '`.mat` file to the `{}` directory'.format(path_project,
                                                     paths['tag'],
                                                     paths['csv'],
                                                     paths['ctd']))

    return None
Esempio n. 2
0
def write_yaml_file(filename, d, overwrite=False):
    """ Accepts  filepath,  dictionary. Writes dictionary in yaml to file path, recursively creating path if necessary """
    if not os.path.exists(os.path.dirname(filename)) and overwrite is False:
        try:
            os.makedirs(os.path.dirname(filename))
        except OSError as exc:
            if exc.errno != errno.EEXIST:
                raise
    logging.debug("Writing yaml file {}".format(filename))
    logging.debug(d)
    yamlord.write_yaml(d, filename)
Esempio n. 3
0
 def write(self, file=None):
     if file is not None:
         self.file = file
     return yamlord.write_yaml(self.data, self.file)
Esempio n. 4
0
def process(path_project, path_analysis, cfg_ann):
    from collections import OrderedDict
    import numpy
    import os
    import pandas
    import pyotelem
    import yamlord

    from . import pre
    from ..config import paths, fnames

    print(path_analysis)
    path_output = _join(path_project, paths['ann'], path_analysis)

    file_field = _join(path_project, paths['csv'], fnames['csv']['field'])
    file_isotope = _join(path_project, paths['csv'], fnames['csv']['isotope'])
    field, isotope = pre.add_rhomod(file_field, file_isotope)

    # EXPERIMENT INPUT
    post = OrderedDict()
    post['input_exp'] = OrderedDict()

    # n experiments and animals
    post['n_field'] = len(field)
    post['n_animals'] = len(field['animal'].unique())

    # Min max values of rho_mod and % lipid for each seal
    post['exp'] = OrderedDict()
    post['iso'] = OrderedDict()
    for a in numpy.unique(field['animal']):
        # Field experiment values
        post['exp'][a] = OrderedDict()
        mask = field['animal'] == a
        post['exp'][a]['min_rhomod'] = field[mask]['rho_mod'].min()
        post['exp'][a]['max_rhomod'] = field[mask]['rho_mod'].max()

        # Isotope experiment values
        post['iso'][a] = OrderedDict()
        mask = isotope['animal'] == a.capitalize()
        post['iso'][a]['min_mass'] = isotope[mask]['mass_kg'].min()
        post['iso'][a]['max_mass'] = isotope[mask]['mass_kg'].max()

    # ANN CONFIG
    results = pandas.read_pickle(_join(path_output, fnames['ann']['tune']))

    post['ann'] = OrderedDict()

    # Number of network configurations
    post['ann']['n_configs'] = len(results)

    # Load training data
    file_train = _join(path_output, 'data_train.p')
    file_valid = _join(path_output, 'data_valid.p')
    file_test = _join(path_output, 'data_test.p')
    train = pandas.read_pickle(file_train)
    valid = pandas.read_pickle(file_valid)
    test = pandas.read_pickle(file_test)

    # Number of samples compiled, train, valid, test
    post['ann']['n'] = OrderedDict()
    post['ann']['n']['train'] = len(train[0])
    post['ann']['n']['valid'] = len(valid[0])
    post['ann']['n']['test'] = len(test[0])
    post['ann']['n']['all'] = len(train[0]) + len(valid[0]) + len(test[0])

    # percentage of compiled dataset in train, valid, test
    post['ann']['n']['perc_train'] = len(train[0]) / post['ann']['n']['all']
    post['ann']['n']['perc_valid'] = len(valid[0]) / post['ann']['n']['all']
    post['ann']['n']['perc_test'] = len(test[0]) / post['ann']['n']['all']

    # Total tuning time
    post['ann']['total_train_time'] = results['train_time'].sum()

    # POSTPROCESS VALUES
    # Best/worst classification accuracies
    mask_best = results['accuracy'] == results['accuracy'].max()
    best_idx = results['train_time'][mask_best].idxmin()

    mask_worst = results['accuracy'] == results['accuracy'].min()
    worst_idx = results['train_time'][mask_worst].idxmax()

    post['ann']['best_idx'] = best_idx
    post['ann']['worst_idx'] = worst_idx

    # Get min/max accuracy and training time for all configurations
    post['ann']['metrics'] = OrderedDict()
    for key in ['accuracy', 'train_time']:
        post['ann']['metrics'][key] = OrderedDict()
        post['ann']['metrics'][key]['max_idx'] = results[key].argmax()
        post['ann']['metrics'][key]['min_idx'] = results[key].argmin()

        post['ann']['metrics'][key]['max'] = results[key].max()
        post['ann']['metrics'][key]['min'] = results[key].min()

        post['ann']['metrics'][key]['best'] = results[key][best_idx]
        post['ann']['metrics'][key]['worst'] = results[key][worst_idx]

    # Optimal network results
    post['ann']['opt'] = OrderedDict()

    net = results['net'][best_idx]

    # Loop 10 times taking mean prediction time
    # Each loop, 100k iterations of timing
    file_test = _join(path_output, fnames['ann']['test'])
    test = pandas.read_pickle(file_test)
    features = numpy.expand_dims(test[0][0], axis=0)
    t_pred = time_prediction(net, features)
    post['ann']['opt']['t_pred'] = t_pred

    # Filesize of trained NN
    file_net_best = './net.tmp'
    pandas.to_pickle(net, file_net_best)
    st = os.stat(file_net_best)
    os.remove(file_net_best)
    post['ann']['opt']['trained_size'] = st.st_size / 1000  #kB

    # %step between subsets of test for dataset size test
    post['ann']['dataset'] = 'numpy.arange(0,1,0.03))[1:]'

    # Tune confusion matrices (cms) from most optimal configuration
    # one field per dataset `train`, `valid`, and `test`
    # first level `targets` if for all datasets
    post['ann']['bins'] = OrderedDict()
    file_tune_cms = _join(path_output, fnames['ann']['cms_tune'])

    tune_cms = pandas.read_pickle(file_tune_cms)
    bins = tune_cms['targets']

    # Range of each bin, density, lipid percent
    bin_range = range(len(bins) - 1)

    rho_lo = numpy.array([bins[i] for i in bin_range])
    rho_hi = numpy.array([bins[i + 1] for i in bin_range])
    # Note density is converted from kg/m^3 to g/cm^3 for `dens2lip`
    lip_lo = pyotelem.physio_seal.dens2lip(rho_lo * 0.001)
    lip_hi = pyotelem.physio_seal.dens2lip(rho_hi * 0.001)

    # Generate bin ranges as strings
    fmt_bin = r'{:7.2f} <= rho_mod < {:7.2f}'
    fmt_lip = r'{:6.2f} >= lipid % > {:6.2f}'
    str_bin = [fmt_bin.format(lo, hi) for lo, hi in zip(rho_lo, rho_hi)]
    str_lip = [fmt_lip.format(lo, hi) for lo, hi in zip(lip_lo, lip_hi)]

    path_sgls = _join(path_output, fnames['ann']['sgls'])
    sgls_ann = pandas.read_pickle(path_sgls)

    post['ann']['bins']['values'] = list(bins)
    post['ann']['bins']['value_range'] = str_bin
    post['ann']['bins']['value_diff'] = list(numpy.diff(bins))

    # Note density is converted from kg/m^3 to g/cm^3 for `dens2lip`
    lipid_perc = pyotelem.physio_seal.dens2lip(bins * 0.001)
    post['ann']['bins']['lipid_perc'] = list(lipid_perc)
    post['ann']['bins']['lipid_range'] = str_lip
    post['ann']['bins']['lipid_diff'] = list(numpy.diff(lipid_perc))

    precision = calculate_precision(tune_cms['validation']['cm'])
    post['ann']['bins']['precision'] = [
        None,
    ] * len(bins)
    targets = tune_cms['validation']['targets']
    for i in range(len(bins)):
        if bins[i] in targets:
            post['ann']['bins']['precision'][i] = precision[bins[i] == targets]
        else:
            post['ann']['bins']['precision'][i] = 'None'

    # Save post processing results as YAML
    file_post = _join(path_output, fnames['ann']['post'])
    yamlord.write_yaml(post, file_post)

    return post
Esempio n. 5
0
def run(path_project,
        path_analysis,
        cfg_project,
        cfg_ann,
        sgls_all,
        plots=False,
        debug=False):
    '''
    Compile subglide data, tune network architecture and test dataset size

    Args
    ----
    cfg_project: OrderedDict
        Dictionary of configuration parameters for the current project
    cfg_ann: OrderedDict
        Dictionary of configuration parameters for the ANN
    debug: bool
        Swith for running single network configuration
    plots: bool
        Switch for generating diagnostic plots after each network training

    Returns
    -------
    cfg: dict
        Dictionary of network configuration parameters used
    data: tuple
        Tuple collecting training, validation, and test sets. Also includes bin
        deliniation values
    results: tuple
        Tuple collecting results dataframes and confusion matrices

    Note
    ----
    The validation set is split into `validation` and `test` sets, the
    first used for initial comparisons of various net configuration
    accuracies and the second for a clean test set to get an true accuracy,
    as reusing the `validation` set can cause the routine to overfit to the
    validation set.
    '''

    from collections import OrderedDict
    import climate
    import numpy
    import os
    import pandas
    import theano
    import yamlord

    from . import utils_ann

    from .utils_ann import ppickle
    from ..config import paths, fnames

    # Environment settings - logging, Theano, load configuration, set paths
    #---------------------------------------------------------------------------
    climate.enable_default_logging()
    theano.config.compute_test_value = 'ignore'

    # Configuration settings
    if debug is True:
        for key in cfg_ann['net_tuning'].keys():
            cfg_ann['net_tuning'][key] = [
                cfg_ann['net_tuning'][key][0],
            ]

    # Drop fields missing values
    sgls_nonan = sgls_all.dropna()

    print('\nSplit and normalize input/output data')
    features = cfg_ann['net_all']['features']
    target = cfg_ann['net_all']['target']
    n_targets = cfg_ann['net_all']['n_targets']
    valid_frac = cfg_ann['net_all']['valid_frac']

    # Normalize input (features) and output (target)
    nsgls, bins = _normalize_data(sgls_nonan, features, target, n_targets)

    # Get indices of train, validation and test datasets
    ind_train, ind_valid, ind_test = _split_indices(nsgls, valid_frac)

    # Split dataframes into train, validation and test  (features, targets) tuples
    train, valid, test = _create_datasets(nsgls, ind_train, ind_valid,
                                          ind_test, features, target)
    print('train', len(train[0]), len(train[1]))
    print('valid', len(valid[0]), len(valid[1]))
    print('test', len(test[0]), len(test[1]))

    # Save information on input data to config
    cfg_ann['net_all']['targets'] = [float(b) for b in bins]

    # Tuning - find optimal network architecture
    #---------------------------------------------------------------------------
    print('\nTune netork configuration')

    # Get all dict of all configuration permutations of params in `tune_params`
    configs = _get_configs(cfg_ann['net_tuning'])

    # Cycle through configurations storing configuration, net in `results_tune`
    n_features = len(cfg_ann['net_all']['features'])
    n_targets = cfg_ann['net_all']['n_targets']

    print('\nNumber of features: {}'.format(n_features))
    print('Number of targets: {}\n'.format(n_targets))

    results_tune, tune_accuracy, cms_tune = _tune_net(
        train,
        valid,
        test,
        bins,
        configs,
        n_features,
        n_targets,
        plots,
    )

    # Get neural net configuration with best accuracy
    best_config = get_best(results_tune, 'config')

    # Test effect of dataset size
    #---------------------------------------------------------------------------
    print('\nRun percentage of datasize tests')

    # Get randomly sorted and subsetted datasets to test effect of dataset_size
    # i.e. - a dataset with the first `subset_fraction` of samples.
    results_dataset, data_accuracy, cms_data = _test_dataset_size(
        best_config, train, valid, test, bins, n_features, n_targets, plots,
        debug)

    print('\nTest data accuracy (Configuration tuning): {}'.format(
        tune_accuracy))
    print(
        'Test data accuracy (Datasize test):        {}'.format(data_accuracy))

    # Save results and configuration to output directory
    #---------------------------------------------------------------------------

    # Create output directory if it does not exist
    path_output = os.path.join(path_project, paths['ann'], path_analysis)
    os.makedirs(path_output, exist_ok=True)

    # Save updated `cfg_ann` to output directory
    file_cfg_ann = os.path.join(path_output, fnames['cfg']['ann'])
    yamlord.write_yaml(cfg_ann, os.path.join(path_output, file_cfg_ann))

    # Compiled SGLs before NaN drop and normalization
    utils_ann.ppickle(sgls_all, os.path.join(path_output,
                                             fnames['ann']['sgls']))

    # Compiled SGLs after NaN drop and normalization
    utils_ann.ppickle(nsgls,
                      os.path.join(path_output, fnames['ann']['sgls_norm']))

    # Save output data to analysis output directory
    tune_fname = fnames['ann']['tune']
    datasize_fname = fnames['ann']['dataset']
    ppickle(results_tune, os.path.join(path_output, tune_fname))
    ppickle(results_dataset, os.path.join(path_output, datasize_fname))

    # Save train, validation, test datasets
    ppickle(train, os.path.join(path_output, fnames['ann']['train']))
    ppickle(valid, os.path.join(path_output, fnames['ann']['valid']))
    ppickle(test, os.path.join(path_output, fnames['ann']['test']))

    ppickle(cms_tune, os.path.join(path_output, fnames['ann']['cms_tune']))
    ppickle(cms_data, os.path.join(path_output, fnames['ann']['cms_data']))

    return cfg_ann, (train, valid, test), (results_tune, results_dataset,
                                           cms_tune, cms_data)
Esempio n. 6
0
def _process_tag_data(path_project,
                      cfg_project,
                      cfg_glide,
                      path_exp,
                      tag,
                      fs_a,
                      plots=True,
                      debug=False):
    '''Calculate body conditions summary statistics

    Args
    ----
    path_project:
        Parent path for project
    cfg_project: OrderedDict
        Dictionary of configuration parameters for the current project
    cfg_glide: OrderedDict
        Dictionary of configuration parameters for glide identification
    path_exp: str
        Directory name of `tag` data being processed
    tag: pandas.DataFrame
        Data loaded from tag with associated sensors
    fs_a: float
        Sampling frequency (i.e. number of samples per second)
    plots: bool
        Switch for turning on plots (Default `True`). When activated plots for
        reviewing signal processing will be displayed.
    debug: bool
        Switch for turning on debugging (Default `False`). When activated values
        for `cutoff_freq` and `J` will be set to generic values and diagnostic
        plots of the `speed` parameter in `tag` will be displayed.

    Returns
    -------
    cfg: OrderedDict
    tag: pandas.DataFrame
        Data loaded from tag with associated sensors, with added fields from
        signal processing
    dives: pandas.DataFrame
        Start and stop indices and attributes for dive events in `tag` data,
        including: start_idx, stop_idx, dive_dur, depths_max, depths_max_idx,
        depths_mean, compr_mean.
    masks: pandas.DataFrame
        Boolean masks for slicing identified dives, glides, and sub-glides from
        the `tag` dataframe.
    exp_ind: OrderedDict
        Start and stop indices of `tag` data to be analyzed
    '''
    from collections import OrderedDict
    import numpy
    from os.path import join as _join
    import pandas
    import pyotelem
    from pyotelem.plots import plotdives, plotdsp
    import yamlord
    import copy

    from .. import utils
    from . import utils_ctd
    from ..config import paths, fnames

    exp_idxs = [None, None]

    file_cfg_exp = _join(path_project, fnames['cfg']['exp_bounds'])
    cfg = copy.deepcopy(cfg_glide)

    try:
        cfg_exp = yamlord.read_yaml(file_cfg_exp)
    except:
        cfg_exp = OrderedDict()

    # 1 Select indices for analysis
    #--------------------------------------------------------------------------
    print('* Select indices for analysis\n')

    if path_exp in cfg_exp:
        exp_idxs[0] = cfg_exp[path_exp]['start_idx']
        exp_idxs[1] = cfg_exp[path_exp]['stop_idx']
    else:
        # Plot accelerometer axes, depths, and propeller speed
        plotdives.plot_triaxial_depths_speed(tag)

        # Get indices user input - mask
        exp_idxs[0] = pyotelem.utils.recursive_input('Analysis start index',
                                                     int)
        exp_idxs[1] = pyotelem.utils.recursive_input('Analysis stop index',
                                                     int)

        cfg_exp[path_exp] = OrderedDict()
        cfg_exp[path_exp]['start_idx'] = exp_idxs[0]
        cfg_exp[path_exp]['stop_idx'] = exp_idxs[1]
        yamlord.write_yaml(cfg_exp, file_cfg_exp)

    # Create dataframe for storing masks for various views of the data
    masks = pandas.DataFrame(index=range(len(tag)), dtype=bool)

    # Create mask of values to be considered part of the analysis
    masks['exp'] = False
    masks['exp'][exp_idxs[0]:exp_idxs[1]] = True

    # Create indices array `exp_ind` for analysis
    exp_ind = numpy.where(masks['exp'])[0]

    # 1.3 Calculate pitch, roll, and heading
    #--------------------------------------------------------------------------
    print('* Calculate pitch, roll, heading\n')
    tag['p'], tag['r'], tag['h'] = pyotelem.dynamics.prh(
        tag['Ax_g'].values, tag['Ay_g'].values, tag['Az_g'].values)

    # 2 Define dives
    #--------------------------------------------------------------------------
    print('* Define dives\n')
    dives, masks['dive'] = pyotelem.dives.finddives2(tag['depth'].values,
                                                     cfg_glide['min_depth'])

    # 3.2.1 Determine `stroke_frq` fluking rate and cut-off frequency
    #--------------------------------------------------------------------------
    print('* Get stroke frequency\n')
    # calculate power spectrum of the accelerometer data at the whale frame
    Ax_g = tag['Ax_g'][masks['exp']].values
    Az_g = tag['Az_g'][masks['exp']].values

    # NOTE change `stroke_ratio` here to modify selectio method
    # should be OK other than t_max, these values are too high
    if debug is False:
        cutoff_frq, stroke_frq, stroke_ratio = pyotelem.glides.get_stroke_freq(
            Ax_g,
            Az_g,
            fs_a,
            cfg_glide['nperseg'],
            cfg_glide['peak_thresh'],
            stroke_ratio=None)
        # Store user input cutoff and stroke frequencies
        cfg['cutoff_frq'] = cutoff_frq
        cfg['stroke_frq'] = stroke_frq
        cfg['stroke_ratio'] = stroke_ratio
    else:
        cutoff_frq = 0.3
        cfg['cutoff_frq'] = cutoff_frq

    # 3.2.2 Separate low and high frequency signals
    #--------------------------------------------------------------------------
    print('* Separate accelerometry to high and low-pass signals\n')
    order = 5
    cutoff_str = str(cfg['cutoff_frq'])
    for btype, suffix in zip(['low', 'high'], ['lf', 'hf']):
        b, a, = pyotelem.dsp.butter_filter(cfg['cutoff_frq'],
                                           fs_a,
                                           order=order,
                                           btype=btype)
        for param in ['Ax_g', 'Ay_g', 'Az_g']:
            key = '{}_{}_{}'.format(param, suffix, cutoff_str)
            tag[key] = pyotelem.dsp.butter_apply(b, a, tag[param].values)

    # Plot low and high frequency accelerometer signals
    if plots is True:
        plotdsp.plot_lf_hf(tag['Ax_g'][masks['exp']],
                           tag['Ax_g_lf_' + cutoff_str][masks['exp']],
                           tag['Ax_g_hf_' + cutoff_str][masks['exp']],
                           title='x axis')

        plotdsp.plot_lf_hf(tag['Ay_g'][masks['exp']],
                           tag['Ay_g_lf_' + cutoff_str][masks['exp']],
                           tag['Ay_g_hf_' + cutoff_str][masks['exp']],
                           title='y axis')

        plotdsp.plot_lf_hf(tag['Az_g'][masks['exp']],
                           tag['Az_g_lf_' + cutoff_str][masks['exp']],
                           tag['Az_g_hf_' + cutoff_str][masks['exp']],
                           title='z axis')

    # 3.2.3 Calculate the smooth pitch from the low pass filter acceleration
    #       signal to avoid incorporating signals above the stroking periods
    #--------------------------------------------------------------------------
    print('* Calculate low-pass pitch, roll, heading\n')
    prh_lf = pyotelem.dynamics.prh(
        tag['Ax_g_lf_' + cutoff_str].values,
        tag['Ay_g_lf_' + cutoff_str].values,
        tag['Az_g_lf_' + cutoff_str].values,
    )

    tag['p_lf'], tag['r_lf'], tag['h_lf'] = prh_lf

    # 4 Define precise descent and ascent phases
    #--------------------------------------------------------------------------
    print('* Get precise indices of descents, ascents, phase and bottom\n')
    masks['des'], masks['asc'] = pyotelem.dives.get_des_asc2(
        tag['depth'].values,
        masks['dive'].values,
        tag['p_lf'].values,
        cfg['cutoff_frq'],
        fs_a,
        order=5)
    # Typecast `des` and `asc` columns to `bool`
    masks = masks.astype(bool)
    if plots is True:
        plotdives.plot_dives_pitch(tag['depth'][masks['exp']],
                                   masks['dive'][masks['exp']],
                                   masks['des'][masks['exp']],
                                   masks['asc'][masks['exp']],
                                   tag['p'][masks['exp']],
                                   tag['p_lf'][masks['exp']])

    # 8 Estimate seawater density around the tagged animal
    #--------------------------------------------------------------------------
    print('* Estimate seawater density\n')

    # Study location and max depth to average salinities
    lon = cfg_project['experiment']['coords']['lon']
    lat = cfg_project['experiment']['coords']['lat']
    lat = cfg_project['experiment']['coords']['lat']
    max_depth = cfg_project['experiment']['net_depth']

    # Read data
    fname_ctd = cfg_project['experiment']['fname_ctd']
    file_ctd_mat = _join(path_project, paths['ctd'], fname_ctd)

    t = tag['temperature'].values

    tag['dsw'] = utils_ctd.get_seawater_densities(file_ctd_mat, t, lon, lat,
                                                  max_depth)

    # 6.1 Extract strokes and glides using heave
    #     high-pass filtered (HPF) acceleration signal, axis=3
    #--------------------------------------------------------------------------
    # Two methods for estimating stroke frequency `stroke_frq`:
    # * from the body rotations (pry) using the magnetometer method
    # * from the dorso-ventral axis of the HPF acceleration signal.

    # For both methods, t_max and J need to be determined.

    # Choose a value for J based on a plot showing distribution of signals:
    #   hpf-x, when detecting glides in the next step use Ahf_Anlf() with axis=0
    #   hpf-z when detecting glides in the next step use Ahf_Anlf() with axis=2

    print('* Get fluke signal threshold\n')

    if debug is False:
        # Plot PSD for J selection
        Ax_g_hf = tag['Ax_g_hf_' + cutoff_str][masks['exp']].values
        Az_g_hf = tag['Az_g_hf_' + cutoff_str][masks['exp']].values

        f_wx, Sx, Px, dpx = pyotelem.dsp.calc_PSD_welch(Ax_g_hf,
                                                        fs_a,
                                                        nperseg=512)
        f_wz, Sz, Pz, dpz = pyotelem.dsp.calc_PSD_welch(Az_g_hf,
                                                        fs_a,
                                                        nperseg=512)

        import matplotlib.pyplot as plt
        fig, (ax1, ax2) = plt.subplots(1, 2)
        ax1.plot(f_wx, Sx, label='hf-x PSD')
        ax1.plot(f_wz, Sz, label='hf-z PSD')
        ax1.legend(loc='upper right')
        ax2.plot(tag['datetimes'][masks['exp']], Ax_g_hf, label='hf-x')
        ax2.plot(tag['datetimes'][masks['exp']], Az_g_hf, label='hf-z')
        ax2.legend(loc='upper right')

        fig.autofmt_xdate()
        plt.show()

        # Get user selection for J - select one for both axes
        cfg['J'] = pyotelem.utils.recursive_input('J (fluke magnitude)', float)
    else:
        cfg['J'] = 0.4

    return cfg, tag, dives, masks, exp_ind
Esempio n. 7
0
def run(path_project,
        cfg_project,
        cfg_glide,
        cfg_filt,
        sgl_dur,
        plots=True,
        debug=False):
    '''Run glide identification on data in configuration paths

    Args
    ----
    path_project:
        Parent path for project
    cfg_project: OrderedDict
        Dictionary of configuration parameters for the current project
    cfg_glide: OrderedDict
        Dictionary of configuration parameters for glide identification
    cfg_filt: OrderedDict
        Dictionary of configuration parameters for filtering sub-glides
    sgl_dur: int
        Duration of sub-glide splits (seconds)
    plots: bool
        Switch for turning on plots (Default `True`). When activated plots for
        reviewing signal processing will be displayed.
    debug: bool
        Switch for turning on debugging (Default `False`). When activated values
        for `cutoff_freq` and `J` will be set to generic values and diagnostic
        plots of the `speed` parameter in `tag` will be displayed.

    Attributes
    ----------
    cutoff_frq: float
        Cutoff frequency for separating low and high frequency signals
    stroke_frq: float
        Frequency at which maximum power is seen in accelerometer PSD
    J:
        Frequency of stroke signal in accelerometer data (m/s2)
    t_max: int
        Maximum duration allowable for a fluke stroke in seconds, it can be set as
        1/`stroke_frq`
    J:
        Magnitude threshold for detecting a fluke stroke in [m/s2]

    Returns
    -------
    tag: pandas.DataFrame
        Data loaded from tag with associated sensors
    dives: pandas.DataFrame
        Start and stop indices and attributes for dive events in `tag` data,
        including: start_idx, stop_idx, dive_dur, depths_max, depths_max_idx,
        depths_mean, compr_mean.
    GL: ndarray, (n, 2)
        Start and stop indices and attributes of glide events in `tag` data,
    sgls: pandas.DataFrame
        Contains sub-glide summary information of `tag` data
    '''
    from collections import OrderedDict
    import numpy
    import os
    from os.path import join as _join
    import pandas
    import pyotelem
    from pyotelem.plots import plotdynamics, plotglides
    import yamlord

    from ..config import paths, fnames
    from .. import utils
    from . import utils_lleo

    # Input filenames
    fname_cal = fnames['tag']['cal']
    fname_cal_prop = fnames['csv']['cal_prop']

    # Output filenames
    fname_cfg_glide = fnames['cfg']['glide']
    fname_cfg_filt = fnames['cfg']['filt']

    fname_dives = fnames['glide']['dives']
    fname_glide_ratio = fnames['glide']['glide_ratio']
    fname_mask_tag = fnames['glide']['mask_tag']
    fname_mask_tag_glides = fnames['glide']['mask_tag_glides']
    fname_sgls = fnames['glide']['sgls']
    fname_mask_tag_sgls = fnames['glide']['mask_tag_sgls']
    fname_mask_tag_filt = fnames['glide']['mask_tag_filt']
    fname_mask_sgls_filt = fnames['glide']['mask_sgls_filt']

    # Fields to ignore when concatenating output path names
    ignore = [
        'nperseg', 'peak_thresh', 'alpha', 'min_depth', 't_max',
        'last_modified'
    ]

    # Generate list of paths in tag data directory
    path_exps = list()
    for path_exp in os.listdir(_join(path_project, paths['tag'])):

        # Only process directories
        if os.path.isdir(_join(path_project, paths['tag'], path_exp)):

            path_exps.append(path_exp)

    # Get user selection of tag data paths to process
    path_exps = sorted(path_exps)
    msg = 'paths numbers to process:\n'
    process_ind = pyotelem.utils.get_dir_indices(msg, path_exps)

    # Process selected tag experiments
    for i in process_ind:
        path_exp = path_exps[i]
        fname_tag = fnames['tag']['data'].format(path_exp)

        # Get correct calibration path given tag ID number
        tag_model = path_exp.replace('-', '').split('_')[1].lower()
        tag_id = int(path_exp.split('_')[2])
        year = int(path_exp[:4])
        month = int(path_exp[4:6])
        path_cal_acc = cfg_project['cal'][tag_model][tag_id][year][month]

        print('Tag calibration file path: {}\n'.format(path_cal_acc))

        # Currently creating a new configuration for each exp
        path_cfg_glide = path_exp

        print('Processing: {}\n'.format(path_exp))

        # Run glide analysis

        # Output paths
        out_data = _join(path_project, paths['tag'], path_exp)
        os.makedirs(out_data, exist_ok=True)

        # LOAD DATA
        #----------
        # linearly interpolated tag to accelerometer sensor
        path_data_tag = _join(path_project, paths['tag'], path_exp)
        file_cal_acc = _join(path_project, paths['tag'], path_cal_acc,
                             fname_cal)
        file_cal_prop = _join(path_project, paths['csv'], fname_cal_prop)

        tag, dt_a, fs_a = utils_lleo.load_lleo(path_data_tag, file_cal_acc,
                                               file_cal_prop)

        # Plot speed if debug is on
        if debug:
            exp_ind = range(len(tag))
            plotdynamics.plot_swim_speed(exp_ind, tag['speed'].values)

        # Signal process data, calculate derived data and find stroke frequencies
        cfg_glide_exp, tag, dives, masks, exp_ind = _process_tag_data(
            path_project,
            cfg_project,
            cfg_glide,
            path_exp,
            tag,
            fs_a,
            plots=plots,
            debug=debug)
        # Save data
        tag.to_pickle(_join(out_data, fname_tag))
        dives.to_pickle(_join(out_data, fname_dives))
        masks.to_pickle(_join(out_data, fname_mask_tag))

        # Find Glides
        #------------
        GL, masks = _process_glides(cfg_glide_exp,
                                    tag,
                                    fs_a,
                                    dives,
                                    masks,
                                    plots=plots,
                                    debug=debug)

        # Create output path from concatenating parameters in `cfg_glide_exp`
        dname_glide = utils.cat_path(cfg_glide_exp, ignore)
        out_glide = _join(path_project, paths['glide'], path_exp, dname_glide)
        os.makedirs(out_glide, exist_ok=True)

        # Save glide data to concatenated path
        masks['glides'].to_pickle(_join(out_glide, fname_mask_tag_glides))

        # Save glide analysis configuration
        cfg_glide_exp['last_modified'] = _now_str()
        file_cfg_glide_exp = _join(out_glide, fname_cfg_glide)
        yamlord.write_yaml(cfg_glide_exp, file_cfg_glide_exp)

        # SPLIT GLIDES TO SUB-GLIDES
        #--------------------------
        # Split into sub-glides, generate summary tables
        sgls, masks['sgls'] = _process_sgls(tag, fs_a, dives, GL, sgl_dur)

        # Create output path from passed `sgls` duration
        out_sgls = _join(out_glide, 'dur_{}'.format(sgl_dur))
        os.makedirs(out_sgls, exist_ok=True)

        # Save sgls data to path for passed `sgls` duration
        sgls.to_pickle(_join(out_sgls, fname_sgls))
        masks['sgls'].to_pickle(_join(out_sgls, fname_mask_tag_sgls))

        # FILTER AND PLOT SUB-GLIDES
        #--------------------------
        # Get masks of `tag` and `sgls` data for sgls matching constraints
        exp_ind = numpy.where(masks['exp'])[0]
        mask_tag_filt, mask_sgls_filt = utils.filter_sgls(
            len(tag), exp_ind, sgls, cfg_filt['pitch_thresh'],
            cfg_filt['min_depth'], cfg_filt['max_depth_delta'],
            cfg_filt['min_speed'], cfg_filt['max_speed'],
            cfg_filt['max_speed_delta'])

        # Plot filtered sgls
        plotglides.plot_sgls(
            masks['exp'], tag['depth'].values, mask_tag_filt, sgls,
            mask_sgls_filt, tag['Az_g_hf_' + str(cfg_glide_exp['cutoff_frq'])])

        # Create output path from concatenating parameters in `cfg_filt`
        dname_filt = utils.cat_path(cfg_filt, ignore)
        out_filt = _join(out_sgls, dname_filt)
        os.makedirs(out_filt, exist_ok=True)

        # Save filtered sgls data to concatenated path
        pandas.to_pickle(mask_tag_filt, _join(out_filt, fname_mask_tag_filt))
        pandas.to_pickle(mask_sgls_filt, _join(out_filt, fname_mask_sgls_filt))

        # Save symlink to data and masks in filter directory
        out_paths = [
            out_data, out_data, out_glide, out_glide, out_sgls, out_sgls,
            out_sgls
        ]
        sym_fnames = [
            fname_tag, fname_mask_tag, fname_cfg_glide, fname_mask_tag_glides,
            fname_cfg_filt, fname_mask_tag_sgls, fname_sgls
        ]
        for out_path, fname in zip(out_paths, sym_fnames):
            rel_path = os.path.relpath(_join(out_path, fname), out_filt)
            utils.symlink(rel_path, _join(out_filt, fname))

        # Save sub-glide analysis configuration
        cfg_filt['last_modified'] = _now_str()
        file_cfg_filt = _join(out_sgls, fname_cfg_filt)
        yamlord.write_yaml(cfg_filt, file_cfg_filt)

    return tag, dives, GL, sgls