Python DataHandler.save Examples, abr_analyze.data_handler.DataHandler.save Python Examples

Example #1

0

Show file

def convert(npz_loc, db_name, save_location, overwrite=False):
    """
    accepts a npz file location and saves its data to the database at the
    specified save_location

    PARAMETERS
    ----------
    npz_loc: string
        location and name of the npz file
    db_name: string
        database to save data to
    save_location: string
        save location in the database
    overwrite: boolean, Optional (Default: False)
        True to overwrite previous data
        NOTE this will be triggered if data is being saved to the same hdf5
        group (folder), not necessarily the same key. In this case you will
        need to set it to True. Other data will not be erased, only data with
        the same keys will be overwritten
    """
    dat = DataHandler(db_name)
    npz = np.load(npz_loc)
    keys = npz.keys()
    new_data = {}
    for key in keys:
        new_data[key] = npz[key]
    dat.save(data=new_data, save_location=save_location, overwrite=overwrite)
    keys = dat.get_keys(save_location)
    data = dat.load(parameters=keys, save_location=save_location)

Example #2

0

Show file

def test_load(parameters, compare_to, key):
    dat = DataHandler('tests')
    save_location = 'test_loading'
    test_data = {'test_data': np.ones(3)}

    dat.save(
        data=test_data,
        save_location='test_loading',
        overwrite=True)

    loaded = dat.load(parameters=parameters,
                      save_location=save_location)

    assert np.all(loaded[key] == compare_to)

Example #3

0

Show file

def test_rename_new_save_location_exists():
    old_save_location = 'test_rename'
    new_save_location = 'test_already_exists'
    with pytest.raises(Exception):
        dat = DataHandler('tests')
        # save data to rename / move
        dat.save(data={'float': 3.14},
                 save_location=old_save_location)
        # create data at new location
        dat.save(data={'float': 3.14},
                 save_location=new_save_location,
                 overwrite=True)
        # try to rename data onto existing key
        dat.rename(old_save_location=old_save_location,
                   new_save_location=new_save_location,
                   delete_old=False)

Example #4

0

Show file

def test_rename(old_save_location, new_save_location, delete_old, compare_to):
    dat = DataHandler('tests')
    # save data to rename / move
    dat.save(data={'float': 3.14},
             save_location=old_save_location,
             overwrite=True)
    # make sure the new entry key is available
    dat.delete(save_location=new_save_location)
    # rename to new key
    dat.rename(old_save_location=old_save_location,
               new_save_location=new_save_location,
               delete_old=delete_old)

    # check if the old location exists
    exists = dat.check_group_exists(location=old_save_location, create=False)
    assert exists == compare_to

    # check if the new location exists
    exists = dat.check_group_exists(location=new_save_location, create=False)
    assert exists is True

Example #5

0

Show file

File: test_data_processor.py Project: abr/abr_analyze

def load_and_process(interpolated_samples, parameters):
    dat = DataHandler("tests")
    loc = "fake_trajectory"
    steps = 147
    fake_traj_data = random_trajectories.generate(steps=steps, plot=False)
    dat.save(data=fake_traj_data, save_location="fake_trajectory", overwrite=True)

    data = proc.load_and_process(
        db_name="tests",
        save_location=loc,
        parameters=parameters,
        interpolated_samples=interpolated_samples,
    )

    if interpolated_samples is None:
        interpolated_samples = steps

    for key in parameters:
        if key == "time":
            key = "cumulative_time"
        assert len(data[key]) == interpolated_samples

Example #6

0

Show file

def test_rename_dataset():
    old_save_location = 'test_saving'
    new_save_location = 'test_saving_moved'

    dat = DataHandler('tests')
    # save data to rename / move
    dat.save(data={'float': 3.14},
             save_location=old_save_location,
             overwrite=True)
    # make sure the new entry key is available
    dat.delete(save_location=new_save_location)

    dat.rename(old_save_location=old_save_location + '/int',
               new_save_location=new_save_location + '/int',
               delete_old=False)

    # check if the old location exists
    exists = dat.check_group_exists(location=old_save_location, create=False)
    assert exists is True

    # check if the new location exists
    exists = dat.check_group_exists(location=new_save_location, create=False)
    assert exists is True

Example #7

0

Show file

File: trajectory_error.py Project: NunoEdgarGFlowHub/abr_analyze

class TrajectoryError():
    def __init__(self, db_name, time_derivative=0, interpolated_samples=100):
        '''
        PARAMETERS
        ----------
        db_name: string
            the name of the database to load data from
        interpolated_samples: positive int, Optional (Default=100)
            the number of samples to take (evenly) from the interpolated data
            if set to None, no interpolated or sampling will be done, the raw
            data will be returned
        time_derivative: int, Optional (Default: 0)
            0: position
            1: velocity
            2: acceleration
            3: jerk
        '''
        # instantiate our data processor
        self.dat = DataHandler(db_name)
        self.db_name = db_name
        self.time_derivative = time_derivative
        self.interpolated_samples = interpolated_samples

    def statistical_error(self,
                          save_location,
                          ideal=None,
                          sessions=1,
                          runs=1,
                          save_data=True,
                          regen=False):
        '''
        calls the calculate error function to get the trajectory for all runs
        and sessions specified at the save location and calculates the mean
        error and confidence intervals

        PARAMETERS
        ----------
        save_location: string
            location of data in database
        ideal: string, Optional (Default: None)
            This tells the function what trajectory to calculate the error
            relative to
            None: use the saved filter data at save_location
            if string: key of Nx3 data in database to use
        sessions: int, Optional (Default: 1)
            the number of sessions to calculate error for
        runs: int, Optional (Default: 1)
            the number of runs in each session
        save_data: boolean, Optional (Default: True)
            True to save data, this saves the error for each session
        regen: boolean, Optional (Default: False)
            True to regenerate data
            False to load data if it exists
        '''
        if regen is False:
            exists = self.dat.check_group_exists(
                '%s/statistical_error_%s' %
                (save_location, self.time_derivative))
            if exists:
                ci_errors = self.dat.load(
                    parameters=[
                        'mean', 'upper_bound', 'lower_bound', 'ee_xyz',
                        'ideal_trajectory', 'time', 'time_derivative',
                        'read_location', 'error'
                    ],
                    save_location='%s/statistical_error_%i' %
                    (save_location, self.time_derivative))

                # still using as boolean, just a python cheatcode
                exists = len(ci_errors['mean'])
        else:
            exists = False

        if not exists:
            errors = []
            for session in range(sessions):
                session_error = []
                for run in range(runs):
                    print('%.3f processing complete...' %
                          (100 * ((run + 1) + (session * runs)) /
                           (sessions * runs)),
                          end='\r')
                    loc = ('%s/session%03d/run%03d' %
                           (save_location, session, run))
                    data = self.calculate_error(save_location=loc, ideal=ideal)
                    session_error.append(np.sum(data['error']))
                errors.append(session_error)

            ci_errors = proc.get_mean_and_ci(raw_data=errors)
            ci_errors['time_derivative'] = self.time_derivative

            if save_data:
                self.dat.save(data=ci_errors,
                              save_location='%s/statistical_error_%i' %
                              (save_location, self.time_derivative),
                              overwrite=True)

        return ci_errors

    def calculate_error(self, save_location, ideal=None):
        '''
        loads the ee_xyz data from save_location and compares it to ideal. If
        ideal is not passed in, it is assuemed that a filtered path planner is
        saved in save_location under the key 'ideal_trajectory' and will be
        used as the reference for the error calculation. The data is loaded,
        interpolated, and differentiated. The two norm error is returned.

        the following dict is returned
        data = {
            'ee_xyz': list of end-effector positions (n_timesteps, xyz),
            'ideal_trajectory': list of path planner positions
                shape of (n_timesteps, xyz)
            'time': list of timesteps (n_timesteps),
            'time_derivative': int, the order of differentiation applied,
            'read_location': string, the location the raw data was loaded from,
            'error': the two-norm error between the end-effector trajectory and
                the path planner followed that run

        PARAMETERS
        ----------
        save_location: string
            location of data in database
        ideal: string, Optional (Default: None)
            This tells the function what trajectory to calculate the error
            relative to
            None: use the saved filter data at save_location
            if string: key of Nx3 data in database to use
        '''
        if ideal is None:
            ideal = 'ideal_trajectory'
        parameters = ['ee_xyz', 'time', ideal]

        # load and interpolate data
        data = proc.load_and_process(
            db_name=self.db_name,
            save_location=save_location,
            parameters=parameters,
            interpolated_samples=self.interpolated_samples)

        if ideal == 'ideal_trajectory':
            data['ideal_trajectory'] = data['ideal_trajectory'][:, :3]
        dt = np.sum(data['time']) / len(data['time'])

        # integrate data
        if self.time_derivative > 0:
            # set our keys that are able to be differentiated to avoid errors
            differentiable_keys = ['ee_xyz', 'ideal_trajectory']
            if ideal is not None:
                differentiable_keys.append(ideal)

            for key in data:
                if key in differentiable_keys:
                    # differentiate the number of times specified by
                    # time_derivative
                    for _ in range(0, self.time_derivative):
                        data[key][:, 0] = np.gradient(data[key][:, 0], dt)
                        data[key][:, 1] = np.gradient(data[key][:, 1], dt)
                        data[key][:, 2] = np.gradient(data[key][:, 2], dt)

        data['time_derivative'] = self.time_derivative
        data['read_location'] = save_location
        data['error'] = np.linalg.norm((data['ee_xyz'] - data[ideal]), axis=1)

        return data

    def plot(self,
             ax,
             save_location,
             step=-1,
             c=None,
             linestyle='--',
             label=None,
             loc=1,
             title='Trajectory Error to Path Planner'):

        data = self.dat.load(parameters=['mean', 'upper_bound', 'lower_bound'],
                             save_location='%s/statistical_error_%i' %
                             (save_location, self.time_derivative))
        vis.plot_mean_and_ci(ax=ax,
                             data=data,
                             c=c,
                             linestyle=linestyle,
                             label=label,
                             loc=loc,
                             title=title)

Example #8

0

Show file

def test_save_error():
    dat = DataHandler('tests')
    with pytest.raises(Exception):
        dat.save(data={'bool': True},
                 save_location='test_saving',
                 overwrite=False)

Example #9

0

Show file

def test_save_type_error():
    dat = DataHandler('tests')
    with pytest.raises(TypeError):
        dat.save(data=np.ones(10),
                 save_location='test_saving',
                 overwrite=True)

Example #10

0

Show file

def test_save(data, overwrite):
    save_location = 'test_saving'
    dat = DataHandler('tests')
    dat.save(data=data,
             save_location=save_location,
             overwrite=overwrite)

Example #11

0

Show file

import numpy as np
import pytest

from abr_analyze.plotting import TrajectoryError
from abr_analyze.data_handler import DataHandler
from abr_analyze.utils import random_trajectories

dat = DataHandler('test')
save_location = 'traj_err_test/session000/run000'
# generate a random trajectory and an ideal
data = random_trajectories.generate(steps=100)
# generate another trajectory and ideal so we can test passing in a custom ideal
data_alt = random_trajectories.generate(steps=100)
# save the second ideal to the first data dict
data['alt_traj'] = data_alt['ideal_trajectory']
dat.save(data=data, save_location=save_location, overwrite=True)


@pytest.mark.parametrize('ideal', ((None), ('ideal_trajectory'), ('alt_traj')))
def test_calculate_error(ideal, save_location=save_location):
    dat = DataHandler('test')
    if ideal is None:
        ideal = 'ideal_trajectory'
    fake_data = dat.load(parameters=[ideal, 'ee_xyz'],
                         save_location=save_location)
    manual_error = np.linalg.norm((fake_data['ee_xyz'] - fake_data[ideal]),
                                  axis=1)

    traj = TrajectoryError(db_name='test',
                           time_derivative=0,
                           interpolated_samples=None)

Example #12

0

Show file

File: intercepts_scan.py Project: NunoEdgarGFlowHub/abr_analyze

def run(encoders, intercept_vals, input_signal, seed=1,
        db_name='intercepts_scan', save_name='example', notes='',
        analysis_fncs=None, **kwargs):
    '''
    runs a scan for the proportion of neurons that are active over time

    PARAMETERS
    ----------
    encoders: array of floats (n_neurons x n_inputs)
        the values that specify along what vector a neuron will be
        sensitive to
    intercept_vals: array of floats (n_intercepts to try x 3)
        the [left_bound, mode, right_bound] to pass on to the triangluar
        intercept function in network_utils
    input_signal: array of floats (n_timesteps x n_inputs)
        the input signal that we want to check our networks response to
    seed: int
        the seed used for any randomization in the sim
    save_name: string, Optional (Default: proportion_neurons)
        the name to save the data under in the intercept_scan database
    notes: string, Optional (Default: '')
        any additional notes to save with the scan
    analysis_fncs: list of network_utils functions to apply to the spike trains
        the function must accept network and input signal, and return a list of
        data and activity
    '''
    if not isinstance(analysis_fncs, list):
        analysis_fncs = [analysis_fncs]

    print('Input Signal Shape: ', np.asarray(input_signal).shape)

    loop_time = 0
    elapsed_time = 0
    for ii, intercept in enumerate(intercept_vals):
        start = timeit.default_timer()
        elapsed_time += loop_time
        print('%i/%i | ' % (ii+1, len(intercept_vals))
              + '%.2f%% Complete | ' % (ii/len(intercept_vals)*100)
              + '%.2f min elapsed | ' % (elapsed_time/60)
              + '%.2f min for last sim | ' % (loop_time/60)
              + '~%.2f min remaining...'
              % ((len(intercept_vals)-ii)*loop_time/60),
              end='\r')

        # create our intercept distribution from the intercepts vals
        # Generates intercepts for a d-dimensional ensemble, such that, given a
        # random uniform input (from the interior of the d-dimensional ball), the
        # probability of a neuron firing has the probability density function given
        # by rng.triangular(left, mode, right, size=n)
        np.random.seed(seed)
        triangular = np.random.triangular(
            # intercept_vals = [left, right, mode]
            left=intercept[0],
            right=intercept[1],
            mode=intercept[2],
            size=encoders.shape[1],
        )
        intercepts = nengo.dists.CosineSimilarity(encoders.shape[2] + 2).ppf(1 - triangular)
        intercept_list = intercepts.reshape((1, encoders.shape[1]))

        print()
        print(intercept)
        print(intercept_list)

        # create a network with the new intercepts
        network = signals.DynamicsAdaptation(
            n_input=encoders.shape[2],
            n_output=1,  # number of output is irrelevant
            n_neurons=encoders.shape[1],
            intercepts=intercept_list,
            seed=seed,
            encoders=encoders,
            **kwargs)

        # get the spike trains from the sim
        spike_trains = network_utils.get_activities(
            network=network, input_signal=input_signal,
            synapse=0.005)

        # loop through the analysis functions
        for func in analysis_fncs:
            func_name = func.__name__
            y, activity = func(network=network, input_signal=input_signal,
                               pscs=spike_trains)

            # get the number of active and inactive neurons
            num_active, num_inactive = (
                network_utils.n_neurons_active_and_inactive(activity=activity))

            if ii == 0:
                dat = DataHandler(db_name)
                dat.save(
                    data={'total_intercepts': len(intercept_vals),
                          'notes': notes},
                    save_location='%s/%s' % (save_name, func_name),
                    overwrite=True)

            # not saving activity because takes up a lot of disk space
            data = {'intercept_bounds': intercept[:2],
                    'intercept_mode': intercept[2],
                    'y': y,
                    'num_active': num_active,
                    'num_inactive': num_inactive,
                    'title': func_name
                    }
            dat.save(data=data, save_location='%s/%s/%05d' %
                     (save_name, func_name, ii), overwrite=True)

            loop_time = timeit.default_timer() - start

Example #13

0

Show file

File: intercepts_scan.py Project: abr/abr_analyze

def run(
    intercept_vals,
    input_signal,
    seed=1,
    db_name="intercepts_scan",
    save_name="example",
    notes="",
    encoders=None,
    analysis_fncs=None,
    network_class=None,
    network_ens_type=None,
    force_params=None,
    angle_params=None,
    means=None,
    variances=None,
    **kwargs
):
    """
    runs a scan for the proportion of neurons that are active over time

    PARAMETERS
    ----------
    intercept_vals: array of floats (n_intercepts to try x 3)
        the [left_bound, mode, right_bound] to pass on to the triangluar
        intercept function in network_utils
    input_signal: array of floats (n_timesteps x n_inputs)
        the input signal that we want to check our networks response to
    seed: int
        the seed used for any randomization in the sim
    save_name: string, Optional (Default: proportion_neurons)
        the name to save the data under in the intercept_scan database
    notes: string, Optional (Default: '')
        any additional notes to save with the scan
    analysis_fncs: list of network_utils functions to apply to the spike trains
        the function must accept network and input signal, and return a list of
        data and activity
    """
    if network_class is None:
        network_class = signals.DynamicsAdaptation
    if not isinstance(analysis_fncs, list):
        analysis_fncs = [analysis_fncs]

    if network_ens_type == "force":
        n_neurons = force_params["n_neurons"]
        n_ensembles = force_params["n_ensembles"]
        n_input = force_params["n_input"]
    elif network_ens_type == "angle":
        n_neurons = angle_params["n_neurons"]
        n_ensembles = angle_params["n_ensembles"]
        n_input = angle_params["n_input"]
    elif network_ens_type is None:
        n_neurons = encoders.shape[1]
        n_ensembles = encoders.shape[0]
        n_input = encoders.shape[2]

    print("Running intercepts scan on %s" % network_class.__name__)
    print("Input Signal Shape: ", np.asarray(input_signal).shape)

    loop_time = 0
    elapsed_time = 0
    for ii, intercept in enumerate(intercept_vals):
        start = timeit.default_timer()
        elapsed_time += loop_time
        print(
            "%i/%i | " % (ii + 1, len(intercept_vals))
            + "%.2f%% Complete | " % (ii / len(intercept_vals) * 100)
            + "%.2f min elapsed | " % (elapsed_time / 60)
            + "%.2f min for last sim | " % (loop_time / 60)
            + "~%.2f min remaining..." % ((len(intercept_vals) - ii) * loop_time / 60),
            end="\r",
        )

        triangular = np.random.triangular(
            left=intercept[0],
            right=intercept[1],
            mode=intercept[2],
            size=n_neurons * n_ensembles,
        )
        intercepts = nengo.dists.CosineSimilarity(n_input + 2).ppf(1 - triangular)
        # intercepts = nengo.dists.CosineSimilarity(1000 + 2).ppf(1 - triangular)
        intercepts = intercepts.reshape((n_ensembles, n_neurons))

        force_params["intercepts"] = intercepts

        # get the spike trains from the sim
        network = network_class(
            force_params=force_params,
            angle_params=angle_params,
            means=means,
            variances=variances,
            seed=seed,
        )

        if network_ens_type == "force":
            network_ens = network.force_ens
            synapse = force_params["tau_output"]
        elif network_ens_type == "angle":
            network_ens = network.angle_ens
            synapse = angle_params["tau_output"]

        spike_trains = network_utils.get_activities(
            network=network,
            network_ens=network_ens,
            input_signal=input_signal,
            synapse=synapse,
        )

        for func in analysis_fncs:
            func_name = func.__name__
            y, activity = func(
                pscs=spike_trains, n_neurons=n_neurons, n_ensembles=n_ensembles
            )

            # get the number of active and inactive neurons
            num_active, num_inactive = network_utils.n_neurons_active_and_inactive(
                activity=activity
            )

            if ii == 0:
                dat = DataHandler(db_name)
                dat.save(
                    data={"total_intercepts": len(intercept_vals), "notes": notes},
                    save_location="%s/%s" % (save_name, func_name),
                    overwrite=True,
                )

            # not saving activity because takes up a lot of disk space
            data = {
                "intercept_bounds": intercept[:2],
                "intercept_mode": intercept[2],
                "y": y,
                "num_active": num_active,
                "num_inactive": num_inactive,
                "title": func_name,
            }
            dat.save(
                data=data,
                save_location="%s/%s/%05d" % (save_name, func_name, ii),
                overwrite=True,
            )

            loop_time = timeit.default_timer() - start