Exemple #1
0
def save_analysis(destination,
                  recording,
                  modelspecs,
                  xfspec,
                  figures,
                  log,
                  add_tree_path=False):
    '''Save an analysis file collection to a particular destination.'''
    if add_tree_path:
        treepath = tree_path(recording, modelspecs, xfspec)
        base_uri = os.path.join(destination, treepath)
    else:
        base_uri = destination

    base_uri = base_uri if base_uri[-1] == '/' else base_uri + '/'
    xfspec_uri = base_uri + 'xfspec.json'  # For attaching to modelspecs

    for number, modelspec in enumerate(modelspecs):
        set_modelspec_metadata(modelspec, 'xfspec', xfspec_uri)
        save_resource(base_uri + 'modelspec.{:04d}.json'.format(number),
                      json=modelspec)
    for number, figure in enumerate(figures):
        save_resource(base_uri + 'figure.{:04d}.png'.format(number),
                      data=figure)
    save_resource(base_uri + 'log.txt', data=log)
    save_resource(xfspec_uri, json=xfspec)
    return {'savepath': base_uri}
Exemple #2
0
def fit_module_sets(
        data,
        modelspec,
        cost_function=basic_cost,
        evaluator=ms.evaluate,
        segmentor=nems.segmentors.use_all_data,
        mapper=nems.fitters.mappers.simple_vector,
        metric=lambda data: nems.metrics.api.nmse(data, 'pred', 'resp'),
        fitter=coordinate_descent,
        fit_kwargs={},
        metaname='fit_module_sets',
        module_sets=None,
        invert=False,
        tolerance=1e-4,
        max_iter=1000):
    '''
    Required Arguments:
     data          A recording object
     modelspec     A modelspec object

    Optional Arguments:
     fitter        A function of (sigma, costfn) that tests various points,
                   in fitspace (i.e. sigmas) using the cost function costfn,
                   and hopefully returns a better sigma after some time.

     module_sets   A nested list specifying which model indices should be fit.
                   Overall iteration will occurr len(module_sets) many times.
                   ex: [[0], [1, 3], [0, 1, 2, 3]]

     invert        Boolean. Causes module_sets to specify the model indices
                   that should *not* be fit.


    Returns
    A list containing a single modelspec, which has the best parameters found
    by this fitter.
    '''
    if module_sets is None:
        module_sets = [[i] for i in range(len(modelspec))]
    fit_kwargs.update({'tolerance': tolerance, 'max_iter': max_iter})

    # Ensure that phi exists for all modules; choose prior mean if not found
    for i, m in enumerate(modelspec):
        if not m.get('phi'):
            log.debug(
                'Phi not found for module, using mean of prior: {}'.format(m))
            m = nems.priors.set_mean_phi([m])[0]  # Inits phi for 1 module
            modelspec[i] = m

    if invert:
        module_sets = _invert_subsets(modelspec, module_sets)

    ms.fit_mode_on(modelspec)
    start_time = time.time()

    log.info("Fitting all subsets with tolerance: %.2E", tolerance)
    for subset in module_sets:
        improved_modelspec = _module_set_loop(subset, data, modelspec,
                                              cost_function, fitter, mapper,
                                              segmentor, evaluator, metric,
                                              fit_kwargs)

    elapsed_time = (time.time() - start_time)

    # TODO: Should this maybe be moved to a higher level
    # so it applies to ALL the fittters?
    ms.fit_mode_off(improved_modelspec)
    ms.set_modelspec_metadata(improved_modelspec, 'fitter', metaname)
    ms.set_modelspec_metadata(improved_modelspec, 'fit_time', elapsed_time)
    results = [copy.deepcopy(improved_modelspec)]

    return results
Exemple #3
0
def fit_iteratively(
    data,
    modelspec,
    cost_function=basic_cost,
    fitter=coordinate_descent,
    evaluator=ms.evaluate,
    segmentor=nems.segmentors.use_all_data,
    mapper=nems.fitters.mappers.simple_vector,
    metric=lambda data: nems.metrics.api.nmse(data, 'pred', 'resp'),
    metaname='fit_basic',
    fit_kwargs={},
    module_sets=None,
    invert=False,
    tolerances=None,
    tol_iter=100,
    fit_iter=20,
):
    '''
    Required Arguments:
     data          A recording object
     modelspec     A modelspec object

    Optional Arguments:
     fitter        A function of (sigma, costfn) that tests various points,
                   in fitspace (i.e. sigmas) using the cost function costfn,
                   and hopefully returns a better sigma after some time.
     mapper        A class that has two methods, pack and unpack, which define
                   the mapping between modelspecs and a fitter's fitspace.
     segmentor     An function that selects a subset of the data during the
                   fitting process. This is NOT the same as est/val data splits
     metric        A function of a Recording that returns an error value
                   that is to be minimized.

     module_sets   A nested list specifying which model indices should be fit.
                   Overall iteration will occurr len(module_sets) many times.
                   ex: [[0], [1, 3], [0, 1, 2, 3]]

     invert        Boolean. Causes module_sets to specify the model indices
                   that should *not* be fit.


    Returns
    A list containing a single modelspec, which has the best parameters found
    by this fitter.
    '''
    if module_sets is None:
        module_sets = [[i] for i in range(len(modelspec))]

    if tolerances is None:
        tolerances = [1e-6]

    start_time = time.time()
    ms.fit_mode_on(modelspec)
    # Ensure that phi exists for all modules; choose prior mean if not found
    for i, m in enumerate(modelspec):
        if not m.get('phi'):
            log.debug(
                'Phi not found for module, using mean of prior: {}'.format(m))
            m = nems.priors.set_mean_phi([m])[0]  # Inits phi for 1 module
            modelspec[i] = m

    error = np.inf
    for tol in tolerances:
        log.info("Fitting all subsets with tolerance: %.2E", tol)
        fit_kwargs.update({'tolerance': tol, 'max_iter': fit_iter})
        error_reduction = np.inf
        i = 0

        while (error_reduction >= tol) and (i < tol_iter):
            for subset in module_sets:
                improved_modelspec = _module_set_loop(subset, data, modelspec,
                                                      cost_function, fitter,
                                                      mapper, segmentor,
                                                      evaluator, metric,
                                                      fit_kwargs)
                new_error = cost_function.error
                error_reduction = error - new_error
                error = new_error
                log.debug("Error reduction was: %.6E", error_reduction)
            i += 1

    elapsed_time = (time.time() - start_time)

    # TODO: Should this maybe be moved to a higher level
    # so it applies to ALL the fittters?
    ms.fit_mode_off(improved_modelspec)
    ms.set_modelspec_metadata(improved_modelspec, 'fitter', metaname)
    ms.set_modelspec_metadata(improved_modelspec, 'fit_time', elapsed_time)
    results = [copy.deepcopy(improved_modelspec)]

    return results
Exemple #4
0
def fit_basic(data,
              modelspec,
              fitter=scipy_minimize,
              cost_function=None,
              segmentor=nems.segmentors.use_all_data,
              mapper=nems.fitters.mappers.simple_vector,
              metric=lambda data: metrics.nmse(data, 'pred', 'resp'),
              metaname='fit_basic',
              fit_kwargs={},
              require_phi=True):
    '''
    Required Arguments:
     data          A recording object
     modelspec     A modelspec object

    Optional Arguments:
     fitter        A function of (sigma, costfn) that tests various points,
                   in fitspace (i.e. sigmas) using the cost function costfn,
                   and hopefully returns a better sigma after some time.
     mapper        A class that has two methods, pack and unpack, which define
                   the mapping between modelspecs and a fitter's fitspace.
     segmentor     An function that selects a subset of the data during the
                   fitting process. This is NOT the same as est/val data splits
     metric        A function of a Recording that returns an error value
                   that is to be minimized.

    Returns
    A list containing a single modelspec, which has the best parameters found
    by this fitter.
    '''
    start_time = time.time()

    if cost_function is None:
        # Use the cost function defined in this module by default
        cost_function = basic_cost

    if require_phi:
        # Ensure that phi exists for all modules; choose prior mean if not found
        for i, m in enumerate(modelspec):
            if not m.get('phi'):
                log.debug('Phi not found for module, using mean of prior: %s',
                          m)
                m = nems.priors.set_mean_phi([m])[0]  # Inits phi for 1 module
                modelspec[i] = m

    ms.fit_mode_on(modelspec)

    # Create the mapper object that translates to and from modelspecs.
    # It has two methods that, when defined as mathematical functions, are:
    #    .pack(modelspec) -> fitspace_point
    #    .unpack(fitspace_point) -> modelspec
    packer, unpacker = mapper(modelspec)

    # A function to evaluate the modelspec on the data
    evaluator = nems.modelspec.evaluate

    my_cost_function = cost_function
    my_cost_function.counter = 0

    # Freeze everything but sigma, since that's all the fitter should be
    # updating.
    cost_fn = partial(my_cost_function,
                      unpacker=unpacker,
                      modelspec=modelspec,
                      data=data,
                      segmentor=segmentor,
                      evaluator=evaluator,
                      metric=metric)

    # get initial sigma value representing some point in the fit space
    sigma = packer(modelspec)

    # Results should be a list of modelspecs
    # (might only be one in list, but still should be packaged as a list)
    improved_sigma = fitter(sigma, cost_fn, **fit_kwargs)
    improved_modelspec = unpacker(improved_sigma)

    elapsed_time = (time.time() - start_time)

    # TODO: Should this maybe be moved to a higher level
    # so it applies to ALL the fittters?
    ms.fit_mode_off(improved_modelspec)
    ms.set_modelspec_metadata(improved_modelspec, 'fitter', metaname)
    ms.set_modelspec_metadata(improved_modelspec, 'fit_time', elapsed_time)
    results = [copy.deepcopy(improved_modelspec)]
    return results
Exemple #5
0
def fit_basic(data, modelspec,
              fitter=scipy_minimize, cost_function=None,
              segmentor=nems.segmentors.use_all_data,
              mapper=nems.fitters.mappers.simple_vector,
              metric=None,
              metaname='fit_basic', fit_kwargs={}, require_phi=True):
    '''
    Required Arguments:
     data          A recording object
     modelspec     A modelspec object

    Optional Arguments:
     fitter        A function of (sigma, costfn) that tests various points,
                   in fitspace (i.e. sigmas) using the cost function costfn,
                   and hopefully returns a better sigma after some time.
     mapper        A class that has two methods, pack and unpack, which define
                   the mapping between modelspecs and a fitter's fitspace.
     segmentor     An function that selects a subset of the data during the
                   fitting process. This is NOT the same as est/val data splits
     metric        A function of a Recording that returns an error value
                   that is to be minimized.

    Returns
    A list containing a single modelspec, which has the best parameters found
    by this fitter.
    '''
    start_time = time.time()

    modelspec = copy.deepcopy(modelspec)
    output_name = modelspec.meta.get('output_name', 'resp')

    if metric is None:
        metric = lambda data: metrics.nmse(data, 'pred', output_name)

    if cost_function is None:
        # Use the cost function defined in this module by default
        cost_function = basic_cost

    if require_phi:
        # Ensure that phi exists for all modules;
        # choose prior mean if not found
        for i, m in enumerate(modelspec.modules):
            if ('phi' not in m.keys()) and ('prior' in m.keys()):
                log.debug('Phi not found for module, using mean of prior: %s', m)
                m = nems.priors.set_mean_phi([m])[0]  # Inits phi for 1 module
                modelspec[i] = m

    # apply mask to remove invalid portions of signals and allow fit to
    # only evaluate the model on the valid portion of the signals
    if 'mask' in data.signals.keys():
        log.info("Data len pre-mask: %d", data['mask'].shape[1])
        data = data.apply_mask()
        log.info("Data len post-mask: %d", data['mask'].shape[1])

    # turn on "fit mode". currently this serves one purpose, for normalization
    # parameters to be re-fit for the output of each module that uses
    # normalization. does nothing if normalization is not being used.
    ms.fit_mode_on(modelspec, data)

    # Create the mapper functions that translates to and from modelspecs.
    # It has three functions that, when defined as mathematical functions, are:
    #    .pack(modelspec) -> fitspace_point
    #    .unpack(fitspace_point) -> modelspec
    #    .bounds(modelspec) -> fitspace_bounds
    packer, unpacker, pack_bounds = mapper(modelspec)

    # A function to evaluate the modelspec on the data
    evaluator = nems.modelspec.evaluate

    my_cost_function = cost_function
    my_cost_function.counter = 0

    # Freeze everything but sigma, since that's all the fitter should be
    # updating.
    cost_fn = partial(my_cost_function,
                      unpacker=unpacker, modelspec=modelspec,
                      data=data, segmentor=segmentor, evaluator=evaluator,
                      metric=metric)

    # get initial sigma value representing some point in the fit space,
    # and corresponding bounds for each value
    sigma = packer(modelspec)
    bounds = pack_bounds(modelspec)

    # Results should be a list of modelspecs
    # (might only be one in list, but still should be packaged as a list)
    improved_sigma = fitter(sigma, cost_fn, bounds=bounds, **fit_kwargs)
    improved_modelspec = unpacker(improved_sigma)
    elapsed_time = (time.time() - start_time)

    start_err = cost_fn(sigma)
    final_err = cost_fn(improved_sigma)
    log.info("Delta error: %.06f - %.06f = %e", start_err, final_err, final_err-start_err)

    # TODO: Should this maybe be moved to a higher level
    # so it applies to ALL the fittters?
    ms.fit_mode_off(improved_modelspec)
    ms.set_modelspec_metadata(improved_modelspec, 'fitter', metaname)
    ms.set_modelspec_metadata(improved_modelspec, 'n_parms',
                              len(improved_sigma))
    if modelspec.fit_count == 1:
        improved_modelspec.meta['fit_time'] = elapsed_time
        improved_modelspec.meta['loss'] = final_err
    else:
        fit_index = modelspec.fit_index
        if fit_index == 0:
            improved_modelspec.meta['fit_time'] = np.zeros(improved_modelspec.fit_count)
            improved_modelspec.meta['loss'] = np.zeros(improved_modelspec.fit_count)
        improved_modelspec.meta['fit_time'][fit_index] = elapsed_time
        improved_modelspec.meta['loss'][fit_index] = final_err

    if type(improved_modelspec) is list:
        return [copy.deepcopy(improved_modelspec)]
    else:
        return improved_modelspec.copy()
Exemple #6
0
def fill_in_default_metadata(rec, modelspecs, IsReload=False, **context):
    '''
    Sets any uninitialized metadata to defaults that should help us
    find it in nems_db again. (fitter, recording, date, etc)
    '''
    if not IsReload:
        # Add metadata to help you reload this state later
        for modelspec in modelspecs:
            meta = get_modelspec_metadata(modelspec)
            if 'fitter' not in meta:
                set_modelspec_metadata(modelspec, 'fitter', 'None')
            if 'fit_time' not in meta:
                set_modelspec_metadata(modelspec, 'fitter', 'None')
            if 'recording' not in meta:
                recname = rec.name if rec else 'None'
                set_modelspec_metadata(modelspec, 'recording', recname)
            if 'recording_uri' not in meta:
                uri = rec.uri if rec and rec.uri else 'None'
                set_modelspec_metadata(modelspec, 'recording_uri', uri)
            if 'date' not in meta:
                set_modelspec_metadata(modelspec, 'date', iso8601_datestring())
            if 'hostname' not in meta:
                set_modelspec_metadata(modelspec, 'hostname',
                                       socket.gethostname())
    return {'modelspecs': modelspecs}
Exemple #7
0
def fit_population_iteratively(
        est, modelspec,
        cost_function=basic_cost,
        fitter=coordinate_descent, evaluator=ms.evaluate,
        segmentor=nems.segmentors.use_all_data,
        mapper=nems.fitters.mappers.simple_vector,
        metric=lambda data: nems.metrics.api.nmse(data, 'pred', 'resp'),
        metaname='fit_basic', fit_kwargs={},
        module_sets=None, invert=False, tolerances=None, tol_iter=50,
        fit_iter=10, IsReload=False, **context
        ):
    '''
    Required Arguments:
     est          A recording object
     modelspec     A modelspec object

    Optional Arguments:
    TODO: need to deal with the fact that you can't pass functions in an xforms-frieldly fucntion
     fitter        (CURRENTLY NOT USED?)
                   A function of (sigma, costfn) that tests various points,
                   in fitspace (i.e. sigmas) using the cost function costfn,
                   and hopefully returns a better sigma after some time.
     mapper        (CURRENTLY NOT USED?)
                   A class that has two methods, pack and unpack, which define
                   the mapping between modelspecs and a fitter's fitspace.
     segmentor     (CURRENTLY NOT USED?)
                   An function that selects a subset of the data during the
                   fitting process. This is NOT the same as est/val data splits
     metric        A function of a Recording that returns an error value
                   that is to be minimized.

     module_sets   (CURRENTLY NOT USED?)
                   A nested list specifying which model indices should be fit.
                   Overall iteration will occurr len(module_sets) many times.
                   ex: [[0], [1, 3], [0, 1, 2, 3]]

     invert        (CURRENTLY NOT USED?)
                   Boolean. Causes module_sets to specify the model indices
                   that should *not* be fit.


    Returns
    A list containing a single modelspec, which has the best parameters found
    by this fitter.
    '''

    if IsReload:
        return {}

    modelspec = copy.deepcopy(modelspec)
    data = est.copy()

    fit_set_all, fit_set_slice = _figure_out_mod_split(modelspec)

    if tolerances is None:
        tolerances = [1e-4, 1e-5]

    # apply mask to remove invalid portions of signals and allow fit to
    # only evaluate the model on the valid portion of the signals
    # then delete the mask signal so that it's not reapplied on each fit
    if 'mask' in data.signals.keys():
        log.info("Data len pre-mask: %d", data['mask'].shape[1])
        data = data.apply_mask()
        log.info("Data len post-mask: %d", data['mask'].shape[1])
        del data.signals['mask']

    start_time = time.time()
    ms.fit_mode_on(modelspec, data)

    # modelspec = init_pop_pca(data, modelspec)
    # print(modelspec)

    # Ensure that phi exists for all modules; choose prior mean if not found
    # for i, m in enumerate(modelspec):
    #    if ('phi' not in m.keys()) and ('prior' in m.keys()):
    #        m = nems.priors.set_mean_phi([m])[0]  # Inits phi for 1 module
    #        log.debug('Phi not found for module, using mean of prior: {}'
    #                  .format(m))
    #        modelspec[i] = m

    error = np.inf

    slice_count = data['resp'].shape[0]
    step_size = 0.1
    if 'nonlinearity' in modelspec[-1]['fn']:
        skip_nl_first = True
        tolerances = [tolerances[0]] + tolerances
    else:
        skip_nl_first = False

    for toli, tol in enumerate(tolerances):

        log.info("Fitting subsets with tol: %.2E fit_iter %d tol_iter %d",
                 tol, fit_iter, tol_iter)
        cd_kwargs = fit_kwargs.copy()
        cd_kwargs.update({'tolerance': tol, 'max_iter': fit_iter,
                          'step_size': step_size})
        sp_kwargs = fit_kwargs.copy()
        sp_kwargs.update({'tolerance': tol, 'max_iter': fit_iter})

        if (toli == 0) and skip_nl_first:
            log.info('skipping nl on first tolerance loop')
            saved_modelspec = copy.deepcopy(modelspec)
            saved_fit_set_slice = fit_set_slice.copy()
            # import pdb;
            # pdb.set_trace()
            modelspec.pop_module()
            fit_set_slice = fit_set_slice[:-1]

        inner_i = 0
        error_reduction = np.inf
        # big_slice = 0
        # big_n = data['resp'].ntimes
        # big_step = int(big_n/10)
        # big_slice_size = int(big_n/2)
        while (error_reduction >= tol) and (inner_i < tol_iter):

            log.info("(%d) Tol %.2e: Loop %d/%d (max)",
                     toli, tol, inner_i, tol_iter)
            improved_modelspec = copy.deepcopy(modelspec)
            cc = 0
            slist = list(range(slice_count))
            # random.shuffle(slist)

            for i, m in enumerate(modelspec):
                if i in fit_set_all:
                    log.info(m['fn'] + ": fitting")
                else:
                    log.info(m['fn'] + ": frozen")

            # partially implemented: select temporal subset of data for fitting
            # on current loop.
            # data2 = data.copy()
            # big_slice += 1
            # sl = np.zeros(big_n, dtype=bool)
            # sl[:big_slice_size]=True
            # sl = np.roll(sl, big_step*big_slice)
            # log.info('Sampling temporal subset %d (size=%d/%d)', big_step, big_slice_size, big_n)
            # for s in data2.signals.values():
            #    e = s._modified_copy(s._data[:,sl])
            #    data2[e.name] = e

            # improved_modelspec = init.prefit_mod_subset(
            #        data, improved_modelspec, analysis.fit_basic,
            #        metric=metric,
            #        fit_set=fit_set_all,
            #        fit_kwargs=sp_kwargs)
            improved_modelspec = fit_population_channel_fast2(
                data, improved_modelspec, fit_set_all, fit_set_slice,
                analysis_function=analysis.fit_basic,
                metric=metric,
                fitter=scipy_minimize, fit_kwargs=sp_kwargs)

            for s in slist:
                log.info('Slice %d set %s' % (s, fit_set_slice))
                improved_modelspec = fit_population_slice(
                        data, improved_modelspec, slice=s,
                        fit_set=fit_set_slice,
                        analysis_function=analysis.fit_basic,
                        metric=metric,
                        fitter=scipy_minimize,
                        fit_kwargs=sp_kwargs)
                # fitter = coordinate_descent,
                # fit_kwargs = cd_kwargs)

                cc += 1
                # if (cc % 8 == 0) or (cc == slice_count):

            data = ms.evaluate(data, improved_modelspec)
            new_error = metric(data)
            error_reduction = error - new_error
            error = new_error
            log.info("tol=%.2E, iter=%d/%d: deltaE=%.6E",
                     tol, inner_i, tol_iter, error_reduction)
            inner_i += 1
            if error_reduction > 0:
                modelspec = improved_modelspec

        log.info("Done with tol %.2E (i=%d, max_error_reduction %.7f)",
                 tol, inner_i, error_reduction)

        if (toli == 0) and skip_nl_first:
            log.info('Restoring NL module after first tol loop')
            modelspec.append(saved_modelspec[-1])

            fit_set_slice = saved_fit_set_slice
            if 'double_exponential' in saved_modelspec[-1]['fn']:
                modelspec = init.init_dexp(data, modelspec)
            elif 'logistic_sigmoid' in saved_modelspec[-1]['fn']:
                modelspec = init.init_logsig(data, modelspec)
            elif 'relu' in saved_modelspec[-1]['fn']:
                # just keep initialized to zero
                pass
            else:
                raise ValueError("Output NL %s not supported",
                                 saved_modelspec[-1]['fn'])
            # just fit the NL
            improved_modelspec = copy.deepcopy(modelspec)

            kwa = cd_kwargs.copy()
            kwa['max_iter'] *= 2
            for s in range(slice_count):
                log.info('Slice %d set %s' % (s, [fit_set_slice[-1]]))
                improved_modelspec = fit_population_slice(
                        data, improved_modelspec, slice=s,
                        fit_set=fit_set_slice,
                        analysis_function=analysis.fit_basic,
                        metric=metric,
                        fitter=scipy_minimize,
                        fit_kwargs=sp_kwargs)
                # fitter = coordinate_descent,
                # fit_kwargs = cd_kwargs)
            data = ms.evaluate(data, modelspec)
            old_error = metric(data)
            data = ms.evaluate(data, improved_modelspec)
            new_error = metric(data)
            log.info('Init NL fit error change %.5f-%.5f = %.5f',
                     old_error, new_error, old_error-new_error)
            modelspec = improved_modelspec

        else:
            step_size *= 0.25

    elapsed_time = (time.time() - start_time)

    # TODO: Should this maybe be moved to a higher level
    # so it applies to ALL the fittters?
    ms.fit_mode_off(improved_modelspec)
    ms.set_modelspec_metadata(improved_modelspec, 'fitter', metaname)
    ms.set_modelspec_metadata(improved_modelspec, 'fit_time', elapsed_time)

    return {'modelspec': improved_modelspec.copy()}
Exemple #8
0
def fit_iteratively(
    data,
    modelspec,
    cost_function=basic_cost,
    fitter=coordinate_descent,
    evaluator=ms.evaluate,
    segmentor=nems.segmentors.use_all_data,
    mapper=nems.fitters.mappers.simple_vector,
    metric=lambda data: nems.metrics.api.nmse(data, 'pred', 'resp'),
    metaname='fit_basic',
    fit_kwargs={},
    module_sets=None,
    invert=False,
    tolerances=None,
    tol_iter=50,
    fit_iter=10,
):
    '''
    Required Arguments:
     data          A recording object
     modelspec     A modelspec object

    Optional Arguments:
     fitter        A function of (sigma, costfn) that tests various points,
                   in fitspace (i.e. sigmas) using the cost function costfn,
                   and hopefully returns a better sigma after some time.
     mapper        A class that has two methods, pack and unpack, which define
                   the mapping between modelspecs and a fitter's fitspace.
     segmentor     An function that selects a subset of the data during the
                   fitting process. This is NOT the same as est/val data splits
     metric        A function of a Recording that returns an error value
                   that is to be minimized.

     module_sets   A nested list specifying which model indices should be fit.
                   Overall iteration will occurr len(module_sets) many times.
                   ex: [[0], [1, 3], [0, 1, 2, 3]]

     invert        Boolean. Causes module_sets to specify the model indices
                   that should *not* be fit.


    Returns
    A list containing a single modelspec, which has the best parameters found
    by this fitter.
    '''
    if module_sets is None:
        module_sets = []
        for i, m in enumerate(modelspec):
            if 'prior' in m.keys():
                if 'levelshift' in m['fn'] and 'fir' in modelspec[i - 1]['fn']:
                    # group levelshift with preceding fir filter by default
                    module_sets[-1].append(i)
                else:
                    # otherwise just fit each module separately
                    module_sets.append([i])
        log.info('Fit sets: %s', module_sets)

    if tolerances is None:
        tolerances = [1e-6]

    # apply mask to remove invalid portions of signals and allow fit to
    # only evaluate the model on the valid portion of the signals
    if 'mask' in data.signals.keys():
        log.info("Data len pre-mask: %d", data['mask'].shape[1])
        data = data.apply_mask()
        log.info("Data len post-mask: %d", data['mask'].shape[1])

    start_time = time.time()
    ms.fit_mode_on(modelspec)
    # Ensure that phi exists for all modules; choose prior mean if not found
    for i, m in enumerate(modelspec):
        if ('phi' not in m.keys()) and ('prior' in m.keys()):
            m = nems.priors.set_mean_phi([m])[0]  # Inits phi for 1 module
            log.debug(
                'Phi not found for module, using mean of prior: {}'.format(m))
            modelspec[i] = m

    error = np.inf
    for tol in tolerances:
        log.info("Fitting subsets with tol: %.2E fit_iter %d tol_iter %d", tol,
                 fit_iter, tol_iter)
        fit_kwargs.update({'tolerance': tol, 'max_iter': fit_iter})
        max_error_reduction = np.inf
        i = 0

        while (max_error_reduction >= tol) and (i < tol_iter):
            max_error_reduction = 0
            j = 0
            for subset in module_sets:
                improved_modelspec = _module_set_loop(subset, data, modelspec,
                                                      cost_function, fitter,
                                                      mapper, segmentor,
                                                      evaluator, metric,
                                                      fit_kwargs)
                new_error = cost_function.error
                error_reduction = error - new_error
                error = new_error
                j += 1
                if error_reduction > max_error_reduction:
                    max_error_reduction = error_reduction
            log.info("tol=%.2E, iter=%d/%d: max deltaE=%.6E", tol, i, tol_iter,
                     max_error_reduction)
            i += 1
        log.info("Done with tol %.2E (i=%d, max_error_reduction %.7f)", tol, i,
                 error_reduction)

    elapsed_time = (time.time() - start_time)

    # TODO: Should this maybe be moved to a higher level
    # so it applies to ALL the fittters?
    ms.fit_mode_off(improved_modelspec)
    ms.set_modelspec_metadata(improved_modelspec, 'fitter', metaname)
    ms.set_modelspec_metadata(improved_modelspec, 'fit_time', elapsed_time)
    results = [copy.deepcopy(improved_modelspec)]

    return results
Exemple #9
0
def fit_pcnorm(modelspec,
               est: recording.Recording,
               metric=None,
               use_modelspec_init: bool = True,
               optimizer: str = 'adam',
               max_iter: int = 10000,
               early_stopping_steps: int = 5,
               tolerance: float = 5e-4,
               learning_rate: float = 1e-4,
               batch_size: typing.Union[None, int] = None,
               seed: int = 0,
               initializer: str = 'random_normal',
               freeze_layers: typing.Union[None, list] = None,
               epoch_name: str = "REFERENCE",
               n_pcs=2,
               **context):
    '''
    Required Arguments:
     est          A recording object
     modelspec     A modelspec object

    Optional Arguments:
     <copied from fit_tf for now

    Returns
     dictionary: {'modelspec': updated_modelspec}
    '''

    # Hard-coded
    cost_function = basic_cost
    fitter = scipy_minimize
    segmentor = nems.segmentors.use_all_data
    mapper = nems.fitters.mappers.simple_vector
    fit_kwargs = {'tolerance': tolerance, 'max_iter': max_iter}

    start_time = time.time()

    modelspec = copy.deepcopy(modelspec)

    # apply mask to remove invalid portions of signals and allow fit to
    # only evaluate the model on the valid portion of the signals
    if 'mask' in est.signals.keys():
        log.info("Data len pre-mask: %d", est['mask'].shape[1])
        est = est.apply_mask()
        log.info("Data len post-mask: %d", est['mask'].shape[1])

    conditions = [
        "_".join(k.split("_")[1:]) for k in est.signals.keys()
        if k.startswith("mask_")
    ]
    if (len(conditions) > 2) and any(
        [c.split("_")[-1] == 'lg' for c in conditions]):
        conditions.remove("small")
        conditions.remove("large")
    #conditions = conditions[0:2]
    #conditions = ['large','small']

    group_idx = [est['mask_' + c].as_continuous()[0, :] for c in conditions]
    cg_filtered = [(c, g) for c, g in zip(conditions, group_idx)
                   if g.sum() > 0]
    conditions, group_idx = zip(*cg_filtered)

    for c, g in zip(conditions, group_idx):
        log.info(f"Data subset for {c} len {g.sum()}")

    resp = est['resp'].as_continuous()
    pred0 = est['pred0'].as_continuous()
    residual = resp - pred0

    pca = PCA(n_components=n_pcs)
    pca.fit(residual.T)
    pc_axes = pca.components_

    pcproj = residual.T.dot(pc_axes.T).T

    group_pc = [pcproj[:, idx].std(axis=1) for idx in group_idx]
    resp_std = resp.std(axis=1)
    #import pdb; pdb.set_trace()

    if metric is None:
        metric = lambda d: pc_err(d,
                                  pred_name='pred',
                                  pred0_name='pred0',
                                  group_idx=group_idx,
                                  group_pc=group_pc,
                                  pc_axes=pc_axes,
                                  resp_std=resp_std)

    # turn on "fit mode". currently this serves one purpose, for normalization
    # parameters to be re-fit for the output of each module that uses
    # normalization. does nothing if normalization is not being used.
    ms.fit_mode_on(modelspec, est)

    # Create the mapper functions that translates to and from modelspecs.
    # It has three functions that, when defined as mathematical functions, are:
    #    .pack(modelspec) -> fitspace_point
    #    .unpack(fitspace_point) -> modelspec
    #    .bounds(modelspec) -> fitspace_bounds
    packer, unpacker, pack_bounds = mapper(modelspec)

    # A function to evaluate the modelspec on the data
    evaluator = nems.modelspec.evaluate

    my_cost_function = cost_function
    my_cost_function.counter = 0

    # Freeze everything but sigma, since that's all the fitter should be
    # updating.
    cost_fn = partial(my_cost_function,
                      unpacker=unpacker,
                      modelspec=modelspec,
                      data=est,
                      segmentor=segmentor,
                      evaluator=evaluator,
                      metric=metric,
                      display_N=1000)

    # get initial sigma value representing some point in the fit space,
    # and corresponding bounds for each value
    sigma = packer(modelspec)
    bounds = pack_bounds(modelspec)

    # Results should be a list of modelspecs
    # (might only be one in list, but still should be packaged as a list)
    improved_sigma = fitter(sigma, cost_fn, bounds=bounds, **fit_kwargs)
    improved_modelspec = unpacker(improved_sigma)
    elapsed_time = (time.time() - start_time)

    start_err = cost_fn(sigma)
    final_err = cost_fn(improved_sigma)
    log.info("Delta error: %.06f - %.06f = %e", start_err, final_err,
             final_err - start_err)

    # TODO: Should this maybe be moved to a higher level
    # so it applies to ALL the fittters?
    ms.fit_mode_off(improved_modelspec)
    ms.set_modelspec_metadata(improved_modelspec, 'fitter', 'ccnorm')
    ms.set_modelspec_metadata(improved_modelspec, 'fit_time', elapsed_time)
    ms.set_modelspec_metadata(improved_modelspec, 'n_parms',
                              len(improved_sigma))

    return {'modelspec': improved_modelspec.copy(), 'save_context': True}
Exemple #10
0
def fit_iteratively(
        data,
        modelspec,
        fitter=coordinate_descent,
        segmentor=nems.segmentors.use_all_data,
        mapper=nems.fitters.mappers.simple_vector,
        metric=lambda data: nems.metrics.api.nmse(data, 'pred', 'resp'),
        metaname='fit_basic',
        fit_kwargs={},
        module_sets=None,
        invert=False,
        tolerances=None,
        max_iter=100):
    '''
    Required Arguments:
     data          A recording object
     modelspec     A modelspec object

    Optional Arguments:
     fitter        A function of (sigma, costfn) that tests various points,
                   in fitspace (i.e. sigmas) using the cost function costfn,
                   and hopefully returns a better sigma after some time.
     mapper        A class that has two methods, pack and unpack, which define
                   the mapping between modelspecs and a fitter's fitspace.
     segmentor     An function that selects a subset of the data during the
                   fitting process. This is NOT the same as est/val data splits
     metric        A function of a Recording that returns an error value
                   that is to be minimized.

     module_sets   A nested list specifying which model indices should be fit.
                   Overall iteration will occurr len(module_sets) many times.
                   ex: [[0], [1, 3], [0, 1, 2, 3]]

     invert        Boolean. Causes module_sets to specify the model indices
                   that should *not* be fit.


    Returns
    A list containing a single modelspec, which has the best parameters found
    by this fitter.
    '''
    if module_sets is None:
        module_sets = [[i] for i in range(len(modelspec))]

    if tolerances is None:
        tolerances = [1e-6]

    start_time = time.time()

    # Ensure that phi exists for all modules; choose prior mean if not found
    for i, m in enumerate(modelspec):
        if not m.get('phi'):
            log.debug(
                'Phi not found for module, using mean of prior: {}'.format(m))
            m = nems.priors.set_mean_phi([m])[0]  # Inits phi for 1 module
            modelspec[i] = m

    # Create the mapper object that translates to and from modelspecs.
    # It has two methods that, when defined as mathematical functions, are:
    #    .pack(modelspec) -> fitspace_point
    #    .unpack(fitspace_point) -> modelspec
    packer, unpacker = mapper(modelspec)

    # A function to evaluate the modelspec on the data
    evaluator = ms.evaluate

    # get initial sigma value representing some point in the fit space
    sigma = packer(modelspec)

    for tol in tolerances:
        log.info("Fitting all subsets with tolerance: %.2E", tol)
        for subset in module_sets:
            log.info("Fitting subset: %s", subset)
            mods = [m['fn'] for i, m in enumerate(modelspec) if i in subset]
            log.info("%s\n", mods)
            if invert:
                # invert the indices
                subset_inverted = [
                    None if i in subset else i for i, in enumerate(modelspec)
                ]
                subset = [i for i in subset_inverted if i is not None]
                log.debug("Inverted subset: %s\n", subset)

            # remove hold_outs from modelspec
            held_out = []
            subtracted_modelspec = []
            for i, m in enumerate(modelspec):
                if i in subset:
                    held_out.append(None)
                    subtracted_modelspec.append(m)
                else:
                    held_out.append(m)
            log.debug(
                "\n\nheld_out subset was: %s\n\nsubtracted_modelspec: %s",
                held_out, subtracted_modelspec)

            def cost_function(sigma, unpacker, modelspec, data, evaluator,
                              metric, held_out):
                updated_spec = unpacker(sigma)
                # The segmentor takes a subset of the data for fitting each step
                # Intended use is for CV or random selection of chunks of the data
                data_subset = segmentor(data)

                # Put hold_out back in before evaluating
                recombined_spec = []
                j = 0
                for i, m in enumerate(held_out):
                    if m is None:
                        recombined_spec.append(updated_spec[j])
                        j += 1
                    else:
                        recombined_spec.append(m)

                updated_data_subset = evaluator(data_subset, recombined_spec)
                error = metric(updated_data_subset)
                log.debug("inside cost function, current error: %.06f", error)
                log.debug("\ncurrent sigma: %s", sigma)

                cost_function.counter += 1
                if cost_function.counter % 1000 == 0:
                    log.info('Eval #%d. E=%.06f', cost_function.counter, error)
                    log.debug("\n\nrecombined_spec was: %s", recombined_spec)

                return error

            cost_function.counter = 0

            # Freeze everything but sigma, since that's all the fitter should be
            # updating.
            cost_fn = partial(cost_function,
                              unpacker=unpacker,
                              modelspec=subtracted_modelspec,
                              data=data,
                              evaluator=evaluator,
                              metric=metric,
                              held_out=held_out)

            # do fit
            improved_sigma = fitter(sigma,
                                    cost_fn,
                                    tolerance=tol,
                                    max_iter=max_iter,
                                    **fit_kwargs)
            improved_modelspec = unpacker(improved_sigma)

            recombined_modelspec = [
                phi if phi is not None else held_out[i]
                for i, phi in enumerate(improved_modelspec)
            ]
            log.debug("\n\nsubset: %s\nrecombined_modelspec: %s", subset,
                      recombined_modelspec)

    elapsed_time = (time.time() - start_time)

    # TODO: Should this maybe be moved to a higher level
    # so it applies to ALL the fittters?
    ms.set_modelspec_metadata(recombined_modelspec, 'fitter', metaname)
    ms.set_modelspec_metadata(recombined_modelspec, 'fit_time', elapsed_time)
    results = [copy.deepcopy(recombined_modelspec)]

    return results
Exemple #11
0
def fit_basic(data,
              modelspec,
              fitter=scipy_minimize,
              segmentor=nems.segmentors.use_all_data,
              mapper=nems.fitters.mappers.simple_vector,
              metric=lambda data: nems.metrics.api.nmse(data, 'pred', 'resp'),
              metaname='fit_basic',
              fit_kwargs={}):
    '''
    Required Arguments:
     data          A recording object
     modelspec     A modelspec object

    Optional Arguments:
     fitter        A function of (sigma, costfn) that tests various points,
                   in fitspace (i.e. sigmas) using the cost function costfn,
                   and hopefully returns a better sigma after some time.
     mapper        A class that has two methods, pack and unpack, which define
                   the mapping between modelspecs and a fitter's fitspace.
     segmentor     An function that selects a subset of the data during the
                   fitting process. This is NOT the same as est/val data splits
     metric        A function of a Recording that returns an error value
                   that is to be minimized.

    Returns
    A list containing a single modelspec, which has the best parameters found
    by this fitter.
    '''

    start_time = time.time()

    # Ensure that phi exists for all modules; choose prior mean if not found
    for i, m in enumerate(modelspec):
        if not m.get('phi'):
            log.debug('Phi not found for module, using mean of prior: %s', m)
            m = nems.priors.set_mean_phi([m])[0]  # Inits phi for 1 module
            modelspec[i] = m

    # Create the mapper object that translates to and from modelspecs.
    # It has two methods that, when defined as mathematical functions, are:
    #    .pack(modelspec) -> fitspace_point
    #    .unpack(fitspace_point) -> modelspec
    packer, unpacker = mapper(modelspec)

    # A function to evaluate the modelspec on the data
    evaluator = nems.modelspec.evaluate

    # TODO - unpacks sigma and updates modelspec, then evaluates modelspec
    #        on the estimation/fit data and
    #        uses metric to return some form of error
    def cost_function(sigma, unpacker, modelspec, data, evaluator, metric):
        updated_spec = unpacker(sigma)
        # The segmentor takes a subset of the data for fitting each step
        # Intended use is for CV or random selection of chunks of the data
        data_subset = segmentor(data)
        updated_data_subset = evaluator(data_subset, updated_spec)
        error = metric(updated_data_subset)
        log.debug("inside cost function, current error: %.06f", error)
        log.debug("\ncurrent sigma: %s", sigma)

        cost_function.counter += 1
        if cost_function.counter % 1000 == 0:
            log.info('Eval #%d. E=%.06f', cost_function.counter, error)

        return error

    cost_function.counter = 0

    # Freeze everything but sigma, since that's all the fitter should be
    # updating.
    cost_fn = partial(cost_function,
                      unpacker=unpacker,
                      modelspec=modelspec,
                      data=data,
                      evaluator=evaluator,
                      metric=metric)

    # get initial sigma value representing some point in the fit space
    sigma = packer(modelspec)

    # Results should be a list of modelspecs
    # (might only be one in list, but still should be packaged as a list)
    improved_sigma = fitter(sigma, cost_fn, **fit_kwargs)
    improved_modelspec = unpacker(improved_sigma)

    elapsed_time = (time.time() - start_time)

    # TODO: Should this maybe be moved to a higher level
    # so it applies to ALL the fittters?
    ms.set_modelspec_metadata(improved_modelspec, 'fitter', metaname)
    ms.set_modelspec_metadata(improved_modelspec, 'fit_time', elapsed_time)
    results = [copy.deepcopy(improved_modelspec)]
    return results