def improve_order_regularization(r,
                                 o,
                                 star_filename,
                                 tellurics_filename,
                                 training_data,
                                 training_results,
                                 validation_data,
                                 validation_results,
                                 verbose=True,
                                 plot=False,
                                 basename='',
                                 K_star=0,
                                 K_t=0,
                                 L1=True,
                                 L2=True,
                                 tellurics_template_fixed=False):
    """
    Use a validation scheme to determine the best regularization parameters for 
    all model components in a given order r.
    Update files at star_filename, tellurics_filename with the best parameters.
    """

    training_model = wobble.Model(training_data, training_results, r)
    training_model.add_star('star', variable_bases=K_star)
    if tellurics_template_fixed:  # hackity hack hack
        results_51peg = wobble.Results(
            filename=
            '/Users/mbedell/python/wobble/results/results_51peg_Kstar0_Kt0.hdf5'
        )
        template_xs = np.copy(results_51peg.tellurics_template_xs[o])
        template_ys = np.copy(results_51peg.tellurics_template_ys[o])
        training_model.add_telluric('tellurics',
                                    rvs_fixed=True,
                                    template_fixed=True,
                                    variable_bases=K_t,
                                    template_xs=template_xs,
                                    template_ys=template_ys)
    else:
        training_model.add_telluric('tellurics',
                                    rvs_fixed=True,
                                    variable_bases=K_t)
    training_model.setup()
    training_model.optimize(niter=0, verbose=verbose, rv_uncertainties=False)

    if plot:
        n = 0  # epoch to plot
        title = 'Initialization'
        filename = '{0}_init'.format(basename)
        plot_fit(r,
                 n,
                 training_data,
                 training_results,
                 title=title,
                 basename=filename)

    validation_model = wobble.Model(validation_data, validation_results, r)
    validation_model.add_star('star',
                              variable_bases=K_star,
                              template_xs=training_results.star_template_xs[r]
                              )  # ensure templates are same size
    if tellurics_template_fixed:  # hackity hack hack
        validation_model.add_telluric(
            'tellurics',
            rvs_fixed=True,
            template_fixed=True,
            variable_bases=K_t,
            template_xs=training_results.tellurics_template_xs[r],
            template_ys=training_results.tellurics_template_ys[r])
    else:
        validation_model.add_telluric(
            'tellurics',
            rvs_fixed=True,
            variable_bases=K_t,
            template_xs=training_results.tellurics_template_xs[r])
    validation_model.setup()

    # the order in which these are defined will determine the order in which they are optimized:
    tensors_to_tune = [
        training_model.components[1].L2_template_tensor,
        training_model.components[0].L2_template_tensor,
        training_model.components[1].L1_template_tensor,
        training_model.components[0].L1_template_tensor
    ]
    tensor_names = [
        'L2_template', 'L2_template', 'L1_template', 'L1_template'
    ]  # this isonly  needed bc TF appends garbage to the end of the tensor name
    tensor_components = ['tellurics', 'star', 'tellurics', 'star']  # ^ same
    if K_star > 0:
        tensors_to_tune = np.append(tensors_to_tune, [
            training_model.components[0].L2_basis_vectors_tensor,
            training_model.components[0].L1_basis_vectors_tensor
        ])
        tensor_names = np.append(tensor_names,
                                 ['L2_basis_vectors', 'L1_basis_vectors'])
        tensor_components = np.append(tensor_components, ['star', 'star'])
    if K_t > 0:
        tensors_to_tune = np.append(tensors_to_tune, [
            training_model.components[1].L2_basis_vectors_tensor,
            training_model.components[1].L1_basis_vectors_tensor
        ])
        tensor_names = np.append(tensor_names,
                                 ['L2_basis_vectors', 'L1_basis_vectors'])
        tensor_components = np.append(tensor_components,
                                      ['tellurics', 'tellurics'])

    regularization_dict = {}
    #o_init = max(0, o-1) # initialize from previous order, or if o=0 use defaults
    o_init = o  # always initialize from starting guess (TODO: decide which init is better)
    for i, tensor in enumerate(tensors_to_tune):
        if tensor_components[i] == 'star':
            filename = star_filename
        elif tensor_components[i] == 'tellurics':
            filename = tellurics_filename
        else:
            print("something has gone wrong.")
            assert False
        with h5py.File(filename, 'r') as f:
            regularization_dict[tensor] = np.copy(f[tensor_names[i]][o_init])

    i = 0  # track order in which parameters are improved
    for component, (tensor, name) in zip(tensor_components,
                                         zip(tensors_to_tune, tensor_names)):
        if (name[0:2] == "L1" and L1) or (name[0:2] == "L2" and L2):
            i += 1
            regularization_dict[tensor] = improve_parameter(
                tensor,
                training_model,
                validation_model,
                regularization_dict,
                validation_data,
                validation_results,
                verbose=verbose,
                plot=plot,
                basename=basename + '_par{0}'.format(i))
            if component == 'star':
                filename = star_filename
            elif component == 'tellurics':
                filename = tellurics_filename
            else:
                print("something has gone wrong.")
                assert False
            with h5py.File(filename, 'r+') as f:
                f[name][o] = np.copy(regularization_dict[tensor])

    if plot:
        test_regularization_value(tensor,
                                  regularization_dict[tensor],
                                  training_model,
                                  validation_model,
                                  regularization_dict,
                                  validation_data,
                                  validation_results,
                                  plot=False,
                                  verbose=False)  # hack to update results
        title = 'Final'
        filename = '{0}_final'.format(basename)
        plot_fit(r,
                 n,
                 validation_data,
                 validation_results,
                 title=title,
                 basename=filename)

        fig = plt.figure()
        ax = fig.add_subplot(111)
        val_rvs = validation_results.star_rvs[r] + validation_results.bervs
        train_rvs = training_results.star_rvs[r] + training_results.bervs
        ax.plot(validation_results.dates, val_rvs - np.mean(val_rvs), 'r.')
        ax.plot(training_results.dates,
                train_rvs - np.mean(train_rvs),
                'k.',
                alpha=0.5)
        ax.set_ylabel('RV (m/s)')
        ax.set_xlabel('JD')
        fig.tight_layout()
        plt.savefig(basename + '_final_rvs.png')
        plt.close(fig)
Exemple #2
0
    orders = np.copy(data.orders)
    results = wobble.Results(data=data)
    
    print("data loaded")
    print("time elapsed: {0:.2f} min".format((time() - start_time)/60.0))
    elapsed_time = time() - start_time
    

    if plots:
        print("plots will be saved under directory: {0}".format(plot_dir))
        if not os.path.exists(plot_dir):
            os.makedirs(plot_dir)
    star_learning_rate = 0.1
    telluric_learning_rate = 0.01
    for r,o in enumerate(orders):
        model = wobble.Model(data, results, r)
        model.add_star('star', variable_bases=K_star, 
                        regularization_par_file=star_reg_file, 
                        learning_rate_template=star_learning_rate)
        model.add_telluric('tellurics', rvs_fixed=True, variable_bases=K_t, 
                            regularization_par_file=tellurics_reg_file, 
                            learning_rate_template=telluric_learning_rate)
        print("--- ORDER {0} ---".format(o))
        if plots:
            wobble.optimize_order(model, niter=niter, save_history=True, 
                                  basename=plot_dir+'history', movies=movies, epochs_to_plot=epochs) 
            fig, ax = plt.subplots(1, 1, figsize=(8,5))
            ax.plot(data.dates, results.star_rvs[r] + data.bervs - data.drifts - np.mean(results.star_rvs[r] + data.bervs), 
                    'k.', alpha=0.8, ms=4)
            ax.plot(data.dates, data.pipeline_rvs + data.bervs - np.mean(data.pipeline_rvs + data.bervs), 
                    'r.', alpha=0.5, ms=4)   
Exemple #3
0
def improve_order_regularization(o, star_filename, tellurics_filename,
                                 training_data, training_results,
                                 validation_data, validation_results,
                                 verbose=True, plot=False, basename='', 
                                 K_star=0, K_t=0, L1=True, L2=True,
                                 tellurics_template_fixed=False): 
    """
    Use a validation scheme to determine the best regularization parameters for 
    all model components in a given order.
    Update files at star_filename, tellurics_filename with the best parameters.
                                 
    By default, this tunes in the following order: 
            tellurics L2, star L2, tellurics L1, star L1.
                                 
    Parameters
    ----------
    o : int
        Index into `star_filename` and `telluric_filename` to retrieve desired order.
    star_filename : str
        Filename containing regularization amplitudes for the star.
    tellurics_filename : str
        Filename containing regularization amplitudes for the tellurics.
    training_data : wobble.Data object
        Data to train template on (should be the majority of available data).
    training_results : wobble.Results object
        Results object corresponding to `training_data`.
    validation_data : wobble.Data object
        Data to use in assessing goodness-of-fit for template 
        (should be a representative minority of the available data).
    validation_results : wobble.Results object
        Results object corresponding to `validation_data`.
    verbose : bool (default `True`)
        Toggle print statements and progress bars.
    plot : bool (default `False`)
        Generate and save plots of fits to validation data.
    basename : str (default ``)
        String to append to the beginning of saved plots (file path and base).
    K_star : int (default `0`)
        Number of variable basis vectors for the star.
    K_t : int (default `0`)
        Number of variable basis vectors for the tellurics.
    L1 : bool (default `True`)
        Whether to tune L1 amplitudes.
    L2 : bool (default `True`)
        Whether to tune L2 amplitudes.
    """
    r = 0 # assumes there is only one order in data & results objects
    training_model = wobble.Model(training_data, training_results, r)
    training_model.add_star('star', variable_bases=K_star)
    training_model.add_telluric('tellurics', rvs_fixed=True, variable_bases=K_t)
    training_model.setup()
    training_model.optimize(niter=0, verbose=verbose, rv_uncertainties=False)
    
    if plot:
        n = 0 # epoch to plot
        title = 'Initialization'
        filename = '{0}_init'.format(basename)
        plot_fit(r, n, training_data, training_results, title=title, basename=filename)

    
    validation_model = wobble.Model(validation_data, validation_results, r)
    validation_model.add_star('star', variable_bases=K_star, 
                          template_xs=training_results.star_template_xs[r]) # ensure templates are same size
    validation_model.add_telluric('tellurics', rvs_fixed=True, variable_bases=K_t,
                          template_xs=training_results.tellurics_template_xs[r])
    validation_model.setup()
    
    # the order in which these are defined will determine the order in which they are optimized:
    tensors_to_tune = [training_model.components[1].L2_template_tensor, training_model.components[0].L2_template_tensor,
                       training_model.components[1].L1_template_tensor, training_model.components[0].L1_template_tensor]
    tensor_names = ['L2_template', 'L2_template', 'L1_template',
                     'L1_template'] # this is only needed bc TF appends garbage to the end of the tensor name
    tensor_components = ['tellurics', 'star', 'tellurics', 'star'] # ^ same
    if K_star > 0:
        tensors_to_tune = np.append(tensors_to_tune, [training_model.components[0].L2_basis_vectors_tensor, 
                                                    training_model.components[0].L1_basis_vectors_tensor])
        tensor_names = np.append(tensor_names, ['L2_basis_vectors', 'L1_basis_vectors'])
        tensor_components = np.append(tensor_components, ['star', 'star'])
    if K_t > 0:
        tensors_to_tune = np.append(tensors_to_tune, [training_model.components[1].L2_basis_vectors_tensor, 
                                                training_model.components[1].L1_basis_vectors_tensor])
        tensor_names = np.append(tensor_names, ['L2_basis_vectors', 'L1_basis_vectors'])
        tensor_components = np.append(tensor_components, ['tellurics', 'tellurics'])
    
    regularization_dict = {}
    #o_init = max(0, o-1) # initialize from previous order, or if o=0 use defaults
    o_init = o # always initialize from starting guess (TODO: decide which init is better)
    for i,tensor in enumerate(tensors_to_tune):
        if tensor_components[i] == 'star':
            filename = star_filename
        elif tensor_components[i] == 'tellurics':
            filename = tellurics_filename
        else:
            print("something has gone wrong.")
            assert False
        with h5py.File(filename, 'r') as f:                
                regularization_dict[tensor] = np.copy(f[tensor_names[i]][o_init])

    i = 0 # track order in which parameters are improved
    for component,(tensor,name) in zip(tensor_components, zip(tensors_to_tune, tensor_names)):
        if (name[0:2] == "L1" and L1) or (name[0:2] == "L2" and L2):
            i += 1
            regularization_dict[tensor] = improve_parameter(tensor, training_model, validation_model, 
                                                         regularization_dict, validation_data, validation_results, 
                                                         verbose=verbose,
                                                         plot=plot, basename=basename+'_par{0}'.format(i))
            if component == 'star':
                filename = star_filename
            elif component == 'tellurics':
                filename = tellurics_filename
            else:
                print("something has gone wrong.")
                assert False
            with h5py.File(filename, 'r+') as f:
                    f[name][o] = np.copy(regularization_dict[tensor])   
                    
    if plot:
        test_regularization_value(tensor, regularization_dict[tensor],
                                  training_model, validation_model, regularization_dict,
                                  validation_data, validation_results, plot=False, verbose=False) # hack to update results
        title = 'Final'
        filename = '{0}_final'.format(basename)
        plot_fit(r, n, validation_data, validation_results, title=title, basename=filename)    
        
        fig = plt.figure()
        ax = fig.add_subplot(111)
        val_rvs = validation_results.star_rvs[r] + validation_results.bervs
        train_rvs = training_results.star_rvs[r] + training_results.bervs
        ax.plot(validation_results.dates, val_rvs - np.mean(val_rvs), 'r.')
        ax.plot(training_results.dates, train_rvs - np.mean(train_rvs), 'k.', alpha=0.5)   
        ax.set_ylabel('RV (m/s)')
        ax.set_xlabel('JD')
        fig.tight_layout()
        plt.savefig(basename+'_final_rvs.png')
        plt.close(fig)