Python sample_data 예제들, regression_samples.sample_data Python 예제들

예제 #1

0

파일 보기

def main():
    """
    This function contains example code that demonstrates how to use the 
    functions defined in poly_fit_base for fitting polynomial curves to data.
    """
    # choose number of data-points and sample a pair of vectors: the input
    # values and the corresponding target values
    N = 20
    degree = 1
    true_func = arbitrary_function_1
    inputs, targets = sample_data(N, true_func, seed=29)
    # convert our inputs (we just sampled) into a matrix where each row
    # is a vector of monomials of the corresponding input
    processed_inputs = expand_to_monomials(inputs, degree)
    #
    # find the weights that fit the data in a least squares way
    weights = least_squares_weights(processed_inputs, targets)
    # use weights to create a function that takes inputs and returns predictions
    # in python, functions can be passed just like any other object
    # those who know MATLAB might call this a function handle
    linear_approx = construct_polynomial_approx(degree, weights)
    fig, ax, hs = plot_function_data_and_approximation(linear_approx, inputs,
                                                       targets, true_func)
    #ax.legend(hs, ['true function', 'data', 'linear approx'])
    ax.set_xticks([])
    ax.set_yticks([])
    fig.tight_layout()
    fig.savefig("regression_linear.pdf", fmt="pdf")

    plt.show()

예제 #2

0

파일 보기

파일: regression_cross_validation.py 프로젝트: anthonyh209/Quality-Prediction-Model

def main():
    """
    This function contains example code that demonstrates how to use the 
    functions defined in poly_fit_base for fitting polynomial curves to data.
    """

    # choose number of data-points and sample a pair of vectors: the input
    # values and the corresponding target values
    N = 500
    inputs, targets = sample_data(N, arbitrary_function_2, seed=1)

    # specify the centres and scale of some rbf basis functions
    default_centres = np.linspace(0, 1, 21)
    default_scale = 0.03
    default_reg_param = 0.08

    # get the cross-validation folds
    num_folds = 4
    folds = create_cv_folds(N, num_folds)

    # evaluate then plot the performance of different reg params
    evaluate_reg_param(inputs, targets, folds, default_centres, default_scale)
    # evaluate then plot the performance of different scales
    evaluate_scale(inputs, targets, folds, default_centres, default_reg_param)
    # evaluate then plot the performance of different numbers of basis
    # function centres.
    evaluate_num_centres(inputs, targets, folds, default_scale,
                         default_reg_param)

    plt.show()

예제 #3

0

파일 보기

파일: regression_bayesian_rbf.py 프로젝트: bereg123/machine_learning_winequality

def main():
    """
    This function contains example code that demonstrates how to use the 
    functions defined in poly_fit_base for fitting polynomial curves to data.
    """

    # specify the centres of the rbf basis functions
    centres = np.linspace(0, 1, 9)
    # the width (analogous to standard deviation) of the basis functions
    scale = 0.1
    print("centres = %r" % (centres, ))
    print("scale = %r" % (scale, ))
    # create the feature mapping
    feature_mapping = construct_rbf_feature_mapping(centres, scale)
    # plot the basis functions themselves for reference
    display_basis_functions(feature_mapping)

    # sample number of data-points: inputs and targets
    N = 9
    # define the noise precision of our data
    beta = (1. / 0.1)**2
    inputs, targets = sample_data(N,
                                  arbitrary_function_1,
                                  noise=np.sqrt(1. / beta),
                                  seed=37)
    # now construct the design matrix for the inputs
    designmtx = feature_mapping(inputs)
    # the number of features is the widht of this matrix
    M = designmtx.shape[1]
    # define a prior mean and covaraince matrix
    m0 = np.zeros(M)
    alpha = 100
    S0 = alpha * np.identity(M)
    # find the posterior over weights
    mN, SN = calculate_weights_posterior(designmtx, targets, beta, m0, S0)
    # the posterior mean (also the MAP) gives the central prediction
    mean_approx = construct_feature_mapping_approx(feature_mapping, mN)
    fig, ax, lines = plot_function_data_and_approximation(
        mean_approx, inputs, targets, arbitrary_function_1)
    # now plot a number of samples from the posterior
    xs = np.linspace(0, 1, 101)
    print("mN = %r" % (mN, ))
    for i in range(20):
        weights_sample = np.random.multivariate_normal(mN, SN)
        sample_approx = construct_feature_mapping_approx(
            feature_mapping, weights_sample)
        sample_ys = sample_approx(xs)
        line, = ax.plot(xs, sample_ys, 'm', linewidth=0.5)
    lines.append(line)
    ax.legend(lines, ['true function', 'data', 'mean approx', 'samples'])
    ax.set_xticks([])
    ax.set_yticks([])
    fig.tight_layout()
    fig.savefig("regression_bayesian_rbf.pdf", fmt="pdf")

    # now for the predictive distribuiton
    new_inputs = np.linspace(0, 1, 51)
    new_designmtx = feature_mapping(new_inputs)
    ys, sigma2Ns = predictive_distribution(new_designmtx, beta, mN, SN)
    print("(sigma2Ns**0.5).shape = %r" % ((sigma2Ns**0.5).shape, ))
    print("np.sqrt(sigma2Ns).shape = %r" % (np.sqrt(sigma2Ns).shape, ))
    print("ys.shape = %r" % (ys.shape, ))
    fig, ax, lines = plot_function_and_data(inputs, targets,
                                            arbitrary_function_1)
    ax.plot(new_inputs, ys, 'r', linewidth=3)
    lower = ys - np.sqrt(sigma2Ns)
    upper = ys + np.sqrt(sigma2Ns)
    print("lower.shape = %r" % (lower.shape, ))
    print("upper.shape = %r" % (upper.shape, ))
    ax.fill_between(new_inputs, lower, upper, alpha=0.2, color='r')

    plt.show()

예제 #4

0

파일 보기

파일: regression_rbf.py 프로젝트: bereg123/machine_learning_winequality

def main():
    """
    This function contains example code that demonstrates how to use the 
    functions defined in poly_fit_base for fitting polynomial curves to data.
    """

    # specify the centres of the rbf basis functions
    centres = np.linspace(0,1,7)
    # the width (analogous to standard deviation) of the basis functions
    scale = 0.15
    print("centres = %r" % (centres,))
    print("scale = %r" % (scale,))
    feature_mapping = construct_rbf_feature_mapping(centres,scale)  
    datamtx = np.linspace(0,1, 51)
    designmtx = feature_mapping(datamtx)
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    for colid in range(designmtx.shape[1]):
      ax.plot(datamtx, designmtx[:,colid])
    ax.set_xlim([0,1])
    ax.set_xticks([0,1])
    ax.set_yticks([0,1])

    # choose number of data-points and sample a pair of vectors: the input
    # values and the corresponding target values
    N = 20
    inputs, targets = sample_data(N, arbitrary_function_1, seed=37)
    # define the feature mapping for the data
    feature_mapping = construct_rbf_feature_mapping(centres,scale)  
    # now construct the design matrix
    designmtx = feature_mapping(inputs)
    #
    # find the weights that fit the data in a least squares way
    weights = ml_weights(designmtx, targets)
    # use weights to create a function that takes inputs and returns predictions
    # in python, functions can be passed just like any other object
    # those who know MATLAB might call this a function handle
    rbf_approx = construct_feature_mapping_approx(feature_mapping, weights)
    fig, ax, lines = plot_function_data_and_approximation(
        rbf_approx, inputs, targets, arbitrary_function_1)
    ax.legend(lines, ['true function', 'data', 'linear approx'])
    ax.set_xticks([])
    ax.set_yticks([])
    fig.tight_layout()
    fig.savefig("regression_rbf.pdf", fmt="pdf")

    # for a single choice of regularisation strength we can plot the
    # approximating function
    reg_param = 10**-3
    reg_weights = regularised_ml_weights(
        designmtx, targets, reg_param)
    rbf_reg_approx = construct_feature_mapping_approx(feature_mapping, reg_weights)
    fig, ax, lines = plot_function_data_and_approximation(
        rbf_reg_approx, inputs, targets, arbitrary_function_1)
    ax.set_xticks([])
    ax.set_yticks([])
    fig.tight_layout()
    fig.savefig("regression_rbf_basis_functions_reg.pdf", fmt="pdf")

    # to find a good regularisation parameter, we can performa a parameter
    # search (a naive way to do this is to simply try a sequence of reasonable
    # values within a reasonable range.
    
    # sample some training and testing inputs
    train_inputs, train_targets = sample_data(N, arbitrary_function_1, seed=37)
    # we need to use a different seed for our test data, otherwise some of our
    # sampled points will be the same
    test_inputs, test_targets = sample_data(100, arbitrary_function_1, seed=82)
    # convert the raw inputs into feature vectors (construct design matrices)
    train_designmtx = feature_mapping(train_inputs)
    test_designmtx = feature_mapping(test_inputs)
    # now we're going to evaluate train and test error for a sequence of
    # potential regularisation strengths storing the results
    reg_params = np.logspace(-5,1)
    train_errors = []
    test_errors = []
    for reg_param in reg_params:
        # evaluate the test and train error for this regularisation parameter
        train_error, test_error = train_and_test(
            train_designmtx, train_targets, test_designmtx, test_targets,
            reg_param=reg_param)
        # collect the errors
        train_errors.append(train_error)
        test_errors.append(test_error)
    # plot the results
    fig, ax = plot_train_test_errors(
        "$\lambda$", reg_params, train_errors, test_errors)        
    ax.set_xscale('log')


    # we may also be interested in choosing the right number of centres, or
    # the right width/scale of the rbf functions.
    # Here we vary the width and evaluate the performance
    reg_param = 10**-3
    scales = np.logspace(-2,0)
    train_errors = []
    test_errors = []
    for scale in scales:
        # we must construct the feature mapping anew for each scale
        feature_mapping = construct_rbf_feature_mapping(centres,scale)  
        train_designmtx = feature_mapping(train_inputs)
        test_designmtx = feature_mapping(test_inputs)
        # evaluate the test and train error for this regularisation parameter
        train_error, test_error = train_and_test(
            train_designmtx, train_targets, test_designmtx, test_targets,
            reg_param=reg_param)
        # collect the errors
        train_errors.append(train_error)
        test_errors.append(test_error)
    # plot the results
    fig, ax = plot_train_test_errors(
        "scale", scales, train_errors, test_errors)        
    ax.set_xscale('log')

    # Here we vary the number of centres and evaluate the performance
    reg_param = 10**-3
    scale = 0.15
    n_centres_seq = np.arange(3,20)
    train_errors = []
    test_errors = []
    for n_centres in n_centres_seq:
        # we must construct the feature mapping anew for each number of centres
        centres = np.linspace(0,1,n_centres)
        feature_mapping = construct_rbf_feature_mapping(centres,scale)  
        train_designmtx = feature_mapping(train_inputs)
        test_designmtx = feature_mapping(test_inputs)
        # evaluate the test and train error for this regularisation parameter
        train_error, test_error = train_and_test(
            train_designmtx, train_targets, test_designmtx, test_targets,
            reg_param=reg_param)
        # collect the errors
        train_errors.append(train_error)
        test_errors.append(test_error)
    # plot the results
    fig, ax = plot_train_test_errors(
        "Num. Centres", n_centres_seq, train_errors, test_errors)        
    plt.show()