Example #1
0
def handle_image_generation(classifier, feature_set, imagepath, title=''):
    '''
    Train a classifier and return it's scores on the train and test split.
    Save a contour image of it's predictions if it is only trained on two features.

    :param classifier: A string or object describing a classifier.
    :param feature_set: A list of column names describing the feature set to train the model on.
    :param imagepath: The path to store the contour plot.
    :param title: The title of the plot with scores.
    :return: The train and test scores for the classifier.
    '''
    train_table, test_table = get_split_table()
    train_labels, test_labels = get_labels(train_table, test_table)
    classifier = fit(classifier, feature_set, train_table)
    train_score = classifier.score(train_table[feature_set], train_labels)
    test_score = classifier.score(test_table[feature_set], test_labels)

    if (len(feature_set) == 2):
        fig = plt.figure()
        ax = visualize_confidence(classifier, train_table, *feature_set)
        plot_with_columns(train_table,
                          *feature_set,
                          ax=ax,
                          marker='+',
                          label='train')
        plot_with_columns(test_table, *feature_set, ax=ax, label='test')
        ax.legend()
        try:
            ax.set_title(
                title.format(train_score=train_score, test_score=test_score))
        except ValueError:
            ax.set_title(title)
        fig.savefig(imagepath)

    return train_score, test_score
Example #2
0
 def test_oseen(coreR, gamma, dist,xdrift,ydrift,u_conv,v_conv):
     print('coreR:',coreR,'Gamma',gamma,'xdrift',xdrift,
           'ydrift',ydrift,'u_conv',u_conv,'v_conv',v_conv)
     model = [[],[],[],[],[],[]]
     model[0] = coreR
     model[1] = gamma
     coreRori = model[0]
     gammaori = model[1]
     x_index = np.linspace(-1,1,dist)
     y_index = np.linspace(-1,1,dist)
     x_index, y_index = np.meshgrid(x_index, y_index)
     x_real = 0.0
     y_real = 0.0
     model[4] = u_conv
     model[5] = v_conv
     u_data, v_data = fitting.velocity_model(coreR, gamma, x_real, y_real,
                                             u_conv, v_conv, x_index+xdrift, y_index+ydrift)
     u_data = u_data + u_conv
     v_data = v_data + v_conv
     # NOISE
     u_data = np.random.normal(u_data,0.3)
     v_data = np.random.normal(v_data,0.3)
     model = fitting.fit(coreR, gamma, x_index, y_index, x_real, y_real, u_data, v_data, u_conv, v_conv,0)
     print('coreR:',model[0],'error(%):',(1-(model[0])/coreRori)*100)
     print('gamma:',model[1],'error(%):',(1-(model[1])/gammaori)*100)
     print('x_real:',model[2])
     print('y_real:',model[3])
     u_model, v_model = fitting.velocity_model(model[0], model[1], model[2], model[3],
                                               model[4],model[5], x_index, y_index)
     corr = fitting.correlation_coef(u_data,v_data,u_model,v_model)
     print('correlation:',corr)
     print('---')
     plot.plot_fit_test(x_index, y_index, u_data, v_data, u_model, v_model, model[2], model[3], model[0], model[1], model[4],model[5], corr)
def segment(k,m, inciset, trainingset, radiographs, colors, leftout, mode = 0):
    
    # get image training set
    trainimgs = [radiographs[i] for i in trainingset]
    
    # read landmarks from file
    lmtrain,lmtest = landmarks.get(trainingset)
    
    # align all landmarks, plot depending on mode
    aligns, means = landmarks.align(lmtrain)
    if mode == 0:
        ui.plotalign(colors, means, aligns)
        
    # do pca, plot depending on mode
    eva, evc = pca.pca(aligns, means)
    if mode == 0:
        ui.plotpca(means,eva,evc)
           
    # get initial estimate, manual or auto depending on mode
    # draw init also depending on mode
    est, greymodels = model.estimate(k, m, inciset, means, trainimgs, lmtrain, radiographs[leftout], colors, mode) 
    if mode == 2:
        ui.plotinit(est, radiographs[leftout], colors, leftout)
        
    # fit init estimate and get plot mask
    if mode == 0 or mode == 1:
        X = fit.fit(est, inciset, eva, evc, means, greymodels, radiographs[leftout], k, m, 3.0)
        mask = ui.plotfit(radiographs[leftout], list(est), X, len(inciset), colors)
    
        return mask
def main():
    try:
        dataset = request.form["dataset"]
        if(dataset == "static/diabetes.csv"):
            target = "diabetes"

        feature1 = request.form["feature1"]
        feature2 = request.form["feature2"]

        classifier = request.form["classifier"]
    except KeyError:
        error = "Warning! Missing selections. Please select one dataset, two features from the dataset, and one classifier!"
        return render_template('select.html', error=error)

    df = read(dataset)
    X_train, X_test, y_train, y_test = split_proportional(df, target)
    y_train = pd.get_dummies(y_train)
    y_test = pd.get_dummies(y_test)

    clf = fit(X_train, y_train, classifier, feature1, feature2)

    data_train = select_features(X_train, (feature1, feature2))
    data_test = select_features(X_test, (feature1, feature2))
    accuracy_train = np.mean(cross_val_score(clf, data_train, y_train, cv=5))
    accuracy_test = np.mean(cross_val_score(clf, data_test, y_test, cv=5))

    plot_data = build_plot(data_test, y_test, clf)

    return render_template('plot.html', accuracy_train=accuracy_train, accuracy_test=accuracy_test, plot_url=plot_data)
Example #5
0
def main():
    """Simple main program to test the methods of the file.
     
    """
    feature_x, feature_y, classifier = "", "", ""
    data_training, data_validation = data.extract_data("diabetes.csv")
    while classifier == "" or classifier != "svc" and classifier != "knn":
        classifier = input(
            "ENTER CLASSIFIER\nSVC or KNN?\n(Enter 'q' to exit)\n>").lower()
        if classifier == "q": exit(0)
        if classifier != "svc" and classifier != "knn":
            print("\n\nInput is not a valid classifier, try again..")
    feature_string = "ENTER {} FEATURE\npregnant\nglucose\npressure\ntriceps\ninsulin\nmass\npedigree\nage\n(Enter 'q' to exit)\n>"
    while feature_x == "" or feature_x not in list(data_training):
        feature_x = input("\n" + feature_string.format("FIRST")).lower()
        if feature_x == "q": exit(0)
        if feature_x not in list(data_training):
            print("\n\nInput is not a valid feature, try again..")
    while feature_y == "" or feature_y not in list(data_training):
        feature_y = input("\n" + feature_string.replace(
            feature_x + "\n", "").format("SECOND")).lower()
        if feature_y == "q": exit(0)
        if feature_y not in list(data_training):
            print("\n\nInput is not a valid feature, try again..")
    plot_diabetes(
        data_training, data_validation,
        fitting.fit(data_training, data_validation, classifier, feature_x,
                    feature_y), feature_x, feature_y).show()
Example #6
0
 def test_oseen(coreR, gamma, dist,xdrift,ydrift):
     print('|*|coreR:',coreR,'Gamma',gamma,'xdrift',xdrift,'ydrift',ydrift,'|*|')
     model = [[],[],[],[],[],[]]
     model[0] = coreR
     model[1] = gamma
     coreRori = model[0]
     gammaori = model[1]
     X = np.linspace(-1,1,dist)
     Y = np.linspace(-1,1,dist)
     X, Y = np.meshgrid(X,Y)
     fxCenter = 0.0
     fyCenter = 0.0
     u_conv = 0.0 #flipped with v, fix later
     v_conv = 0.0
     Uw, Vw = fitting.velocity_model(coreR, gamma, fxCenter, fyCenter, u_conv, v_conv, X+xdrift, Y+ydrift)
     Uw = Uw + u_conv
     Vw = Vw + v_conv
     # NOISE
     Uw = np.random.normal(Uw,0.3)
     Vw = np.random.normal(Vw,0.3)
     model = fitting.fit(coreR, gamma, X, Y, fxCenter, fyCenter, Uw, Vw, u_conv, v_conv)
     print('coreR:',model[0],'error(%):',(1-(model[0])/coreRori)*100)
     print('gamma:',model[1],'error(%):',(1-(model[1])/gammaori)*100)
     print('fxCenter:',model[2])
     print('fyCenter:',model[3])
     #print('u_conv:',model[4])
     #print('v_conv:',model[5])
     #print('xCenter:', fxCenter)
     #print('yCenter:',fyCenter)
     uMod, vMod = fitting.velocity_model(model[0], model[1], model[2], model[3],u_conv,v_conv, X, Y)#, model[4], model[5])
     corr = fitting.correlation_coef(Uw,Vw,uMod,vMod)
     print('correlation:',corr)
     print('---')
     plot.plot_corr(X, Y, Uw, Vw, uMod, vMod, model[0], corr)
Example #7
0
def scan_and_fit(l, step_size = step_size):
    range = r_[l-range_width/2, l+range_width/2]
    y = scan_and_read(range,
                      step_time, step_size)
                        
    x = linspace(range[0],range[1],len(y))
    ## Expected values for parameters
    l0.set(x[argmin(y)])
    y0.set(amax(y))
    a.set(amin(y)-amax(y))    
    
    fit(f,parameters, x, y, 1,)
    figure(1)
    hold(False)
    title("$\lambda_0 = %.4f$"%l0())
    xt = linspace(range[0],range[1],100) 
    plot(x,y,'o')
    hold(True)
    plot(xt, f(xt),'-')
    
    return l0(), gamma()
def plot(error=False):
    plt = visualize.plot_diabetes(
        data_training, data_validation,
        fitting.fit(data_training, data_validation, classifiers[0],
                    features[0], features[1]), features[0], features[1])
    acc_train, acc_val = fitting.getAccuracy(data_training, data_validation,
                                             classifiers[0], features[0],
                                             features[1])
    return render_template('show_plot.html',
                           error=error,
                           classifiers=classifiers,
                           features=features,
                           p1=acc_train,
                           p2=acc_val)
def picture_prosess(features, targeted_column, classifier):
    """
    Calls upon data.diabetes_dataset() and fitting.fit() to predict and calculate
    accuracy to be displayed on the web-page. Visualize.visualizer() will also be
    called if only two checkboxes are checked.

    The scatterplots are saved in buffers to avoid problems with matplotlib, flask,
    and python. There has been some problems occurring testing (Mac) with all
    the latest updates to the packages. Without the buffers the scatterplots would not
    be updated in real time when checking new checkboxes without having to restart
    the page. Since the scatterplots should only be displayed when 2 checkboxes are
    marked, the whole buffer and scatter-plot action is inside an if-statement.

        args:
            features (list:String): list containing names of features(columns)
            targeted_column (String): name of column
            classifier (String): name of classifier

        returns:
            t_ac (float):
            v_ac (float):
            img1 (string): scatter plot object 1
            img2 (string): scatter plot object 2
    """
    data_frame, training_set, validation_set = data.diabetes_dataset()
    trained_classifier = fitting.fit(training_set, classifier, features, targeted_column)

    img1 = None
    img2 = None

    prediction1 = trained_classifier.predict(training_set[features])
    t_ac = metrics.accuracy_score(training_set[targeted_column], prediction1)

    prediction2 = trained_classifier.predict(validation_set[features])
    v_ac = metrics.accuracy_score(validation_set[targeted_column], prediction2)

    if(len(features) == 2):
        buf = BytesIO()
        #add to buffer
        (visualize.visualizer(prediction1, training_set, features)).savefig(buf, format="png")
        img1 = base64.b64encode(buf.getbuffer()).decode("ascii")

        buf = BytesIO()
        #add to buffer
        (visualize.visualizer(prediction2, validation_set, features)).savefig(buf, format="png")
        img2 = base64.b64encode(buf.getbuffer()).decode("ascii")

    return t_ac, v_ac, img1, img2
Example #10
0
def render_metrics(model_names, features, svm_settings, knn_settings,
                   lda_settings):
    features.append(
        'diabetes')  #Adds data which contains 0/1 or True/False values
    models, training_data, target_data, control_data = fitting.fit(
        'data/diabetes.csv', model_names, features, svm_settings, knn_settings,
        lda_settings)
    y_true = control_data['diabetes']
    control_data = control_data.drop(columns=['diabetes'])

    if len(model_names) == 1:
        return metrics_one(models, control_data, y_true)
    elif len(model_names) > 1:
        return metrics_multiple(models, control_data, y_true)
    else:
        return ('Something went wrong', 'error')
Example #11
0
 def test_oseen(core_radius, gamma, dist, xdrift, ydrift, u_advection,
                v_advection):
     print('core_radius:', core_radius, 'Gamma', gamma, 'xdrift', xdrift,
           'ydrift', ydrift, 'u_advection', u_advection, 'v_advection',
           v_advection)
     model = [[], [], [], [], [], []]
     model[0] = core_radius
     model[1] = gamma
     core_radius_ori = model[0]
     gamma_ori = model[1]
     x_index = np.linspace(-1, 1, dist)
     y_index = np.linspace(-1, 1, dist)
     x_index, y_index = np.meshgrid(x_index, y_index)
     x_real = 0.0
     y_real = 0.0
     model[4] = u_advection
     model[5] = v_advection
     u_data, v_data = fitting.velocity_model(core_radius, gamma, x_real,
                                             y_real, u_advection,
                                             v_advection, x_index + xdrift,
                                             y_index + ydrift)
     u_data = u_data + u_advection
     v_data = v_data + v_advection
     # NOISE
     u_data = np.random.normal(u_data, 0.3)
     v_data = np.random.normal(v_data, 0.3)
     model = fitting.fit(core_radius, gamma, x_index, y_index, x_real,
                         y_real, u_data, v_data, u_advection, v_advection,
                         0)
     print('core_radius:', model[0], 'error(%):',
           (1 - (model[0]) / core_radius_ori) * 100)
     print('gamma:', model[1], 'error(%):',
           (1 - (model[1]) / gamma_ori) * 100)
     print('x_real:', model[2])
     print('y_real:', model[3])
     u_model, v_model = fitting.velocity_model(model[0], model[1], model[2],
                                               model[3], model[4], model[5],
                                               x_index, y_index)
     corr = fitting.correlation_coef(u_data, v_data, u_model, v_model)
     print('correlation:', corr)
     print('---')
     fitting.plot_fit(x_index, y_index, u_data, v_data, u_model, v_model,
                      model[2], model[3], model[0], model[1], model[4],
                      model[5], corr, 0, 0, '.', 0, 'png')
Example #12
0
def train(expr_in,
          vae_lr=1e-4,
          epochs=500,
          info_step=10,
          batch_size=50,
          latent_dim=2,
          f="nb",
          log=True,
          scale=True):
    # Preprocessing
    expr_in[expr_in < 0] = 0.0

    if log:
        expr_in = np.log2(expr_in + 1)
    if scale:
        for i in range(expr_in.shape[0]):
            expr_in[i, :] = expr_in[i, :] / np.max(expr_in[i, :])

    # Number of data samples
    n_sam = expr_in.shape[0]
    # Dimension of input data
    in_dim = expr_in.shape[1]
    # Build VAE model and its optimizer
    lmd = fitting.fit(expr_in, f)
    model_vae = model.VAE(in_dim=in_dim, latent_dim=latent_dim, f=f, lmd=lmd)
    optimizer_vae = tf.keras.optimizers.Adam(vae_lr)

    # Training
    for epoch in range(1, epochs + 1):
        # Minibatch for VAE training
        vae_train_set = tf.data.Dataset.from_tensor_slices(expr_in).shuffle(
            n_sam).batch(batch_size)
        # Batch training
        for vae_batch in vae_train_set:
            # Update VAE model
            rec_loss, kl_loss, rank_loss = update_model(
                model_vae, vae_batch, optimizer_vae, losses.vae_loss)
        # Print training info
        if epoch % info_step == 0:
            print("Epoch", epoch, " rec_loss: ", rec_loss.numpy(),
                  " kl_loss: ", kl_loss.numpy(), " rank_loss: ",
                  rank_loss.numpy())

    return model_vae
Example #13
0
def visualize(feature_1, feature_2, classifier):
    """
    6.3: Creates a scatter plot of diabetes data, displaying areas of predicted negative/positive result.

    Args:
        feature_1 (string): The first feature to plot by
        feature_2 (string): The second feature to plot by

    Returns:
        plt: the scatter plot
        float: the accuracy score on the training set
        float: the accuracy score on the validation set
    """

    trained_classifier, training_score, validation_score = fitting.fit(
        classifier, include_features=[feature_1, feature_2])
    plt = data.create_scatter_plot(feature_1, feature_2)

    X = data.data_frame[[feature_1, feature_2]].values
    y = data.data_frame['diabetes'].values
    step = 0.5

    # Mesh
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, step),
                         np.arange(y_min, y_max, step))
    x_r = xx.ravel()
    y_r = yy.ravel()
    r = np.c_[x_r, y_r]
    Z = trained_classifier.predict(r)

    Z = Z.reshape(xx.shape)
    plt.pcolormesh(xx, yy, Z, cmap=plt.cm.coolwarm)

    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.scatter(X, X)

    #plt.show(block=True)

    return plt, training_score, validation_score
def fitPredPlot(t: pd.DataFrame,
                v: pd.DataFrame,
                feature1: str,
                feature2: str,
                include_error: bool = False,
                cf="knn"):
    """Overall function that takes inn training dataframe, validation
    dataframe. Fit's the classifier, does predictions based on
    validation data, and plots the true values features,
    classifier type and classifier arguments. The function fit
    the chosen classifier and returns the fitted classifier.

    Args:
        t: dataframe training
        v: dataframe validation
        feature1: feature used for x
        feature2: feature used for y
        include_error: flag used for visualize prediction errors in plot
        cf: The classifier to use for prediction and fitting.

    Returns:
        plt (matplotLib plot): The prediction plot
        acc (integer): Accuracy score for the prediction
    """
    # Organizing data based on choosen features
    v_data = v[[feature1, feature2]]
    v_target = v["diabetes"].replace(["neg", "pos"], [0, 1])

    scatterplot(v, feature1, feature2)

    # Training and predicting
    clf = fit(t, feature1, feature2, classifier=cf, max_iter=5000)
    pred_target = clf.predict(v_data)

    # plot
    plt = visualize_clf(feature1, feature2, v_data, v_target, pred_target,
                        include_error, clf)
    acc = metrics.accuracy_score(v_target, pred_target)
    print(f"Accuracy score for {cf}:{acc}")
    return plt, acc
Example #15
0
def val_changed():
    classifier = request.form["classifiers"]
    x_feature = request.form["x_features"]
    y_feature = request.form["y_features"]
    try:
        plt = visualize.plot_diabetes(
            data_training, data_validation,
            fitting.fit(data_training, data_validation, classifier, x_feature,
                        y_feature), x_feature, y_feature)
        acc_train, acc_val = fitting.getAccuracy(data_training,
                                                 data_validation,
                                                 classifiers[0], features[0],
                                                 features[1])

    except AssertionError as e:
        print(e)
        return plot(error=True)
    return render_template('show_plot.html',
                           error=False,
                           classifiers=classifiers,
                           features=features,
                           p1=acc_train,
                           p2=acc_val)
Example #16
0
        y_min, y_max = X1.min() - 1, X1.max() + 1
        stepsize = 0.8
        xx, yy = np.meshgrid(
            np.arange(x_min, x_max, stepsize),
            np.arange(y_min, y_max, stepsize),
        )
        Z = classifer.predict(np.c_[xx.ravel(), yy.ravel()])
        train_pred = classifer.predict(test[features])
        acc = accuracy_score(test["diabetes"], train_pred)
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)
        title = "Decision surface of {}, Accuracy {:.2%}".format(
            classifer.__class__.__name__, acc)
        Z = Z.reshape(xx.shape)
        ax.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8)
        ax.scatter(X0, X1, c=y, cmap=plt.cm.coolwarm, s=20, edgecolors="k")
        ax.set_ylabel(features[1])
        ax.set_xlabel(features[0])
        ax.set_title(title)
        return fig
    else:
        print("Cant scatter plot unless there is two features")
        return None


if __name__ == "__main__":
    feaut = ["insulin", "glucose"]
    clf = ft.fit(feaut, eval("SVC(kernel='linear')"), "diabetes.csv")
    visualize(feaut, clf, "diabetes.csv")
    plt.show()
Example #17
0
            if e == 'pos':
                test.append(1)
            else:
                test.append(0)

        test = np.array(test)

        #plt.pcolormesh(np.expand_dims(x,0), np.expand_dims(y,1), test*np.eye(len(prediction)))
        """
        for idx, row in df.groupby('diabetes'):
            plt.scatter(row[feature_set[0]], row[feature_set[1]], c=[colors[r] for r in row['diabetes']], label=idx)

        plt.xlabel(feature_set[0])
        plt.ylabel(feature_set[1])
        plt.legend(title="Diabetes")

        return plt


if __name__ == '__main__':
    features = ['glucose','pressure']
    targeted_column = 'diabetes'

    data_frame, training_set, validation_set = data.diabetes_dataset()

    trained_classifier = fitting.fit(training_set, 'KNN', features, targeted_column)
    prediction = trained_classifier.predict(validation_set[features])
    sctplt = visualizer(prediction, validation_set, ['glucose','pressure'],trained_classifier)

    sctplt.show(block=True)
Example #18
0
import fitting
import numpy as np

t = np.linspace(0, 100, 50)
data = 3*t + 4

m = fitting.Parameter(2)
b = fitting.Parameter(5)

def fit_func(x): return (m()*x + b())

fitting.fit(fit_func, [m, b], data, t)

print "m: %f" % m()
print "b: %f" % b()

Example #19
0
def fit_data(data_dir, file_name, mode, fit_method):

    name = '{}/{}/{}'.format(data_dir, file_name, file_name)

    if fit_method == 'gaussian':
        min_SNR = 40
    else:
        min_SNR = 80
    many_plots = 'True'

    if mode == 'real':
        noiseless = 'False'
    elif mode == 'noiseless':
        noiseless = 'True'
    elif mode == 'select':
        noiseless = 'True'
    else:
        sys.exit('Invalid "mode" given by user')

    # Convert the datacubes to a list of dictionaries. Each dictionary contains information
    # (e.g. radius, spectrum) for a single spaxel.
    data, wavel, in_wavel = prepare_data(name, noiseless)

    # For the real data, bin the spaxels to a minimum SNR. Each element of the data dictionary now represents a single annular bin.
    if mode == 'real':
        data = SNR_bin(data, min_SNR)
    # Bin data as above, but use the noiseless data (with the 'real' errors).
    elif mode == 'noiseless':
        data = SNR_bin(data, min_SNR)
    # From the noiseless data, just select num_noiseless individual spaxels, equally spaced in radius.
    elif mode == 'select':
        data = reduce(data, num_noiseless)

    # Set spectrum error to 1 if using noiseless data (so it is irrelevant in reduced chi fitting).
    if noiseless == True:
        for item in data:
            data['spec_err'] = np.ones(len(wavel))

    # Create dictionary of functions that are used to calculate second moment and partial derivatives of a Gauss-Hermite function.
    ghdict = gauss_moments()

    # THE FITTING
    num = len(data)
    for i, x in enumerate(data):

        out, params, perr = fit(wavel, x['spec'], x['spec_err'], in_wavel,
                                fit_method)
        I, I_err, S, S_err = extract(out, fit_method, ghdict, in_wavel)

        newkeys = {
            'I': I,
            'I_err': I_err,
            'S': S,
            'S_err': S_err,
            'params': params,
            'perr': perr
        }
        x.update(newkeys)

        #print i + 1, "out of", num, "fits complete!"

    # Convert fitted values in keys (and h4) to 1D arrays.
    keys = ['rad', 'S', 'S_err', 'S_mod', 'I', 'I_err', 'I_mod', 'j']
    rad, S, S_err, S_mod, I, I_err, I_mod, j = [
        np.asarray([x[attribute] for x in data]) for attribute in keys
    ]
    h4 = np.asarray([x['params']['h4'] for x in data])

    # For noiseless data set the error in the fitted velocity equal to '1.0' (so it is irrelevant in Reduced Chi fitting).
    if noiseless == 'True':
        S_err = np.ones(len(S_err))

    # Store the fitted values to the data using pickle - this is the completed data!
    main_data = rad, I, I_err, S, S_err, j
    pickle.dump(main_data, open("{0}_data.p".format(name), "wb"))

    # BELOW IS JUST ANALYTICS

    # Calculate average difference between theoretical model and the data (as a fraction of the model).
    I_diff = (I - I_mod) / I_mod
    S_diff = (S - S_mod) / S_mod
    print 'Average absolute difference in dispersion:', np.sum(
        (S - S_mod)) / len(S)
    print 'Average h4 is:', np.sum(h4) / len(h4)

    # Check what fraction of fitted values are within two sigma of the theoretical model (should be about 95%.)
    within_err = 0
    for i in range(0, len(S)):
        if (S_mod[i] > (S[i] - 2 * S_err[i])) and (S_mod[i] <
                                                   (S[i] + 2 * S_err[i])):
            within_err += 1

    print 'Fraction within error is:', within_err / float(len(rad))

    # PLOTTING

    # Plot fitted dispersion with errorbars for real data, and without errorbars for noiseless data.
    plt.figure()
    if noiseless == 'False':
        plt.errorbar(rad, S, yerr=S_err, ls='none')
    else:
        plt.plot(rad, S, 'b-x')

    plt.plot(rad, S_mod, 'g-')
    plt.ylabel('Dispersion \n $kms^{-1}$')
    plt.xlim(0, rad[-1])
    plt.title(min_SNR)
    plt.savefig("{}/{}/velocity.pdf".format(data_dir, file_name))

    # Plot fractional difference in dispersion between theoretical model and fitted value.
    plt.figure()
    plt.plot(rad, S_diff, '.', markersize=2)
    plt.ylabel('Dispersion fractional difference')
    plt.xlabel('R / milliarcseconds')
    plt.xlim(0, rad[-1])
    # with Y = 0 line as a visual aid.
    Y = np.zeros(100)
    X = np.linspace(0, 1000, 100)
    plt.plot(X, Y)

    if many_plots == 'True':
        # Plot fractional difference in dispersion between theoretical model and fitted value.
        plt.figure()
        plt.plot(rad, I_diff, '.', markersize=2)
        plt.plot(X, Y)
        plt.ylabel('Intensity fractional difference')
        plt.xlabel('R / milliarcseconds')
        plt.xlim(0, rad[-1])

        # Plot h4 and h6.
        plt.figure()
        plt.plot(rad, h4, 'g.', markersize=3)
        #plt.plot(rad, h6, 'r.', markersize=3)
        plt.plot(X, Y)
        plt.ylabel('h4')
        plt.xlabel('R / milliarcseconds')
        plt.xlim(0, rad[-1])

        # Plot intensity
        plt.figure()
        plt.plot(rad, I)
        plt.ylabel('Intensity \n $L_{\odot} per pixel$')
        plt.xlim(0, rad[-1])
Example #20
0
def visualize(classifier, training_set, dataset, target, features):
    classifier = fit(training_set, classifier, ["age", "pregnant"], "diabetes")
    plot_dataset(classifier, dataset, target, features)
Example #21
0
import data
import fitting
import matplotlib.pyplot as plt
import numpy as np

#生成阶数和损失
scala = [i + 1 for i in range(3)]
train_loss = []
validation_loss = []

#对单一变量的k阶拟合,不知道为什么,只能够算到第三阶.
for i in scala:
    trainset,trainresultset,validationset,\
    varesultset=data.createdata("Olympic.txt",i)
    arg = fitting.fit(trainset, trainresultset)
    #训练预测的结果
    predictresult = trainset.dot(arg)
    train_loss.append(((trainresultset - predictresult)**2).mean())
    #验证预测的结果
    predictresult = validationset.dot(arg)
    validation_loss.append(((varesultset - predictresult)**2).mean())

print("trainning loss:\n", train_loss, '\n')
print("validation loss:\n", validation_loss, '\n')

#设置一个画图的类型列表
#kind=[['b','o'],['r','*'],['g','^'],['k','+']]
#画图部分
fig = plt.figure(1)
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
        g = sum([topcolors[i][1] for i in range(0,n)]) / n
        b = sum([topcolors[i][2] for i in range(0,n)]) / n

        color_wb = (r,g,b)
        points_to_fit.append(((0.5+cbox)*boxwidth, (0.5+rbox)*boxheight, lum(color_wb)))

        # may be able to reduce time by only saving into a rbox by cbox sized array of color vals
        for r in range(rbox*boxheight, (rbox+1)*boxheight):
            for c in range(cbox*boxwidth, (cbox+1)*boxwidth):
                wb_im[r][c] = color_wb

        profile_sum += queTimer.getAndReset()
        num_profiles += 1


fit_func = fitting.fit(points_to_fit)

max_val = 0
min_val = 255

for r in range(smaller_h):
    for c in range(smaller_w):
        wb_im[r][c] = [int(fit_func(c,r))] * 3
        if wb_im[r][c][0] > max_val:
            max_val = wb_im[r][c][0]

wb_im = array(Image.fromarray(wb_im).resize((width, height)))

figure()
imshow(wb_im)
figure()
Example #23
0
            ax.set_title(title)
        fig.savefig(imagepath)

    return train_score, test_score


if __name__ == '__main__':
    from data import get_numerical_columns
    from fitting import classifier_map
    train_table, test_table = get_split_table()
    train_labels, test_labels = get_labels(train_table, test_table)
    feature_options = get_numerical_columns()
    feature_set = np.random.choice(feature_options, size=2)
    fig = plt.figure()
    classifier_name = np.random.choice(list(classifier_map.keys()))
    classifier = fit(classifier_name, feature_set, train_table)
    train_score = classifier.score(train_table[feature_set], train_labels)
    test_score = classifier.score(test_table[feature_set], test_labels)
    ax = visualize_confidence(classifier, train_table, *feature_set)
    plot_with_columns(
        train_table,
        *feature_set,
        ax=ax,
        marker='+',
        label='train',
    )
    plot_with_columns(test_table, *feature_set, ax=ax, label='test')
    ax.set_title(
        f'{classifier_name}: Test {test_score:.5f} Train {train_score:.5f}')
    ax.legend()
    plt.show()
Example #24
0
def ransac(data,n,k,t,d,debug=False,return_all=False):
    """fit model parameters to data using the RANSAC algorithm

This implementation written from pseudocode found at
http://en.wikipedia.org/w/index.php?title=RANSAC&oldid=116358182

{{{
Given:
    data - a set of observed data points
    model - a model that can be fitted to data points
    n - the minimum number of data values required to fit the model
    k - the maximum number of iterations allowed in the algorithm
    t - a threshold value for determining when a data point fits a model
    d - the number of close data values required to assert that a model fits well to data
Return:
    bestfit - model parameters which best fit the data (or nil if no good model is found)
iterations = 0
bestfit = nil
besterr = something really large
while iterations < k {
    maybeinliers = n randomly selected values from data
    maybemodel = model parameters fitted to maybeinliers
    alsoinliers = empty set
    for every point in data not in maybeinliers {
        if point fits maybemodel with an error smaller than t
             add point to alsoinliers
    }
    if the number of elements in alsoinliers is > d {
        % this implies that we may have found a good model
        % now test how good it is
        bettermodel = model parameters fitted to all points in maybeinliers and alsoinliers
        thiserr = a measure of how well model fits these points
        if thiserr < besterr {
            bestfit = bettermodel
            besterr = thiserr
        }
    }
    increment iterations
}
return bestfit
}}}
"""
    iterations = 0
    bestfit = None
    besterr = numpy.inf
    best_inlier_idxs = None
    while iterations < k:
        maybe_idxs, test_idxs = random_partition(n,data.shape[0])
        maybeinliers = data[maybe_idxs,:]
        test_points = data[test_idxs]
        maybemodel,_ = fit(maybeinliers)
        _,test_err = fit(test_points)
        also_idxs = test_idxs[test_err < t] # select indices of rows with accepted points
        alsoinliers = data[also_idxs,:]
        if debug:
            print 'test_err.min()',test_err.min()
            print 'test_err.max()',test_err.max()
            print 'numpy.mean(test_err)',numpy.mean(test_err)
            print 'iteration %d:len(alsoinliers) = %d'%(
                iterations,len(alsoinliers))
        if len(alsoinliers) > d:
            betterdata = numpy.concatenate( (maybeinliers, alsoinliers) )
            bettermodel,better_errs = fit(betterdata)

            thiserr = numpy.mean( better_errs )
            if thiserr < besterr:
                bestfit = bettermodel
                besterr = thiserr
                best_inlier_idxs = numpy.concatenate( (maybe_idxs, also_idxs) )
        iterations+=1
    if bestfit is None:
        raise ValueError("did not meet fit acceptance criteria")
    if return_all:
        return bestfit, {'inliers':best_inlier_idxs}
    else:
        return bestfit
Example #25
0
t_sprung = 5
ufnc = stepfnc(t_sprung, 1)  # Eingangsfunktion mit Zeitpunkt, Sprunghöhe
t_max = 30
dt = 5e-3  # Schrittweite des Ergebnisvektors

PID = [1, 1, 1, 1]  # Parameter des PID Reglers - T_i, T_d, T_n, K
t, b_out, S, IN, S_noise = Simulator(dt, t_max, ufnc, PT1**o, True, *PID,
                                     False)
y1 = b_out[S]

PID = [0.5, 1, 1, 1]  # Parameter des PID Reglers - T_i, T_d, T_n, K
t, b_out, S, IN, S_noise = Simulator(dt, t_max, ufnc, PT1**o, False, *PID,
                                     False)
y2 = b_out[S]

[T_e, K_e], p2 = fit(G_abs, G_phi, w, f0, System)
PID = param([0, T_e, 0, 0, 0, 0, 0], K_e)
t, b_out, S, IN, S_noise = Simulator(dt, t_max, ufnc, PT1**o, False, *PID,
                                     False)
y3 = b_out[S]

figure(6)
plot(t, y1, 'b', label='ungeregelt')
plot(t, y2, '--g', label='geregelt default')
plot(t, y3, 'r', label='geregelt nach Reinisch')
legend(loc='lower right')

# AUSGABE ---------------------------------------------------------------------

#mp.rcParams.update({'font.size': 30})
#figure(3)