Esempio n. 1
0
def run_cost_analysis(alphas, cost_hist, dataset_title):
    """Run Cost analysis based on learnt values of theta."""
    min_cost = np.zeros(shape=(1, np.size(alphas)))
    if cost_hist is not None:
        fig = None
        subplot = None
        colors = get_colors(np.shape(alphas)[1])
        for index in range(0, np.shape(alphas)[1]):
            min_cost[0, index] = np.min(cost_hist[:, index])
            fig, subplot = \
                line_plot(np.reshape(range(1, np.shape(cost_hist)[0] + 1),
                                     newshape=(np.shape(cost_hist)[0], 1)),
                          cost_hist[:, index],
                          xlabel='Number Of Iterations',
                          ylabel='Cost J',
                          marker='x', markersize=2,
                          title=f'{dataset_title}\nConvergence Graph',
                          color=colors[index],
                          label=f'alpha={alphas[0, index]}',
                          linewidth=1,
                          fig=fig, subplot=subplot)
        util.pause('Program paused. Press enter to continue.')
        close_plot(fig)

        fig, subplot = \
            line_plot(alphas.transpose(), min_cost.transpose(),
                      xlabel='alpha',
                      ylabel='cost',
                      marker='x', markersize=2,
                      color='r',
                      title=f'{dataset_title}\n Alphas Vs Cost',
                      linewidth=2)
        util.pause('Program paused. Press enter to continue.')
        close_plot(fig)
Esempio n. 2
0
 def plot_all(self):
     neval = []
     function_value = []
     for r in self.list_of_results:
         neval.append(r[3])
         function_value.append(r[2])
     plot.line_plot(neval,
                    function_value,
                    xaxis='Evaluations',
                    yaxis='f(x)')
Esempio n. 3
0
def run_gradient_descent(feature_matrix,
                         output_colvec,
                         num_examples,
                         num_features,
                         alpha,
                         num_iters,
                         fig,
                         subplot,
                         theta_colvec=None,
                         normal_eq=False,
                         debug=False):
    """Run Gradient Descent/Normal Equation.

    1) num_examples - number of training samples
    2) num_features - number of features
    3) feature_matrix - num_examples x (num_features + 1)
    4) output_colvec - num_examples x 1 col vector
    5) alpha - alpha value for gradient descent
    6) num_iters - number of iterations
    7) theta_colvec - (num_features + 1) x 1 col vector
                      initial values of theta
    8) debug - print debug info
    """
    print('Running Gradient Descent ...')

    if not theta_colvec:
        theta_colvec = np.zeros(shape=(num_features + 1, 1))

    cost_hist = None
    if normal_eq:
        theta_colvec = \
            normal_equation(feature_matrix, output_colvec)
        print(f'Theta found by normal equation : {theta_colvec}')
    else:
        theta_colvec, cost_hist = \
            gradient_descent(feature_matrix, output_colvec,
                             num_examples, num_features,
                             alpha, num_iters, theta_colvec,
                             debug=debug)
        print(f'Theta found by gradient descent: {theta_colvec}')

    if num_features == 1:
        line_plot(feature_matrix[:, 1],
                  feature_matrix @ theta_colvec,
                  marker='x',
                  label='Linear regression',
                  color='b',
                  markersize=2,
                  fig=fig,
                  subplot=subplot)
        util.pause('Program paused. Press enter to continue.')

    return theta_colvec, cost_hist
Esempio n. 4
0
 def restore_graph(self, drop_cols=None):
     """
     Show the dimentional simulate data as a figure.
     @drop_cols <list[str]>: the columns not to be shown
     """
     df = self.restore_df()
     if drop_cols is not None:
         df = df.drop(drop_cols, axis=1)
     line_plot(
         df,
         title=f"{self.name}: {', '.join(self.title_list)}",
         v=datetime.now(), h=self.total_population
     )
Esempio n. 5
0
 def predict_graph(self, step_n, name=None, excluded_cols=None):
     """
     Predict the values in the future and create a figure.
     @step_n <int>: the number of steps
     @name <str>: name of the area
     @excluded_cols <list[str]>: the excluded columns in the figure
     """
     if self.name is not None:
         name = self.name
     else:
         name = str() if name is None else name
     df = self.predict_df(step_n=step_n)
     if excluded_cols is not None:
         df = df.drop(excluded_cols, axis=1)
     r0 = self.param_dict["R0"]
     title = f"Prediction in {name} with {self.model.NAME} model: R0 = {r0}"
     line_plot(df, title, v= datetime.today(), h=self.total_population)
def main():
    hist = crawl()

    # split data
    train, test = train_test_split(hist, test_size=0.2)

    pd.plotting.register_matplotlib_converters()

    target_col = 'close'
    line_plot(train[target_col], test[target_col], 'training', 'test', title='')

    # initial data in neurons in LSTM layer
    np.random.seed(42)
    window_len = 5
    test_size = 0.2
    zero_base = True
    lstm_neurons = 100
    epochs = 20
    batch_size = 32
    loss = 'mse'
    dropout = 0.2
    optimizer = 'adam'

    # train model
    train, test, X_train, X_test, y_train, y_test = prepare_data(
        hist, target_col, window_len=window_len, zero_base=zero_base, test_size=test_size)
    model = build_lstm_model(
        X_train, output_size=1, neurons=lstm_neurons, dropout=dropout, loss=loss,
        optimizer=optimizer)
    history = model.fit(
        X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1, shuffle=True)


    # Mean Absolute Error
    targets = test[target_col][window_len:]
    preds = model.predict(X_test).squeeze()
    mean_absolute_error(preds, y_test)

    # plot and predict prices
    preds = test[target_col].values[:-window_len] * (preds + 1)
    preds = pd.Series(index=targets.index, data=preds)
    line_plot(targets, preds, 'actual', 'prediction', lw=3)
import sys

algos = ["FSHD", "SHOT", "VFH", "ESF", "FPFH"]
files2 = sys.argv[2:]
files = []
for f in files2:
    #if f.find("freiburg2_desk")==-1 and f.find("freiburg2_large_no_loop")==-1:
    if f.find("pioneer") == -1:
        files.append(f)

cols = [[6, "avg. deviation [m]"], [3, "med. deviation [m]"], [7, "precision"],
        [9, "recall"], [11, "true negative rate"], [13, "accuracy"]]

for C in cols:
    gnuplot = line_plot(sys.argv[1] + "_" + str(C[0]))
    gnuplot.set_labels("search radius [m]", C[1])
    gnuplot.set_tics(0.1)
    gnuplot.add_user('set grid x y')

    for ft in algos:
        R = qual_matrix(files, ft, C[0])
        gnuplot.add_data(R, ft, 'linespoints')

    gnuplot.add_attr('lc rgb "black')
    gnuplot.close()

for dist in [[0.3, 2, 1], [0.25, 2, 1], [0.35, 2, 1], [0.7, 2, 1], [0.8, 2,
                                                                    1]]:
    gnuplot = line_plot(sys.argv[1] + "_pr_" + str(dist[0]))
    gnuplot.set_labels("search radius [m]", "re")
Esempio n. 8
0
from plot import line_plot
import numpy as np
from constants import RESULT_DIR

makespan = np.loadtxt('makespan.txt')
complete_time = np.loadtxt('complete_time.txt')
all_episode_rewards = np.loadtxt('all_episode_rewards.txt')

# line_plot(np.arange(len(makespan)), makespan, "episode", "makespan", "Makespan")
for i in range(len(all_episode_rewards[1,:])):
    line_plot(np.arange(len(makespan)), all_episode_rewards[:, i], "episode", "reward", "rewards of machine 0")
    # line_plot(np.arange(len(makespan)), complete_time[:, i], "episode", "reward", "rewards of machine 0")
Esempio n. 9
0
def run_logistic_regression(feature_matrix,
                            output_colvec,
                            num_examples,
                            num_features,
                            num_iters,
                            fig,
                            subplot,
                            theta_colvec=None,
                            debug=False,
                            uv_vals=None,
                            degree=None,
                            regularization_param=0):
    """Run Logistic Regression.

    1) num_examples - number of training samples
    2) num_features - number of features
    3) feature_matrix - num_examples x (num_features + 1)
    4) output_colvec - num_examples x 1 col vector
    5) alpha - alpha value for gradient descent
    6) num_iters - number of iterations
    7) theta_colvec - (num_features + 1) x 1 col vector
                      initial values of theta
    8) debug - print debug info
    """
    def get_z_values(u_val, v_val):
        return (util.add_features(u_val, v_val, degree) @ theta_colvec)[0, 0]

    print('Running Logistic Regression...')

    if not theta_colvec:
        theta_colvec = np.zeros(shape=(num_features + 1, 1))

    cost_hist = None

    theta_colvec, alpha, cost, \
        thetas, alphas, cost_hist, = \
        gradient_descent_alphas(feature_matrix, output_colvec,
                                num_examples, num_features,
                                num_iters, theta_colvec,
                                transform=sigmoid,
                                cost_func=cross_entropy,
                                regularization_param=regularization_param,
                                debug=debug, debug_print=debug)
    print(f'Theta found by gradient descent(alpha={alpha}, '
          f'cost={cost}) : {theta_colvec}')
    if debug:
        print(f'cost history : {cost_hist}')

    if num_features == 2:
        # With 2 features. Decision boundary is
        #            theta-0 + theta-1*x1 + theta-2*x2 >= 0
        # to draw a line we need 2 points.
        # take the min and max of the first feature
        # x2 = -1/theta-2*(theta-0 + theta-1*x1)
        xdata_colvec = np.reshape(
            [np.min(feature_matrix[:, 1]),
             np.max(feature_matrix[:, 1])],
            newshape=(2, 1))
        ydata_colvec = -(theta_colvec[0, 0] +
                         (theta_colvec[1, 0] * xdata_colvec)) / theta_colvec[2,
                                                                             0]
        line_plot(xdata_colvec,
                  ydata_colvec,
                  marker='x',
                  label='Logistic regression',
                  color='r',
                  markersize=2,
                  fig=fig,
                  subplot=subplot,
                  linewidth=1)
        util.pause('Program paused. Press enter to continue.')
    elif num_features > 2 and degree:
        if not uv_vals:
            uv_vals = [0, 0]
            uv_vals[0] = np.linspace(-2, 2, 50)
            uv_vals[1] = np.linspace(-2, 2, 50)

        fig, subplot = contour_plot(uv_vals[0],
                                    uv_vals[1],
                                    get_z_values,
                                    levels=0,
                                    fig=fig,
                                    subplot=subplot)

    return theta_colvec, alpha, cost, thetas, alphas, cost_hist
Esempio n. 10
0
#!/usr/bin/python

from qual import qual_matrix, time_matrix
from plot import line_plot

import sys

#algos=["FSHD","SHOT","VFH","ESF","FPFH"]
algos = ["FSHD"]
date = sys.argv[2]
files = sys.argv[3:]
params = [2, 4, 8, 16, 32, 64, 128, 256]

gnuplotT = line_plot(sys.argv[1] + "-RADII-timing")
gnuplotT.set_labels("radii", "exec. time [s]")
gnuplotT.set_logscale('x', 2)

gnuplot = line_plot(sys.argv[1] + "-RADII")
gnuplot.set_labels("radii", "descriptor distance [L2]", "dim. of feature")
gnuplot.set_range('y', 0.5, 0.8)
gnuplot.set_logscale('x', 2)
gnuplot.set_logscale('x2', 2)
gnuplot.add_user('set grid x y2')
gnuplot.add_eq('x*32*32 axes x2y2')
for ft in algos:
    D = {}
    T = {}
    for param in params:
        print "Param: ", param
        subset_files = []
        for f in files:
Esempio n. 11
0
signal.signal(signal.SIGINT, signal_handler)

# set start time
start_time = time.time() - wall_t

for t in train_threads:
    t.start()

print('Press Ctrl+C to stop')
signal.pause()

# makespan = complete_time.max(axis=1)
# print("complete time: /n={}".format(complete_time))
# print("complete time for each episode: /n={}".format(makespan))

line_plot(np.arange(len(all_episode_rewards)), tuple(all_episode_rewards),
          "episode", "reward", "all_episode_rewards")

# np.savetxt('makespan.txt', makespan)
# np.savetxt('complete_time.txt', complete_time)
# np.savetxt('all_episode_rewards.txt', all_episode_rewards)

print('Now saving data. Please wait')

with open('all_episode_rewards.pickle', 'wb') as f:
    f.truncate()
    pickle.dump(all_episode_rewards, f)

for t in train_threads:
    t.join()

if not os.path.exists(CHECKPOINT_DIR):
Esempio n. 12
0
def run_cost_analysis(feature_matrix,
                      output_colvec,
                      num_features,
                      theta_colvec,
                      cost_hist,
                      dataset_title,
                      theta_vals=None):
    """Visualize Cost data using contour and sureface plots."""
    def get_z_values(theta0, theta1):
        return util.compute_cost(feature_matrix,
                                 output_colvec,
                                 np.reshape([theta0, theta1], newshape=(2, 1)),
                                 transform_func=identity,
                                 cost_func=mean_squared_error)

    if cost_hist is not None:
        fig, subplot = \
            line_plot(np.reshape(
                range(1, np.size(cost_hist) + 1),
                newshape=(np.size(cost_hist), 1)),
                      cost_hist,
                      xlabel='Number Of Iterations',
                      ylabel='Cost J',
                      marker='x', markersize=2,
                      title=f'{dataset_title}\nConvergence Graph',
                      color='b')
        util.pause('Program paused. Press enter to continue.')
        close_plot(fig)

    if num_features > 1:
        print('Detailed Cost analysis only supported for 1 features!!')
        return None, None

    if not theta_vals:
        theta_vals = [0, 0]
        theta_vals[0] = np.linspace(-10, 10, 100)
        theta_vals[1] = np.linspace(-1, 4, 100)

    fig, subplot = surface_plot(theta_vals[0],
                                theta_vals[1],
                                get_z_values,
                                title=dataset_title,
                                xlabel='theta_0',
                                ylabel='theta_1')
    util.pause('Program paused. Press enter to continue.')
    close_plot(fig)

    fig, subplot = contour_plot(theta_vals[0],
                                theta_vals[1],
                                get_z_values,
                                title=dataset_title,
                                levels=np.logspace(-2, 3, 20))
    fig, subplot = line_plot(theta_colvec[0],
                             theta_colvec[1],
                             marker='x',
                             color='r',
                             title=dataset_title,
                             fig=fig,
                             subplot=subplot)
    util.pause('Program paused. Press enter to continue.')
    return fig, subplot
Esempio n. 13
0
    residuals = [
        line[1] - mileage_predict.linear_prediction(line[0], theta0, theta1)
        for line in data
    ]
    mean_squared_error = sum([residual**2
                              for residual in residuals]) / len(residuals)
    return mean_squared_error


if __name__ == '__main__':

    theta = theta_reader()
    data = data_reader()

    for count in range(
            0, 1000
    ):  # A lower number of iterations might work, but why change it?
        theta = model_trainer(theta['theta0'], theta['theta1'], data)
        if count % 100 == 0:
            print('Count: %i' % count)
            print('Theta0: %f    –    Theta1: %f' %
                  (theta['theta0'], theta['theta1']))

    theta = theta_unscale(theta['theta0'], theta['theta1'], data)
    theta_writer(theta['theta0'], theta['theta1'])
    print('Mean Square Error: %f' %
          mse_calculator(theta['theta0'], theta['theta1'], data))

    plot.line_plot()
Esempio n. 14
0
ncov_df[data_cols] = ncov_df[data_cols].astype(int)
ncov_df = ncov_df.loc[:, ["Date", "Country", "Province", *data_cols]]
print(ncov_df.tail())
print(ncov_df.info())
ncov_df.describe(include="all").fillna("-")
pd.DataFrame(ncov_df.isnull().sum()).T
", ".join(ncov_df["Country"].unique().tolist())

total_df = ncov_df.loc[ncov_df["Country"] != "China", :].groupby("Date").sum()
total_df[rate_cols[0]] = total_df["Deaths"] / total_df[data_cols].sum(axis=1)
total_df[rate_cols[1]] = total_df["Recovered"] / total_df[data_cols].sum(
    axis=1)
total_df[rate_cols[2]] = total_df["Deaths"] / (total_df["Deaths"] +
                                               total_df["Recovered"])
total_df.tail()
line_plot(total_df[data_cols], title="Cases over time (Total except China)")
line_plot(total_df[rate_cols],
          "Rate over time (Total except China)",
          ylabel="",
          math_scale=False)
total_df[rate_cols].plot.kde()
plt.title("Kernel density estimation of the rates (Total except China)")
plt.show()

population_date = "15Mar2020"
_dict = {
    "Global": "7,794,798,729",
    "China": "1,439,323,774",
    "Japan": "126,476,458",
    "South Korea": "51,269,182",
    "Italy": "60,461,827",