def run_cost_analysis(alphas, cost_hist, dataset_title): """Run Cost analysis based on learnt values of theta.""" min_cost = np.zeros(shape=(1, np.size(alphas))) if cost_hist is not None: fig = None subplot = None colors = get_colors(np.shape(alphas)[1]) for index in range(0, np.shape(alphas)[1]): min_cost[0, index] = np.min(cost_hist[:, index]) fig, subplot = \ line_plot(np.reshape(range(1, np.shape(cost_hist)[0] + 1), newshape=(np.shape(cost_hist)[0], 1)), cost_hist[:, index], xlabel='Number Of Iterations', ylabel='Cost J', marker='x', markersize=2, title=f'{dataset_title}\nConvergence Graph', color=colors[index], label=f'alpha={alphas[0, index]}', linewidth=1, fig=fig, subplot=subplot) util.pause('Program paused. Press enter to continue.') close_plot(fig) fig, subplot = \ line_plot(alphas.transpose(), min_cost.transpose(), xlabel='alpha', ylabel='cost', marker='x', markersize=2, color='r', title=f'{dataset_title}\n Alphas Vs Cost', linewidth=2) util.pause('Program paused. Press enter to continue.') close_plot(fig)
def plot_all(self): neval = [] function_value = [] for r in self.list_of_results: neval.append(r[3]) function_value.append(r[2]) plot.line_plot(neval, function_value, xaxis='Evaluations', yaxis='f(x)')
def run_gradient_descent(feature_matrix, output_colvec, num_examples, num_features, alpha, num_iters, fig, subplot, theta_colvec=None, normal_eq=False, debug=False): """Run Gradient Descent/Normal Equation. 1) num_examples - number of training samples 2) num_features - number of features 3) feature_matrix - num_examples x (num_features + 1) 4) output_colvec - num_examples x 1 col vector 5) alpha - alpha value for gradient descent 6) num_iters - number of iterations 7) theta_colvec - (num_features + 1) x 1 col vector initial values of theta 8) debug - print debug info """ print('Running Gradient Descent ...') if not theta_colvec: theta_colvec = np.zeros(shape=(num_features + 1, 1)) cost_hist = None if normal_eq: theta_colvec = \ normal_equation(feature_matrix, output_colvec) print(f'Theta found by normal equation : {theta_colvec}') else: theta_colvec, cost_hist = \ gradient_descent(feature_matrix, output_colvec, num_examples, num_features, alpha, num_iters, theta_colvec, debug=debug) print(f'Theta found by gradient descent: {theta_colvec}') if num_features == 1: line_plot(feature_matrix[:, 1], feature_matrix @ theta_colvec, marker='x', label='Linear regression', color='b', markersize=2, fig=fig, subplot=subplot) util.pause('Program paused. Press enter to continue.') return theta_colvec, cost_hist
def restore_graph(self, drop_cols=None): """ Show the dimentional simulate data as a figure. @drop_cols <list[str]>: the columns not to be shown """ df = self.restore_df() if drop_cols is not None: df = df.drop(drop_cols, axis=1) line_plot( df, title=f"{self.name}: {', '.join(self.title_list)}", v=datetime.now(), h=self.total_population )
def predict_graph(self, step_n, name=None, excluded_cols=None): """ Predict the values in the future and create a figure. @step_n <int>: the number of steps @name <str>: name of the area @excluded_cols <list[str]>: the excluded columns in the figure """ if self.name is not None: name = self.name else: name = str() if name is None else name df = self.predict_df(step_n=step_n) if excluded_cols is not None: df = df.drop(excluded_cols, axis=1) r0 = self.param_dict["R0"] title = f"Prediction in {name} with {self.model.NAME} model: R0 = {r0}" line_plot(df, title, v= datetime.today(), h=self.total_population)
def main(): hist = crawl() # split data train, test = train_test_split(hist, test_size=0.2) pd.plotting.register_matplotlib_converters() target_col = 'close' line_plot(train[target_col], test[target_col], 'training', 'test', title='') # initial data in neurons in LSTM layer np.random.seed(42) window_len = 5 test_size = 0.2 zero_base = True lstm_neurons = 100 epochs = 20 batch_size = 32 loss = 'mse' dropout = 0.2 optimizer = 'adam' # train model train, test, X_train, X_test, y_train, y_test = prepare_data( hist, target_col, window_len=window_len, zero_base=zero_base, test_size=test_size) model = build_lstm_model( X_train, output_size=1, neurons=lstm_neurons, dropout=dropout, loss=loss, optimizer=optimizer) history = model.fit( X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1, shuffle=True) # Mean Absolute Error targets = test[target_col][window_len:] preds = model.predict(X_test).squeeze() mean_absolute_error(preds, y_test) # plot and predict prices preds = test[target_col].values[:-window_len] * (preds + 1) preds = pd.Series(index=targets.index, data=preds) line_plot(targets, preds, 'actual', 'prediction', lw=3)
import sys algos = ["FSHD", "SHOT", "VFH", "ESF", "FPFH"] files2 = sys.argv[2:] files = [] for f in files2: #if f.find("freiburg2_desk")==-1 and f.find("freiburg2_large_no_loop")==-1: if f.find("pioneer") == -1: files.append(f) cols = [[6, "avg. deviation [m]"], [3, "med. deviation [m]"], [7, "precision"], [9, "recall"], [11, "true negative rate"], [13, "accuracy"]] for C in cols: gnuplot = line_plot(sys.argv[1] + "_" + str(C[0])) gnuplot.set_labels("search radius [m]", C[1]) gnuplot.set_tics(0.1) gnuplot.add_user('set grid x y') for ft in algos: R = qual_matrix(files, ft, C[0]) gnuplot.add_data(R, ft, 'linespoints') gnuplot.add_attr('lc rgb "black') gnuplot.close() for dist in [[0.3, 2, 1], [0.25, 2, 1], [0.35, 2, 1], [0.7, 2, 1], [0.8, 2, 1]]: gnuplot = line_plot(sys.argv[1] + "_pr_" + str(dist[0])) gnuplot.set_labels("search radius [m]", "re")
from plot import line_plot import numpy as np from constants import RESULT_DIR makespan = np.loadtxt('makespan.txt') complete_time = np.loadtxt('complete_time.txt') all_episode_rewards = np.loadtxt('all_episode_rewards.txt') # line_plot(np.arange(len(makespan)), makespan, "episode", "makespan", "Makespan") for i in range(len(all_episode_rewards[1,:])): line_plot(np.arange(len(makespan)), all_episode_rewards[:, i], "episode", "reward", "rewards of machine 0") # line_plot(np.arange(len(makespan)), complete_time[:, i], "episode", "reward", "rewards of machine 0")
def run_logistic_regression(feature_matrix, output_colvec, num_examples, num_features, num_iters, fig, subplot, theta_colvec=None, debug=False, uv_vals=None, degree=None, regularization_param=0): """Run Logistic Regression. 1) num_examples - number of training samples 2) num_features - number of features 3) feature_matrix - num_examples x (num_features + 1) 4) output_colvec - num_examples x 1 col vector 5) alpha - alpha value for gradient descent 6) num_iters - number of iterations 7) theta_colvec - (num_features + 1) x 1 col vector initial values of theta 8) debug - print debug info """ def get_z_values(u_val, v_val): return (util.add_features(u_val, v_val, degree) @ theta_colvec)[0, 0] print('Running Logistic Regression...') if not theta_colvec: theta_colvec = np.zeros(shape=(num_features + 1, 1)) cost_hist = None theta_colvec, alpha, cost, \ thetas, alphas, cost_hist, = \ gradient_descent_alphas(feature_matrix, output_colvec, num_examples, num_features, num_iters, theta_colvec, transform=sigmoid, cost_func=cross_entropy, regularization_param=regularization_param, debug=debug, debug_print=debug) print(f'Theta found by gradient descent(alpha={alpha}, ' f'cost={cost}) : {theta_colvec}') if debug: print(f'cost history : {cost_hist}') if num_features == 2: # With 2 features. Decision boundary is # theta-0 + theta-1*x1 + theta-2*x2 >= 0 # to draw a line we need 2 points. # take the min and max of the first feature # x2 = -1/theta-2*(theta-0 + theta-1*x1) xdata_colvec = np.reshape( [np.min(feature_matrix[:, 1]), np.max(feature_matrix[:, 1])], newshape=(2, 1)) ydata_colvec = -(theta_colvec[0, 0] + (theta_colvec[1, 0] * xdata_colvec)) / theta_colvec[2, 0] line_plot(xdata_colvec, ydata_colvec, marker='x', label='Logistic regression', color='r', markersize=2, fig=fig, subplot=subplot, linewidth=1) util.pause('Program paused. Press enter to continue.') elif num_features > 2 and degree: if not uv_vals: uv_vals = [0, 0] uv_vals[0] = np.linspace(-2, 2, 50) uv_vals[1] = np.linspace(-2, 2, 50) fig, subplot = contour_plot(uv_vals[0], uv_vals[1], get_z_values, levels=0, fig=fig, subplot=subplot) return theta_colvec, alpha, cost, thetas, alphas, cost_hist
#!/usr/bin/python from qual import qual_matrix, time_matrix from plot import line_plot import sys #algos=["FSHD","SHOT","VFH","ESF","FPFH"] algos = ["FSHD"] date = sys.argv[2] files = sys.argv[3:] params = [2, 4, 8, 16, 32, 64, 128, 256] gnuplotT = line_plot(sys.argv[1] + "-RADII-timing") gnuplotT.set_labels("radii", "exec. time [s]") gnuplotT.set_logscale('x', 2) gnuplot = line_plot(sys.argv[1] + "-RADII") gnuplot.set_labels("radii", "descriptor distance [L2]", "dim. of feature") gnuplot.set_range('y', 0.5, 0.8) gnuplot.set_logscale('x', 2) gnuplot.set_logscale('x2', 2) gnuplot.add_user('set grid x y2') gnuplot.add_eq('x*32*32 axes x2y2') for ft in algos: D = {} T = {} for param in params: print "Param: ", param subset_files = [] for f in files:
signal.signal(signal.SIGINT, signal_handler) # set start time start_time = time.time() - wall_t for t in train_threads: t.start() print('Press Ctrl+C to stop') signal.pause() # makespan = complete_time.max(axis=1) # print("complete time: /n={}".format(complete_time)) # print("complete time for each episode: /n={}".format(makespan)) line_plot(np.arange(len(all_episode_rewards)), tuple(all_episode_rewards), "episode", "reward", "all_episode_rewards") # np.savetxt('makespan.txt', makespan) # np.savetxt('complete_time.txt', complete_time) # np.savetxt('all_episode_rewards.txt', all_episode_rewards) print('Now saving data. Please wait') with open('all_episode_rewards.pickle', 'wb') as f: f.truncate() pickle.dump(all_episode_rewards, f) for t in train_threads: t.join() if not os.path.exists(CHECKPOINT_DIR):
def run_cost_analysis(feature_matrix, output_colvec, num_features, theta_colvec, cost_hist, dataset_title, theta_vals=None): """Visualize Cost data using contour and sureface plots.""" def get_z_values(theta0, theta1): return util.compute_cost(feature_matrix, output_colvec, np.reshape([theta0, theta1], newshape=(2, 1)), transform_func=identity, cost_func=mean_squared_error) if cost_hist is not None: fig, subplot = \ line_plot(np.reshape( range(1, np.size(cost_hist) + 1), newshape=(np.size(cost_hist), 1)), cost_hist, xlabel='Number Of Iterations', ylabel='Cost J', marker='x', markersize=2, title=f'{dataset_title}\nConvergence Graph', color='b') util.pause('Program paused. Press enter to continue.') close_plot(fig) if num_features > 1: print('Detailed Cost analysis only supported for 1 features!!') return None, None if not theta_vals: theta_vals = [0, 0] theta_vals[0] = np.linspace(-10, 10, 100) theta_vals[1] = np.linspace(-1, 4, 100) fig, subplot = surface_plot(theta_vals[0], theta_vals[1], get_z_values, title=dataset_title, xlabel='theta_0', ylabel='theta_1') util.pause('Program paused. Press enter to continue.') close_plot(fig) fig, subplot = contour_plot(theta_vals[0], theta_vals[1], get_z_values, title=dataset_title, levels=np.logspace(-2, 3, 20)) fig, subplot = line_plot(theta_colvec[0], theta_colvec[1], marker='x', color='r', title=dataset_title, fig=fig, subplot=subplot) util.pause('Program paused. Press enter to continue.') return fig, subplot
residuals = [ line[1] - mileage_predict.linear_prediction(line[0], theta0, theta1) for line in data ] mean_squared_error = sum([residual**2 for residual in residuals]) / len(residuals) return mean_squared_error if __name__ == '__main__': theta = theta_reader() data = data_reader() for count in range( 0, 1000 ): # A lower number of iterations might work, but why change it? theta = model_trainer(theta['theta0'], theta['theta1'], data) if count % 100 == 0: print('Count: %i' % count) print('Theta0: %f – Theta1: %f' % (theta['theta0'], theta['theta1'])) theta = theta_unscale(theta['theta0'], theta['theta1'], data) theta_writer(theta['theta0'], theta['theta1']) print('Mean Square Error: %f' % mse_calculator(theta['theta0'], theta['theta1'], data)) plot.line_plot()
ncov_df[data_cols] = ncov_df[data_cols].astype(int) ncov_df = ncov_df.loc[:, ["Date", "Country", "Province", *data_cols]] print(ncov_df.tail()) print(ncov_df.info()) ncov_df.describe(include="all").fillna("-") pd.DataFrame(ncov_df.isnull().sum()).T ", ".join(ncov_df["Country"].unique().tolist()) total_df = ncov_df.loc[ncov_df["Country"] != "China", :].groupby("Date").sum() total_df[rate_cols[0]] = total_df["Deaths"] / total_df[data_cols].sum(axis=1) total_df[rate_cols[1]] = total_df["Recovered"] / total_df[data_cols].sum( axis=1) total_df[rate_cols[2]] = total_df["Deaths"] / (total_df["Deaths"] + total_df["Recovered"]) total_df.tail() line_plot(total_df[data_cols], title="Cases over time (Total except China)") line_plot(total_df[rate_cols], "Rate over time (Total except China)", ylabel="", math_scale=False) total_df[rate_cols].plot.kde() plt.title("Kernel density estimation of the rates (Total except China)") plt.show() population_date = "15Mar2020" _dict = { "Global": "7,794,798,729", "China": "1,439,323,774", "Japan": "126,476,458", "South Korea": "51,269,182", "Italy": "60,461,827",