def plot_training_history(training_history, loss_function="", show=True, save=False): fig = plt.figure(figsize=(16, 6)) loss_ax = fig.add_subplot(121) Print.data(list(training_history.history.keys())) loss_ax.plot(training_history.history['loss'], 'r', linewidth=3.0) loss_ax.plot(training_history.history['val_loss'], 'b', linewidth=3.0) loss_ax.legend(['Training loss', 'Validation Loss'], fontsize=18) loss_ax.set_xlabel('Epochs ', fontsize=16) loss_ax.set_ylabel('Loss', fontsize=16) loss_ax.set_title('Loss Curves: {}'.format(loss_function), fontsize=16) acc_ax = fig.add_subplot(122) acc_ax.plot(training_history.history['acc'], 'r', linewidth=3.0) acc_ax.plot(training_history.history['val_acc'], 'b', linewidth=3.0) acc_ax.legend(['Training Accuracy', 'Validation Accuracy'], fontsize=18) acc_ax.set_xlabel('Epochs ', fontsize=16) acc_ax.set_ylabel('Accuracy', fontsize=16) acc_ax.set_title('Accuracy Curves', fontsize=16) plt.tight_layout() if save: create_path_if_not_existing(Path.plots) fp = "/".join([Path.plots, save]) plt.savefig(fp, format="png", dpi=400) if show: plt.show()
def generate_reproduction(self): fn = self.filename("exp_set_reproduction", "md") Print.data(fn) fp = "/".join([self.path, fn]) relevant_keys = list(set(self.exp_set.relevant_keys)) exp_summary = np.empty( shape=[len(self.exp_reports), 3 + len(relevant_keys)], dtype="U25") res = "# Experiment Set Reproduction\n" res += "## Code\n" res += "```\n" code = "params = " code += json.dumps(self.exp_set.reproduction_params(), indent=4) + "\n\n" code += "exp_set = ExperimentSet(cv_splits={}, **params)\n".format( self.exp_set.cv_splits) code += "exp_set.multiprocessing = \"cv\"\n" code += "exp_set.run_experiments()" res += code + "\n" res += "```\n\n" res += "<!--- Figure in LaTeX\n" res += self.python_figure(code) + "\n" res += "--->\n" with open(fp, 'w+') as file: file.write(res)
def upload_recording_buffer(self): all_successful = True time_frames = list() line_count = sum(1 for _ in open(self.buffer_path)) ch_count = len(ch_names) with open(self.buffer_path) as infile: for i, line in enumerate(tqdm(infile, total=line_count)): new_time_frame = TimeFrame.from_line(line) if not len(new_time_frame.sensor_data) == ch_count: Print.warning( "Skipped TimeFrame with {} data points".format( len(new_time_frame.sensor_data))) Print.data(new_time_frame.sensor_data) continue time_frames.append(new_time_frame.to_json()) if (i + 1) % batch_size == 0: if not self.upload_batch(time_frames): Print.failure("Failed to upload batch") all_successful = False time_frames = list() self.upload_batch(time_frames) return all_successful
def create_classifier(self): if self.datasets is None: Print.info("Fetching dataset") self.datasets = list() ds = Session.full_dataset(window_length=self.window_length) ds = ds.reduced_dataset(self.dataset_type) ds = ds.normalize() ds.shuffle() self.datasets.append(ds) pipeline = self.create_pipeline() Print.data(pipeline) ds = self.datasets[0] ds_train, ds_test = ds.split_random() fit_output = pipeline.fit(ds_train.X, ds_train.y) accuracy = pipeline.score(ds_test.X, ds_test.y) Print.info("Accuracy: {}".format(accuracy)) return pipeline
def plot_matrix(m, upscale_lowest_dim=True): if upscale_lowest_dim: min_ratio = 5 h, w = np.shape(m) Print.pandas(m) row_height = 1 if h >= w / min_ratio else int(w / (min_ratio * h)) col_width = 1 if w >= h / min_ratio else int(h / (min_ratio * w)) Print.data(row_height) Print.data(col_width) res = np.zeros([h * row_height, w * col_width]) Print.data(np.shape(res)) if row_height > col_width: for i, row in enumerate(m): res[i * row_height: (i + 1) * row_height, :] = np.tile(row, (row_height, 1)) elif col_width > row_height: for i, col in enumerate(m.T): res[:, i * col_width: (i + 1) * col_width] = np.tile(col, (col_width, 1)).T else: res = m else: res = m cmap = plt.cm.Blues plt.imshow(res, interpolation='nearest', cmap=cmap) plt.colorbar() plt.show()
def generate_detail(self): fn = self.filename("exp_set_detail", "md") Print.data(fn) fp = "/".join([self.path, fn]) relevant_keys = list(set(self.exp_set.relevant_keys)) res = "# Experiment Set Detail\n" res += "{}\n\n".format(datestamp_str(self.exp_set.init_time)) res += "* **Runtime:** {}s\n".format(np.round(self.exp_set.run_time, 1)) res += "* **Multiprocessing:** {}\n".format( self.exp_set.multiprocessing) res += "\n\n" if self.exp_set.description: res += "#### Description\n" res += self.exp_set.description + "\n" if self.exp_set.hypothesis: res += "#### Hypothesis\n" res += self.exp_set.hypothesis + "\n" res += "\n\n" res += "## Performance by configuration\n\n" for i, exp_report in enumerate(self.exp_reports): flat_params = flatten_dict(exp_report["raw_params"]) res += "---\n\n" res += "### Entry {} accuracy: {}\n".format( i + 1, np.round(exp_report["accuracy"], DECIMALS)) res += "* **Kappa:** {}\n".format( np.round(exp_report["kappa"], DECIMALS)) res += "* **Average Experiment Time:** {}s\n".format( np.round(exp_report["time"]["exp"], 2)) res += "* **Dataset type:** {}\n".format( exp_report["dataset_type"]) res += "* **Dataset avg length:** {}\n".format( np.round(np.mean(exp_report["dataset_lengths"]), DECIMALS)) # res += "* **Feature Vector Length:** {}\n".format(exp_report["feature_vector_length"]) res += "* **CV Splits:** {}\n".format(exp_report["cv_splits"]) res += "\n" res += "{}\n".format(np.round(exp_report["accuracies"], DECIMALS)) res += "### Config\n" res += "**Relevant Parameters**\n\n" relevant_params = { key: flat_params[key] for key in relevant_keys if key in flat_params } params_df = pd.DataFrame([relevant_params]) res += tabulate( params_df, tablefmt="pipe", headers="keys", showindex=False) + "\n" res += "**All Parameters**\n\n" params_df = pd.DataFrame([flat_params]) res += tabulate(params_df.round(DECIMALS), tablefmt="pipe", headers="keys", showindex=False) + "\n" res += "### Details\n" res += "**Confusion Matrix**\n\n" c_matrix = exp_report["confusion_matrix"] class_names = exp_report["dataset_type"].labels c_matrix_df = pd.DataFrame( c_matrix, columns=["Pred: {}".format(l) for l in class_names], index=["__True: {}__".format(l) for l in class_names]) res += tabulate( c_matrix_df, tablefmt="pipe", headers="keys", showindex=True) + "\n" res += "<!---\nConfusion Matrix in LaTeX\n" res += tabulate( c_matrix_df, tablefmt="latex", headers="keys", showindex=False) + "\n" res += "--->\n" # Formats the confusion matrix as res += "<!---\nConfusion Matrix Raw\n" res += "c_matrix = np.array({})\n".format(format_array(c_matrix)) res += "class_names = {}\n".format(format_array(class_names)) res += "--->\n" # res += "**Report**\n\n" # report = exp_report["report"] # report_df = pd.DataFrame.from_dict(report) # report_key = list(report.keys())[0] # index = ["__{}__".format(key) for key in report[report_key].keys()] # res += tabulate(report_df.round(DECIMALS), tablefmt="pipe", headers="keys", showindex=index) + "\n" res += "**Time**\n\n" time_df = pd.DataFrame([exp_report["time"]]) res += tabulate(time_df.round(DECIMALS), tablefmt="pipe", headers="keys", showindex=False) + "\n" with open(fp, 'w+') as file: file.write(res)
def generate_overview(self): fn = self.filename("exp_set_overview", "md") Print.data(fn) fp = "/".join([self.path, fn]) relevant_keys = list(set(self.exp_set.relevant_keys)) Print.data(relevant_keys) exp_summary = np.empty( shape=[len(self.exp_reports), 3 + len(relevant_keys)], dtype="U25") res = "# Experiment Set Overview\n" res += "## Performance by relevant params\n\n" param_performances = { param: self.param_performance(param) for param in self.all_relevant_params() } for param_name, param_vals in param_performances.items(): res += "### {}\n\n".format(param_name) param_vals_list = sorted(list(param_vals.items()), key=lambda x: x[1], reverse=True) res += "\n".join([ "* **{}:** {}".format(e[0], np.round(e[1], DECIMALS)) for e in param_vals_list ]) res += "\n\n" res += "\n\n" res += "## Performance Overview\n\n" for i, exp_report in enumerate(self.exp_reports): flat_params = flatten_dict(exp_report["raw_params"]) relevant_params = np.empty(shape=[len(relevant_keys)], dtype="U25") for j, key in enumerate(relevant_keys): if key in flat_params: relevant_params[j] = flat_params[key] else: relevant_params[j] = "-" acc_string = "{}%".format(np.round(100 * exp_report["accuracy"], 1)) kappa_string = "{}".format(np.round(exp_report["kappa"], 3)) time_string = "{}s".format(np.round(exp_report["time"]["exp"], 2)) exp_summary[i, :3] = [acc_string, kappa_string, time_string] exp_summary[i, 3:] = relevant_params df_perf1 = pd.DataFrame(exp_summary, columns=["Accuracy", "Kappa", "Avg Time"] + relevant_keys, copy=True) df_perf1.sort_values(by=["Accuracy"], axis=0, ascending=False, inplace=True) res += tabulate( df_perf1, tablefmt="pipe", headers="keys", showindex=False) + "\n" res += "<!---\nResults in LaTeX\n" res += tabulate( df_perf1, tablefmt="latex", headers="keys", showindex=False) + "\n" res += "--->\n" with open(fp, 'w+') as file: file.write(res)
yield dataset if __name__ == '__main__': Print.start("Starting") sessions = Session.fetch_all() session = random.choice(sessions) n_channels = len(session.ch_names) session.fetch_timeframes() X = session.timeframes[:, :n_channels] y = session.timeframes[:, n_channels + 1] Print.data(np.mean(y)) X_pow = X**2 res = np.zeros([len(y), n_channels + 1]) res[:, :n_channels] = X_pow res[:, n_channels] = y res = res / res.max(axis=0) res = res.T Print.data(np.mean(res[-1, :])) Print.data(np.shape(res)) plot_matrix(res)