コード例 #1
0
    def generate_reproduction(self):
        fn = self.filename("exp_set_reproduction", "md")
        Print.data(fn)
        fp = "/".join([self.path, fn])
        relevant_keys = list(set(self.exp_set.relevant_keys))
        exp_summary = np.empty(
            shape=[len(self.exp_reports), 3 + len(relevant_keys)], dtype="U25")

        res = "# Experiment Set Reproduction\n"

        res += "## Code\n"

        res += "```\n"
        code = "params = "
        code += json.dumps(self.exp_set.reproduction_params(),
                           indent=4) + "\n\n"
        code += "exp_set = ExperimentSet(cv_splits={},  **params)\n".format(
            self.exp_set.cv_splits)
        code += "exp_set.multiprocessing = \"cv\"\n"
        code += "exp_set.run_experiments()"
        res += code + "\n"
        res += "```\n\n"

        res += "<!--- Figure in LaTeX\n"
        res += self.python_figure(code) + "\n"
        res += "--->\n"

        with open(fp, 'w+') as file:
            file.write(res)
コード例 #2
0
def plot_training_history(training_history, loss_function="", show=True, save=False):
    fig = plt.figure(figsize=(16, 6))
    loss_ax = fig.add_subplot(121)

    Print.data(list(training_history.history.keys()))

    loss_ax.plot(training_history.history['loss'], 'r', linewidth=3.0)
    loss_ax.plot(training_history.history['val_loss'], 'b', linewidth=3.0)
    loss_ax.legend(['Training loss', 'Validation Loss'], fontsize=18)
    loss_ax.set_xlabel('Epochs ', fontsize=16)
    loss_ax.set_ylabel('Loss', fontsize=16)
    loss_ax.set_title('Loss Curves: {}'.format(loss_function), fontsize=16)

    acc_ax = fig.add_subplot(122)
    acc_ax.plot(training_history.history['acc'], 'r', linewidth=3.0)
    acc_ax.plot(training_history.history['val_acc'], 'b', linewidth=3.0)
    acc_ax.legend(['Training Accuracy', 'Validation Accuracy'], fontsize=18)
    acc_ax.set_xlabel('Epochs ', fontsize=16)
    acc_ax.set_ylabel('Accuracy', fontsize=16)
    acc_ax.set_title('Accuracy Curves', fontsize=16)

    plt.tight_layout()

    if save:
        create_path_if_not_existing(Path.plots)
        fp = "/".join([Path.plots, save])
        plt.savefig(fp, format="png", dpi=400)

    if show:
        plt.show()
コード例 #3
0
    def run_multi(self):
        working_q = mp.Queue()
        output_q = mp.Queue()

        for i in range(len(self.datasets)):
            working_q.put(i)

        n_workers = np.min([mp.cpu_count(), self.cv_splits])

        print("")
        Print.info("Using {} workers".format(n_workers))
        processes = [
            mp.Process(target=self.worker,
                       args=(i, working_q, output_q, self.pipeline))
            for i in range(n_workers)
        ]

        for proc in processes:
            proc.start()

        for proc in processes:
            proc.join()

        while True:
            try:
                self.cv_reports.append(output_q.get_nowait())
            except Empty:
                break
コード例 #4
0
    def run_multi(self, ds_collection):
        working_q = mp.Queue()
        output_q = mp.Queue()

        for exp_params in self.exp_params_list:
            working_q.put(exp_params)

        n_workers = np.min([mp.cpu_count(), len(self.exp_params_list)])

        Print.info("Using {} workers".format(n_workers))
        processes = [
            mp.Process(target=self.worker,
                       args=(i, working_q, output_q, self.cv_splits,
                             ds_collection)) for i in range(n_workers)
        ]

        for proc in processes:
            proc.start()

        for proc in processes:
            proc.join()

        while True:
            try:
                self.exp_reports.append(output_q.get_nowait())
            except Empty:
                break
コード例 #5
0
    def upload_batch(time_frames):
        url = URL.timeframes

        r = requests.post(url, json=time_frames)
        if r.status_code != 201:
            Print.failure("Failed to upload batch")
            return False
        return True
コード例 #6
0
    def run(self):
        is_successful = self.upload_recording_buffer()

        if is_successful:
            if not self.keep_buffer:
                os.remove(self.buffer_path)
        else:
            Print.failure("Failed to upload recording buffer")
コード例 #7
0
    def stop_recording(self):
        if self.state == State.RECORDING:
            self.state = State.IDLE

            uploader = Uploader(sys_manager.session.id)
            uploader.start()
        else:
            Print.warning("Cannot stop recording because it is not recording.")
コード例 #8
0
    def start_recording(self):
        if self.state == State.IDLE:
            sys_manager.session = Session.create()
            # create_path_if_not_existing('{}_{}'.format(Path.recording_buffer, sys_manager.session.id))

            self.state = State.RECORDING
        else:
            Print.warning("Is already recording.")
コード例 #9
0
    def create_experiment_params(self):
        Print.point("Generating Experiments")
        for key in param_grid.keys():
            if key not in self.params:
                self.params[key] = param_grid[key]

        exp_params_list = self.recurse_flatten(self.params)

        for params in exp_params_list:
            pipeline_items = params["preprocessor"].split(";")
            pipeline_items.append(params["classifier"])
            self.pipeline_items = list(
                set(self.pipeline_items + pipeline_items))

            for key, val in conditional_param_grid.items():
                key = key
                if key in pipeline_items:
                    if isinstance(val, dict):
                        for val_key, val_val in val.items():
                            if key in self.params:
                                if val_key in self.params[key]:
                                    params[key][val_key] = self.params[key][
                                        val_key]
                                else:
                                    params[key][val_key] = val_val
                            else:
                                params[key] = val
                    else:
                        params[key] = self.params[
                            key] if key in self.params else val
                else:
                    if key in params:
                        del params[key]

        exp_params_list = self.recurse_flatten(exp_params_list)

        # The following two lines remove duplicate configurations

        out = []
        for v in exp_params_list:
            if v not in out:
                out.append(v)

        exp_params_list = out
        # set_of_jsons = {json.dumps(d, sort_keys=True) for d in exp_params_list}
        # exp_params_list = [json.loads(t) for t in set_of_jsons]

        Print.start("")
        print(pd.DataFrame([flatten_dict(e) for e in exp_params_list]))
        print("\n\n")

        self.exp_params_list = exp_params_list
コード例 #10
0
def plot_matrix(m, upscale_lowest_dim=True):
    if upscale_lowest_dim:
        min_ratio = 5
        h, w = np.shape(m)

        Print.pandas(m)

        row_height = 1 if h >= w / min_ratio else int(w / (min_ratio * h))
        col_width = 1 if w >= h / min_ratio else int(h / (min_ratio * w))

        Print.data(row_height)
        Print.data(col_width)

        res = np.zeros([h * row_height, w * col_width])

        Print.data(np.shape(res))

        if row_height > col_width:
            for i, row in enumerate(m):
                res[i * row_height: (i + 1) * row_height, :] = np.tile(row, (row_height, 1))
        elif col_width > row_height:
            for i, col in enumerate(m.T):
                res[:, i * col_width: (i + 1) * col_width] = np.tile(col, (col_width, 1)).T
        else:
            res = m
    else:
        res = m

    cmap = plt.cm.Blues
    plt.imshow(res, interpolation='nearest', cmap=cmap)
    plt.colorbar()

    plt.show()
コード例 #11
0
    def full_dataset_gen(cls, window_length, count=1, sessions=None):

        if sessions is None:
            Print.info("Fetching sessions")
            sessions = Session.fetch_all(only_real=True,
                                         include_timeframes=True)

        for _ in range(count):
            dataset = Dataset.empty()
            for session in sessions:
                windows = list(session.window_gen(window_length=window_length))
                dataset = dataset + session.dataset(windows=windows)

            yield dataset
コード例 #12
0
    def worker(self, i, working_queue, output_q, pipeline):
        while True:
            try:
                ds_index = working_queue.get_nowait()
                ds = self.datasets[ds_index]

                Print.progress("Worker {} is doing a job".format(i))

                cv_report = self.run_cv(ds, pipeline)

                output_q.put(cv_report)
            except Empty:
                break

        return
コード例 #13
0
    def worker(i, working_queue, output_q, cv_splits, ds_collection):
        while True:
            try:
                exp_params = working_queue.get_nowait()
                exp = Experiment.from_params(exp_params)
                exp.cv_splits = cv_splits
                exp.set_datasets(ds_collection)

                Print.progress("{}: Running Experiment".format(i))
                exp.run()
                output_q.put(exp.report)
            except Empty:
                Print.info("Queue Empty")
                break

        return
コード例 #14
0
ファイル: dataset.py プロジェクト: olavblj/thesis-experiments
    def trim_none_seconds(self, sample_trim, return_copy=False):
        if not self.original_order:
            Print.warning("Skipped trim_none_seconds since dataset was not in original order")
            if return_copy:
                return self.copy()
            return

        if isinstance(sample_trim, str):
            sample_trim = [float(t) for t in sample_trim.split(";")]

        relabel_seconds = sample_trim[0]
        remove_seconds = np.max(sample_trim)

        if remove_seconds == 0:
            if return_copy:
                return self.copy()
            return

        change_points = []
        action_labels = []
        for i in range(1, self.length, 1):
            if self.y[i] != self.y[i - 1]:
                change_points.append(i)
                action_labels.append(np.max(self.y[i - 1:i + 1]))

        relabel_dist = int((SAMPLING_RATE * relabel_seconds) / self.window_length)
        remove_dist = int((SAMPLING_RATE * remove_seconds) / self.window_length)

        y_none_mask = self.y == 0

        relabel_mask = np.isin(self.range, points_around(change_points, relabel_dist))
        relabel_mask = np.logical_and(relabel_mask, y_none_mask)

        remove_mask = np.isin(self.range, points_around(change_points, remove_dist))
        remove_mask = np.logical_and(remove_mask, y_none_mask)
        remove_mask = np.logical_xor(remove_mask, relabel_mask)

        for i in self.range:
            if relabel_mask[i]:
                self.y[i] = action_labels[find_nearest(change_points, i, return_index=True)]

        keep_mask = np.invert(remove_mask)

        if return_copy:
            return self.copy(keep_mask)
        else:
            self.apply_mask(keep_mask)
コード例 #15
0
ファイル: session.py プロジェクト: olavblj/thesis-pong
    def create(cls):
        url = URL.sessions

        payload = dict(
            person=sys_manager.person.id,
            ch_names=ch_names,
            is_real_data=sys_manager.is_real_data
        )

        r = requests.post(url, data=payload)
        json_resp = r.json()

        obj = cls(**json_resp)

        if r.status_code == 201:
            Print.api("Created New Session ({})".format(obj.id))
        else:
            Print.failure("Something went wrong")

        return obj
コード例 #16
0
ファイル: dataset.py プロジェクト: olavblj/thesis-experiments
    def split_random(self, include_val=False):
        if self.is_child:
            raise Print.build_except("Tried to split a child dataset.", self)

        split_ratio = np.asarray(self.split_ratio if include_val else [self.split_ratio[1]])
        splits = (self.length * split_ratio).astype(int)

        p = np.random.permutation(self.length)
        p_parts = np.split(p, splits)

        return [self.child_from_mask(p) for p in p_parts]
コード例 #17
0
    def upload_recording_buffer(self):
        all_successful = True
        time_frames = list()
        line_count = sum(1 for _ in open(self.buffer_path))

        ch_count = len(ch_names)

        with open(self.buffer_path) as infile:
            for i, line in enumerate(tqdm(infile, total=line_count)):
                new_time_frame = TimeFrame.from_line(line)

                if not len(new_time_frame.sensor_data) == ch_count:
                    Print.warning(
                        "Skipped TimeFrame with {} data points".format(
                            len(new_time_frame.sensor_data)))
                    Print.data(new_time_frame.sensor_data)
                    continue

                time_frames.append(new_time_frame.to_json())

                if (i + 1) % batch_size == 0:
                    if not self.upload_batch(time_frames):
                        Print.failure("Failed to upload batch")
                        all_successful = False
                    time_frames = list()

            self.upload_batch(time_frames)

        return all_successful
コード例 #18
0
def run_queue():
    fns = os.listdir(path)

    fns = [fn for fn in fns if "done" not in fn]
    fns.sort(key=lambda x: pri_from_fn(x), reverse=True)

    fps = ["/".join([path, fn]) for fn in fns if "done" not in fn]

    for fp in fps:
        print("\n\n")
        Print.start("Running ExperimentSet ({})".format(name_from_fn(fp.split("/")[-1])))
        with open(fp, "r") as infile:
            json_data = json.load(infile)

            exp_set = ExperimentSet(cv_splits=8, **json_data)
            exp_set.multiprocessing = "cv"
            exp_set.run_experiments()

        tokens = fp.split(".")
        tokens[0] += "_done"
        new_fp = ".".join(tokens)
        os.rename(fp, new_fp)
コード例 #19
0
    def run_experiments(self, fast_datasets=False):
        time.sleep(1)
        start_run_time = time.time()

        ds_collection = DatasetCollection.from_params(self.params,
                                                      self.cv_splits,
                                                      fast=fast_datasets)

        if self.multiprocessing == "exp":
            self.run_multi(ds_collection)
        else:
            for i, exp_params in enumerate(
                    tqdm(self.exp_params_list, desc="Running Experiments")):
                exp = Experiment.from_params(exp_params)
                exp.cv_splits = self.cv_splits
                exp.index = i
                exp.set_datasets(ds_collection)

                exp.multiprocessing = (self.multiprocessing == "cv")

                exp.run()

                if self.best_exp is None or exp.report[
                        "accuracy"] > self.best_exp.report["accuracy"]:
                    Print.good("New best: {}".format(
                        np.round(exp.report["accuracy"], 3)))
                    self.best_exp = exp

                self.exp_reports.append(exp.report)

        self.run_time = time.time() - start_run_time
        self.generate_report()

        if self.save_best:
            from sklearn.externals import joblib
            fp = Path.classifiers + '/' + "classifier1.pkl"

            joblib.dump(self.best_exp.pipeline, fp)
コード例 #20
0
ファイル: person.py プロジェクト: olavblj/thesis-pong
    def create_or_fetch(cls, name, age, gender):
        url = URL.persons

        r = requests.post(url, data=dict(name=name, age=age, gender=gender))
        print(r)
        json_resp = r.json()
        Print.json(json_resp)

        person = cls(**json_resp)

        if r.status_code == 201:
            Print.api("Created New Person: {})".format(person))
        elif r.status_code == 200:
            Print.api("Fetched Existing Person: {}".format(person))

        return person
コード例 #21
0
    def create_classifier(self):
        if self.datasets is None:
            Print.info("Fetching dataset")
            self.datasets = list()
            ds = Session.full_dataset(window_length=self.window_length)
            ds = ds.reduced_dataset(self.dataset_type)
            ds = ds.normalize()
            ds.shuffle()
            self.datasets.append(ds)

        pipeline = self.create_pipeline()
        Print.data(pipeline)

        ds = self.datasets[0]
        ds_train, ds_test = ds.split_random()

        fit_output = pipeline.fit(ds_train.X, ds_train.y)
        accuracy = pipeline.score(ds_test.X, ds_test.y)
        Print.info("Accuracy: {}".format(accuracy))

        return pipeline
コード例 #22
0
    def run(self):
        print("\n\n")
        Print.time("Running Experiment {}".format(
            "" if self.index is None else self.index))

        start_time = time.time()

        try:
            if self.datasets is None:
                self.datasets = list()
                for i in tqdm(range(self.cv_splits), desc="Fetching Datasets"):
                    ds = Session.full_dataset(window_length=self.window_length)
                    ds = ds.reduced_dataset(self.dataset_type)
                    ds = ds.normalize()
                    ds.shuffle()
                    self.datasets.append(ds)

            if self.multiprocessing:
                self.run_multi()
            else:
                for ds in tqdm(self.datasets, desc="Cross validating"):
                    self.cv_reports.append(self.run_cv(ds))

            self.report["success"] = True
        except Exception as e:
            print("")
            Print.warning("Skipping experiment: {}".format(e))
            Print.ex(e)
            self.report["success"] = False
            return

        self.report = {**self.report, **avg_dict(self.cv_reports)}
        self.report["confusion_matrix"] = np.sum(
            [r["confusion_matrix"] for r in self.cv_reports], 0)

        self.report["time"]["exp"] = (time.time() - start_time)
        self.report["accuracies"] = [r["accuracy"] for r in self.cv_reports]
        self.report["cv_splits"] = self.cv_splits
        # self.report["feature_vector_length"] = self.feature_vector_length()
        self.report["dataset_lengths"] = [d.length for d in self.datasets]
コード例 #23
0
    def generate_overview(self):
        fn = self.filename("exp_set_overview", "md")
        Print.data(fn)
        fp = "/".join([self.path, fn])
        relevant_keys = list(set(self.exp_set.relevant_keys))
        Print.data(relevant_keys)
        exp_summary = np.empty(
            shape=[len(self.exp_reports), 3 + len(relevant_keys)], dtype="U25")

        res = "# Experiment Set Overview\n"

        res += "## Performance by relevant params\n\n"

        param_performances = {
            param: self.param_performance(param)
            for param in self.all_relevant_params()
        }

        for param_name, param_vals in param_performances.items():
            res += "### {}\n\n".format(param_name)

            param_vals_list = sorted(list(param_vals.items()),
                                     key=lambda x: x[1],
                                     reverse=True)

            res += "\n".join([
                "* **{}:** {}".format(e[0], np.round(e[1], DECIMALS))
                for e in param_vals_list
            ])
            res += "\n\n"

        res += "\n\n"

        res += "## Performance Overview\n\n"

        for i, exp_report in enumerate(self.exp_reports):
            flat_params = flatten_dict(exp_report["raw_params"])
            relevant_params = np.empty(shape=[len(relevant_keys)], dtype="U25")

            for j, key in enumerate(relevant_keys):
                if key in flat_params:
                    relevant_params[j] = flat_params[key]
                else:
                    relevant_params[j] = "-"

            acc_string = "{}%".format(np.round(100 * exp_report["accuracy"],
                                               1))
            kappa_string = "{}".format(np.round(exp_report["kappa"], 3))
            time_string = "{}s".format(np.round(exp_report["time"]["exp"], 2))

            exp_summary[i, :3] = [acc_string, kappa_string, time_string]
            exp_summary[i, 3:] = relevant_params

        df_perf1 = pd.DataFrame(exp_summary,
                                columns=["Accuracy", "Kappa", "Avg Time"] +
                                relevant_keys,
                                copy=True)
        df_perf1.sort_values(by=["Accuracy"],
                             axis=0,
                             ascending=False,
                             inplace=True)
        res += tabulate(
            df_perf1, tablefmt="pipe", headers="keys", showindex=False) + "\n"

        res += "<!---\nResults in LaTeX\n"
        res += tabulate(
            df_perf1, tablefmt="latex", headers="keys", showindex=False) + "\n"
        res += "--->\n"

        with open(fp, 'w+') as file:
            file.write(res)
コード例 #24
0
        if sessions is None:
            Print.info("Fetching sessions")
            sessions = Session.fetch_all(only_real=True,
                                         include_timeframes=True)

        for _ in range(count):
            dataset = Dataset.empty()
            for session in sessions:
                windows = list(session.window_gen(window_length=window_length))
                dataset = dataset + session.dataset(windows=windows)

            yield dataset


if __name__ == '__main__':
    Print.start("Starting")
    sessions = Session.fetch_all()

    session = random.choice(sessions)
    n_channels = len(session.ch_names)

    session.fetch_timeframes()

    X = session.timeframes[:, :n_channels]
    y = session.timeframes[:, n_channels + 1]

    Print.data(np.mean(y))

    X_pow = X**2

    res = np.zeros([len(y), n_channels + 1])
コード例 #25
0
    def exit_system(self):

        self.streamhandler.stop()
        time.sleep(1)
        Print.success("Successfully exited program.")
        sys.exit()
コード例 #26
0
    def generate_report(self):
        print("\n")
        Print.success("Generating Report")

        report = Report(self, self.exp_reports)
        report.generate()
コード例 #27
0
    def generate_detail(self):
        fn = self.filename("exp_set_detail", "md")
        Print.data(fn)
        fp = "/".join([self.path, fn])

        relevant_keys = list(set(self.exp_set.relevant_keys))

        res = "# Experiment Set Detail\n"
        res += "{}\n\n".format(datestamp_str(self.exp_set.init_time))
        res += "* **Runtime:** {}s\n".format(np.round(self.exp_set.run_time,
                                                      1))
        res += "* **Multiprocessing:** {}\n".format(
            self.exp_set.multiprocessing)
        res += "\n\n"
        if self.exp_set.description:
            res += "#### Description\n"
            res += self.exp_set.description + "\n"

        if self.exp_set.hypothesis:
            res += "#### Hypothesis\n"
            res += self.exp_set.hypothesis + "\n"

        res += "\n\n"
        res += "## Performance by configuration\n\n"

        for i, exp_report in enumerate(self.exp_reports):
            flat_params = flatten_dict(exp_report["raw_params"])

            res += "---\n\n"
            res += "### Entry {} accuracy: {}\n".format(
                i + 1, np.round(exp_report["accuracy"], DECIMALS))
            res += "* **Kappa:** {}\n".format(
                np.round(exp_report["kappa"], DECIMALS))
            res += "* **Average Experiment Time:** {}s\n".format(
                np.round(exp_report["time"]["exp"], 2))
            res += "* **Dataset type:** {}\n".format(
                exp_report["dataset_type"])
            res += "* **Dataset avg length:** {}\n".format(
                np.round(np.mean(exp_report["dataset_lengths"]), DECIMALS))
            # res += "* **Feature Vector Length:** {}\n".format(exp_report["feature_vector_length"])
            res += "* **CV Splits:** {}\n".format(exp_report["cv_splits"])
            res += "\n"

            res += "{}\n".format(np.round(exp_report["accuracies"], DECIMALS))

            res += "### Config\n"
            res += "**Relevant Parameters**\n\n"
            relevant_params = {
                key: flat_params[key]
                for key in relevant_keys if key in flat_params
            }
            params_df = pd.DataFrame([relevant_params])
            res += tabulate(
                params_df, tablefmt="pipe", headers="keys",
                showindex=False) + "\n"

            res += "**All Parameters**\n\n"
            params_df = pd.DataFrame([flat_params])
            res += tabulate(params_df.round(DECIMALS),
                            tablefmt="pipe",
                            headers="keys",
                            showindex=False) + "\n"

            res += "### Details\n"

            res += "**Confusion Matrix**\n\n"
            c_matrix = exp_report["confusion_matrix"]
            class_names = exp_report["dataset_type"].labels
            c_matrix_df = pd.DataFrame(
                c_matrix,
                columns=["Pred: {}".format(l) for l in class_names],
                index=["__True: {}__".format(l) for l in class_names])
            res += tabulate(
                c_matrix_df, tablefmt="pipe", headers="keys",
                showindex=True) + "\n"

            res += "<!---\nConfusion Matrix in LaTeX\n"
            res += tabulate(
                c_matrix_df, tablefmt="latex", headers="keys",
                showindex=False) + "\n"
            res += "--->\n"

            # Formats the confusion matrix as
            res += "<!---\nConfusion Matrix Raw\n"
            res += "c_matrix = np.array({})\n".format(format_array(c_matrix))
            res += "class_names = {}\n".format(format_array(class_names))
            res += "--->\n"

            # res += "**Report**\n\n"
            # report = exp_report["report"]
            # report_df = pd.DataFrame.from_dict(report)
            # report_key = list(report.keys())[0]
            # index = ["__{}__".format(key) for key in report[report_key].keys()]
            # res += tabulate(report_df.round(DECIMALS), tablefmt="pipe", headers="keys", showindex=index) + "\n"

            res += "**Time**\n\n"
            time_df = pd.DataFrame([exp_report["time"]])
            res += tabulate(time_df.round(DECIMALS),
                            tablefmt="pipe",
                            headers="keys",
                            showindex=False) + "\n"

        with open(fp, 'w+') as file:
            file.write(res)
コード例 #28
0
            res = np.vstack((cA, cH, cV, cD))

            self.sample_shape = np.shape(res)

        return self


if __name__ == '__main__':

    ds = list(Session.full_dataset_gen(window_length=10))[0]

    indices = []

    for class_idx in [0, 1]:
        for i, y in enumerate(ds.y):
            if y == class_idx:
                indices.append(i)
                break

    print(indices)

    wavelet = DWT(dim=1, wavelet='db1')
    wavelet.fit(ds.X)

    for i in indices:
        sample = ds.X[i]
        sample_t = wavelet._transform_sample(sample)

        Print.pandas(sample)
        Print.pandas(sample_t)