Example #1
0
    def load_data(self, id, test_size=DEFAULT_TEST_SIZE):
        """
        """

        print("Loading dataset:", id, "\n...")

        self.dataset_id = id

        self.output_dir = config.KSN_OUTPUT_DIR_T.format(id=self.dataset_id)
        self.images_dir = config.KSN_IMAGES_DIR_T.format(id=self.dataset_id, agent=self.get_short_name())
        self.images_high_dir = config.KSN_IMAGES_HIGH_DIR_T.format(id=self.dataset_id, agent=self.get_short_name())

        self.features_df = io.load_topview_kinect_features(id)
        self.labels_df = io.load_topview_kinect_labels(id)
        self.test_size = test_size

        y_all = self.labels_df["label"].values
        y_train, y_test = cross_validation.train_test_split(y_all, test_size=self.test_size, random_state=DEFAULT_RAND)

        # update dataset.json
        dataset = io.load_topview_kinect_dataset(self.dataset_id)

        # overwrite activities counts
        total_sizes = np.bincount(y_all).astype(int).tolist()
        training_sizes = np.bincount(y_train).astype(int).tolist()
        testing_sizes = np.bincount(y_test).astype(int).tolist()

        for label_idx in range(len(dataset["labels"])):
            dataset["labels"][label_idx]["total size"] = total_sizes[label_idx]
            dataset["labels"][label_idx]["training size"] = training_sizes[label_idx]
            dataset["labels"][label_idx]["testing size"] = testing_sizes[label_idx]

        # update dataset.json
        io.update_topview_kinect_dataset(dataset)
Example #2
0
    def visualize(self, id):

        print("\nVisualizing results from", self.get_name(), "...")

        self.load_data(id)

        self.visualize_data()
        self.visualize_confusion_matrices()

        print("Visualizing naive classifiers selection ...")

        dataset = io.load_topview_kinect_dataset(id)

        ml_agent = next(agent for agent in dataset["ml"] if
                        agent["name"] == self.get_name())

        data = pd.DataFrame(0, index=np.arange(len(ml_agent["classifiers"])),
                            columns=["classifier", "training error",
                                     "testing error", "training time",
                                     "testing time"])

        for classifier_idx, classifier in enumerate(ml_agent["classifiers"]):
            data.ix[classifier_idx, "classifier"] = classifier["name"]
            data.ix[classifier_idx, "training error"] = \
                classifier["training error"]
            data.ix[classifier_idx, "testing error"] = \
                classifier["testing error"]
            data.ix[classifier_idx, "training time"] = \
                classifier["training time"]
            data.ix[classifier_idx, "testing time"] = \
                classifier["testing time"]

        classifiers_time_filename = figs.CLASSIFIERS_SELECTION_TIME_FILENAME
        for f in self.figure_paths(classifiers_time_filename):
            viz.classifier_selection_time(data, f)

        classifiers_error_filename = figs.CLASSIFIERS_SELECTION_ERROR_FILENAME
        for f in self.figure_paths(classifiers_error_filename):
            viz.classifier_selection_error(data, f)

        # add figures to dataset.json
        dataset["figures"]["naive classifiers selection time"] = {
            "title": figs.CLASSIFIERS_SELECTION_TIME_TITLE,
            "source": classifiers_time_filename
        }
        dataset["figures"]["naive classifiers selection error"] = {
            "title": figs.CLASSIFIERS_SELECTION_ERROR_TITLE,
            "source": classifiers_error_filename
        }

        # update dataset.json
        io.update_topview_kinect_dataset(dataset)
Example #3
0
    def apply_feature_map(self, feature_map):
        """
        """

        print("Applying feature map:", feature_map.get_name(), "\n...")

        self.feature_map = feature_map

        # dump feature map
        joblib.dump(
            self.feature_map,
            config.KSN_FEATURE_MAP_PKL_T.format(
                id=self.dataset_id, agent=self.get_short_name(), feature_map=self.feature_map.get_short_name()
            ),
        )

        # feature map data
        self.X = self.feature_map.map(self.features_df).values
        self.y = self.labels_df["label"].values

        # split training and testing data
        self.X_train, self.X_test, self.y_train, self.y_test = cross_validation.train_test_split(
            self.X, self.y, test_size=self.test_size, random_state=DEFAULT_RAND
        )

        # update dataset.json
        dataset = io.load_topview_kinect_dataset(self.dataset_id)

        # update agent
        if "ml" not in dataset:
            dataset["ml"] = list()

        ml_agent = next((agent for agent in dataset["ml"] if agent["name"] == self.get_name()), None)

        if ml_agent is None:
            ml_agent = {"name": self.get_name(), "feature maps": list(), "classifiers": list()}
            dataset["ml"].append(ml_agent)

        # update feature map
        ml_agent["feature maps"] = [fm for fm in ml_agent["feature maps"] if fm["name"] != self.feature_map.get_name()]
        ml_agent["feature maps"].append(
            {"name": self.feature_map.get_name(), "description": self.feature_map.get_description()}
        )

        # update dataset.json
        io.update_topview_kinect_dataset(dataset)
Example #4
0
    def visualize_data(self):
        """
        """

        dataset = io.load_topview_kinect_dataset(self.dataset_id)

        print("\nVisualizing activity histogram...")

        train = (1 - self.test_size) * 100
        test = 100 - train

        data = pd.DataFrame(
            {
                "activity": [label["name"] for label in dataset["labels"]],
                "training size": [label["training size"] for label in dataset["labels"]],
                "total size": [label["total size"] for label in dataset["labels"]],
            }
        )

        activities_title = figs.TRAIN_TEST_HIST_TITLE_T.format(train=train, test=test)
        activities_filename = figs.TRAIN_TEST_HIST_FILENAME_T.format(train=train, test=test)
        for f in self.figure_paths(activities_filename):
            viz.train_test_histogram(data, f)

        # add figures to dataset.json
        if "figures" not in dataset:
            dataset["figures"] = dict()

        dataset["figures"]["activities"] = {"title": activities_title, "source": activities_filename}

        print("\nVisualizing confusion matrices...")

        labels = [label["name"] for label in dataset["labels"]]

        ml_agent = next(agent for agent in dataset["ml"] if agent["name"] == self.get_name())

        for ml_classifier in ml_agent["classifiers"]:
            if "cm training" not in ml_classifier:
                continue

            # training confusion matrix
            training_cm_filename = figs.CONFUSION_MAT_TRAIN_FILENAME_T.format(
                estimator=ml_classifier["short name"], feature_map=ml_classifier["short feature map"]
            )
            for f in self.figure_paths(training_cm_filename, "result"):
                viz.results_confusion_matrix(ml_classifier["cm training"], labels, f)

            # testing confusion matrix
            testing_cm_filename = figs.CONFUSION_MAT_TEST_FILENAME_T.format(
                estimator=ml_classifier["short name"], feature_map=ml_classifier["short feature map"]
            )
            for f in self.figure_paths(testing_cm_filename, "result"):
                viz.results_confusion_matrix(ml_classifier["cm testing"], labels, f)

            # update figures
            ml_classifier["figures"]["cm training"] = {
                "title": figs.CONFUSION_MAT_TRAIN_TITLE_T,
                "source": training_cm_filename,
            }
            ml_classifier["figures"]["cm testing"] = {
                "title": figs.CONFUSION_MAT_TEST_TITLE_T,
                "source": testing_cm_filename,
            }

        # update dataset.json
        io.update_topview_kinect_dataset(dataset)
Example #5
0
    def learn(self, estimator_obj, cv_scores=None):
        """
        """

        estimator_name = estimator_obj["name"]
        estimator_short_name = estimator_obj["name"].replace(" ", "")
        estimator_description = estimator_obj["description"]
        feature_map_name = self.feature_map.get_name()
        feature_map_short_name = self.feature_map.get_short_name()

        print("Learning", estimator_name, "{", estimator_description, "}\n...")

        # run classifier
        classifier = estimator_obj["model"]
        training_start_time = time.time()
        classifier.fit(self.X_train, self.y_train)
        training_time = (time.time() - training_start_time) * 1000
        training_time /= len(self.X_train)

        # predict training set
        y_train_pred = classifier.predict(self.X_train)

        # predict testing set
        testing_start_time = time.time()
        y_pred = classifier.predict(self.X_test)
        testing_time = (time.time() - testing_start_time) * 1000
        testing_time /= len(self.X_test)

        # compute confusion matrices
        training_cm = metrics.confusion_matrix(self.y_train, y_train_pred)
        training_cm_normalized = training_cm.astype("float") / training_cm.sum(axis=1)[:, np.newaxis]
        testing_cm = metrics.confusion_matrix(self.y_test, y_pred)
        testing_cm_normalized = testing_cm.astype("float") / testing_cm.sum(axis=1)[:, np.newaxis]

        # compute errors
        training_error = 1 - classifier.score(self.X_train, self.y_train)
        testing_error = 1 - classifier.score(self.X_test, self.y_test)

        # run classifier with all data and dump
        classifier.fit(self.X, self.y)
        joblib.dump(
            classifier,
            config.KSN_CLASSIFIER_PKL_T.format(
                id=self.dataset_id,
                agent=self.get_short_name(),
                estimator=estimator_short_name,
                feature_map=feature_map_short_name,
            ),
        )

        # update dataset.json
        dataset = io.load_topview_kinect_dataset(self.dataset_id)

        ml_agent = next(agent for agent in dataset["ml"] if agent["name"] == self.get_name())

        # update result
        ml_agent["classifiers"] = [
            classifier
            for classifier in ml_agent["classifiers"]
            if classifier["name"] != estimator_name
            or classifier["description"] != estimator_description
            or classifier["feature map"] != feature_map_name
        ]

        result = {
            "name": estimator_name,
            "short name": estimator_short_name,
            "description": estimator_description,
            "feature map": feature_map_name,
            "short feature map": feature_map_short_name,
            "training time": training_time,
            "testing time": testing_time,
            "training error": training_error,
            "testing error": testing_error,
            "cm training": training_cm_normalized.tolist(),
            "cm testing": testing_cm_normalized.tolist(),
            "figures": dict(),
        }

        if cv_scores is not None:
            result["cv error mean"] = 1 - cv_scores.mean()
            result["cv error std"] = cv_scores.std()

        ml_agent["classifiers"].append(result)

        # update dataset.json
        io.update_topview_kinect_dataset(dataset)