def load_data(self, id, test_size=DEFAULT_TEST_SIZE): """ """ print("Loading dataset:", id, "\n...") self.dataset_id = id self.output_dir = config.KSN_OUTPUT_DIR_T.format(id=self.dataset_id) self.images_dir = config.KSN_IMAGES_DIR_T.format(id=self.dataset_id, agent=self.get_short_name()) self.images_high_dir = config.KSN_IMAGES_HIGH_DIR_T.format(id=self.dataset_id, agent=self.get_short_name()) self.features_df = io.load_topview_kinect_features(id) self.labels_df = io.load_topview_kinect_labels(id) self.test_size = test_size y_all = self.labels_df["label"].values y_train, y_test = cross_validation.train_test_split(y_all, test_size=self.test_size, random_state=DEFAULT_RAND) # update dataset.json dataset = io.load_topview_kinect_dataset(self.dataset_id) # overwrite activities counts total_sizes = np.bincount(y_all).astype(int).tolist() training_sizes = np.bincount(y_train).astype(int).tolist() testing_sizes = np.bincount(y_test).astype(int).tolist() for label_idx in range(len(dataset["labels"])): dataset["labels"][label_idx]["total size"] = total_sizes[label_idx] dataset["labels"][label_idx]["training size"] = training_sizes[label_idx] dataset["labels"][label_idx]["testing size"] = testing_sizes[label_idx] # update dataset.json io.update_topview_kinect_dataset(dataset)
def visualize(self, id): print("\nVisualizing results from", self.get_name(), "...") self.load_data(id) self.visualize_data() self.visualize_confusion_matrices() print("Visualizing naive classifiers selection ...") dataset = io.load_topview_kinect_dataset(id) ml_agent = next(agent for agent in dataset["ml"] if agent["name"] == self.get_name()) data = pd.DataFrame(0, index=np.arange(len(ml_agent["classifiers"])), columns=["classifier", "training error", "testing error", "training time", "testing time"]) for classifier_idx, classifier in enumerate(ml_agent["classifiers"]): data.ix[classifier_idx, "classifier"] = classifier["name"] data.ix[classifier_idx, "training error"] = \ classifier["training error"] data.ix[classifier_idx, "testing error"] = \ classifier["testing error"] data.ix[classifier_idx, "training time"] = \ classifier["training time"] data.ix[classifier_idx, "testing time"] = \ classifier["testing time"] classifiers_time_filename = figs.CLASSIFIERS_SELECTION_TIME_FILENAME for f in self.figure_paths(classifiers_time_filename): viz.classifier_selection_time(data, f) classifiers_error_filename = figs.CLASSIFIERS_SELECTION_ERROR_FILENAME for f in self.figure_paths(classifiers_error_filename): viz.classifier_selection_error(data, f) # add figures to dataset.json dataset["figures"]["naive classifiers selection time"] = { "title": figs.CLASSIFIERS_SELECTION_TIME_TITLE, "source": classifiers_time_filename } dataset["figures"]["naive classifiers selection error"] = { "title": figs.CLASSIFIERS_SELECTION_ERROR_TITLE, "source": classifiers_error_filename } # update dataset.json io.update_topview_kinect_dataset(dataset)
def apply_feature_map(self, feature_map): """ """ print("Applying feature map:", feature_map.get_name(), "\n...") self.feature_map = feature_map # dump feature map joblib.dump( self.feature_map, config.KSN_FEATURE_MAP_PKL_T.format( id=self.dataset_id, agent=self.get_short_name(), feature_map=self.feature_map.get_short_name() ), ) # feature map data self.X = self.feature_map.map(self.features_df).values self.y = self.labels_df["label"].values # split training and testing data self.X_train, self.X_test, self.y_train, self.y_test = cross_validation.train_test_split( self.X, self.y, test_size=self.test_size, random_state=DEFAULT_RAND ) # update dataset.json dataset = io.load_topview_kinect_dataset(self.dataset_id) # update agent if "ml" not in dataset: dataset["ml"] = list() ml_agent = next((agent for agent in dataset["ml"] if agent["name"] == self.get_name()), None) if ml_agent is None: ml_agent = {"name": self.get_name(), "feature maps": list(), "classifiers": list()} dataset["ml"].append(ml_agent) # update feature map ml_agent["feature maps"] = [fm for fm in ml_agent["feature maps"] if fm["name"] != self.feature_map.get_name()] ml_agent["feature maps"].append( {"name": self.feature_map.get_name(), "description": self.feature_map.get_description()} ) # update dataset.json io.update_topview_kinect_dataset(dataset)
def visualize_data(self): """ """ dataset = io.load_topview_kinect_dataset(self.dataset_id) print("\nVisualizing activity histogram...") train = (1 - self.test_size) * 100 test = 100 - train data = pd.DataFrame( { "activity": [label["name"] for label in dataset["labels"]], "training size": [label["training size"] for label in dataset["labels"]], "total size": [label["total size"] for label in dataset["labels"]], } ) activities_title = figs.TRAIN_TEST_HIST_TITLE_T.format(train=train, test=test) activities_filename = figs.TRAIN_TEST_HIST_FILENAME_T.format(train=train, test=test) for f in self.figure_paths(activities_filename): viz.train_test_histogram(data, f) # add figures to dataset.json if "figures" not in dataset: dataset["figures"] = dict() dataset["figures"]["activities"] = {"title": activities_title, "source": activities_filename} print("\nVisualizing confusion matrices...") labels = [label["name"] for label in dataset["labels"]] ml_agent = next(agent for agent in dataset["ml"] if agent["name"] == self.get_name()) for ml_classifier in ml_agent["classifiers"]: if "cm training" not in ml_classifier: continue # training confusion matrix training_cm_filename = figs.CONFUSION_MAT_TRAIN_FILENAME_T.format( estimator=ml_classifier["short name"], feature_map=ml_classifier["short feature map"] ) for f in self.figure_paths(training_cm_filename, "result"): viz.results_confusion_matrix(ml_classifier["cm training"], labels, f) # testing confusion matrix testing_cm_filename = figs.CONFUSION_MAT_TEST_FILENAME_T.format( estimator=ml_classifier["short name"], feature_map=ml_classifier["short feature map"] ) for f in self.figure_paths(testing_cm_filename, "result"): viz.results_confusion_matrix(ml_classifier["cm testing"], labels, f) # update figures ml_classifier["figures"]["cm training"] = { "title": figs.CONFUSION_MAT_TRAIN_TITLE_T, "source": training_cm_filename, } ml_classifier["figures"]["cm testing"] = { "title": figs.CONFUSION_MAT_TEST_TITLE_T, "source": testing_cm_filename, } # update dataset.json io.update_topview_kinect_dataset(dataset)
def learn(self, estimator_obj, cv_scores=None): """ """ estimator_name = estimator_obj["name"] estimator_short_name = estimator_obj["name"].replace(" ", "") estimator_description = estimator_obj["description"] feature_map_name = self.feature_map.get_name() feature_map_short_name = self.feature_map.get_short_name() print("Learning", estimator_name, "{", estimator_description, "}\n...") # run classifier classifier = estimator_obj["model"] training_start_time = time.time() classifier.fit(self.X_train, self.y_train) training_time = (time.time() - training_start_time) * 1000 training_time /= len(self.X_train) # predict training set y_train_pred = classifier.predict(self.X_train) # predict testing set testing_start_time = time.time() y_pred = classifier.predict(self.X_test) testing_time = (time.time() - testing_start_time) * 1000 testing_time /= len(self.X_test) # compute confusion matrices training_cm = metrics.confusion_matrix(self.y_train, y_train_pred) training_cm_normalized = training_cm.astype("float") / training_cm.sum(axis=1)[:, np.newaxis] testing_cm = metrics.confusion_matrix(self.y_test, y_pred) testing_cm_normalized = testing_cm.astype("float") / testing_cm.sum(axis=1)[:, np.newaxis] # compute errors training_error = 1 - classifier.score(self.X_train, self.y_train) testing_error = 1 - classifier.score(self.X_test, self.y_test) # run classifier with all data and dump classifier.fit(self.X, self.y) joblib.dump( classifier, config.KSN_CLASSIFIER_PKL_T.format( id=self.dataset_id, agent=self.get_short_name(), estimator=estimator_short_name, feature_map=feature_map_short_name, ), ) # update dataset.json dataset = io.load_topview_kinect_dataset(self.dataset_id) ml_agent = next(agent for agent in dataset["ml"] if agent["name"] == self.get_name()) # update result ml_agent["classifiers"] = [ classifier for classifier in ml_agent["classifiers"] if classifier["name"] != estimator_name or classifier["description"] != estimator_description or classifier["feature map"] != feature_map_name ] result = { "name": estimator_name, "short name": estimator_short_name, "description": estimator_description, "feature map": feature_map_name, "short feature map": feature_map_short_name, "training time": training_time, "testing time": testing_time, "training error": training_error, "testing error": testing_error, "cm training": training_cm_normalized.tolist(), "cm testing": testing_cm_normalized.tolist(), "figures": dict(), } if cv_scores is not None: result["cv error mean"] = 1 - cv_scores.mean() result["cv error std"] = cv_scores.std() ml_agent["classifiers"].append(result) # update dataset.json io.update_topview_kinect_dataset(dataset)