def load_all_features(self, participants, X, calibrations, y, **kwargs): exclude = kwargs.get('exclude', []) include = kwargs.get('include', utils.all_features()) for key in include: if key not in exclude: X = self.attach_feature(participants, X, calibrations, y, key) return X
def plot_pca_coefficients(self, **kwargs): self.reset() kwargs['base_fields'] = kwargs.get( 'base_fields', utils.all_body_fields() + utils.all_body_orientation_fields() + utils.all_features() ) kwargs['n_components'] = kwargs.get('n_components', .99) pca, fs = self.compute_pca(**kwargs) components = pd.DataFrame(pca.components_, columns=fs) kwargs = {**self.plotConfig.getConfig('pca_coefficients'), **kwargs} plotting.plot_pca_coefficients(components, **kwargs)
def plot_extratrees(self, **kwargs): fs = utils.all_body_fields() +\ utils.all_body_orientation_fields() +\ utils.all_features() fields = kwargs.get('fields', fs) kwargs['fields'] = fields exclude = kwargs.get('exclude_features', []) p, X, c, y = self.preprocess(features=fields, exclude_features=exclude) kbest = self.find_best_features_extratree( p, X, c, y, **kwargs ) kwargs = {**self.plotConfig.getConfig('extratrees'), **kwargs} plotting.plot_extratrees(kbest, **kwargs)
def plot_selectKBest_chi2(self, **kwargs): fs = utils.all_body_fields() +\ utils.all_body_orientation_fields() +\ utils.all_features() fields = kwargs.get('fields', fs) kwargs['fields'] = fields exclude = kwargs.get('exclude_features', []) p, X, c, y = self.preprocess(features=fields, exclude_features=exclude) kbest = self.find_best_features( p, X, c, y, **kwargs ) if(len(kbest) == 0): return kwargs = {**self.plotConfig.getConfig('selectKBest'), **kwargs} plotting.plot_selectKBest_chi2(kbest, **kwargs)
def preprocess(self, participants=None, X=None, calibrations=None, y=None, features=None, **kwargs): use_preloaded = kwargs.get('use_preloaded', True) use_provided = kwargs.get('use_provided', True) exclude_features = kwargs.get('exclude_features', []) if features is None: features = kwargs.get('include', None) features = features if features is not None else utils.all_features() kwargs['include'] = features if use_preloaded: participants, X, calibrations, y = self.normalized elif use_provided: pass else: participants, X, calibrations, y = self.normalize_data( participants, X, calibrations, y, **kwargs) X = self.load_all_features(participants, X, calibrations, y, **kwargs) if y is not None: X = self.attach_target(X, y) if self.use_dynamic_features: X = utils.only_endpoints(X) add_features = [] for f in features: add_features += self.multi_features.get(f, [f]) self.add_features = [ f for f in add_features if f not in exclude_features ] # print(self.add_features), exit() # self.add_features = list(set(add_features) - set(exclude_features)) X = X.set_index('cid')[self.add_features] X = X.fillna(X.mean()) if any(X.isna().any().values) > 0: X = X.fillna(0) return participants, X, calibrations, y
def export_SelectKBest_chi2(point_model, path): path += 'content/pointing_movement/import/' p, X, c, y = point_model.normalized fs = utils.all_body_fields() +\ utils.all_body_orientation_fields() +\ utils.all_features() k = 10 kbest = point_model.find_best_features( p, X, c, y, fields=fs, load_features=True ).head(k) table = kbest.to_latex(escape=False, index=False) latex = pack_table( table, '$\\chi^2$ scores of the top %d features' % k, 'tab:pointing_movement:chi2' ) path += 'table_chi2.tex' with open(path, 'w') as f: f.write(latex) print('exported SelectKBest chi2 latex table.')
def export_pca_components(point_model, path): path += 'content/pointing_movement/import/' point_model.reset() fs = utils.all_body_fields() +\ utils.all_body_orientation_fields() +\ utils.all_features() fs = point_model.compute_pca(base_fields=fs) components = pd.DataFrame(point_model.pca.components_, columns=fs).round(2) components = components.abs().sum().sort_values(ascending=False) path += 'table_pca_components.tex' table = components.to_latex(escape=False) latex = pack_table( table, 'Overview of PCA Coefficients', 'tab:data_collection:participants' ) with open(path, 'w') as f: f.write(latex) print('exported PCA components latex table.')
def compute_pca(self, **kwargs): base_fields = kwargs.get('base_fields', utils.all_body_fields()) n_components = kwargs.get('n_components', 'mle') exclude_features = kwargs.get('exclude_features', []) include_features = kwargs.get('include_features', utils.all_features()) load_features = kwargs.get('load_features', True) fields = base_fields if load_features: feature_functions = {*include_features} - set(exclude_features) fields = list(set(list(feature_functions) + fields)) _, X, _, y = self.preprocess(features=fields, include=fields) X = X.fillna(X.mean()) if any(X.isna().any().values) > 0: X = X.fillna(0) X = X[self.add_features].values X = StandardScaler().fit_transform(X) # print('computing new PCA, fields: %s' % ', '.join(fields)) pca = PCA(n_components=n_components) self.pca = pca.fit(X) return self.pca, fields
def export(point_model, path="./export"): figure_base_path = '%s/content' % path path_end = "/figures/generated" def path_gen(folder): path = figure_base_path + folder + path_end utils.ensure_dir_exists(path) return path pointing_movement_path = path_gen("/pointing_movement") data_collection_path = path_gen("/data_collection_study") application_path = path_gen("/application") p, X, c, y = point_model.normalized X = point_model.attach_target(X, y) proj_defaults = { 'save': True, 'view_elev': 20, 'view_azim': 50, 'highlight_endpoint': False } X.groupby(utils.target_fields())\ .apply( lambda xs: plotting.plot_projection( xs, xs.name, save_path=( "%s/plot_projection_grouped_%s_%s_%s.png" % ((data_collection_path, ) + xs.name) ), **proj_defaults ) ) X.groupby(utils.target_fields())\ .apply( lambda xs: plotting.plot_projection( xs, xs.name, show_marker_labels=True, set_plot_limit=False, show_true_target=False, save_path=( "%s/plot_projection_grouped_%s_%s_%s_no_target.png" % ((data_collection_path, ) + xs.name) ), **proj_defaults ) ) X[utils.x_fields()] = X[utils.x_fields()].sub(X['hmd.X'], axis=0) X[utils.y_fields()] = X[utils.y_fields()].sub(X['hmd.Y'], axis=0) X[utils.z_fields()] = X[utils.z_fields()].sub(X['hmd.Z'], axis=0) X.groupby(utils.target_fields())\ .apply( lambda xs: plotting.plot_projection( xs, xs.name, proj_fields=( utils.x_fields(exclude=['leftShoulder']), utils.y_fields(exclude=['leftShoulder']), utils.z_fields(exclude=['leftShoulder']) ), save_path=( "%s/plot_projection_grouped_hmd_normalized_%s_%s_%s_no_target.png" % ((data_collection_path, ) + xs.name) ), show_marker_labels=True, set_plot_limit=False, show_true_target=False, **proj_defaults ) ) X.groupby(utils.target_fields())\ .apply( lambda xs: plotting.plot_projection( xs, xs.name, proj_fields=( utils.x_fields(exclude=['leftShoulder']), utils.y_fields(exclude=['leftShoulder']), utils.z_fields(exclude=['leftShoulder']) ), save_path=( "%s/plot_projection_grouped_hmd_normalized_%s_%s_%s.png" % ((data_collection_path, ) + xs.name) ), **proj_defaults ) ) projections = [] collection = 70 _projections = [ (1, collection), (2, collection), (3, collection), (4, collection), (5, collection), (6, collection), (7, collection), (8, collection), (9, collection), (10, collection), (11, collection), (12, collection), (13, collection) ] for p, c in _projections: defaults = { 'plot_key': 'projection', 'save': True, "participant": p, "collection": c, 'data': 'normalized_all_snapshots' } plot_target = { 'fn': 'plot_config', 'fn_args': { 'save_path': ( "%s/plot_projection_%s_%s.png" % (data_collection_path, p, c) ), **defaults } } plot_no_target = { 'fn': 'plot_config', 'fn_args': { 'save_path': ( "%s/plot_projection_%s_%s_no_target.png" % (data_collection_path, p, c) ), "show_marker_labels": True, "show_true_target": False, "set_plot_limit": False, **defaults } } projections.append(plot_target) projections.append(plot_no_target) count_hist = [] _count_hist = [ 'above_head', 'above_hand', 'indexfinger_body_position_x', 'indexfinger_body_position_y', 'indexfinger_body_position_z', ] for k in _count_hist: plot = { 'fn': 'plot_config', 'fn_args': { 'save': True, 'plot_key': "count_hist_plot_%s" % k, 'save_path': ( "%s/plot_count_hist_%s.png" % (pointing_movement_path, k) ) } } count_hist.append(plot) boxplots = [] _boxplots = [k for k in utils.all_features() if k not in _count_hist] for k in _boxplots: plot = { 'fn': 'plot_config', 'fn_args': { 'save': True, 'plot_key': "boxplot_%s" % k, 'save_path': "%s/boxplot_%s.png" % (pointing_movement_path, k) } } boxplots.append(plot) kde = [] _kde = list(map( utils.field_to_orientation_key, utils.flatten( list(map(utils.body_orientation_field, ['upperarm', 'hmd'])) ) )) for k in _kde: plot = { 'fn': 'plot_config', 'fn_args': { 'save': True, 'plot_key': "kde_%s" % k, 'save_path': pointing_movement_path + "/kde_%s.png" % k } } kde.append(plot) plot = { 'fn': 'plot_config', 'fn_args': { 'save': True, 'plot_key': "boxplot_%s" % k, 'save_path': pointing_movement_path + "/boxplot_%s.png" % k } } boxplots.append(plot) features = utils.all_features() configs = [ { 'plotting': [ # plot PCA { 'fn': 'plot_pca', 'fn_args': { 'save': True, 'force': True, 'load_features': False, 'save_path': ( "%s/plot_pca_base.png" % pointing_movement_path ) } }, { 'fn': 'plot_pca', 'fn_args': { 'save': True, 'force': True, 'include_features': features, 'save_path': ( "%s/plot_pca_all.png" % pointing_movement_path ) } }, # { # 'fn': 'plot_pca', # 'fn_args': { # 'save': True, 'force': True, # 'base_fields': ( # list(point_model.feature_functions.keys()) # + utils.target_fields() # ), # 'save_path': ( # "%s/plot_pca_features.png" # % pointing_movement_path # ) # } # }, { 'fn': 'plot_pca', 'fn_args': { 'save': True, 'force': True, 'base_fields': features, 'save_path': ( "%s/plot_pca_all_features.png" % pointing_movement_path ) } }, # plot correlation matrix { 'fn': 'plot_correlation_matrix', 'fn_args': { 'save': True, 'force': True, 'title': 'Correlation matrix for raw data and targets', 'additional_fields': utils.target_fields(), 'save_path': ( "%s/correlation_matrix_base.png" % pointing_movement_path ) } }, { 'fn': 'plot_correlation_matrix', 'fn_args': { 'save': True, 'force': True, 'title': ( 'Correlation matrix for ' + 'raw data, features and targets' ), 'additional_fields': features + utils.target_fields(), 'save_path': ( "%s/correlation_matrix_all.png" % pointing_movement_path ) } }, { 'fn': 'plot_correlation_matrix', 'fn_args': { 'save': True, 'force': True, 'title': ( 'Correlation matrix for ' + 'raw orientation data and targets' ), 'base_fields': ( utils.all_body_orientation_fields() + utils.target_fields() ), 'save_path': ( "%s/correlation_matrix_orientations.png" % pointing_movement_path ) } }, { 'fn': 'plot_correlation_matrix', 'fn_args': { 'save': True, 'force': True, 'title': ( 'Correlation matrix for ' + 'all computed features and the targets' ), 'base_fields': ( list(point_model.feature_functions.keys()) + utils.target_fields() ), 'include': list(point_model.feature_functions.keys()), 'save_path': ( "%s/correlation_matrix_all_features.png" % pointing_movement_path ) } }, { 'fn': 'plot_correlation_matrix', 'fn_args': { 'save': True, 'force': True, 'title': 'Correlation matrix for features and targets', 'base_fields': features + utils.target_fields(), 'save_path': ( "%s/correlation_matrix_features.png" % pointing_movement_path ) } }, { 'fn': 'plot_extratrees', 'fn_args': { 'save': True, 'save_path': ( "%s/plot_extratrees.png" % pointing_movement_path ), 'xticks_rot': 90 } }, { 'fn': 'plot_extratrees', 'fn_args': { 'save': True, 'save_path': ( "%s/plot_extratrees_all_features.png" % pointing_movement_path ), 'fields': ( list(point_model.feature_functions.keys()) ), 'xticks_rot': 90 } }, { 'fn': 'plot_selectKBest_chi2', 'fn_args': { 'save': True, 'save_path': ( "%s/plot_selectKBest_chi2.png" % pointing_movement_path ), 'xticks_rot': 90 } }, { 'fn': 'plot_selectKBest_chi2', 'fn_args': { 'save': True, 'save_path': ( "%s/plot_selectKBest_chi2_all_features.png" % pointing_movement_path ), 'fields': ( list(point_model.feature_functions.keys()) ), 'xticks_rot': 90 } }, { 'fn': 'plot_selectKBest_mutual_information', 'fn_args': { 'save': True, 'save_path': ( "%s/plot_selectKBest_mi.png" % pointing_movement_path ), 'xticks_rot': 90 } }, { 'fn': 'plot_selectKBest_mutual_information', 'fn_args': { 'save': True, 'save_path': ( "%s/plot_selectKBest_mi_all_features.png" % pointing_movement_path ), 'fields': ( list(point_model.feature_functions.keys()) ), 'xticks_rot': 90 } }, { 'fn': 'plot_pca_coefficients', 'fn_args': { 'save': True, 'base_fields': features, 'save_path': ( "%s/plot_pca_coefficients_features.png" % pointing_movement_path ), 'legend_kws': {'ncol': 6} } } ] +\ kde +\ boxplots +\ projections +\ count_hist }, ] for config in configs: for plot in config['plotting']: fn = getattr(point_model, plot['fn']) fn(**plot['fn_args']) plotting.plot_target_grid( save=True, save_path=application_path + "/targets.png" )
def analyze_all_features(self, path='./feature_analysis', **kwargs): base_pairplot = kwargs.get('base_pairplot', False) save = kwargs.get('save', True) utils.ensure_dir_exists(path) participants, X, calibrations, y = self.normalized featureX = self.load_all_features(participants, X, calibrations, y) featureX = self.attach_target(featureX, y) features = utils.all_features() describe_path = "%s/describe.csv" % path featureX[features].describe().to_csv(describe_path) for key in features: self.analyze_feature(participants, featureX, calibrations, y, key, path=path, **kwargs) self.plot_correlation_matrix(force=True, save=True, additional_fields=features, save_path="%s/correlation_matrix.png" % path) self.plot_correlation_matrix( force=True, save=True, additional_fields=features + utils.target_fields(), save_path="%s/correlation_matrix_with_targets.png" % path) self.plot_correlation_matrix( force=True, save=True, base_fields=features + utils.target_fields(), save_path="%s/correlation_matrix_features.png" % path) self.plot_correlation_matrix( force=True, save=True, include=list(self.feature_functions.keys()), base_fields=(list(self.feature_functions.keys()) + utils.target_fields()), save_path=("%s/correlation_matrix_all_features_with_targets.png" % path)) self.plot_correlation_matrix( force=True, save=True, additional_fields=utils.target_fields(), save_path="%s/correlation_matrix_base.png" % path) self.plot_pca_coefficients(force=True, save=True, base_fields=utils.all_body_fields(), save_path="%s/pca_coefficients_base.png" % path) self.plot_pca_coefficients( force=True, save=True, base_fields=features, save_path="%s/pca_coefficients_features.png" % path) save_path = "%s/pca_base.png" % path utils.ensure_dir_exists(save_path, is_file=True) self.plot_pca(load_features=False, force=True, save=save, save_path=save_path) save_path = "%s/pca_all.png" % path utils.ensure_dir_exists(save_path, is_file=True) self.plot_pca(include_features=features, force=True, save=save, save_path=save_path) if base_pairplot: print('plotting pairplots:') print('\t start features') self.plot_pairplot(force=True, base_fields=features + utils.target_fields(), save=save, save_path="%s/pairplot_features.png" % path) print('\t start base') self.plot_pairplot(force=True, save=save, base_fields=utils.all_body_fields() + utils.target_fields(), save_path="%s/pairplot_base.png" % path) print('\t start orientations') self.plot_pairplot( force=True, save=save, base_fields=(utils.all_body_orientation_fields() + utils.target_fields()), save_path="%s/pairplot_orientations.png" % path)
def export(point_model, **kwargs): figures_export(point_model, **kwargs) stats_export(point_model, **kwargs) point_model.analyze_all_features(base_pairplot=False) print("features:", utils.all_features()) ml_export(point_model, **kwargs)