def export(filename="export", **kwargs): participants, X, y = log(**kwargs) X.groupby(['pid', 'cid'])\ .tail(1)\ .reset_index()\ .set_index("pid")\ .join(participants, how='outer')\ .reset_index()\ .rename(columns={'level_0': 'participantId'})\ .merge( y.reset_index(), left_on=['participantId', 'cid'], right_on=['pid', 'id'] )\ .drop(columns=[ 'id_x', 'id_y', 'pid' ])\ .rename(columns={'cid': 'collectionId'})\ .set_index('collectionId')\ .sort_values(by=utils.target_fields())[ ['participantId'] + utils.target_fields() + utils.participant_fields() + utils.all_body_fields() + utils.all_body_orientation_fields() ]\ .to_csv('%s.csv' % filename) print("exported to %s.csv" % filename)
def compute_pairplot(self, **kwargs): additional_fields = kwargs.get('additional_fields', []) base_fields = kwargs.get('base_fields', utils.all_body_fields()) fields = base_fields + utils.target_fields() + additional_fields _, X, _, y = self.preprocess(features=fields) X['target'] = X[utils.target_fields()]\ .apply(lambda xs: tuple(xs), axis=1) self.pairplot_data = X.drop(columns=utils.target_fields())
def kdeplot_feature(X, key, **kwargs): label_targets = kwargs.get('label_targets', False) figsize = kwargs.get('figsize', g_figsize) # fig, ax = get_fig_ax(**kwargs) ylim = kwargs.get('ylim', None) xlim = kwargs.get('xlim', None) fig, axs = plt.subplots(3, 9, figsize=figsize, sharex='col', sharey='row', gridspec_kw={ 'hspace': 0, 'wspace': 0 }) def plot_kde(xs): tx, ty, tz = xs.name itx = utils.get_horizontal_targets().index(tx) ity = utils.get_vertical_targets().index(ty) itz = utils.get_depth_targets().index(tz) _ax = axs[itx, ity * 3 + itz] g = sns.kdeplot(xs[key], ax=_ax, shade=True) g.legend_.remove() if ylim is not None and xlim is not None: _ax.text(xlim[0] - (xlim[0] * .05), ylim[1] - (ylim[1] * .2), str(xs.name)) _ax.set_ylim(ylim) _ax.set_xlim(xlim) X[[key] + utils.target_fields()]\ .groupby(utils.target_fields())\ .apply(lambda xs: plot_kde(xs)) if label_targets: labelLines(plt.gca().get_lines(), fontsize=10, zorder=2.5) fig.add_subplot(111, frameon=False) # hide tick and tick label of the big axis plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False) plt.title(kwargs.get('title', 'kde plot of %s' % key)) plt.ylabel(kwargs.get('ylabel', 'Density')) plt.xlabel(kwargs.get('xlabel', utils.orientation_to_readable(key))) plt_show_save(fig, **kwargs)
def find_best_features_extratree(self, participants, X, calibrations, y, **kwargs): from sklearn import ensemble, preprocessing default_fields =\ utils.all_body_fields() + utils.all_body_orientation_fields() fields = kwargs.get('fields', default_fields) load_features = kwargs.get('load_features', False) if load_features: X = self.load_all_features(participants, X, calibrations, y, include=fields) X = X[self.add_features] columns = copy.deepcopy(X.columns) with pd.option_context('mode.use_inf_as_na', True): X = X.fillna(X.mean()) labelEncoder = preprocessing.LabelEncoder() y['target'] = y[utils.target_fields()]\ .apply(lambda xs: str(tuple(xs)), axis=1) y = labelEncoder.fit_transform(y['target']) scaler = StandardScaler() X = scaler.fit_transform(X) model = ensemble.ExtraTreesClassifier() model.fit(X, y) feat_importances = pd.Series(model.feature_importances_, index=columns) return feat_importances.sort_values()
def plot_grouped_line(X, feature, **kwargs): groupby_time = kwargs.get('groupby_time', False) plot_mean_line = kwargs.get('plot_mean_line', True) plot_all_lines = kwargs.get('plot_all_lines', False) y = kwargs.get('y', 'time') figsize = kwargs.get('figsize', g_figsize) fig, ax = get_fig_ax(**kwargs) for name, g in X.groupby(utils.target_fields()): if plot_all_lines: if groupby_time: g_ = g.round({'time': 2}).groupby(['time']) else: g_ = g.groupby(['pid', 'cid']) g_.plot.line(y=feature, x=y, ax=ax, label=name, color="C%d" % get_target_color(name, return_index=True), alpha=.1) if plot_mean_line: g.round({'time': 2}).groupby(['time'])\ .mean().plot.line( y=feature, ax=ax, label='%s' % str(name), color="C%d" % get_target_color(name, return_index=True), alpha=.6, #cmap='rainbow' ) kwargs['remove_legend_duplicates'] = True kwargs['legend'] = True kwargs['legend_kws'] = {'ncol': 3} plt_title_label_lim_legend(**kwargs) plt_show_save(fig, **kwargs)
def attach_text(feature, key, ax): offset = .5 x, y, z = feature[utils.target_fields()] ax.text(feature['time'], feature[key], '%s, %s, %s' % (x, y, z), fontsize=10)
def attach_target(self, X, y): if all(elem in X.columns for elem in utils.target_fields()): return X Xs = X\ .reset_index()\ .merge( y.reset_index(), right_on=['id', 'pid'], left_on=['cid', 'pid'], how="left" )\ .drop(columns=['id_y'])\ .rename(columns={'id_x': 'id', 'index_x': 'index'})\ .set_index('index') if 'index_y' in list(Xs.columns): Xs = Xs.drop(columns=['index_y']) utils.check_dimensions(X, Xs, additional_columns=3, name='target') for target in utils.target_fields(): Xs[target] = Xs[target].astype(float) return Xs
def attach_mean(feature, key, ax, label_mean): label = "" if label_mean: x, y, z = feature.iloc[0][utils.target_fields()] pid = feature.iloc[0].pid label = "$\\overline{x}$,p:%d,t:(%s,%s,%s)" % (pid, x, y, z) mean = feature[key].mean() ax.axhline(mean, ls=':', label=label, c=next(colors)) return mean
def plot_truetarget(y, ax): if isinstance(y, tuple): _x, _z, _y = y else: _x, _z, _y = y[utils.target_fields()].values ax.scatter(_x, _y, _z, marker='x', c='r', s=20) ax.text(_x + offset, _y + offset, _z + offset, '%.2f, %.2f, %.2f' % (_x, _z, _y), fontsize=10)
def automatic_features(self, participants, X, calibrations, y, model, **kwargs): # X = self.attach_target(X, y) # X = self.load_all_features(participants, X, calibrations, y) X['cid_pid'] = X.apply(lambda xs: "%s_%s" % (xs['cid'], xs['pid']), axis=1) # print(participants.head(), X.head(), y.head(), sep='\n') body_types = { k: ft.variable_types.Numeric for k in utils.all_body_fields() } target_types = {k: ft.variable_types.Id for k in utils.target_fields()} vt = { 'cid_pid': ft.variable_types.Index, 'id': ft.variable_types.Id, 'cid': ft.variable_types.Id, 'pid': ft.variable_types.Id, 'time': ft.variable_types.Numeric, **body_types, # **target_types } es = ft.EntitySet(id="pointing_movement") es = es.entity_from_dataframe(entity_id='collections', dataframe=X, index='cid_pid', variable_types=vt) selected = ['hmd', 'indexfinger'] for f in utils.flatten(map(utils.body_field, selected)): es = es.normalize_entity(base_entity_id='collections', new_entity_id=f, index=f) # es.plot(to_file='./plot.png') feature_matrix, features_defs = ft.dfs(entityset=es, entities=es, target_entity="collections", verbose=1) # feature_matrix.to_csv('./export.csv') # feature_matrix = self.load_all_features( # participants, feature_matrix, calibrations, y # ) feature_matrix = feature_matrix.fillna(feature_matrix.median()) modl = model(feature_matrix, y) # modl.gridsearch() modl.better_kfold_cross_validation()
def plot_violinplot(X, feature_key, **kwargs): figsize = kwargs.get('figsize', g_figsize) fig, ax = get_fig_ax(**kwargs) X['target'] = X[utils.target_fields()]\ .apply(lambda xs: str(tuple(xs)), axis=1) sns.violinplot(x=X['target'], y=X[feature_key], ax=ax) kwargs['xticks_rot'] = 90 plt_title_label_lim_legend(**kwargs) sns_show_save(fig, **kwargs)
def plot_means(X, feature_key, **kwargs): figsize = kwargs.get('figsize', g_figsize) fig, ax = get_fig_ax(**kwargs) X['target'] = X[utils.target_fields()]\ .apply(lambda xs: str(tuple(xs)), axis=1) g = X.groupby('target')[feature_key].median() plt.plot(g.index, g.values, marker='.', label='median') kwargs['xticks_rot'] = 90 plt_title_label_lim_legend(**kwargs) plt_show_save(fig, **kwargs)
def boxplot_feature(features, key, **kwargs): kwargs['xlabel'] = kwargs.get('xlabel', 'target') groupby = kwargs.get('groupby', utils.target_fields()) rename = kwargs.get('rename', True) if rename: features = rename_fields(features, sort=False) key = utils.body_fields_short(key) groupby = list(map(utils.body_fields_short, groupby)) fig, ax = get_fig_ax(**kwargs) features.boxplot(by=groupby, column=key, ax=ax, rot=90) plt.suptitle("") plt_title_label_lim_legend(**kwargs) plt_show_save(fig, **kwargs)
def attach_feat(feature, key, ax, **kwargs): line_type = kwargs.get('line_type', 'line') label_feature = kwargs.get('label_feature', False) scatter_size = kwargs.get('scatter_size', 50) targets = feature[utils.target_fields()] pid, cid = feature.iloc[0].pid, feature.iloc[0].cid x, y, z = feature.iloc[0][utils.target_fields()] label = "" if label_feature: label = "p:%d, c:%d, t:(%s,%s,%s)" % (pid, cid, x, y, z) marker = next(markers) if feature.shape[0] == 1 or line_type == 'scatter': ax.scatter(feature['time'], feature[key], label=label, marker=marker, s=scatter_size) else: ax.plot(feature['time'], feature[key], label=label, marker=marker, markersize=4)
def plot_feature(features, key, **kwargs): label_mean = kwargs.get('label_mean', False) plot_feature = kwargs.get('plot_feature', True) plot_target = kwargs.get('plot_target', False) plot_mean = kwargs.get('plot_mean', False) plot_time_mean = kwargs.get('plot_time_mean', False) features = rename_fields(features) key = utils.body_fields_short(key) fig, ax = get_fig_ax(**kwargs) with plt.style.context(style_label): group = features[ ['pid', 'cid', key, 'time'] + utils.target_fields() ]\ .groupby(['pid', 'cid']) if (plot_feature): group.apply(lambda f: attach_feat(f, key, ax, **kwargs)) if (plot_target): group.tail(1).apply(lambda f: attach_text(f, key, ax), axis=1) if (plot_mean): features.groupby(['pid'] + utils.target_fields())\ .apply(lambda f: attach_mean(f, key, ax, label_mean)) if (plot_time_mean): decimals = 2 features['time'] = features['time']\ .apply(lambda x: round(x, decimals)) group = features[['pid', key, 'time']]\ .groupby(['time', 'pid'])\ .mean()\ .groupby(['pid'])\ .apply(lambda f: attach_feature_mean(f, key, ax)) plt_title_label_lim_legend(**kwargs) plt_show_save(fig, **kwargs)
def rename_fields(df, axis=1, targets=None, sort=False): if targets is None: targets = utils.target_fields() if 'TrueTarget.X' in df.columns else [] if axis == 'both': axis = [0, 1] else: axis = [axis] for ax in axis: if isinstance(df, pd.Series): df = df.rename(utils.body_fields_short, axis=ax) else: df = df.rename(mapper=utils.body_fields_short, axis=ax) c = df.columns if ax else df.index columns = [i for i in c if i not in targets] columns = sorted(columns) if sort else columns if isinstance(df, pd.DataFrame): df.columns = columns + targets if ax == 1 else df.columns df.index = columns + targets if ax == 0 else df.index return df
def plot_count_hist(X, key, **kwargs): with plt.style.context(style_label): figsize = kwargs.get('figsize', g_figsize) fig, ax = get_fig_ax(**kwargs) def vl_cnt(xs, key): vl_cnts = xs[key].value_counts() # print(vl_cnts, xs.name, sep='\n') return vl_cnts X['target'] = X[utils.target_fields()]\ .apply(lambda xs: str(tuple(xs)), axis=1) X[[key, 'target']]\ .groupby(['target'])\ .apply(lambda xs: vl_cnt(xs, key))\ .unstack(level=-1)\ .fillna(0)\ .plot.bar(ax=ax, width=.8, stacked=True) plt_title_label_lim_legend(**kwargs) plt_show_save(fig, **kwargs)
def analyze_all_features(self, path='./feature_analysis', **kwargs): base_pairplot = kwargs.get('base_pairplot', False) save = kwargs.get('save', True) utils.ensure_dir_exists(path) participants, X, calibrations, y = self.normalized featureX = self.load_all_features(participants, X, calibrations, y) featureX = self.attach_target(featureX, y) features = utils.all_features() describe_path = "%s/describe.csv" % path featureX[features].describe().to_csv(describe_path) for key in features: self.analyze_feature(participants, featureX, calibrations, y, key, path=path, **kwargs) self.plot_correlation_matrix(force=True, save=True, additional_fields=features, save_path="%s/correlation_matrix.png" % path) self.plot_correlation_matrix( force=True, save=True, additional_fields=features + utils.target_fields(), save_path="%s/correlation_matrix_with_targets.png" % path) self.plot_correlation_matrix( force=True, save=True, base_fields=features + utils.target_fields(), save_path="%s/correlation_matrix_features.png" % path) self.plot_correlation_matrix( force=True, save=True, include=list(self.feature_functions.keys()), base_fields=(list(self.feature_functions.keys()) + utils.target_fields()), save_path=("%s/correlation_matrix_all_features_with_targets.png" % path)) self.plot_correlation_matrix( force=True, save=True, additional_fields=utils.target_fields(), save_path="%s/correlation_matrix_base.png" % path) self.plot_pca_coefficients(force=True, save=True, base_fields=utils.all_body_fields(), save_path="%s/pca_coefficients_base.png" % path) self.plot_pca_coefficients( force=True, save=True, base_fields=features, save_path="%s/pca_coefficients_features.png" % path) save_path = "%s/pca_base.png" % path utils.ensure_dir_exists(save_path, is_file=True) self.plot_pca(load_features=False, force=True, save=save, save_path=save_path) save_path = "%s/pca_all.png" % path utils.ensure_dir_exists(save_path, is_file=True) self.plot_pca(include_features=features, force=True, save=save, save_path=save_path) if base_pairplot: print('plotting pairplots:') print('\t start features') self.plot_pairplot(force=True, base_fields=features + utils.target_fields(), save=save, save_path="%s/pairplot_features.png" % path) print('\t start base') self.plot_pairplot(force=True, save=save, base_fields=utils.all_body_fields() + utils.target_fields(), save_path="%s/pairplot_base.png" % path) print('\t start orientations') self.plot_pairplot( force=True, save=save, base_fields=(utils.all_body_orientation_fields() + utils.target_fields()), save_path="%s/pairplot_orientations.png" % path)
def export(point_model, path="./export"): figure_base_path = '%s/content' % path path_end = "/figures/generated" def path_gen(folder): path = figure_base_path + folder + path_end utils.ensure_dir_exists(path) return path pointing_movement_path = path_gen("/pointing_movement") data_collection_path = path_gen("/data_collection_study") application_path = path_gen("/application") p, X, c, y = point_model.normalized X = point_model.attach_target(X, y) proj_defaults = { 'save': True, 'view_elev': 20, 'view_azim': 50, 'highlight_endpoint': False } X.groupby(utils.target_fields())\ .apply( lambda xs: plotting.plot_projection( xs, xs.name, save_path=( "%s/plot_projection_grouped_%s_%s_%s.png" % ((data_collection_path, ) + xs.name) ), **proj_defaults ) ) X.groupby(utils.target_fields())\ .apply( lambda xs: plotting.plot_projection( xs, xs.name, show_marker_labels=True, set_plot_limit=False, show_true_target=False, save_path=( "%s/plot_projection_grouped_%s_%s_%s_no_target.png" % ((data_collection_path, ) + xs.name) ), **proj_defaults ) ) X[utils.x_fields()] = X[utils.x_fields()].sub(X['hmd.X'], axis=0) X[utils.y_fields()] = X[utils.y_fields()].sub(X['hmd.Y'], axis=0) X[utils.z_fields()] = X[utils.z_fields()].sub(X['hmd.Z'], axis=0) X.groupby(utils.target_fields())\ .apply( lambda xs: plotting.plot_projection( xs, xs.name, proj_fields=( utils.x_fields(exclude=['leftShoulder']), utils.y_fields(exclude=['leftShoulder']), utils.z_fields(exclude=['leftShoulder']) ), save_path=( "%s/plot_projection_grouped_hmd_normalized_%s_%s_%s_no_target.png" % ((data_collection_path, ) + xs.name) ), show_marker_labels=True, set_plot_limit=False, show_true_target=False, **proj_defaults ) ) X.groupby(utils.target_fields())\ .apply( lambda xs: plotting.plot_projection( xs, xs.name, proj_fields=( utils.x_fields(exclude=['leftShoulder']), utils.y_fields(exclude=['leftShoulder']), utils.z_fields(exclude=['leftShoulder']) ), save_path=( "%s/plot_projection_grouped_hmd_normalized_%s_%s_%s.png" % ((data_collection_path, ) + xs.name) ), **proj_defaults ) ) projections = [] collection = 70 _projections = [ (1, collection), (2, collection), (3, collection), (4, collection), (5, collection), (6, collection), (7, collection), (8, collection), (9, collection), (10, collection), (11, collection), (12, collection), (13, collection) ] for p, c in _projections: defaults = { 'plot_key': 'projection', 'save': True, "participant": p, "collection": c, 'data': 'normalized_all_snapshots' } plot_target = { 'fn': 'plot_config', 'fn_args': { 'save_path': ( "%s/plot_projection_%s_%s.png" % (data_collection_path, p, c) ), **defaults } } plot_no_target = { 'fn': 'plot_config', 'fn_args': { 'save_path': ( "%s/plot_projection_%s_%s_no_target.png" % (data_collection_path, p, c) ), "show_marker_labels": True, "show_true_target": False, "set_plot_limit": False, **defaults } } projections.append(plot_target) projections.append(plot_no_target) count_hist = [] _count_hist = [ 'above_head', 'above_hand', 'indexfinger_body_position_x', 'indexfinger_body_position_y', 'indexfinger_body_position_z', ] for k in _count_hist: plot = { 'fn': 'plot_config', 'fn_args': { 'save': True, 'plot_key': "count_hist_plot_%s" % k, 'save_path': ( "%s/plot_count_hist_%s.png" % (pointing_movement_path, k) ) } } count_hist.append(plot) boxplots = [] _boxplots = [k for k in utils.all_features() if k not in _count_hist] for k in _boxplots: plot = { 'fn': 'plot_config', 'fn_args': { 'save': True, 'plot_key': "boxplot_%s" % k, 'save_path': "%s/boxplot_%s.png" % (pointing_movement_path, k) } } boxplots.append(plot) kde = [] _kde = list(map( utils.field_to_orientation_key, utils.flatten( list(map(utils.body_orientation_field, ['upperarm', 'hmd'])) ) )) for k in _kde: plot = { 'fn': 'plot_config', 'fn_args': { 'save': True, 'plot_key': "kde_%s" % k, 'save_path': pointing_movement_path + "/kde_%s.png" % k } } kde.append(plot) plot = { 'fn': 'plot_config', 'fn_args': { 'save': True, 'plot_key': "boxplot_%s" % k, 'save_path': pointing_movement_path + "/boxplot_%s.png" % k } } boxplots.append(plot) features = utils.all_features() configs = [ { 'plotting': [ # plot PCA { 'fn': 'plot_pca', 'fn_args': { 'save': True, 'force': True, 'load_features': False, 'save_path': ( "%s/plot_pca_base.png" % pointing_movement_path ) } }, { 'fn': 'plot_pca', 'fn_args': { 'save': True, 'force': True, 'include_features': features, 'save_path': ( "%s/plot_pca_all.png" % pointing_movement_path ) } }, # { # 'fn': 'plot_pca', # 'fn_args': { # 'save': True, 'force': True, # 'base_fields': ( # list(point_model.feature_functions.keys()) # + utils.target_fields() # ), # 'save_path': ( # "%s/plot_pca_features.png" # % pointing_movement_path # ) # } # }, { 'fn': 'plot_pca', 'fn_args': { 'save': True, 'force': True, 'base_fields': features, 'save_path': ( "%s/plot_pca_all_features.png" % pointing_movement_path ) } }, # plot correlation matrix { 'fn': 'plot_correlation_matrix', 'fn_args': { 'save': True, 'force': True, 'title': 'Correlation matrix for raw data and targets', 'additional_fields': utils.target_fields(), 'save_path': ( "%s/correlation_matrix_base.png" % pointing_movement_path ) } }, { 'fn': 'plot_correlation_matrix', 'fn_args': { 'save': True, 'force': True, 'title': ( 'Correlation matrix for ' + 'raw data, features and targets' ), 'additional_fields': features + utils.target_fields(), 'save_path': ( "%s/correlation_matrix_all.png" % pointing_movement_path ) } }, { 'fn': 'plot_correlation_matrix', 'fn_args': { 'save': True, 'force': True, 'title': ( 'Correlation matrix for ' + 'raw orientation data and targets' ), 'base_fields': ( utils.all_body_orientation_fields() + utils.target_fields() ), 'save_path': ( "%s/correlation_matrix_orientations.png" % pointing_movement_path ) } }, { 'fn': 'plot_correlation_matrix', 'fn_args': { 'save': True, 'force': True, 'title': ( 'Correlation matrix for ' + 'all computed features and the targets' ), 'base_fields': ( list(point_model.feature_functions.keys()) + utils.target_fields() ), 'include': list(point_model.feature_functions.keys()), 'save_path': ( "%s/correlation_matrix_all_features.png" % pointing_movement_path ) } }, { 'fn': 'plot_correlation_matrix', 'fn_args': { 'save': True, 'force': True, 'title': 'Correlation matrix for features and targets', 'base_fields': features + utils.target_fields(), 'save_path': ( "%s/correlation_matrix_features.png" % pointing_movement_path ) } }, { 'fn': 'plot_extratrees', 'fn_args': { 'save': True, 'save_path': ( "%s/plot_extratrees.png" % pointing_movement_path ), 'xticks_rot': 90 } }, { 'fn': 'plot_extratrees', 'fn_args': { 'save': True, 'save_path': ( "%s/plot_extratrees_all_features.png" % pointing_movement_path ), 'fields': ( list(point_model.feature_functions.keys()) ), 'xticks_rot': 90 } }, { 'fn': 'plot_selectKBest_chi2', 'fn_args': { 'save': True, 'save_path': ( "%s/plot_selectKBest_chi2.png" % pointing_movement_path ), 'xticks_rot': 90 } }, { 'fn': 'plot_selectKBest_chi2', 'fn_args': { 'save': True, 'save_path': ( "%s/plot_selectKBest_chi2_all_features.png" % pointing_movement_path ), 'fields': ( list(point_model.feature_functions.keys()) ), 'xticks_rot': 90 } }, { 'fn': 'plot_selectKBest_mutual_information', 'fn_args': { 'save': True, 'save_path': ( "%s/plot_selectKBest_mi.png" % pointing_movement_path ), 'xticks_rot': 90 } }, { 'fn': 'plot_selectKBest_mutual_information', 'fn_args': { 'save': True, 'save_path': ( "%s/plot_selectKBest_mi_all_features.png" % pointing_movement_path ), 'fields': ( list(point_model.feature_functions.keys()) ), 'xticks_rot': 90 } }, { 'fn': 'plot_pca_coefficients', 'fn_args': { 'save': True, 'base_fields': features, 'save_path': ( "%s/plot_pca_coefficients_features.png" % pointing_movement_path ), 'legend_kws': {'ncol': 6} } } ] +\ kde +\ boxplots +\ projections +\ count_hist }, ] for config in configs: for plot in config['plotting']: fn = getattr(point_model, plot['fn']) fn(**plot['fn_args']) plotting.plot_target_grid( save=True, save_path=application_path + "/targets.png" )
def find_best_features(self, participants, X, calibrations, y, **kwargs): default_fields =\ utils.all_body_fields() + utils.all_body_orientation_fields() fields = kwargs.get('fields', default_fields) load_features = kwargs.get('load_features', False) k = kwargs.get('k', 'all') regression = kwargs.get('regression', False) target_fields = kwargs.get('target_fields', utils.target_fields()) if load_features: X = self.load_all_features(participants, X, calibrations, y, include=fields, **kwargs) X = X[self.add_features] with pd.option_context('mode.use_inf_as_na', True): X = X.fillna(X.mean()) if any(X.isna().any().values) > 0: X = X.fillna(0) scaler = MinMaxScaler() Xs = scaler.fit_transform(X) y = y[target_fields] if not regression: y = y.apply(lambda xs: str(tuple(xs)), axis=1) function = chi2 sKbest = SelectKBest(function, k=k) sKbest.fit_transform(Xs, y) support = sKbest.get_support(indices=True) col_names = X.columns[support].values scores = sKbest.scores_[support] pvalues = sKbest.pvalues_[support] zipped = list(zip(col_names, pvalues)) zipped.sort(key=lambda t: t[1]) return pd.DataFrame(zipped, columns=['feature', 'p-value']).round(5) else: scores_dict = {} for t in target_fields: ys = y[t] function = mutual_info_regression sKbest = SelectKBest(function, k=k) sKbest.fit_transform(Xs, ys) support = sKbest.get_support(indices=True) col_names = X.columns[support].values scores = sKbest.scores_[support] zipped = list(zip(col_names, scores)) zipped.sort(key=lambda t: t[0]) idx, sorted_scores = list(zip(*zipped)) scores_dict[t] = sorted_scores return pd.DataFrame(scores_dict, index=idx)
def plot_projection(X, y, **kwargs): highlight_endpoint = kwargs.get('highlight_endpoint', True) show_marker_labels = kwargs.get('show_marker_labels', False) show_true_target = kwargs.get('show_true_target', True) set_plot_limit = kwargs.get('set_plot_limit', True) view_elev = kwargs.get('view_elev', 10) view_azim = kwargs.get('view_azim', 10) color_by_target = kwargs.get('color_by_target', False) xfs, yfs, zfs = kwargs.get( 'proj_fields', (utils.x_fields(), utils.y_fields(), utils.z_fields())) xlim = kwargs.get('xlim', (-1.1, 1.1)) ylim = kwargs.get('ylim', (-0.75, 3.6)) zlim = kwargs.get('zlim', (0, 2.6)) mark = kwargs.get('mark', None) # https://matplotlib.org/3.1.1/gallery/style_sheets/style_sheets_reference.html # with plt.xkcd(): kwargs['projection'] = '3d' fig, ax = get_fig_ax(**kwargs) ax.set_xlabel('x') ax.set_ylabel('z') ax.set_zlabel('y') if set_plot_limit: ax.set_xlim(xlim) ax.set_ylim(ylim) ax.set_zlim(zlim) if view_elev is not None: ax.elev = view_elev if view_azim is not None: ax.azim = view_azim fields = utils.body_fields() def o(xs): return xs + offset _target_colors = [ '#333333', '#ff0000', '#ff4000', '#ff8000', '#ffab00', '#ffbf00', '#ffff00', '#bfff00', '#80ff00', '#40ff00', '#00ff00', '#00ff80', '#00ffbf', '#00ffff', '#00bfff', '#0080ff', '#0040ff', '#0000ff', '#4000ff', '#8000ff', '#bf00ff', '#ff00ff', '#ff00bf', '#ff0080', '#ff0040', '#ff0000', '#000000', ] # random.shuffle(_target_colors) target_colors = { str(tuple(map(float, t.values()))): _target_colors[i] for i, t in enumerate(utils.get_targets()) } last = X.iloc[-1] for _, r in X.iterrows(): marker = next(markers) line_defaults = {'marker': marker, 'markersize': 5, 'alpha': 0.7} Xs, Ys, Zs = r[xfs], r[zfs], r[yfs] if (highlight_endpoint and r['time'] == last['time']): ax.plot(Xs, Ys, Zs, linewidth=3, color='r', **line_defaults) if show_marker_labels: for i in range(len(Xs)): ax.text(Xs[i], Ys[i], Zs[i], fields[i], fontsize=10) continue if color_by_target: i = r['cid'] target = str(tuple(r[utils.target_fields()].to_numpy())) c = target_colors.get(target) ax.plot(Xs, Ys, Zs, linewidth=1, c=c, label=target, alpha=.4, marker='o', markersize=3) else: ax.plot(Xs, Ys, Zs, linewidth=1, **line_defaults) if show_true_target: plot_truetarget(y, ax) if isinstance(mark, tuple) and len(mark) == 3: m, n, j = mark ax.scatter(m, n, j, c='r', s=50, edgecolors='k') ax.text(m - .03, n, j, "HMD", fontsize=12) if isinstance(y, tuple): plt.title('Participants pointing at target (%s, %s, %s)' % y) elif color_by_target: plt.title('Participants pointing color coded by targets') handles, labels = plt.gca().get_legend_handles_labels() by_label = dict(zip(labels, handles)) plt.legend(by_label.values(), by_label.keys()) else: participant, collection = X.iloc[0].pid, X.iloc[0].cid plt.title('Participant %d, Collection %d' % (participant, collection)) plt_show_save(fig, **kwargs)
def plot_target_grid(**kwargs): fscores = kwargs.get('fscores', {}) title = kwargs.get('title', 'Target grid') figsize = kwargs.get('figsize', g_figsize) cmap = kwargs.get('cmap', 'YlGn') label_target = kwargs.get('label_target', True) vmax = kwargs.get('vmax', 1) vmin = kwargs.get('vmin', None) cast_target_to_float = kwargs.get('cast_target_to_float', False) cbar_label = kwargs.get('cbar_label', '') kwargs['projection'] = '3d' target_fields = kwargs.get('target_fields', utils.target_fields()) fig, ax = get_fig_ax(**kwargs) # x = y = np.arange(-.2, .2, .1) # X, Y = np.meshgrid(x, y) # Z = np.zeros(X.shape) # ax.plot_surface(X, Y, Z, alpha=1, color='r') ax.scatter(0, 0, alpha=0.75, depthshade=False, edgecolors="k", c='r', s=100, marker='x') if len(fscores) > 0: x_index = utils.list_get_or_default(target_fields, 'trueTarget.X', None) y_index = utils.list_get_or_default(target_fields, 'trueTarget.Y', None) z_index = utils.list_get_or_default(target_fields, 'trueTarget.Z', None) cs = [] targets = [] for k, v in fscores.items(): k = utils.parse_tuple(k) _x = k[x_index] if x_index is not None else 0 _y = k[y_index] if y_index is not None else 1.49 _z = k[z_index] if z_index is not None else 2.5 _is = [ c for c, v in zip([_x, _y, _z], [x_index, y_index, z_index]) if v is not None ] if cast_target_to_float: i = str(tuple(map(float, _is))) else: i = str(tuple(_is)) # i = str((_x, _y, _z)) cs.append(float(fscores.get(i, 0))) targets.append((_x, _y, _z)) x, y, z = zip(*targets) ss = ax.scatter(x, z, y, s=150, c=cs, depthshade=False, vmax=vmax, cmap=cmap, edgecolors="k", vmin=vmin) cbar = fig.colorbar(ss) cbar.ax.set_ylabel(cbar_label, rotation=270) else: targets = [(x, y, z) for x in utils.get_horizontal_targets() for y in utils.get_vertical_targets() for z in utils.get_depth_targets()] x, y, z = zip(*targets) ax.scatter(x, z, y, s=150, c='g', edgecolors="k") if label_target: for i in range(len(x)): offset_x, offset_z, offset_y = -.65, 0, .05 label = '(%s, %s, %s)' % (x[i], y[i], z[i]) ax.text(x[i] + offset_x, z[i] + offset_z, y[i] + offset_y, label, fontsize=12) ax.set_xlim(-1.8, 1.8), ax.set_ylim(-.1, 3.6), ax.set_zlim(0, 2.99) ax.set_xlabel('x'), ax.set_ylabel('z'), ax.set_zlabel('y') ax.elev = 15 ax.azim = -80 plt.title(title) plt_show_save(fig, **kwargs)