def do_surveys(): with figure("tlx_results", figsize=fig_size(0.44, 1)): sns.factorplot(x="experiment", y="tlx", data=tlx, kind="box") sns.swarmplot(x="experiment", y=r"tlx", data=tlx, palette=cmap_complement, split=True) plt.ylim(0, plt.ylim()[1]) plt.ylabel("NASA-TLX weighted score") with figure("tlx_components", figsize=fig_size(0.44, 1)): components = ["mental", "physical", "temporal", "performance", "effort", "frustration"] molten = pd.melt(tlx, id_vars=["user", "experiment", "order"], value_vars=components, var_name="component", value_name="score") g = sns.barplot(x=r"component", y="score", hue="experiment", data=molten) plt.gca().set_xticklabels( ["MD", "PD", "TD", "P", "E", "F"]) plt.xlabel("NASA-TLX component") plt.ylabel("score") with figure("survey_results", fig_size(0.44, 1)): sns.factorplot(x="experiment", y="total", data=surveys, kind="box") sns.swarmplot(x="experiment", y=r"total", data=surveys, palette=cmap_complement, split=True) plt.ylim(0, plt.ylim()[1]) plt.ylabel("survey score") with figure("survey_components", figsize=fig_size(0.9, 0.5)): molten = pd.melt(surveys, id_vars=["user", "experiment", "order"], value_vars=[r"orientation_understanding", r"orientation_control", r"position_understanding", r"position_control", r"spacial_understanding", r"spacial_control"], var_name="question", value_name="rating") g = sns.barplot(x=r"rating", y=r"question", hue="experiment", data=molten) sns.stripplot(x="rating", y=r"question", data=molten, hue="experiment", split=True, palette=cmap_complement, jitter=0.6, size=3) plt.gca().set_yticklabels( ["angle aware", "angle control", "position aware", "position control", "rel. pos. aware", "rel. pos. control"]) handles, labels = g.get_legend_handles_labels() plt.legend(handles[2:], labels[2:]) plt.xlabel("rating") plt.title("Survey results")
def do_durations(): with figure("duration", figsize=fig_size(0.44, 1)): sns.factorplot(x="experiment", y="duration", data=analyses, kind="box") sns.swarmplot(x="experiment", y="duration", split=True, data=analyses, palette=cmap_complement) plt.ylim(0, plt.ylim()[1]) plt.ylabel("duration (s)") with figure("duration_runs", figsize=fig_size(0.44, 1)): sns.factorplot(x="order", y="duration", hue="experiment", data=analyses, capsize=0.2) plt.ylim(0, plt.ylim()[1]) plt.ylabel("duration (s)") plt.xlabel("run")
def _update_plot(self, axis, view): style = self._process_style(self.style[self.cyclic_index]) if self.plot_type == 'factorplot': opts = dict(style, **({'hue': view.x2} if view.x2 else {})) sns.factorplot(x=view.x, y=view.y, data=view.data, **opts) elif self.plot_type == 'regplot': sns.regplot(x=view.x, y=view.y, data=view.data, ax=axis, **style) elif self.plot_type == 'boxplot': style.pop('return_type', None) style.pop('figsize', None) sns.boxplot(view.data[view.y], view.data[view.x], ax=axis, **style) elif self.plot_type == 'violinplot': if view.x: sns.violinplot(view.data[view.y], view.data[view.x], ax=axis, **style) else: sns.violinplot(view.data, ax=axis, **style) elif self.plot_type == 'interact': sns.interactplot(view.x, view.x2, view.y, data=view.data, ax=axis, **style) elif self.plot_type == 'corrplot': sns.corrplot(view.data, ax=axis, **style) elif self.plot_type == 'lmplot': sns.lmplot(x=view.x, y=view.y, data=view.data, ax=axis, **style) elif self.plot_type in ['pairplot', 'pairgrid', 'facetgrid']: style_keys = list(style.keys()) map_opts = [(k, style.pop(k)) for k in style_keys if 'map' in k] if self.plot_type == 'pairplot': g = sns.pairplot(view.data, **style) elif self.plot_type == 'pairgrid': g = sns.PairGrid(view.data, **style) elif self.plot_type == 'facetgrid': g = sns.FacetGrid(view.data, **style) for opt, args in map_opts: plot_fn = getattr(sns, args[0]) if hasattr( sns, args[0]) else getattr(plt, args[0]) getattr(g, opt)(plot_fn, *args[1:]) plt.close(self.handles['fig']) self.handles['fig'] = plt.gcf() else: super(SNSFramePlot, self)._update_plot(axis, view)
def _update_plot(self, axis, view): style = self._process_style(self.style[self.cyclic_index]) if self.plot_type == 'factorplot': opts = dict(style, **({'hue': view.x2} if view.x2 else {})) sns.factorplot(x=view.x, y=view.y, data=view.data, **opts) elif self.plot_type == 'regplot': sns.regplot(x=view.x, y=view.y, data=view.data, ax=axis, **style) elif self.plot_type == 'boxplot': style.pop('return_type', None) style.pop('figsize', None) sns.boxplot(view.data[view.y], view.data[view.x], ax=axis, **style) elif self.plot_type == 'violinplot': if view.x: sns.violinplot(view.data[view.y], view.data[view.x], ax=axis, **style) else: sns.violinplot(view.data, ax=axis, **style) elif self.plot_type == 'interact': sns.interactplot(view.x, view.x2, view.y, data=view.data, ax=axis, **style) elif self.plot_type == 'corrplot': sns.corrplot(view.data, ax=axis, **style) elif self.plot_type == 'lmplot': sns.lmplot(x=view.x, y=view.y, data=view.data, ax=axis, **style) elif self.plot_type in ['pairplot', 'pairgrid', 'facetgrid']: style_keys = list(style.keys()) map_opts = [(k, style.pop(k)) for k in style_keys if 'map' in k] if self.plot_type == 'pairplot': g = sns.pairplot(view.data, **style) elif self.plot_type == 'pairgrid': g = sns.PairGrid(view.data, **style) elif self.plot_type == 'facetgrid': g = sns.FacetGrid(view.data, **style) for opt, args in map_opts: plot_fn = getattr(sns, args[0]) if hasattr(sns, args[0]) else getattr(plt, args[0]) getattr(g, opt)(plot_fn, *args[1:]) if self._close_figures: plt.close(self.handles['fig']) self.handles['fig'] = plt.gcf() else: super(SNSFramePlot, self)._update_plot(axis, view)
def do_errors(): with figure("rms", figsize=fig_size(0.9, 0.4)): molten = pd.melt(analyses, id_vars=["user", "experiment", "order", "group"], value_vars=["rms", "rms_x", "rms_y"]) g = sns.factorplot(x="experiment", y="value", col="variable", data=molten, kind="box") g.fig.axes[0].set_title("RMS Error") g.fig.axes[1].set_title("RMS Error in $x$") g.fig.axes[2].set_title("RMS Error in $y$") g.fig.axes[0].set_ylabel("error (m)") with figure("rms_runs", figsize=fig_size(0.9, 0.4)): molten = pd.melt(analyses, id_vars=["user","experiment", "order", "group"], value_vars=["rms", "rms_x", "rms_y"]), g = sns.factorplot(x="order", y="value", hue="experiment", col="variable", data=molten, capsize=0.2) g.fig.axes[0].set_title("RMS Error") g.fig.axes[1].set_title("RMS Error in $x$") g.fig.axes[2].set_title("RMS Error in $y$") g.fig.axes[0].set_ylabel("error (m)") g.fig.axes[0].set_xlabel("run") g.fig.axes[1].set_xlabel("run") g.fig.axes[2].set_xlabel("run") with figure("distance", figsize=fig_size(0.9, 0.4)): molten = pd.melt(analyses, id_vars=["user", "experiment", "order", "group"], value_vars=[r"dist_err", r"x_err", r"y_err"]) g = sns.factorplot(x="experiment", y="value", col="variable", data=molten, kind="box") g.fig.axes[0].set_title("Distance from target") g.fig.axes[1].set_title("Distance from target in $x$") g.fig.axes[2].set_title("Distance from target in $y$") g.fig.axes[0].set_ylabel("distance (m)") g.axes[0][0].axhline(0, color="black", linewidth=1, zorder=-1) g.axes[0][1].axhline(0, color="black", linewidth=1, zorder=-1) g.axes[0][2].axhline(0, color="black", linewidth=1, zorder=-1)
def extracted_features_method_classifier_polynomial_features(crossval=True, xpname='cross_validation_classifier', oncleaned_data=False, learningmethod='xgboost'): pol = PolynomialFeatures() print('Loading Training Dataset') col = [u'euclidean_distance', u'fuzz_token_sort_ratio', u'fuzz_partial_token_set_ratio', u'canberra_distance', u'skew_q1vec', u'kur_q1vec', u'norm_wmd_train', u'wmd_train', u'tfidf_word_match_train', u'fuzz_token_set_ratio', u'braycurtis_distance', u'fuzz_partial_ratio', u'minkowski_distance', u'fuzz_qratio', u'fuzz_wratio', u'cosine_distance', u'fuzz_partial_token_sort_ratio', u'jaccard_distance', u'word_match_train', # u'skew_q2vec', u'kur_q2vec' ] # col = [ # u'euclidean_distance', # u'fuzz_token_sort_ratio', # u'fuzz_partial_token_set_ratio', u'canberra_distance', u'skew_q1vec', # u'kur_q1vec', u'norm_wmd_train', u'wmd_train', # u'tfidf_word_match_train', u'fuzz_token_set_ratio', # u'braycurtis_distance', u'fuzz_partial_ratio', u'minkowski_distance', # u'fuzz_qratio', u'fuzz_wratio', u'cosine_distance', # u'fuzz_partial_token_sort_ratio', u'jaccard_distance', # u'word_match_train', u'skew_q2vec', u'kur_q2vec'] # df = load_dataset('train', clean=oncleaned_data) df = pd.read_csv('/home/nacim/DATASET_KAGGLE/quora/train.csv') Xtrain = load_dataset('train_extracted_features').replace('', 0).replace(np.nan, 0).replace(np.inf, 0).replace( -np.inf, 0).astype(np.float32) y = df['is_duplicate'].values Xtrain = np.array(Xtrain[col]) # Xtrain = poly.fit_transform(Xtrain) Xtrain = pol.fit_transform(Xtrain) del df if crossval: sss = StratifiedShuffleSplit(y=y, n_iter=5, test_size=0.2, ) result = pd.DataFrame() for train, test in sss: xtrain, xtest, ytrain, ytest = Xtrain[train], Xtrain[test], y[train], y[test] xtrain, ytrain = oversample(ssp.csr_matrix(xtrain), ytrain, p=0.165) # xtest = ssp.csr_matrix(xtest) # dump_svmlight_file(xtrain,ytrain,path='/') s = pd.Series() for learningmethod in [ # 'svm_linear', # 'svm_rbf', 'xgboost', 'rf', ]: print(learningmethod) if learningmethod == 'rf': estimator = RandomForestClassifier(n_jobs=6, n_estimators=100) estimator.fit(xtrain, ytrain) ypred = estimator.predict_proba(xtest) loss = log_loss(ytest, ypred) print loss exit() s[learningmethod] = loss elif learningmethod == 'svm_rbf': estimator = SVC(kernel='rbf') elif learningmethod == 'svm_linear': estimator = LinearSVC() elif learningmethod == 'xgboost': estimator = XGBClassifier(nthread=4, n_estimators=350, max_depth=4) gpu_params = { 'objective': 'binary:logistic', 'eval_metric': 'logloss', 'eta': 0.02, 'max_depth': 4, 'min_child_weight': 1, 'subsample': 0.8, 'colsample_bytree': 0.8, # 'updater': 'grow_gpu', 'n_estimators': 300, 'scale_pos_weight': 1 } D_training = xgboost.DMatrix(xtrain, label=ytrain) D_validation = xgboost.DMatrix(xtest, label=ytest) watchlist = [(D_training, 'training'), (D_validation, 'validation')] bst = xgboost.train(gpu_params, D_training, 50000, watchlist, early_stopping_rounds=10000) ypred = bst.predict(D_validation) print ypred exit() # result = result.append(s.T, ignore_index=True) result.to_csv('{0}_clean{1}.csv'.format(xpname, oncleaned_data)) final = pd.DataFrame() for c in result.columns: tmp = pd.DataFrame() tmp['logloss'] = result[c] tmp['classifier'] = c final = final.append(tmp, ignore_index=True) sns.factorplot(x='classifier', y='logloss', data=final) sns.plt.savefig('{0}_clean{1}.pdf'.format(xpname, oncleaned_data), bbox_inches='tight') else: print 'Test submission' if learningmethod == 'rf': estimator = RandomForestClassifier(n_jobs=5, n_estimators=150) elif learningmethod == 'xgboost': estimator = XGBClassifier(nthread=4, n_estimators=400, max_depth=5) # print 'fitting a %s classifier' % learningmethod # estimator.fit(Xtrain, y) if learningmethod == 'DL': model = Sequential() model.add(Dense(1024, input_dim=Xtrain.shape[1],kernel_initializer='normal', activation='sigmoid', bias_initializer='random_normal' )) # model.add(Dropout(0.3)) # model.add(Dense(512, kernel_initializer='normal', activation='relu')) model.add(Dense(1, kernel_initializer='normal', activation='sigmoid')) # sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='binary_crossentropy', optimizer='nadam', metrics=['accuracy']) estimator = model y, Xtrain, _ = unison_shuffled_copies(y, Xtrain, np.zeros(len(y))) hist = estimator.fit(Xtrain, y, batch_size=1024, epochs=100, validation_split=0.2, shuffle=True, class_weight={0: 0.79264156344230219, 1: 1.3542873987525375}, verbose=1, ) plot_training(hist, 'training_NN_extracted_features___relu') result = pd.DataFrame() for i in range(1, 21): print 'read Test %s' % i df = load_dataset('test_part%s_extracted_features' % i, clean=oncleaned_data).replace('', 0).replace(np.nan, 0).replace( np.inf, 0).replace(-np.inf, 0).astype(np.float32) dtmp = load_dataset('test_part%s' % i, clean=oncleaned_data) test_ids = dtmp['test_id'] del dtmp Xtest = np.array(df[col]) Xtest = pol.transform(Xtest) del df ypred = estimator.predict_proba(Xtest) # print ypred # exit() resulttmp = pd.DataFrame() resulttmp['test_id'] = test_ids if learningmethod == 'DL': resulttmp['is_duplicate'] = ypred.ravel() else: resulttmp['is_duplicate'] = ypred[:, 1] result = result.append(resulttmp, ignore_index=True) result.to_csv(xpname + '_clean_____relu{0}_{1}.csv'.format(oncleaned_data, learningmethod), index=False)
def from_question_representation_method_classifier(crossval=True, xpname='cross_validation_classifier', learningmethod='xgboost', merge_method='concat'): df = load_dataset('train', clean=False) y = df['is_duplicate'].values del df dataset = 'train' Xtrain = load_questions_and_merge(dataset, method=merge_method) if crossval: sss = StratifiedShuffleSplit(y=y, n_iter=1, test_size=0.2, ) result = pd.DataFrame() for train, test in sss: xtrain, xtest, ytrain, ytest = Xtrain[train], Xtrain[test], y[train], y[test] s = pd.Series() for learningmethod in [ 'rf', 'xgboost']: print(learningmethod) if learningmethod == 'rf': estimator = RandomForestClassifier(n_jobs=6, n_estimators=150) elif learningmethod == 'svm_rbf': estimator = SVC(kernel='rbf') elif learningmethod == 'svm_linear': estimator = LinearSVC() elif learningmethod == 'xgboost': estimator = XGBClassifier(nthread=6, n_estimators=300, max_depth=4) estimator.fit(xtrain, ytrain) ypred = estimator.predict_proba(xtest) loss = log_loss(ytest, ypred) print loss s[learningmethod] = loss result = result.append(s.T, ignore_index=True) result.to_csv('{0}_merge{1}.csv'.format(xpname, merge_method)) final = pd.DataFrame() for c in result.columns: tmp = pd.DataFrame() tmp['logloss'] = result[c] tmp['classifier'] = c final = final.append(tmp, ignore_index=True) sns.factorplot(x='classifier', y='logloss', data=final) sns.plt.savefig('{0}_merge{1}.pdf'.format(xpname, merge_method), bbox_inches='tight') else: print 'Test submission' if learningmethod == 'rf': estimator = RandomForestClassifier(n_jobs=6, n_estimators=150) elif learningmethod == 'svm_rbf': estimator = SVC(kernel='rbf') elif learningmethod == 'svm_linear': estimator = SVR(kernel='linear') elif learningmethod == 'xgboost': estimator = XGBClassifier(nthread=6, n_estimators=300, max_depth=4) print 'fitting a %s classifier' % learningmethod estimator.fit(Xtrain, y) result = pd.DataFrame() for i in range(1, 21): print 'read Test %s' % i Xtest = load_questions_and_merge('test_part%s' % (i), method=merge_method) dtmp = load_dataset('test_part%s' % i, clean=False) test_ids = dtmp['test_id'] del dtmp ypred = estimator.predict_proba(Xtest) resulttmp = pd.DataFrame() resulttmp['test_id'] = test_ids resulttmp['is_duplicate'] = ypred[:, 1] result = result.append(resulttmp, ignore_index=True) result.to_csv(xpname + '_merge{0}_{1}.csv'.format(merge_method, learningmethod), index=False)
def extracted_features_method_classifier(crossval=True, xpname='cross_validation_classifier', oncleaned_data=False, learningmethod='xgboost'): print('Loading Training Dataset') df = load_dataset('train', clean=oncleaned_data) Xtrain = load_dataset('train_extracted_features').replace('', 0).replace(np.nan, 0).replace(np.inf, 0).replace( -np.inf, 0).astype(np.float32) Xtrain = np.array(Xtrain) y = df['is_duplicate'].values del df if crossval: sss = StratifiedShuffleSplit(y=y, n_iter=5, test_size=0.2, ) result = pd.DataFrame() for train, test in sss: xtrain, xtest, ytrain, ytest = Xtrain[train], Xtrain[test], y[train], y[test] s = pd.Series() for learningmethod in [ # 'svm_linear', # 'svm_rbf', 'xgboost', 'rf', ]: print(learningmethod) if learningmethod == 'rf': estimator = RandomForestClassifier(n_jobs=4, n_estimators=100) estimator.fit(xtrain, ytrain) ypred = estimator.predict_proba(xtest) loss = log_loss(ytest, ypred) print loss s[learningmethod] = loss elif learningmethod == 'svm_rbf': estimator = SVC(kernel='rbf') elif learningmethod == 'svm_linear': estimator = LinearSVC() elif learningmethod == 'xgboost': estimator = XGBClassifier(nthread=4, n_estimators=350, max_depth=4) gpu_params = { 'objective': 'binary:logistic', 'eval_metric': 'logloss', 'eta': 0.01, 'max_depth': 9, 'min_child_weight': 1, # 'updater': 'grow_gpu', 'n_estimators': 1000, 'scale_pos_weight': 1 } D_training = xgboost.DMatrix(xtrain, label=ytrain) D_validation = xgboost.DMatrix(xtest, label=ytest) watchlist = [(D_training, 'training'), (D_validation, 'validation')] bst = xgboost.train(gpu_params, D_training, 50000, watchlist, early_stopping_rounds=10000, verbose_eval=50) ypred = bst.predict(D_validation) print ypred exit() # result = result.append(s.T, ignore_index=True) result.to_csv('{0}_clean{1}.csv'.format(xpname, oncleaned_data)) final = pd.DataFrame() for c in result.columns: tmp = pd.DataFrame() tmp['logloss'] = result[c] tmp['classifier'] = c final = final.append(tmp, ignore_index=True) sns.factorplot(x='classifier', y='logloss', data=final) sns.plt.savefig('{0}_clean{1}.pdf'.format(xpname, oncleaned_data), bbox_inches='tight') else: print 'Test submission' if learningmethod == 'rf': estimator = RandomForestClassifier(n_jobs=5, n_estimators=150) elif learningmethod == 'svm_rbf': estimator = SVC(kernel='rbf') elif learningmethod == 'svm_linear': estimator = SVR(kernel='linear') elif learningmethod == 'xgboost': estimator = XGBClassifier(nthread=4, n_estimators=300, max_depth=4) print 'fitting a %s classifier' % learningmethod estimator.fit(Xtrain, y) result = pd.DataFrame() for i in range(1, 21): print 'read Test %s' % i df = load_dataset('test_part%s_extracted_features' % i, clean=oncleaned_data).replace('', 0).replace(np.nan, 0).replace( np.inf, 0).replace(-np.inf, 0).astype(np.float32) dtmp = load_dataset('test_part%s' % i, clean=oncleaned_data) test_ids = dtmp['test_id'] del dtmp Xtest = np.array(df) del df ypred = estimator.predict_proba(Xtest) resulttmp = pd.DataFrame() resulttmp['test_id'] = test_ids resulttmp['is_duplicate'] = ypred[:, 1] result = result.append(resulttmp, ignore_index=True) result.to_csv(xpname + '_clean{0}_{1}.csv'.format(oncleaned_data, learningmethod), index=False)
def naive_method_classifier(crossval=True, xpname='', learningmethod='rf', oncleaned_data=False, maxfeature=100): print('Loading Training Dataset') df = load_dataset('train', clean=oncleaned_data) first = df[['question1']] second = df[['question2']] first.columns = ["question"] second.columns = ["question"] dfq = pd.concat([first, second], axis=0, ignore_index=True).fillna('') tfidf = TfidfVectorizer(max_features=maxfeature, stop_words='english').fit_transform(dfq['question'].values) N = len(df) X_tfidf = (np.abs(tfidf[:N] - tfidf[N:])).toarray() y = df['is_duplicate'].values if crossval: sss = StratifiedShuffleSplit(y=y, n_iter=5, test_size=0.2, ) result = pd.DataFrame() for train, test in sss: xtrain, xtest, ytrain, ytest = X_tfidf[train], X_tfidf[test], y[train], y[test] s = pd.Series() for learningmethod in [ # 'svm_linear', 'rf', 'xgboost']: print(learningmethod) if learningmethod == 'rf': estimator = RandomForestClassifier(n_jobs=4) elif learningmethod == 'svm_rbf': estimator = SVC(kernel='rbf') elif learningmethod == 'svm_linear': estimator = SVR(kernel='linear') elif learningmethod == 'xgboost': estimator = XGBClassifier(nthread=4, n_estimators=350, max_depth=4) estimator.fit(xtrain, ytrain) ypred = estimator.predict_proba(xtest) loss = log_loss(ytest, ypred) print loss s[learningmethod] = loss result = result.append(s.T, ignore_index=True) result.to_csv('{0}_clean{1}.csv'.format(xpname, oncleaned_data)) final = pd.DataFrame() for c in result.columns: tmp = pd.DataFrame() tmp['logloss'] = result[c] tmp['classifier'] = c final = final.append(tmp, ignore_index=True) sns.factorplot(x='classifier', y='logloss', data=final) sns.plt.savefig('{0}_clean{1}.pdf'.format(xpname, oncleaned_data), bbox_inches='tight') else: print 'Test submission' print('Load Test Dataset') df = load_dataset('test', clean=oncleaned_data) first = df[['question1']] second = df[['question2']] first.columns = ["question"] second.columns = ["question"] dfq = pd.concat([first, second], axis=0, ignore_index=True).fillna('') tfidf = TfidfVectorizer(max_features=maxfeature, stop_words='english').fit_transform(dfq['question'].values) N = len(df) Xtest = (np.abs(tfidf[:N] - tfidf[N:])).toarray() if learningmethod == 'rf': estimator = RandomForestClassifier(n_jobs=4) elif learningmethod == 'svm_rbf': estimator = SVC(kernel='rbf') elif learningmethod == 'svm_linear': estimator = SVR(kernel='linear') elif learningmethod == 'xgboost': estimator = XGBClassifier(nthread=4, n_estimators=300, max_depth=4) print 'fitting a %s classifier' % learningmethod estimator.fit(X_tfidf, y) ypred = estimator.predict_proba(Xtest) result = pd.DataFrame() result['test_id'] = df['test_id'] result['is_duplicate'] = ypred[:, 1] result.to_csv(xpname + '_clean{0}_{1}.csv'.format(oncleaned_data, learningmethod), index=False)
def do_movement(): with figure("movement", figsize=fig_size(0.9, 0.4)): molten = pd.melt(analyses, id_vars=["user", "experiment", "order", "group"], value_vars=["path_length", "move_x", "move_y"]) g = sns.factorplot(x="experiment", y="value", col="variable", data=molten, kind="box") g.fig.axes[0].set_title("Path length") g.fig.axes[1].set_title("Movement in $x$") g.fig.axes[2].set_title("Movement in $y$") g.fig.axes[0].set_ylabel("distance (m)") plt.ylim(0, plt.ylim()[1]) with figure("movement_x"): molten = pd.melt(analyses, id_vars=["user", "experiment", "order", "group"], value_vars=["move_l", "move_r", "move_x"]) g = sns.factorplot(x="experiment", y="value", col="variable", data=molten, kind="box") g.fig.axes[0].set_title("Movement left") g.fig.axes[1].set_title("Movement right") g.fig.axes[2].set_title("Movement in $x$") g.fig.axes[0].set_ylabel("distance (m)") plt.ylim(0, plt.ylim()[1]) with figure("movement_y"): molten = pd.melt(analyses, id_vars=["user", "experiment", "order", "group"], value_vars=["move_b", "move_f", "move_y"]) g = sns.factorplot(x="experiment", y="value", col="variable", data=molten, kind="box") g.fig.axes[0].set_title("Movement backwards") g.fig.axes[1].set_title("Movement forwards") g.fig.axes[2].set_title("Movement in $y$") g.fig.axes[0].set_ylabel("distance (m)") plt.ylim(0, plt.ylim()[1]) with figure("movement_back"): sns.factorplot(x="experiment", y="move_b", data=analyses, kind="box") sns.swarmplot(x="experiment", y="move_b", split=True, data=analyses, palette=cmap_complement) plt.ylabel("distance (m)") plt.title("Movement backwards") with figure("movement_runs", figsize=fig_size(0.9, 0.4)): molten = pd.melt(analyses, id_vars=["user", "experiment", "order", "group"], value_vars=["path_length", "move_x", "move_y"]) g = sns.factorplot(x="order", y="value", col="variable", data=molten, hue="experiment", capsize=0.2) g.fig.axes[0].set_title("Path length") g.fig.axes[1].set_title("Movement in $x$") g.fig.axes[2].set_title("Movement in $y$") g.fig.axes[0].set_ylabel("distance (m)") g.fig.axes[0].set_xlabel("run") g.fig.axes[1].set_xlabel("run") g.fig.axes[2].set_xlabel("run") plt.ylim(0, plt.ylim()[1]) with figure("movement_x_runs"): molten = pd.melt(analyses, id_vars=["user", "experiment", "order", "group"], value_vars=["move_l", "move_r", "move_x"]) g = sns.factorplot(x="order", y="value", col="variable", data=molten, hue="experiment") g.fig.axes[0].set_title("Movement left") g.fig.axes[1].set_title("Movement right") g.fig.axes[2].set_title("Movement in $x$") g.fig.axes[0].set_ylabel("distance (m)") g.fig.axes[0].set_xlabel("run") g.fig.axes[1].set_xlabel("run") g.fig.axes[2].set_xlabel("run") plt.ylim(0, plt.ylim()[1]) with figure("movement_y_runs"): molten = pd.melt(analyses, id_vars=["user", "experiment", "order", "group"], value_vars=["move_b", "move_f", "move_y"]) g = sns.factorplot(x="order", y="value", col="variable", data=molten, hue="experiment") g.fig.axes[0].set_title("Movement backwards") g.fig.axes[1].set_title("Movement forwards") g.fig.axes[2].set_title("Movement in $y$") g.fig.axes[0].set_ylabel("distance (m)") g.fig.axes[0].set_xlabel("run") g.fig.axes[1].set_xlabel("run") g.fig.axes[2].set_xlabel("run") plt.ylim(0, plt.ylim()[1])