def get_rmcorr_for_sensitivities_by_participant( sensitivities_by_participant: List[List[InterstimSensitivities]], ): df = _get_dataframe_for_sensitivities_by_participant( sensitivities_by_participant) model = ols("dprime ~ C(subject) + distance", data=df).fit() rm_corr = pg.rm_corr(df, x="distance", y="dprime", subject="subject") return ( model.params["distance"], rm_corr.at["rm_corr", "r"], rm_corr.at["rm_corr", "CI95%"], rm_corr.at["rm_corr", "pval"], )
shapiro_samples.append([x for x in data_points.T]) shapiro_names.append(['{} {} {} {}'.format(metric_type, fb_type, k, th) for k in range(1, 16)]) fb_data_points.append(data_points[:, :]) ax = (axes[fb_typs_axes[fb_type]] if metric_type=='magnitude' else axes2[metric_types_ax[metric_type], fb_typs_axes[fb_type]]) if th == threshold or metric_type=='magnitude': ax.errorbar(np.arange(len(unique_blocks))+1, data_points.mean(0), 2*data_points.std(0) / np.sqrt(data_points.shape[0]), color=fb_typs_colors[fb_type], linewidth=0.75, elinewidth=0.75, linestyle='', marker='o', markersize=2, label=fb_type, zorder=100) ax.plot(np.arange(len(unique_blocks))+1, data_points.T, color=fb_typs_colors[fb_type], linewidth=0.5, linestyle='-', markersize=2, label=fb_type, alpha=0.25) df = metrics_df.query('metric_type=="{}" & fb_type=="{}"'.format(metric_type, fb_type)) df['k'] = df['k'].astype(int) s = r'$\rho_{CI95}$=' + rm_corr(df, 'k', 'metric', 'subj_id')['CI95%'].values[0] lim = metric_types_lims[metric_type] ax.text(1, lim[0] + (lim[1]-lim[0])*0.9, s, size=7) if fb_typs_axes[fb_type] == 0: ax.set_ylabel(metric_type) ax.set_ylim(*lim) ax.set_yticks([lim[0], 1., lim[1]]) stats = [] stats_extra = [] z_scores = [] ds = [] ps = []
def time_series_with_biometric_bar_plot(biometric_source_data_1, biometric_source_data_2, sample_source_data_1, sample_source_data_2, view_1, view_2, selection_dict): # parse selections biometric = selection_dict['biometric'] metabolite = selection_dict['metabolite'] user = selection_dict['user'] scale = selection_dict['scale'] start_time = pd.to_datetime('8/22/2018') end_time = pd.to_datetime('9/1/2018') # set up data and relevant stats if user == "Both": # run rm_corr title = 'Daily total/average: {}'.format(biometric) # .tolist() causing refresh error x = list(biometric_source_data_1.data[biometric]) + list( biometric_source_data_2.data[biometric]) y = list(np.log2(biometric_source_data_1.data[metabolite])) \ + list(np.log2(biometric_source_data_2.data[metabolite])) subject = ["Subject1"] * len(biometric_source_data_1.data[metabolite]) \ + ["Subject2"] * len(biometric_source_data_2.data[metabolite]) df = pd.DataFrame({ 'x': x, 'y': y, 'subject': subject, }) r, p, dof = pg.rm_corr(data=df, x='x', y='y', subject='subject') title = "Daily total/average: {} vs. log2 (Avg. Int.) {}; RM Corr : r = {}, p = {}".format( biometric, metabolite, round(r, 3), round(p, 3)) # get biometric max biometric_max = max(x) # get metabolite intensity min metabolite_intensities = list(sample_source_data_1.data[metabolite]) \ + list(sample_source_data_2.data[metabolite]) intensity_min, intensity_max = min(metabolite_intensities), \ max(metabolite_intensities) elif user == "Subject1": # calculate Spearman's Rho for Subject1 x = biometric_source_data_1.data[biometric] y = np.log2(biometric_source_data_1.data[metabolite]) corr_df = pg.corr(x, y, method='skipped') coef = corr_df.iloc[0]['r'] p = corr_df.iloc[0]['p-val'] #print(corr_df) title = "Daily average {} vs. log2(Avg. Int.) {}; Spearman's Rho: {}, p = {}".format( biometric, metabolite, round(coef, 3), round(p, 5)) # get biometric max biometric_max = max(x) # get metabolite intensity min metabolite_intensities = list(sample_source_data_1.data[metabolite]) intensity_min, intensity_max = min(metabolite_intensities), \ max(metabolite_intensities) elif user == "Subject2": # calculate Spearman's Rho x = biometric_source_data_2.data[biometric] y = np.log2(biometric_source_data_2.data[metabolite]) corr_df = pg.corr(x, y, method='skipped') coef = corr_df.iloc[0]['r'] p = corr_df.iloc[0]['p-val'] #print(corr_df) title = "Daily average {} vs. log2(Avg. Int.) {}; Spearman's Rho: {}, p = {}".format( biometric, metabolite, round(coef, 3), round(p, 5)) # get biometric max biometric_max = max(x) # get metabolite intensity range metabolite_intensities = sample_source_data_2.data[metabolite] intensity_min, intensity_max = min(metabolite_intensities), \ max(metabolite_intensities) # Set up figure and formatting p = figure( title=title, tools=tools, x_axis_type="datetime", plot_width=800, plot_height=400, x_range=[start_time, end_time], y_range=[intensity_min, intensity_max], ) #tooltips = [("sample", "@SampleID")]) # Setting the second y axis range name and range biometric_max_start = biometric_max * 0.10 biometric_range_end = biometric_max * 1.10 p.extra_y_ranges = { "biometric_axis": Range1d(start=biometric_max_start, end=biometric_range_end) } # Adding the second axis to the plot. p.add_layout(LinearAxis(y_range_name="biometric_axis"), 'right') p.xaxis.ticker = DaysTicker(days=np.arange(1, 59)) p.xaxis.formatter = DatetimeTickFormatter( hours=["%d %B %Y"], days=["%d %B %Y"], months=["%d %B %Y"], years=["%d %B %Y"], ) p.output_backend = "svg" p.xaxis.axis_label = None p.toolbar.logo = None p.xaxis.major_label_orientation = pi / 4 p.xgrid.grid_line_color = None p.ygrid.grid_line_color = None p.outline_line_color = None p.yaxis.axis_label = metabolite + " {}".format(scale) p.yaxis[1].axis_label = biometric ### Now for actual data ### # Have to make width huge, since Datetime has millisecond resolution: # https://stackoverflow.com/questions/45711567/categorical-y-axis-and-datetime-x-axis-with-bokeh-vbar-plot # 1 hr * 4 millisecond_width = 3600000 * 24 if user == "Both" or user == "Subject1": # time series data legend_title = "Subject 1 [{}]".format(metabolite) p.line('Datetime', metabolite, source=sample_source_data_1, color='red') p.circle('Datetime', metabolite, source=sample_source_data_1, color="red", size=5, alpha=0.5, view=view_1, hover_color="black") # biometric data legend_title = "Subject 1 Daily Total {}".format(biometric) p.step('Datetime', y=biometric, color="red", mode="center", line_dash="dashed", source=biometric_source_data_1, legend=legend_title, y_range_name="biometric_axis") p.vbar('Datetime', top=biometric, fill_color="red", width=millisecond_width, line_color=None, alpha=0.3, source=biometric_source_data_1, y_range_name="biometric_axis") # overwrite if user == "Both" or user == "Subject2": # time series data legend_title = "Subject 2 [{}]".format(metabolite) p.line('Datetime', metabolite, source=sample_source_data_2, color='blue') p.circle('Datetime', metabolite, source=sample_source_data_2, color="blue", size=5, alpha=0.5, view=view_2, hover_color="black") # biometric data legend_title = "Subject 2 Daily Total {}".format(biometric) p.step('Datetime', y=biometric, color="blue", mode="center", line_dash="dashed", source=biometric_source_data_2, legend=legend_title, y_range_name="biometric_axis") p.vbar(x='Datetime', top=biometric, fill_color="blue", width=millisecond_width, line_color=None, alpha=0.3, source=biometric_source_data_2, y_range_name="biometric_axis") # Light cycle formatting, this needs to come second for tool tips to render vline_list = [] for datetime in pd.date_range(start='8/22/2018', end='9/1/2018'): vline = Span( location=datetime, dimension='height', line_color='grey', #this should creat a ~6 hr window around midnight, to simulate # the dark cycle during this time period line_width=24, line_dash='solid', line_alpha=0.3) vline_list.append(vline) p.renderers.extend(vline_list) return p
def print_rm_corr(df_lab, df_wild): """ print the rm_corr. """ print("\n\nGETTING RM_CORR:") print('\nIN-LAB:') print('ActiGraph VM3 vs Ainsworth:') g = pg.plot_rm_corr(data=df_lab, x='MET (VM3)', y='MET (Ainsworth)', subject='Participant') output = pg.rm_corr(data=df_lab, x='MET (VM3)', y='MET (Ainsworth)', subject='Participant') print(output) print('\nWRIST vs Ainsworth:') g = pg.plot_rm_corr(data=df_lab, x='estimation', y='MET (Ainsworth)', subject='Participant') output = pg.rm_corr(data=df_lab, x='estimation', y='MET (Ainsworth)', subject='Participant') print(output) print('\nGoogle Fit vs Ainsworth:') g = pg.plot_rm_corr(data=df_lab, x='MET (Google Fit)', y='MET (Ainsworth)', subject='Participant') output = pg.rm_corr(data=df_lab, x='MET (Google Fit)', y='MET (Ainsworth)', subject='Participant') print(output) print('\nWRIST vs ActiGraph VM3:') g = pg.plot_rm_corr(data=df_lab, x='estimation', y='MET (VM3)', subject='Participant') output = pg.rm_corr(data=df_lab, x='estimation', y='MET (VM3)', subject='Participant') print(output) print('\nIN-WILD:') print('WRIST vs ActiGraph VM3:') g = pg.plot_rm_corr(data=df_wild, x='estimation', y='MET (VM3)', subject='Participant') output = pg.rm_corr(data=df_wild, x='estimation', y='MET (VM3)', subject='Participant') print(output)
# load data into pandas dataframes data_samples, data_class, data_images = load_data( './plots/experiment_results.pickl') # with open('frames.pickl', 'wb') as f: # pickle.dump((data_samples, data_class, data_images),f) #with open('frames.pickl', 'rb') as f: # data_samples, data_class, data_images = pickle.load(f) print('\ncorrelation model uncertainty with error') for dataset in data_class['dataset'].unique(): data_set = data_class[data_class['dataset'] == dataset] for model in data_set['model'].unique(): model_data = data_set[data_set['model'] == model] corr_res = pg.rm_corr(model_data, x='correct', y='model_uncertainty', subject='image') print(dataset + "/" + model, float(corr_res['r']), float(corr_res['pval'])) print('\nged, p-values, each pair ged(model1) < ged(model2)') for dataset in data_images['dataset'].unique(): data_set = data_images[data_images['dataset'] == dataset] for modeli in data_set['model'].unique(): model_datai = data_set[data_set['model'] == modeli] for modelj in data_set['model'].unique(): if modeli == modelj: continue model_dataj = data_set[data_set['model'] == modelj] ged_res = pg.ttest(model_datai['ged'], model_dataj['ged'],