def has_epilepsy_in_loc(self, loc_name): from db_parsing import get_granularity if loc_name == 'Whole Brain': # Has epilepsy in any part of the brain return any([e.soz for e in self.electrodes ]) # assumes that e.soz is already parsed else: # looks if any soz electrode matches its loc_name in the correct granularity tags granularity = get_granularity(loc_name) return any([ e.soz and getattr(e, e.loc_field_by_granularity(granularity)) == loc_name for e in self.electrodes ])
def plot_co_pse_auc(data_by_loc, saving_path): pse = [] auc = [] locations = [] colors = [] labels = [] from db_parsing import get_granularity fig = plt.figure() defined_legends = set() for loc in data_by_loc.keys(): for type in HFO_TYPES: if type + '_AUC' in data_by_loc[loc].keys(): pse.append(data_by_loc[loc]['PSE']) auc.append(data_by_loc[loc][type + '_AUC']) locations.append(loc) granularity = get_granularity(loc) colors.append( color_for_scatter(granularity, type, loc == 'Hippocampus')) labels.append(type + '_in_loc' + str(granularity) if loc != 'Hippocampus' else 'Hippocampus {0}'.format(type)) if granularity == 2: marker = 'v' elif granularity == 3: marker = '<' elif granularity == 5: marker = '^' else: raise ValueError('Granularity marker undefined') label = None if labels[-1] in defined_legends else labels[-1] defined_legends.add(label) plt.scatter( pse[-1], auc[-1], c=colors[-1], label=label, marker=marker, ) axes = plt.gca() axes.legend(loc='lower right', prop={'size': 10}) m, b = np.polyfit(pse, auc, 1) X_plot = np.linspace(axes.get_xlim()[0], axes.get_xlim()[1], 100) plt.plot(X_plot, m * X_plot + b, '-') plt.xlabel('Percentage of SOZ electrodes (PSE)') plt.ylabel('AUC ROC') plt.title('Percentage of SOZ electrodes and HFO rate baseline relation') plt.savefig(saving_path, bbox_inches='tight') plt.show() plt.close(fig)
def feature_statistical_tests(patients_dic, location=None, types=HFO_TYPES, features=['duration', 'freq_pk', 'power_pk'], saving_dir=FIG_SAVE_PATH[4]['dir']): # Structure initialization feature_data = dict() stats = dict() for feature in features: if 'angle' in feature: feature_data['sin_' + feature] = dict() feature_data['cos_' + feature] = dict() stats['sin_' + feature] = dict() stats['cos_' + feature] = dict() else: feature_data[feature] = dict() stats[feature] = dict() for t in types: if 'angle' in feature: feature_data['sin_' + feature][t] = {'soz': [], 'nsoz': []} feature_data['cos_' + feature][t] = {'soz': [], 'nsoz': []} else: feature_data[feature][t] = {'soz': [], 'nsoz': []} granularity = get_granularity(location) # Gathering data for p in patients_dic.values(): if location is None or location == 'Whole Brain': electrodes = p.electrodes else: electrodes = [ e for e in p.electrodes if location == getattr(e, 'loc{g}'.format(g=granularity)) ] for e in electrodes: soz_label = 'soz' if e.soz else 'nsoz' for t in types: for h in e.events[t]: for f in features: if 'angle' in f: if h.info[f[:-len('_angle')]] == True: feature_data['sin_{f}'.format( f=f)][t][soz_label].append( mt.sin(h.info[f])) feature_data['cos_{f}'.format( f=f)][t][soz_label].append( mt.cos(h.info[f])) else: feature_data[f][t][soz_label].append(h.info[f]) # Calculating Stat and pvalue and plotting for f in features: if 'angle' in f: f_names = ['sin_{f}'.format(f=f), 'cos_{f}'.format(f=f)] else: f_names = [f] for t in types: for feat_name in f_names: if min(len(feature_data[feat_name][t]['soz']), len(feature_data[feat_name][t]['nsoz'])) == 0: print('There is no info for {f} with type {t}'.format( f=feat_name, t=t)) else: stats[feat_name][t] = dict() data_soz = feature_data[feat_name][t]['soz'] data_nsoz = feature_data[feat_name][t]['nsoz'] test_names = { 'D': 'Kolmogorov-Smirnov test', 'W': 'Wilcoxon signed-rank test', 'U': 'Mann-Whitney U test' } test_func = { 'D': ks_2samp, 'W': ranksums, 'U': mannwhitneyu } for s_name, test_f in test_func.items(): stats[feat_name][t][test_names[s_name]] = dict() stats[feat_name][t][test_names[s_name]][s_name], \ stats[feat_name][t][test_names[s_name]]['pval']= \ test_f(data_soz, data_nsoz) graphics.plot_feature_distribution(data_soz, data_nsoz, feature=feat_name, type=t, stats=stats, test_names=test_names, saving_dir=saving_dir)
def region_info(patients_dic, event_types=EVENT_TYPES, flush=False, conf=None, location=None): print('Region info location {0}, types {1}.'.format(location, event_types)) patients_with_epilepsy = set() elec_count_per_patient = [] elec_x_null, elec_y_null, elec_z_null = 0, 0, 0 # todo create dic elec_cnt_loc2_empty, elec_cnt_loc3_empty, elec_cnt_loc5_empty = 0, 0, 0 # todo create dic pat_with_x_null, pat_with_y_null, pat_with_z_null = set(), set(), set( ) # todo create dic pat_with_loc2_empty, pat_with_loc3_empty, pat_with_loc5_empty = set(), set( ), set() # todo create dic soz_elec_count, elec_with_evt_count, event_count = 0, 0, 0 counts = {type: 0 for type in event_types} event_rates, soz_labels, soz_rates, nsoz_rates = [], [], [], [] if location is not None: patients_dic = {p_name: p for p_name, p in patients_dic.items() if \ p.has_elec_in(loc=location)} pat_in_loc = dict() for p_name, p in patients_dic.items(): if location is None: electrodes = p.electrodes pat_in_loc[p_name] = p # was commented else: electrodes = [ e for e in p.electrodes if getattr(e, 'loc{i}'.format( i=get_granularity(location))) == location ] pat_in_loc[p_name] = Patient(p_name, p.age, electrodes) elec_count_per_patient.append(len(electrodes)) assert (len(electrodes) > 0) for e in electrodes: if flush: e.flush_cache(event_types) if e.soz: patients_with_epilepsy.add(p_name) soz_elec_count = soz_elec_count + 1 if e.x is None: elec_x_null += 1 pat_with_x_null.add(p_name) if e.y is None: elec_y_null += 1 pat_with_y_null.add(p_name) if e.z is None: elec_z_null += 1 pat_with_z_null.add(p_name) if e.loc2 == 'empty': elec_cnt_loc2_empty += 1 pat_with_loc2_empty.add(p_name) if e.loc3 == 'empty': elec_cnt_loc3_empty += 1 pat_with_loc3_empty.add(p_name) if e.loc5 == 'empty': elec_cnt_loc5_empty += 1 pat_with_loc5_empty.add(p_name) elec_evt_count = e.get_events_count(event_types) elec_with_evt_count = elec_with_evt_count + 1 if elec_evt_count > 0 else elec_with_evt_count event_count += elec_evt_count for type in event_types: counts[type] += e.get_events_count([type]) evt_rate = e.get_events_rate(event_types) # Measured in events/min event_rates.append(evt_rate) soz_labels.append(e.soz) if e.soz: soz_rates.append(evt_rate) else: nsoz_rates.append(evt_rate) elec_count = sum(elec_count_per_patient) pse = soz_elec_count / elec_count # proportion of soz electrodes non_empty_elec_prop = elec_with_evt_count / elec_count try: auc_roc = roc_auc_score(soz_labels, event_rates) except ValueError: auc_roc = None info = { 'patients_dic_in_loc': pat_in_loc, 'patient_count': len(list(patients_dic.keys())), 'patients_with_epilepsy': len(patients_with_epilepsy), 'elec_count': elec_count, 'mean_elec_per_pat': np.mean(elec_count_per_patient), 'soz_elec_count': soz_elec_count, 'pse': round(100 * pse, 2), # percentage of SOZ electrodes 'pnee': round(100 * non_empty_elec_prop, 2), # percentage of non empty elec 'evt_count': event_count, # Total count of all types 'evt_count_per_type': counts, 'AUC_ROC': auc_roc, # Baseline performance in region for these types collapsed 'conf': conf, # Sensibility and specificity for the simulator 'pat_with_x_null': pat_with_x_null, 'pat_with_y_null': pat_with_y_null, 'pat_with_z_null': pat_with_z_null, 'pat_with_loc2_empty': pat_with_loc2_empty, 'pat_with_loc3_empty': pat_with_loc3_empty, 'pat_with_loc5_empty': pat_with_loc5_empty, 'elec_x_null': elec_x_null, 'elec_y_null': elec_y_null, 'elec_z_null': elec_z_null, 'elec_cnt_loc2_empty': elec_cnt_loc2_empty, 'elec_cnt_loc3_empty': elec_cnt_loc3_empty, 'elec_cnt_loc5_empty': elec_cnt_loc5_empty, 'evt_rates': event_rates, # events per minute for each electrode 'soz_labels': soz_labels, # SOZ label of each electrode 'soz_rates': soz_rates, 'nsoz_rates': nsoz_rates, } return info
def has_elec_in(self, loc): from db_parsing import get_granularity return any([ getattr(e, 'loc{i}'.format(i=get_granularity(loc))) == loc for e in self.electrodes ])
def plot_pse_hfo_rate_auc_table(data_by_loc, saving_path): np.random.seed(1) col_colors = [] rows = [] for loc, data in data_by_loc.items(): row = [] granularity = get_granularity(loc) row.append(granularity) row.append(loc) row.append(data['PSE']) sorted_types = sorted(HFO_TYPES) for type in sorted_types: row.append(round(data[type + '_AUC'], 2)) row.append(max(row[3:7])) # row = 3:7 ['g','l','p','t1','t2','t3', # 't4', 'max_auc_ti'] rows.append(tuple(row)) # Order by granularity, loc_name # rows = sorted(rows, key=lambda x: (x[0], x[1])) # Rank by AUC rows = sorted(rows, reverse=True, key=lambda x: x[-1]) # orders by last row # element, # which is the maxs AUC of the 4 types for row in rows: col_colors.append(color_by_gran(row[0])) fig = go.Figure(data=[ go.Table(columnwidth=[100, 200, 100, 100, 100, 100, 100], header=dict(values=[ '{b}Granularity{b}'.format(b='<b>'), '{b}Location{b}'.format(b='<b>'), '{b}PSE{b}'.format(b='<b>'), '{b}{t}{b}'.format(b='<b>', t=sorted_types[0]), '{b}{t}{b}'.format(b='<b>', t=sorted_types[1]), '{b}{t}{b}'.format(b='<b>', t=sorted_types[2]), '{b}{t}{b}'.format(b='<b>', t=sorted_types[3]), ], line_color='black', fill_color='white', align='left', font=dict(color='black', size=10)), cells=dict(values=[[r[i] for r in rows] for i in range(len(rows[0]) - 1)], fill_color=[ np.array(col_colors) for i in range(len(rows[0]) - 1) ], align='left', font=dict(color='black', size=10))) ]) col_width = 90 row_height = 50 fig.update_layout( autosize=False, width=col_width * 7, # height=row_height * (len(rows) + 1), margin=dict(l=50, r=50, b=100, t=100, pad=4)) orca_save(fig, saving_path) fig.show()
def plot_global_info_by_loc_table(data_by_loc, saving_path): np.random.seed(1) sorted_types = sorted(HFO_TYPES + ['Spikes']) col_colors = [] rows = [] for loc, data in data_by_loc.items(): row = [] granularity = get_granularity(loc) row.append(granularity) row.append(loc) row.append(data['patient_count']) row.append(data['patients_with_epilepsy']) row.append(data['elec_count']) # row.append(data['soz_elec_count']) row.append(data['PSE']) for type in sorted_types: row.append(data[type + '_N']) rows.append(tuple(row)) # Order by granularity, loc_name rows = sorted(rows, key=lambda x: (x[0], x[1])) for row in rows: col_colors.append(color_by_gran(row[0])) col_names = ['Location', '#Patients', '#SOZPatients', '#Elec', 'PSE' ] + ['#{t}'.format(t=t) for t in sorted_types] # PSE col_width = [len(col_name) for col_name in col_names] for r in range(len(rows)): for c in range(1, len(rows[0])): # granularity out if len(str(rows[r][c])) > col_width[c - 1]: col_width[c - 1] = len(str(rows[r][c])) print('\n Generating table...') print('Columns {0}'.format(col_names)) print('Column widths: {0}'.format(col_width)) fig = go.Figure(data=[ go.Table( columnwidth=[col_width[i] for i in range(len(col_width))], header=dict(values=[ '{b}{c}{b}'.format(b='<b>', c=col_names[i].ljust(col_width[i])) for i in range(len(col_names)) ], line_color='black', fill_color='white', align='left', font=dict(color='black', size=12)), cells=dict( values=[[r[i + 1] for r in rows] for i in range(len(col_names))], # toma columnas, el +1 es porque no estoy imprimiendo granularity fill_color=[ np.array(col_colors) for i in range(len(col_names)) ], align='left', font=dict(color='black', size=12))) ]) row_height = 27 header_h = 45 table_h = header_h + row_height * len(rows) fig.update_layout(autosize=False, height=table_h, width=80 * len(col_names), margin=dict(l=10, r=10, b=10, t=10, pad=2)) orca_save(fig, saving_path) fig.show()