def assess_states(self, raw_path=None, downsample_rate=None, savestring='example', threshold=65, raw_load=True, saved_path=None, make_pdfs=True): self.threshold = '65' # 'sureity' threshold self.savestring = savestring if raw_load: self.dataobj = SeizureData(raw_path, fs_dict=self.fs_dict) self.dataobj.load_data() f = open('../' + savestring + '_saved', 'wb') pickle.dump(self.dataobj, f) else: assert saved_path != None self.dataobj = pickle.load(open(saved_path, 'rb')) #print 'printing filename_list' #print self.dataobj.filename_list self.norm_data = utils.normalise(self.dataobj.data_array) feature_obj = FeatureExtractor(self.norm_data) i_features = self.classifier.imputer.transform( feature_obj.feature_array) iss_features = self.classifier.std_scaler.transform(i_features) lda_iss_features = self.lda.transform(iss_features) np.set_printoptions(precision=3, suppress=True) #self.pred_table = self.r_forest.predict_proba(iss_features)*100 #self.preds = self.r_forest.predict(iss_features) self.pred_table = self.r_forest_lda.predict_proba( lda_iss_features) * 100 self.preds = self.r_forest_lda.predict(lda_iss_features) self.predslist = list(self.preds) # why need this? self.predslist[self.predslist == 4] = 'Baseline' self.max_preds = np.max(self.pred_table, axis=1) #print pred_table self.threshold_for_mixed = np.where( self.max_preds < int(self.threshold), 1, 0) # 1 when below self._string_fun2() self._write_to_excel() if make_pdfs: self.plot_pdfs()
def raw_training_load(): ################# 'NEW data' ################### dirpath = '/Users/Jonathan/PhD/Seizure_related/20150616' _20150616dataobj = SeizureData(dirpath, amount_to_downsample=40) _20150616dataobj.load_data() _20150616data = _20150616dataobj.data_array _20150616labels = _20150616dataobj.label_colarray _20150616data_norm = normalise(_20150616data) print _20150616dataobj.filename_list.shape _20150616dataobj.filenames_list = [ _20150616dataobj.filename_list[i] for i in range(_20150616dataobj.filename_list.shape[0]) ] for name in _20150616dataobj.filenames_list[0:20]: print name[-34:] # select out the stuff we want #inds = np.loadtxt('0901_400newdata.csv', delimiter=',') notebook_dir = '/Users/jonathan/PhD/Seizure_related/2015_08_PyRanalysis/' inds = np.loadtxt(notebook_dir + '0616correctedintervals.csv', delimiter=',') data0616_unnorm = _20150616data[list(inds[:, 0])] data0616 = _20150616data_norm[list(inds[:, 0])] labels0616 = _20150616labels[list(inds[:, 0])] for i in range(data0616.shape[0]): labels0616[i] = inds[i, 1] ################## Original Data #################### dirpath = '/Users/Jonathan/PhD/Seizure_related/Classified' dataobj = SeizureData(dirpath, amount_to_downsample=20) dataobj.load_data() dataobj = relabel(dataobj) dataobj = reorder(dataobj) dataset301 = dataobj.data_array labels301 = dataobj.label_colarray new_labels = np.loadtxt(notebook_dir + 'new_event_labels_28082015.csv', delimiter=',') for x in new_labels: labels301[x[0]] = x[1] selection = np.loadtxt(notebook_dir + 'perfect_event_labels_28082015.csv', delimiter=',') indexes = list(selection[:, 0]) dataset129_unnorm = dataset301[indexes, :] dataset129_norm = normalise(dataset129_unnorm) dataset301_norm = normalise(dataset301) labels129 = labels301[indexes] return np.vstack((data0616_unnorm, dataset301))
def raw_validation_load(): dirpath1 = '/Users/Jonathan/PhD/Seizure_related/batchSept_UC_20' testdataobj20 = SeizureData(dirpath1, amount_to_downsample=40) testdataobj20.load_data() datasettest20 = testdataobj20.data_array dirpath2 = '/Users/Jonathan/PhD/Seizure_related/batchSept_UC_40' testdataobj40 = SeizureData(dirpath2, amount_to_downsample=40) testdataobj40.load_data() datasettest40 = testdataobj40.data_array print datasettest40.shape, 'is correct data shape' datasettest = np.vstack([datasettest20, datasettest40]) return datasettest
def assess_states(self, raw_path = None, downsample_rate = None, savestring = 'example', threshold = 65, raw_load = True, saved_path = None, make_pdfs = True): self.threshold = '65' # 'sureity' threshold self.savestring = savestring if raw_load: self.dataobj = SeizureData(raw_path, fs_dict = self.fs_dict) self.dataobj.load_data() f = open('../'+savestring+'_saved','wb') pickle.dump(self.dataobj,f) else: assert saved_path != None self.dataobj = pickle.load(open(saved_path,'rb')) #print 'printing filename_list' #print self.dataobj.filename_list self.norm_data = utils.normalise(self.dataobj.data_array) feature_obj = FeatureExtractor(self.norm_data) i_features = self.classifier.imputer.transform(feature_obj.feature_array) iss_features = self.classifier.std_scaler.transform(i_features) lda_iss_features = self.lda.transform(iss_features) np.set_printoptions(precision=3, suppress = True) #self.pred_table = self.r_forest.predict_proba(iss_features)*100 #self.preds = self.r_forest.predict(iss_features) self.pred_table = self.r_forest_lda.predict_proba(lda_iss_features)*100 self.preds = self.r_forest_lda.predict(lda_iss_features) self.predslist = list(self.preds) # why need this? self.predslist[self.predslist == 4] = 'Baseline' self.max_preds = np.max(self.pred_table, axis = 1) #print pred_table self.threshold_for_mixed = np.where(self.max_preds < int(self.threshold),1,0) # 1 when below self._string_fun2() self._write_to_excel() if make_pdfs: self.plot_pdfs()
def raw_validation_load(): dirpath1 = '/Users/Jonathan/PhD/Seizure_related/batchSept_UC_20' testdataobj20 = SeizureData(dirpath1,amount_to_downsample = 40) testdataobj20.load_data() datasettest20 = testdataobj20.data_array dirpath2 = '/Users/Jonathan/PhD/Seizure_related/batchSept_UC_40' testdataobj40 = SeizureData(dirpath2,amount_to_downsample = 40) testdataobj40.load_data() datasettest40 = testdataobj40.data_array print datasettest40.shape,'is correct data shape' datasettest = np.vstack([datasettest20,datasettest40]) return datasettest
def raw_training_load(): ################# 'NEW data' ################### dirpath = '/Users/Jonathan/PhD/Seizure_related/20150616' _20150616dataobj = SeizureData(dirpath, amount_to_downsample = 40) _20150616dataobj.load_data() _20150616data = _20150616dataobj.data_array _20150616labels = _20150616dataobj.label_colarray _20150616data_norm = normalise(_20150616data) print _20150616dataobj.filename_list.shape _20150616dataobj.filenames_list = [_20150616dataobj.filename_list[i] for i in range(_20150616dataobj.filename_list.shape[0])] for name in _20150616dataobj.filenames_list[0:20]: print name[-34:] # select out the stuff we want #inds = np.loadtxt('0901_400newdata.csv', delimiter=',') notebook_dir = '/Users/jonathan/PhD/Seizure_related/2015_08_PyRanalysis/' inds = np.loadtxt(notebook_dir +'0616correctedintervals.csv', delimiter=',') data0616_unnorm = _20150616data[list(inds[:,0])] data0616 = _20150616data_norm[list(inds[:,0])] labels0616 = _20150616labels[list(inds[:,0])] for i in range(data0616.shape[0]): labels0616[i] = inds[i,1] ################## Original Data #################### dirpath = '/Users/Jonathan/PhD/Seizure_related/Classified' dataobj = SeizureData(dirpath,amount_to_downsample = 20) dataobj.load_data() dataobj = relabel(dataobj) dataobj = reorder(dataobj) dataset301 = dataobj.data_array labels301 = dataobj.label_colarray new_labels = np.loadtxt(notebook_dir+'new_event_labels_28082015.csv',delimiter= ',') for x in new_labels: labels301[x[0]] = x[1] selection = np.loadtxt(notebook_dir+'perfect_event_labels_28082015.csv',delimiter= ',') indexes = list(selection[:,0]) dataset129_unnorm = dataset301[indexes,:] dataset129_norm = normalise(dataset129_unnorm) dataset301_norm = normalise(dataset301) labels129 = labels301[indexes] return np.vstack((data0616_unnorm,dataset301))
class Predictor(): ''' Todo: ''' def __init__(self, clf_pickle_path=None, fs_dict_path='../pickled_fs_dictionary'): self.skipfiles = ('EX150515T11', 'EX180315T14', 'EX180515T4', 'EX200515T4.',) self.skip_dir = '/Volumes/LACIE SHARE/VM_data/All_Data_Jan_2016/PV_ChR2/' if clf_pickle_path == None: clf_pickle_path = '../saved_clf' self.fs_dict = pickle.load(open(fs_dict_path,'rb')) #for key in self.fs_dict: #print key, self.fs_dict[key] self.classifier = pickle.load(open(clf_pickle_path,'rb')) self.r_forest = self.classifier.r_forest self.r_forest_lda = self.classifier.r_forest_lda self.lda = self.classifier.lda print self.lda #print self.r_forest_lda def assess_states(self, raw_path = None, downsample_rate = None, savestring = 'example', threshold = 65, raw_load = True, saved_path = None, make_pdfs = True): self.threshold = '65' # 'sureity' threshold self.savestring = savestring if raw_load: self.dataobj = SeizureData(raw_path, fs_dict = self.fs_dict) self.dataobj.load_data() f = open('../'+savestring+'_saved','wb') pickle.dump(self.dataobj,f) else: assert saved_path != None self.dataobj = pickle.load(open(saved_path,'rb')) #print 'printing filename_list' #print self.dataobj.filename_list self.norm_data = utils.normalise(self.dataobj.data_array) feature_obj = FeatureExtractor(self.norm_data) i_features = self.classifier.imputer.transform(feature_obj.feature_array) iss_features = self.classifier.std_scaler.transform(i_features) lda_iss_features = self.lda.transform(iss_features) np.set_printoptions(precision=3, suppress = True) #self.pred_table = self.r_forest.predict_proba(iss_features)*100 #self.preds = self.r_forest.predict(iss_features) self.pred_table = self.r_forest_lda.predict_proba(lda_iss_features)*100 self.preds = self.r_forest_lda.predict(lda_iss_features) self.predslist = list(self.preds) # why need this? self.predslist[self.predslist == 4] = 'Baseline' self.max_preds = np.max(self.pred_table, axis = 1) #print pred_table self.threshold_for_mixed = np.where(self.max_preds < int(self.threshold),1,0) # 1 when below self._string_fun2() self._write_to_excel() if make_pdfs: self.plot_pdfs() def plot_pdfs(self): plot_traces(self.norm_data, self.preds, savestring = '/Volumes/LACIE SHARE/VM_data/All_Data_Jan_2016/pdfs/'+self.savestring, prob_thresholds= self.threshold_for_mixed) def _string_fun2(self): ''' This method is for the full data, vm gave in 2016/01 ''' self.nameframe = pd.DataFrame(columns = ['Date', 'ID', 'File Start', 'File End', 'Pulse Time']) for i,f in enumerate(self.dataobj.filename_list): f = f.split('/')[-1] try: date = f.split('X')[1].split('T')[0] except IndexError: date = f.split('x')[1].split('T')[0] t_start = '0' t_end = f.split('_')[-1] t_onset = f.split('_')[1] transmitter = f.split('_')[0].split(date)[-1] #print [date, transmitter, t_start, t_end, t_onset] self.nameframe.loc[i] = [date, transmitter, t_start, t_end, t_onset] #print f def _string_fun(self): self.nameframe = pd.DataFrame(columns = ['Date', 'ID', 'File Start', 'File End', 'Pulse Time']) for i,f in enumerate(self.dataobj.filename_list): #print f f = f.split('/')[-1] s_brackets = f.split('[', 1)[1].split(']')[0] s_brackets_start = s_brackets.split('-')[0] t_end = s_brackets.split('-')[-1].split('s')[0] try: t_start = int(s_brackets_start) except: t_start = int(s_brackets_start.split(' ')[-1]) date = f.split('X')[1].split('t')[0].strip('-') transmitter = f.split('_')[0].split('r')[-1] t_onset = float(f.split(']')[-1]) + t_start self.nameframe.loc[i] = [date, transmitter, t_start, t_end, t_onset] #print self.nameframe.head() def _write_to_excel(self): sheet = pd.DataFrame(self.pred_table) pred = pd.DataFrame(self.predslist,columns=['Index']) max_preds = pd.DataFrame(self.max_preds) max_preds.columns = ['Max'] sheet.columns = ['State1','State2','State3','Baseline'] frames = [self.nameframe, sheet, max_preds, pred] vmsheet = pd.concat(frames,axis = 1) print vmsheet.head() writer = pd.ExcelWriter('/Volumes/LACIE SHARE/VM_data/All_Data_Jan_2016/'+self.savestring+'.xlsx',engine = 'xlsxwriter') vmsheet.to_excel(writer,index = True,sheet_name = 'Pulse prediction' ) workbook = writer.book worksheet = writer.sheets['Pulse prediction'] percent_fmt = workbook.add_format({'num_format': '0.00', 'bold': False}) format1 = workbook.add_format({'bg_color': '#FFC7CE', 'font_color': '#9C0006'}) worksheet.set_column('G:J',12,percent_fmt) color_range = "K2:K{}".format(len(self.dataobj.filename_list)+1) #worksheet.conditional_format(color_range, {'type': 'top', # 'value': '20', # 'format': format1}) worksheet.conditional_format(color_range, {'type': 'cell', 'criteria': '<=', 'value': self.threshold, 'format': format1}) writer.save()
class Predictor(): ''' Todo: ''' def __init__(self, clf_pickle_path=None, fs_dict_path='../pickled_fs_dictionary'): self.skipfiles = ( 'EX150515T11', 'EX180315T14', 'EX180515T4', 'EX200515T4.', ) self.skip_dir = '/Volumes/LACIE SHARE/VM_data/All_Data_Jan_2016/PV_ChR2/' if clf_pickle_path == None: clf_pickle_path = '../saved_clf' self.fs_dict = pickle.load(open(fs_dict_path, 'rb')) #for key in self.fs_dict: #print key, self.fs_dict[key] self.classifier = pickle.load(open(clf_pickle_path, 'rb')) self.r_forest = self.classifier.r_forest self.r_forest_lda = self.classifier.r_forest_lda self.lda = self.classifier.lda print self.lda #print self.r_forest_lda def assess_states(self, raw_path=None, downsample_rate=None, savestring='example', threshold=65, raw_load=True, saved_path=None, make_pdfs=True): self.threshold = '65' # 'sureity' threshold self.savestring = savestring if raw_load: self.dataobj = SeizureData(raw_path, fs_dict=self.fs_dict) self.dataobj.load_data() f = open('../' + savestring + '_saved', 'wb') pickle.dump(self.dataobj, f) else: assert saved_path != None self.dataobj = pickle.load(open(saved_path, 'rb')) #print 'printing filename_list' #print self.dataobj.filename_list self.norm_data = utils.normalise(self.dataobj.data_array) feature_obj = FeatureExtractor(self.norm_data) i_features = self.classifier.imputer.transform( feature_obj.feature_array) iss_features = self.classifier.std_scaler.transform(i_features) lda_iss_features = self.lda.transform(iss_features) np.set_printoptions(precision=3, suppress=True) #self.pred_table = self.r_forest.predict_proba(iss_features)*100 #self.preds = self.r_forest.predict(iss_features) self.pred_table = self.r_forest_lda.predict_proba( lda_iss_features) * 100 self.preds = self.r_forest_lda.predict(lda_iss_features) self.predslist = list(self.preds) # why need this? self.predslist[self.predslist == 4] = 'Baseline' self.max_preds = np.max(self.pred_table, axis=1) #print pred_table self.threshold_for_mixed = np.where( self.max_preds < int(self.threshold), 1, 0) # 1 when below self._string_fun2() self._write_to_excel() if make_pdfs: self.plot_pdfs() def plot_pdfs(self): plot_traces( self.norm_data, self.preds, savestring='/Volumes/LACIE SHARE/VM_data/All_Data_Jan_2016/pdfs/' + self.savestring, prob_thresholds=self.threshold_for_mixed) def _string_fun2(self): ''' This method is for the full data, vm gave in 2016/01 ''' self.nameframe = pd.DataFrame( columns=['Date', 'ID', 'File Start', 'File End', 'Pulse Time']) for i, f in enumerate(self.dataobj.filename_list): f = f.split('/')[-1] try: date = f.split('X')[1].split('T')[0] except IndexError: date = f.split('x')[1].split('T')[0] t_start = '0' t_end = f.split('_')[-1] t_onset = f.split('_')[1] transmitter = f.split('_')[0].split(date)[-1] #print [date, transmitter, t_start, t_end, t_onset] self.nameframe.loc[i] = [ date, transmitter, t_start, t_end, t_onset ] #print f def _string_fun(self): self.nameframe = pd.DataFrame( columns=['Date', 'ID', 'File Start', 'File End', 'Pulse Time']) for i, f in enumerate(self.dataobj.filename_list): #print f f = f.split('/')[-1] s_brackets = f.split('[', 1)[1].split(']')[0] s_brackets_start = s_brackets.split('-')[0] t_end = s_brackets.split('-')[-1].split('s')[0] try: t_start = int(s_brackets_start) except: t_start = int(s_brackets_start.split(' ')[-1]) date = f.split('X')[1].split('t')[0].strip('-') transmitter = f.split('_')[0].split('r')[-1] t_onset = float(f.split(']')[-1]) + t_start self.nameframe.loc[i] = [ date, transmitter, t_start, t_end, t_onset ] #print self.nameframe.head() def _write_to_excel(self): sheet = pd.DataFrame(self.pred_table) pred = pd.DataFrame(self.predslist, columns=['Index']) max_preds = pd.DataFrame(self.max_preds) max_preds.columns = ['Max'] sheet.columns = ['State1', 'State2', 'State3', 'Baseline'] frames = [self.nameframe, sheet, max_preds, pred] vmsheet = pd.concat(frames, axis=1) print vmsheet.head() writer = pd.ExcelWriter( '/Volumes/LACIE SHARE/VM_data/All_Data_Jan_2016/' + self.savestring + '.xlsx', engine='xlsxwriter') vmsheet.to_excel(writer, index=True, sheet_name='Pulse prediction') workbook = writer.book worksheet = writer.sheets['Pulse prediction'] percent_fmt = workbook.add_format({ 'num_format': '0.00', 'bold': False }) format1 = workbook.add_format({ 'bg_color': '#FFC7CE', 'font_color': '#9C0006' }) worksheet.set_column('G:J', 12, percent_fmt) color_range = "K2:K{}".format(len(self.dataobj.filename_list) + 1) #worksheet.conditional_format(color_range, {'type': 'top', # 'value': '20', # 'format': format1}) worksheet.conditional_format( color_range, { 'type': 'cell', 'criteria': '<=', 'value': self.threshold, 'format': format1 }) writer.save()