def build_circadian_model(): os.system( Constants.MATLAB_PATH + ' -nodisplay -nosplash -nodesktop -r \"run(\'' + str(utils.get_project_root()) + '/source/preprocessing/time/clock_proxy/runCircadianModel.m\'); exit;\"' )
def test_build_activity_counts(self, mock_os): expected_argument = 'matlab -nodisplay -nosplash -nodesktop -r \"run(\'' + str( utils.get_project_root()) + '/source/make_counts.m\'); exit;\"' ActivityCountService.build_activity_counts() mock_os.system.assert_called_once_with(expected_argument)
def read_precleaned(subject_id): psg_path = str( utils.get_project_root().joinpath('data/labels/' + subject_id + '_labeled_sleep.txt')) data = [] with open(psg_path, 'rt') as csv_file: file_reader = csv.reader(csv_file, delimiter=' ', quotechar='|') count = 0 rows_per_epoch = 1 for row in file_reader: if count == 0: start_time = float(row[0]) start_score = int(row[1]) epoch = Epoch(timestamp=start_time, index=1) data.append( StageItem(epoch=epoch, stage=PSGConverter.get_label_from_int( start_score))) else: timestamp = start_time + count * 30 score = int(row[1]) epoch = Epoch( timestamp=timestamp, index=(1 + int(np.floor(count / rows_per_epoch)))) data.append( StageItem( epoch=epoch, stage=PSGConverter.get_label_from_int(score))) count = count + 1 return PSGRawDataCollection(subject_id=subject_id, data=data)
def test_get_raw_file_path(self): subject_id = 'subject1' motion_dir = utils.get_project_root().joinpath('data/motion/') file_path = MotionService.get_raw_file_path(subject_id) self.assertEqual(motion_dir.joinpath("subject1_acceleration.txt"), file_path)
class Constants(object): # WAKE_THRESHOLD = 0.3 # These values were used for scikit-learn 0.20.3, See: # REM_THRESHOLD = 0.35 # https://scikit-learn.org/stable/whats_new.html#version-0-21-0 WAKE_THRESHOLD = 0.5 # REM_THRESHOLD = 0.35 EPOCH_DURATION_IN_SECONDS = 30 SECONDS_PER_MINUTE = 60 SECONDS_PER_DAY = 3600 * 24 SECONDS_PER_HOUR = 3600 VERBOSE = False CROPPED_FILE_PATH = utils.get_project_root().joinpath('outputs/cropped/') FEATURE_FILE_PATH = utils.get_project_root().joinpath('outputs/features/') FIGURE_FILE_PATH = utils.get_project_root().joinpath('outputs/figures/') LOWER_BOUND = -0.2 MATLAB_PATH = '/Applications/MATLAB_R2019a.app/bin/matlab' # Replace with your MATLAB path
def test_get_raw_file_path(self): subject_id = 'subject1' heart_rate_dir = utils.get_project_root().joinpath('data/heart_rate/') file_path = HeartRateService.get_raw_file_path(subject_id) self.assertEqual(heart_rate_dir.joinpath("subject1_heartrate.txt"), file_path)
def test_gets_root_directory(self, mock_path): path_to_return = "path/to/return" mock_path_module = mock_path.return_value mock_parent = mock_path_module.parent mock_parent.parent = path_to_return project_root = utils.get_project_root() self.assertEqual(project_root, path_to_return)
def build_circadian_model(subject_id, valid_epochs): circadian_file = utils.get_project_root().joinpath( 'data/circadian_predictions/' + subject_id + '_clock_proxy.txt') if circadian_file.is_file(): circadian_model = pd.read_csv(str(circadian_file), delimiter=',').values return TimeBasedFeatureService.build_circadian_model_from_raw( circadian_model, valid_epochs)
def get_path_to_file(subject_id): psg_dir = utils.get_project_root().joinpath('data/psg') compumedics_file = psg_dir.joinpath('compumedics/AW0' + subject_id.zfill(2) + '.TXT') if compumedics_file.is_file(): return compumedics_file txt_file = psg_dir.joinpath('vitaport/AW0' + subject_id.zfill(2) + '011.txt') if txt_file.is_file(): return txt_file
def get_type_and_report(subject_id): report_dir = utils.get_project_root().joinpath('data/reports') pdf_file = report_dir.joinpath('AW0' + subject_id.zfill(2) + '011_REPORT.pdf') if pdf_file.is_file(): return pdf_file, PSGFileType.Compumedics docx_file = report_dir.joinpath('AW00' + subject_id + '011 Study Sleep Log.docx') if docx_file.is_file(): return docx_file, PSGFileType.Vitaport
def load_circadian_model(file_id): path = utils.get_project_root().joinpath('data/mesa/clock_proxy/' + file_id + '_clock_proxy.out') if path.is_file(): array = pd.read_csv(str(path), delimiter=',').values if np.shape(array)[0] > 0: array = utils.remove_nans(array) if np.shape(array)[0] > 0: return array return None
def load_raw(file_id): stage_to_num = { 'Stage 4 sleep|4': 4, 'Stage 3 sleep|3': 3, 'Stage 2 sleep|2': 2, 'Stage 1 sleep|1': 1, 'Wake|0': 0, 'REM sleep|5': 5 } project_root = str(utils.get_project_root()) xml_document = minidom.parse( project_root + '/data/mesa/polysomnography/annotations-events-nsrr/mesa-sleep-' + file_id + '-nsrr.xml') list_of_scored_events = xml_document.getElementsByTagName( 'ScoredEvent') stage_data = [] for scored_event in list_of_scored_events: # 3 is stage, 5 is start, 7 is duration duration = scored_event.childNodes[7].childNodes[0].nodeValue start = scored_event.childNodes[5].childNodes[0].nodeValue stage = scored_event.childNodes[3].childNodes[0].nodeValue if stage in stage_to_num: # # For debugging: print(file_id + ' ' + str(stage) + ' ' + str(start) + ' ' + str(duration)) stage_data.append( [stage_to_num[stage], float(start), float(duration)]) stages = [] for staged_window in stage_data[:]: # Ignore last PSG overflow entry: it's long & doesn't have valid HR elapsed_time_counter = 0 stage_value = staged_window[0] duration = staged_window[2] while elapsed_time_counter < duration: stages.append(stage_value) elapsed_time_counter = elapsed_time_counter + 1 return np.array(stages)
def load_raw(file_id): project_root = str(utils.get_project_root()) edf_file = pyedflib.EdfReader( project_root + '/data/mesa/polysomnography/edfs/mesa-sleep-' + file_id + '.edf') signal_labels = edf_file.getSignalLabels() hr_column = len(signal_labels) - 2 sample_frequencies = edf_file.getSampleFrequencies() heart_rate = edf_file.readSignal(hr_column) sf = sample_frequencies[hr_column] time_hr = np.array(range( 0, len(heart_rate))) # Get timestamps for heart rate data time_hr = time_hr / sf data = np.transpose(np.vstack((time_hr, heart_rate))) data = utils.remove_nans(data) return HeartRateCollection(subject_id=file_id, data=data)
def load_raw(file_id): line_align = -1 # Find alignment line between PSG and actigraphy project_root = str(utils.get_project_root()) with open(project_root + '/data/mesa/overlap/mesa-actigraphy-psg-overlap.csv' ) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') next(csv_file) for row in csv_reader: if int(row[0]) == int(file_id): line_align = int(row[1]) activity = [] elapsed_time_counter = 0 if line_align == -1: # If there was no alignment found return ActivityCountCollection(subject_id=file_id, data=np.array([[-1], [-1]])) with open(project_root + '/data/mesa/actigraphy/mesa-sleep-' + file_id + '.csv') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') next(csv_file) for row in csv_reader: if int(row[1]) >= line_align: if row[4] == '': activity.append([elapsed_time_counter, np.nan]) else: activity.append([elapsed_time_counter, float(row[4])]) elapsed_time_counter = elapsed_time_counter + 30 data = np.array(activity) data = utils.remove_nans(data) return ActivityCountCollection(subject_id=file_id, data=data)
def make_data_demo(subject_id="16", snippet=False): hr_color = [0.8, 0.2, 0.1] motion_color = [0.3, 0.2, 0.8] circ_color = [0.9, 0.7, 0] psg_color = [0.1, 0.7, 0.1] font_size = 16 font_name = "Arial" data_path = str(Constants.CROPPED_FILE_PATH) + '/' circadian_data_path = str(utils.get_project_root().joinpath( 'data/circadian_predictions/')) + '/' output_path = str(Constants.FIGURE_FILE_PATH) + '/' if snippet is False: fig = plt.figure(figsize=(10, 12)) else: fig = plt.figure(figsize=(3, 12)) num_v_plots = 5 fig.patch.set_facecolor('white') if (os.path.isfile(data_path + subject_id + '_cleaned_hr.out') and os.path.isfile(data_path + subject_id + '_cleaned_motion.out') and os.path.isfile(data_path + subject_id + '_cleaned_psg.out') and os.path.isfile(data_path + subject_id + '_cleaned_counts.out') and os.stat(data_path + subject_id + '_cleaned_motion.out').st_size > 0) and os.path.isfile(circadian_data_path + subject_id + '_clock_proxy.txt'): hr = np.genfromtxt(data_path + subject_id + '_cleaned_hr.out', delimiter=' ') motion = np.genfromtxt(data_path + subject_id + '_cleaned_motion.out', delimiter=' ') scores = np.genfromtxt(data_path + subject_id + '_cleaned_psg.out', delimiter=' ') counts = np.genfromtxt(data_path + subject_id + '_cleaned_counts.out', delimiter=',') circ_model = np.genfromtxt(circadian_data_path + subject_id + '_clock_proxy.txt', delimiter=',') min_time = min(scores[:, 0]) max_time = max(scores[:, 0]) dt = 60 * 60 sample_point_fraction = 0.92 sample_point = sample_point_fraction * (max_time - min_time) + min_time window_size = 10 if snippet: min_time = sample_point max_time = sample_point + window_size ax = plt.subplot(num_v_plots, 1, 1) ax.plot(motion[:, 0], motion[:, 1], color=motion_color) ax.plot(motion[:, 0], motion[:, 2], color=[0.4, 0.2, 0.7]) ax.plot(motion[:, 0], motion[:, 3], color=[0.5, 0.2, 0.6]) plt.ylabel('Motion (g)', fontsize=font_size, fontname=font_name) DataPlotBuilder.tidy_data_plot(min_time, max_time, dt, ax) if snippet: ax.spines['bottom'].set_visible(True) ax.spines['left'].set_visible(True) ax.spines['top'].set_visible(True) ax.spines['right'].set_visible(True) ax.yaxis.label.set_visible(False) inds = np.intersect1d( np.where(motion[:, 0] > sample_point)[0], np.where(motion[:, 0] <= sample_point + window_size)[0]) y_min = np.amin(motion[inds, 1:3]) plt.ylim(y_min - 0.005, y_min + 0.025) # Get rid of the ticks ax.set_xticks([]) ax.yaxis.set_ticks_position("right") plt.ylabel('') plt.xlabel(str(window_size) + ' sec window', fontsize=font_size, fontname=font_name) else: y_min = -3.2 y_max = 2.5 plt.ylim(y_min, y_max) current_axis = plt.gca() current_axis.add_patch( Rectangle((sample_point, y_min), window_size, y_max - y_min, alpha=0.7, facecolor="gray")) ax = plt.subplot(num_v_plots, 1, 2) ax.plot(counts[:, 0], counts[:, 1], color=[0.2, 0.2, 0.7]) DataPlotBuilder.tidy_data_plot(min_time, max_time, dt, ax) plt.ylabel('Counts', fontsize=font_size, fontname=font_name) if snippet: plt.axis('off') plt.ylim(-1, -1) ax = plt.subplot(num_v_plots, 1, 3) ax.plot(hr[:, 0], hr[:, 1], color=hr_color) plt.ylabel('Heart rate (bpm)', fontsize=font_size, fontname=font_name) DataPlotBuilder.tidy_data_plot(min_time, max_time, dt, ax) sample_point = sample_point_fraction * (max_time - min_time) + min_time window_size = 1200 if snippet: min_time = sample_point max_time = sample_point + window_size DataPlotBuilder.tidy_data_plot(min_time, max_time, dt, ax) ax.spines['bottom'].set_visible(True) ax.spines['left'].set_visible(True) ax.spines['top'].set_visible(True) ax.spines['right'].set_visible(True) ax.yaxis.label.set_visible(False) ax.set_xticks([]) ax.yaxis.set_ticks_position("right") plt.ylabel('') plt.xlabel(str(window_size) + ' sec window', fontsize=font_size, fontname=font_name) plt.ylim(35, 100) else: y_min = 40 y_max = 130 plt.ylim(y_min, y_max) current_axis = plt.gca() current_axis.add_patch( Rectangle((sample_point, y_min), window_size, y_max - y_min, alpha=0.35, facecolor="gray")) plt.ylim(40, 130) ax = plt.subplot(num_v_plots, 1, 4) ax.plot(circ_model[:, 0], -circ_model[:, 1], color=circ_color) plt.ylabel('Clock Proxy', fontsize=font_size, fontname=font_name) DataPlotBuilder.tidy_data_plot(min_time, max_time, dt, ax) if snippet: plt.axis('off') plt.ylim(-1, -1) else: plt.ylim(.2, 1.2) ax = plt.subplot(num_v_plots, 1, 5) relabeled_scores = DataPlotBuilder.convert_labels_for_hypnogram( scores[:, 1]) ax.step(scores[:, 0], relabeled_scores, color=psg_color) plt.ylabel('Stage', fontsize=font_size, fontname=font_name) plt.xlabel('Time', fontsize=font_size, fontname=font_name) DataPlotBuilder.tidy_data_plot(min_time, max_time, dt, ax) ax.set_yticks([-4, -3, -2, -1, 0, 1]) ax.set_yticklabels(['N4', 'N3', 'N2', 'N1', 'Wake', 'REM']) if snippet: plt.axis('off') plt.ylim(5, 5) else: plt.ylim(-5, 2) if not snippet: plt.savefig(output_path + 'data_validation_' + subject_id + '.png', bbox_inches='tight', pad_inches=0.1, dpi=300) else: plt.savefig(output_path + 'data_validation_zoom_' + subject_id + '.png', bbox_inches='tight', pad_inches=0.1, dpi=300) plt.close()
def get_raw_file_path(subject_id): heart_rate_dir = utils.get_project_root().joinpath('data/heart_rate/') return heart_rate_dir.joinpath(subject_id + '_heartrate.txt')
def get_raw_file_path(subject_id): project_root = utils.get_project_root() return project_root.joinpath('data/motion/' + subject_id + '_acceleration.txt')
def get_all_files(): project_root = str(utils.get_project_root()) return glob.glob(project_root + "/data/mesa/polysomnography/edfs/*edf")
class RawDataProcessor: BASE_FILE_PATH = utils.get_project_root().joinpath('outputs/cropped/') @staticmethod def crop_all(subject_id): # psg_raw_collection = PSGService.read_raw(subject_id) # Used to extract PSG details from the reports psg_raw_collection = PSGService.read_precleaned( subject_id) # Loads already extracted PSG data motion_collection = MotionService.load_raw(subject_id) heart_rate_collection = HeartRateService.load_raw(subject_id) valid_interval = RawDataProcessor.get_intersecting_interval( [psg_raw_collection, motion_collection, heart_rate_collection]) psg_raw_collection = PSGService.crop(psg_raw_collection, valid_interval) motion_collection = MotionService.crop(motion_collection, valid_interval) heart_rate_collection = HeartRateService.crop(heart_rate_collection, valid_interval) PSGService.write(psg_raw_collection) MotionService.write(motion_collection) HeartRateService.write(heart_rate_collection) ActivityCountService.build_activity_counts_without_matlab( subject_id, motion_collection.data ) # Builds activity counts with python, not MATLAB @staticmethod def get_intersecting_interval(collection_list): start_times = [] end_times = [] for collection in collection_list: interval = collection.get_interval() start_times.append(interval.start_time) end_times.append(interval.end_time) return Interval(start_time=max(start_times), end_time=min(end_times)) @staticmethod def get_valid_epochs(subject_id): psg_collection = PSGService.load_cropped(subject_id) motion_collection = MotionService.load_cropped(subject_id) heart_rate_collection = HeartRateService.load_cropped(subject_id) start_time = psg_collection.data[0].epoch.timestamp motion_epoch_dictionary = RawDataProcessor.get_valid_epoch_dictionary( motion_collection.timestamps, start_time) hr_epoch_dictionary = RawDataProcessor.get_valid_epoch_dictionary( heart_rate_collection.timestamps, start_time) valid_epochs = [] for stage_item in psg_collection.data: epoch = stage_item.epoch if epoch.timestamp in motion_epoch_dictionary and epoch.timestamp in hr_epoch_dictionary \ and stage_item.stage != SleepStage.unscored: valid_epochs.append(epoch) return valid_epochs @staticmethod def get_valid_epoch_dictionary(timestamps, start_time): epoch_dictionary = {} for ind in range(np.shape(timestamps)[0]): time = timestamps[ind] floored_timestamp = time - np.mod(time - start_time, Epoch.DURATION) epoch_dictionary[floored_timestamp] = True return epoch_dictionary
def build_activity_counts(): os.system(Constants.MATLAB_PATH + ' -nodisplay -nosplash -nodesktop -r \"run(\'' + str(utils.get_project_root()) + '/source/make_counts.m\'); exit;\"')