Ejemplo n.º 1
0
 def build_circadian_model():
     os.system(
         Constants.MATLAB_PATH +
         ' -nodisplay -nosplash -nodesktop -r \"run(\'' +
         str(utils.get_project_root()) +
         '/source/preprocessing/time/clock_proxy/runCircadianModel.m\'); exit;\"'
     )
    def test_build_activity_counts(self, mock_os):
        expected_argument = 'matlab -nodisplay -nosplash -nodesktop -r \"run(\'' + str(
            utils.get_project_root()) + '/source/make_counts.m\'); exit;\"'

        ActivityCountService.build_activity_counts()

        mock_os.system.assert_called_once_with(expected_argument)
Ejemplo n.º 3
0
    def read_precleaned(subject_id):
        psg_path = str(
            utils.get_project_root().joinpath('data/labels/' + subject_id +
                                              '_labeled_sleep.txt'))
        data = []

        with open(psg_path, 'rt') as csv_file:
            file_reader = csv.reader(csv_file, delimiter=' ', quotechar='|')
            count = 0
            rows_per_epoch = 1
            for row in file_reader:
                if count == 0:
                    start_time = float(row[0])
                    start_score = int(row[1])
                    epoch = Epoch(timestamp=start_time, index=1)
                    data.append(
                        StageItem(epoch=epoch,
                                  stage=PSGConverter.get_label_from_int(
                                      start_score)))
                else:
                    timestamp = start_time + count * 30
                    score = int(row[1])
                    epoch = Epoch(
                        timestamp=timestamp,
                        index=(1 + int(np.floor(count / rows_per_epoch))))

                    data.append(
                        StageItem(
                            epoch=epoch,
                            stage=PSGConverter.get_label_from_int(score)))
                count = count + 1
        return PSGRawDataCollection(subject_id=subject_id, data=data)
    def test_get_raw_file_path(self):
        subject_id = 'subject1'
        motion_dir = utils.get_project_root().joinpath('data/motion/')

        file_path = MotionService.get_raw_file_path(subject_id)

        self.assertEqual(motion_dir.joinpath("subject1_acceleration.txt"), file_path)
Ejemplo n.º 5
0
class Constants(object):
    # WAKE_THRESHOLD = 0.3  # These values were used for scikit-learn 0.20.3, See:
    # REM_THRESHOLD = 0.35  # https://scikit-learn.org/stable/whats_new.html#version-0-21-0
    WAKE_THRESHOLD = 0.5  #
    REM_THRESHOLD = 0.35

    EPOCH_DURATION_IN_SECONDS = 30
    SECONDS_PER_MINUTE = 60
    SECONDS_PER_DAY = 3600 * 24
    SECONDS_PER_HOUR = 3600
    VERBOSE = False
    CROPPED_FILE_PATH = utils.get_project_root().joinpath('outputs/cropped/')
    FEATURE_FILE_PATH = utils.get_project_root().joinpath('outputs/features/')
    FIGURE_FILE_PATH = utils.get_project_root().joinpath('outputs/figures/')
    LOWER_BOUND = -0.2
    MATLAB_PATH = '/Applications/MATLAB_R2019a.app/bin/matlab'  # Replace with your MATLAB path
Ejemplo n.º 6
0
    def test_get_raw_file_path(self):
        subject_id = 'subject1'
        heart_rate_dir = utils.get_project_root().joinpath('data/heart_rate/')

        file_path = HeartRateService.get_raw_file_path(subject_id)

        self.assertEqual(heart_rate_dir.joinpath("subject1_heartrate.txt"),
                         file_path)
Ejemplo n.º 7
0
    def test_gets_root_directory(self, mock_path):
        path_to_return = "path/to/return"

        mock_path_module = mock_path.return_value
        mock_parent = mock_path_module.parent
        mock_parent.parent = path_to_return

        project_root = utils.get_project_root()
        self.assertEqual(project_root, path_to_return)
Ejemplo n.º 8
0
    def build_circadian_model(subject_id, valid_epochs):
        circadian_file = utils.get_project_root().joinpath(
            'data/circadian_predictions/' + subject_id + '_clock_proxy.txt')
        if circadian_file.is_file():
            circadian_model = pd.read_csv(str(circadian_file),
                                          delimiter=',').values

            return TimeBasedFeatureService.build_circadian_model_from_raw(
                circadian_model, valid_epochs)
Ejemplo n.º 9
0
    def get_path_to_file(subject_id):
        psg_dir = utils.get_project_root().joinpath('data/psg')
        compumedics_file = psg_dir.joinpath('compumedics/AW0' +
                                            subject_id.zfill(2) + '.TXT')
        if compumedics_file.is_file():
            return compumedics_file

        txt_file = psg_dir.joinpath('vitaport/AW0' + subject_id.zfill(2) +
                                    '011.txt')
        if txt_file.is_file():
            return txt_file
Ejemplo n.º 10
0
    def get_type_and_report(subject_id):
        report_dir = utils.get_project_root().joinpath('data/reports')

        pdf_file = report_dir.joinpath('AW0' + subject_id.zfill(2) +
                                       '011_REPORT.pdf')
        if pdf_file.is_file():
            return pdf_file, PSGFileType.Compumedics

        docx_file = report_dir.joinpath('AW00' + subject_id +
                                        '011 Study Sleep Log.docx')
        if docx_file.is_file():
            return docx_file, PSGFileType.Vitaport
    def load_circadian_model(file_id):

        path = utils.get_project_root().joinpath('data/mesa/clock_proxy/' + file_id + '_clock_proxy.out')

        if path.is_file():
            array = pd.read_csv(str(path), delimiter=',').values
            if np.shape(array)[0] > 0:
                array = utils.remove_nans(array)
            if np.shape(array)[0] > 0:
                return array

        return None
Ejemplo n.º 12
0
    def load_raw(file_id):
        stage_to_num = {
            'Stage 4 sleep|4': 4,
            'Stage 3 sleep|3': 3,
            'Stage 2 sleep|2': 2,
            'Stage 1 sleep|1': 1,
            'Wake|0': 0,
            'REM sleep|5': 5
        }
        project_root = str(utils.get_project_root())

        xml_document = minidom.parse(
            project_root +
            '/data/mesa/polysomnography/annotations-events-nsrr/mesa-sleep-' +
            file_id + '-nsrr.xml')
        list_of_scored_events = xml_document.getElementsByTagName(
            'ScoredEvent')

        stage_data = []

        for scored_event in list_of_scored_events:  # 3 is stage, 5 is start, 7 is duration
            duration = scored_event.childNodes[7].childNodes[0].nodeValue
            start = scored_event.childNodes[5].childNodes[0].nodeValue
            stage = scored_event.childNodes[3].childNodes[0].nodeValue

            if stage in stage_to_num:
                # # For debugging: print(file_id + ' ' + str(stage) + ' ' + str(start) + ' ' + str(duration))
                stage_data.append(
                    [stage_to_num[stage],
                     float(start),
                     float(duration)])

        stages = []
        for staged_window in stage_data[:]:  # Ignore last PSG overflow entry: it's long & doesn't have valid HR
            elapsed_time_counter = 0
            stage_value = staged_window[0]
            duration = staged_window[2]

            while elapsed_time_counter < duration:
                stages.append(stage_value)
                elapsed_time_counter = elapsed_time_counter + 1

        return np.array(stages)
    def load_raw(file_id):
        project_root = str(utils.get_project_root())

        edf_file = pyedflib.EdfReader(
            project_root + '/data/mesa/polysomnography/edfs/mesa-sleep-' +
            file_id + '.edf')
        signal_labels = edf_file.getSignalLabels()

        hr_column = len(signal_labels) - 2

        sample_frequencies = edf_file.getSampleFrequencies()

        heart_rate = edf_file.readSignal(hr_column)
        sf = sample_frequencies[hr_column]

        time_hr = np.array(range(
            0, len(heart_rate)))  # Get timestamps for heart rate data
        time_hr = time_hr / sf

        data = np.transpose(np.vstack((time_hr, heart_rate)))
        data = utils.remove_nans(data)
        return HeartRateCollection(subject_id=file_id, data=data)
Ejemplo n.º 14
0
    def load_raw(file_id):
        line_align = -1  # Find alignment line between PSG and actigraphy
        project_root = str(utils.get_project_root())

        with open(project_root +
                  '/data/mesa/overlap/mesa-actigraphy-psg-overlap.csv'
                  ) as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=',')
            next(csv_file)
            for row in csv_reader:
                if int(row[0]) == int(file_id):
                    line_align = int(row[1])

        activity = []
        elapsed_time_counter = 0

        if line_align == -1:  # If there was no alignment found
            return ActivityCountCollection(subject_id=file_id,
                                           data=np.array([[-1], [-1]]))

        with open(project_root + '/data/mesa/actigraphy/mesa-sleep-' +
                  file_id + '.csv') as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=',')
            next(csv_file)
            for row in csv_reader:
                if int(row[1]) >= line_align:
                    if row[4] == '':
                        activity.append([elapsed_time_counter, np.nan])
                    else:
                        activity.append([elapsed_time_counter, float(row[4])])
                    elapsed_time_counter = elapsed_time_counter + 30

        data = np.array(activity)
        data = utils.remove_nans(data)

        return ActivityCountCollection(subject_id=file_id, data=data)
Ejemplo n.º 15
0
    def make_data_demo(subject_id="16", snippet=False):
        hr_color = [0.8, 0.2, 0.1]
        motion_color = [0.3, 0.2, 0.8]
        circ_color = [0.9, 0.7, 0]
        psg_color = [0.1, 0.7, 0.1]
        font_size = 16
        font_name = "Arial"

        data_path = str(Constants.CROPPED_FILE_PATH) + '/'
        circadian_data_path = str(utils.get_project_root().joinpath(
            'data/circadian_predictions/')) + '/'
        output_path = str(Constants.FIGURE_FILE_PATH) + '/'

        if snippet is False:
            fig = plt.figure(figsize=(10, 12))
        else:
            fig = plt.figure(figsize=(3, 12))

        num_v_plots = 5
        fig.patch.set_facecolor('white')

        if (os.path.isfile(data_path + subject_id + '_cleaned_hr.out') and
                os.path.isfile(data_path + subject_id + '_cleaned_motion.out')
                and os.path.isfile(data_path + subject_id + '_cleaned_psg.out')
                and os.path.isfile(data_path + subject_id +
                                   '_cleaned_counts.out') and
                os.stat(data_path + subject_id + '_cleaned_motion.out').st_size
                > 0) and os.path.isfile(circadian_data_path + subject_id +
                                        '_clock_proxy.txt'):

            hr = np.genfromtxt(data_path + subject_id + '_cleaned_hr.out',
                               delimiter=' ')
            motion = np.genfromtxt(data_path + subject_id +
                                   '_cleaned_motion.out',
                                   delimiter=' ')
            scores = np.genfromtxt(data_path + subject_id + '_cleaned_psg.out',
                                   delimiter=' ')
            counts = np.genfromtxt(data_path + subject_id +
                                   '_cleaned_counts.out',
                                   delimiter=',')
            circ_model = np.genfromtxt(circadian_data_path + subject_id +
                                       '_clock_proxy.txt',
                                       delimiter=',')

            min_time = min(scores[:, 0])
            max_time = max(scores[:, 0])
            dt = 60 * 60

            sample_point_fraction = 0.92

            sample_point = sample_point_fraction * (max_time -
                                                    min_time) + min_time
            window_size = 10
            if snippet:
                min_time = sample_point
                max_time = sample_point + window_size

            ax = plt.subplot(num_v_plots, 1, 1)
            ax.plot(motion[:, 0], motion[:, 1], color=motion_color)
            ax.plot(motion[:, 0], motion[:, 2], color=[0.4, 0.2, 0.7])
            ax.plot(motion[:, 0], motion[:, 3], color=[0.5, 0.2, 0.6])
            plt.ylabel('Motion (g)', fontsize=font_size, fontname=font_name)
            DataPlotBuilder.tidy_data_plot(min_time, max_time, dt, ax)

            if snippet:
                ax.spines['bottom'].set_visible(True)
                ax.spines['left'].set_visible(True)
                ax.spines['top'].set_visible(True)
                ax.spines['right'].set_visible(True)

                ax.yaxis.label.set_visible(False)

                inds = np.intersect1d(
                    np.where(motion[:, 0] > sample_point)[0],
                    np.where(motion[:, 0] <= sample_point + window_size)[0])
                y_min = np.amin(motion[inds, 1:3])
                plt.ylim(y_min - 0.005, y_min + 0.025)

                # Get rid of the ticks
                ax.set_xticks([])
                ax.yaxis.set_ticks_position("right")

                plt.ylabel('')
                plt.xlabel(str(window_size) + ' sec window',
                           fontsize=font_size,
                           fontname=font_name)
            else:
                y_min = -3.2
                y_max = 2.5
                plt.ylim(y_min, y_max)
                current_axis = plt.gca()
                current_axis.add_patch(
                    Rectangle((sample_point, y_min),
                              window_size,
                              y_max - y_min,
                              alpha=0.7,
                              facecolor="gray"))

            ax = plt.subplot(num_v_plots, 1, 2)
            ax.plot(counts[:, 0], counts[:, 1], color=[0.2, 0.2, 0.7])
            DataPlotBuilder.tidy_data_plot(min_time, max_time, dt, ax)
            plt.ylabel('Counts', fontsize=font_size, fontname=font_name)

            if snippet:
                plt.axis('off')
                plt.ylim(-1, -1)

            ax = plt.subplot(num_v_plots, 1, 3)
            ax.plot(hr[:, 0], hr[:, 1], color=hr_color)
            plt.ylabel('Heart rate (bpm)',
                       fontsize=font_size,
                       fontname=font_name)
            DataPlotBuilder.tidy_data_plot(min_time, max_time, dt, ax)

            sample_point = sample_point_fraction * (max_time -
                                                    min_time) + min_time
            window_size = 1200

            if snippet:
                min_time = sample_point
                max_time = sample_point + window_size
                DataPlotBuilder.tidy_data_plot(min_time, max_time, dt, ax)

                ax.spines['bottom'].set_visible(True)
                ax.spines['left'].set_visible(True)
                ax.spines['top'].set_visible(True)
                ax.spines['right'].set_visible(True)

                ax.yaxis.label.set_visible(False)

                ax.set_xticks([])
                ax.yaxis.set_ticks_position("right")

                plt.ylabel('')
                plt.xlabel(str(window_size) + ' sec window',
                           fontsize=font_size,
                           fontname=font_name)
                plt.ylim(35, 100)

            else:
                y_min = 40
                y_max = 130
                plt.ylim(y_min, y_max)
                current_axis = plt.gca()
                current_axis.add_patch(
                    Rectangle((sample_point, y_min),
                              window_size,
                              y_max - y_min,
                              alpha=0.35,
                              facecolor="gray"))
                plt.ylim(40, 130)

            ax = plt.subplot(num_v_plots, 1, 4)
            ax.plot(circ_model[:, 0], -circ_model[:, 1], color=circ_color)
            plt.ylabel('Clock Proxy', fontsize=font_size, fontname=font_name)
            DataPlotBuilder.tidy_data_plot(min_time, max_time, dt, ax)
            if snippet:
                plt.axis('off')
                plt.ylim(-1, -1)
            else:
                plt.ylim(.2, 1.2)

            ax = plt.subplot(num_v_plots, 1, 5)

            relabeled_scores = DataPlotBuilder.convert_labels_for_hypnogram(
                scores[:, 1])
            ax.step(scores[:, 0], relabeled_scores, color=psg_color)
            plt.ylabel('Stage', fontsize=font_size, fontname=font_name)
            plt.xlabel('Time', fontsize=font_size, fontname=font_name)
            DataPlotBuilder.tidy_data_plot(min_time, max_time, dt, ax)
            ax.set_yticks([-4, -3, -2, -1, 0, 1])
            ax.set_yticklabels(['N4', 'N3', 'N2', 'N1', 'Wake', 'REM'])

            if snippet:
                plt.axis('off')
                plt.ylim(5, 5)
            else:
                plt.ylim(-5, 2)

            if not snippet:
                plt.savefig(output_path + 'data_validation_' + subject_id +
                            '.png',
                            bbox_inches='tight',
                            pad_inches=0.1,
                            dpi=300)
            else:
                plt.savefig(output_path + 'data_validation_zoom_' +
                            subject_id + '.png',
                            bbox_inches='tight',
                            pad_inches=0.1,
                            dpi=300)
            plt.close()
Ejemplo n.º 16
0
 def get_raw_file_path(subject_id):
     heart_rate_dir = utils.get_project_root().joinpath('data/heart_rate/')
     return heart_rate_dir.joinpath(subject_id + '_heartrate.txt')
Ejemplo n.º 17
0
 def get_raw_file_path(subject_id):
     project_root = utils.get_project_root()
     return project_root.joinpath('data/motion/' + subject_id + '_acceleration.txt')
Ejemplo n.º 18
0
 def get_all_files():
     project_root = str(utils.get_project_root())
     return glob.glob(project_root + "/data/mesa/polysomnography/edfs/*edf")
Ejemplo n.º 19
0
class RawDataProcessor:
    BASE_FILE_PATH = utils.get_project_root().joinpath('outputs/cropped/')

    @staticmethod
    def crop_all(subject_id):
        # psg_raw_collection = PSGService.read_raw(subject_id)  # Used to extract PSG details from the reports
        psg_raw_collection = PSGService.read_precleaned(
            subject_id)  # Loads already extracted PSG data
        motion_collection = MotionService.load_raw(subject_id)
        heart_rate_collection = HeartRateService.load_raw(subject_id)

        valid_interval = RawDataProcessor.get_intersecting_interval(
            [psg_raw_collection, motion_collection, heart_rate_collection])

        psg_raw_collection = PSGService.crop(psg_raw_collection,
                                             valid_interval)
        motion_collection = MotionService.crop(motion_collection,
                                               valid_interval)
        heart_rate_collection = HeartRateService.crop(heart_rate_collection,
                                                      valid_interval)

        PSGService.write(psg_raw_collection)
        MotionService.write(motion_collection)
        HeartRateService.write(heart_rate_collection)
        ActivityCountService.build_activity_counts_without_matlab(
            subject_id, motion_collection.data
        )  # Builds activity counts with python, not MATLAB

    @staticmethod
    def get_intersecting_interval(collection_list):
        start_times = []
        end_times = []
        for collection in collection_list:
            interval = collection.get_interval()
            start_times.append(interval.start_time)
            end_times.append(interval.end_time)

        return Interval(start_time=max(start_times), end_time=min(end_times))

    @staticmethod
    def get_valid_epochs(subject_id):

        psg_collection = PSGService.load_cropped(subject_id)
        motion_collection = MotionService.load_cropped(subject_id)
        heart_rate_collection = HeartRateService.load_cropped(subject_id)

        start_time = psg_collection.data[0].epoch.timestamp
        motion_epoch_dictionary = RawDataProcessor.get_valid_epoch_dictionary(
            motion_collection.timestamps, start_time)
        hr_epoch_dictionary = RawDataProcessor.get_valid_epoch_dictionary(
            heart_rate_collection.timestamps, start_time)

        valid_epochs = []
        for stage_item in psg_collection.data:
            epoch = stage_item.epoch

            if epoch.timestamp in motion_epoch_dictionary and epoch.timestamp in hr_epoch_dictionary \
                    and stage_item.stage != SleepStage.unscored:
                valid_epochs.append(epoch)

        return valid_epochs

    @staticmethod
    def get_valid_epoch_dictionary(timestamps, start_time):
        epoch_dictionary = {}

        for ind in range(np.shape(timestamps)[0]):
            time = timestamps[ind]
            floored_timestamp = time - np.mod(time - start_time,
                                              Epoch.DURATION)

            epoch_dictionary[floored_timestamp] = True

        return epoch_dictionary
 def build_activity_counts():
     os.system(Constants.MATLAB_PATH +
               ' -nodisplay -nosplash -nodesktop -r \"run(\'' +
               str(utils.get_project_root()) +
               '/source/make_counts.m\'); exit;\"')