Example #1
0
def format_source():
    """
    timestamp the source
    """

    print("begin format source")

    study_list = retrieve_ref('study_list')
    format_types = retrieve_ref('format_types')
    segment_list = retrieve_ref('segment_list')
    sensor_list = retrieve_ref('sensor_list')

    segment = 'All'
    format_type = 'source'

    for study in study_list:

        # print('study = ' + str(study))

        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        for record in source_path:

            for sensor in sensor_list:

                df_timestamped = timestamp_source(study, format_type, segment,
                                                  record, sensor)
Example #2
0
def timestamp_source_main():
    """
    timestamp the source
    """

    print("begin timestamp source")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')

    # timestamp temp
    sensor = 'TEMP'
    for study in study_list:

        print('study = ' + str(study))

        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        # summarize what has been found so far
        record_to_summary(study, 'Records found', len(source_path))

        for record in source_path:
            # source = os.path.join(study, 'source', record, sensor + '.csv')
            analysis_type = 'timestamp'
            df_timestamped = timestamp_source(analysis_type, study, record,
                                              sensor)
def segment_data():
    """
    Clean the data
    """

    print("segment data")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')
    timePreStudy = retrieve_ref('timePreStudy')
    timePostStudy = retrieve_ref('timePostStudy')

    for study in study_list:
        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        for record in source_path:

            for sensor in sensor_list:

                for segment in segment_list:

                    # print('segment_list')
                    # print(segment_list)

                    analysis_type = 'truncate'
                    df = retrieve_analyzed(study, analysis_type, record,
                                           sensor)

                    if segment == segment_list[0]:
                        timeEnd = timePreStudy
                        df = df.drop(df[df['timeMinutes'] > timeEnd].index)

                    if segment == segment_list[1]:
                        timeBegin = timePreStudy
                        timeEnd = timePostStudy
                        df = df.drop(df[df['timeMinutes'] < timeBegin].index)
                        df = df.drop(df[df['timeMinutes'] > timeEnd].index)

                    if segment == segment_list[2]:
                        timeBegin = timePostStudy
                        df = df.drop(df[df['timeMinutes'] < timeBegin].index)

                    path = os.path.join(study, 'segment')
                    if not os.path.isdir(path): os.mkdir(path)
                    # print(path)
                    path = os.path.join(study, 'segment', str(segment))
                    if not os.path.isdir(path): os.mkdir(path)
                    # print(path)
                    path = os.path.join(study, 'segment', str(segment), record)
                    if not os.path.isdir(path): os.mkdir(path)
                    # print(path)
                    path = os.path.join(study, 'segment', str(segment), record,
                                        sensor + ".csv")
                    df.to_csv(path)

                    print('segments file saved: ' + str(path))
def segment_records():
    """
    segment records
    """

    print("begin segmenting records")

    study_list = retrieve_ref('study_list')
    format_types = retrieve_ref('format_types')
    segment_list = retrieve_ref('segment_list')
    sensor_list = retrieve_ref('sensor_list')

    timePreStudy = retrieve_ref('timePreStudy')
    timePostStudy = retrieve_ref('timePostStudy')

    for study in study_list:

        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        for record in source_path:

            for format in format_types:

                for sensor in sensor_list:

                    df = retrieve_analyzed(study, format, record, 'All',
                                           sensor)

                    for segment in segment_list:

                        if segment == segment_list[0]:
                            timeEnd = timePreStudy
                            df = df.drop(df[df['timeMinutes'] > timeEnd].index)

                        if segment == segment_list[1]:
                            timeBegin = timePreStudy
                            timeEnd = timePostStudy
                            df = df.drop(
                                df[df['timeMinutes'] < timeBegin].index)
                            df = df.drop(df[df['timeMinutes'] > timeEnd].index)

                        if segment == segment_list[2]:
                            timeBegin = timePostStudy
                            df = df.drop(
                                df[df['timeMinutes'] < timeBegin].index)

                        path = os.path.join(study, 'formatted', format, record,
                                            segment)
                        if not os.path.isdir(path): os.mkdir(path)
                        file_path = os.path.join(study, 'formatted', format,
                                                 record, segment,
                                                 sensor + ".csv")
                        df.to_csv(file_path)

                        print('dataframe saved for segments: ' +
                              str(file_path))
def find_paired_duration():
    """
    Find the duration of the record
    Add the end of the coregistered record in the meta file
    """

    print("begin find_paired_duration")

    study_list = retrieve_ref('study_list')

    for study in study_list:

        df_meta = retrieve_meta(study)
        # print(df_meta)
        source_path = list(df_meta['source_path'])

        # add emptyt column
        df_meta['recordDuration'] = [None] * len(source_path)

        for record in source_path:

            # save that value in the dataframe
            i = df_meta[df_meta['source_path'] == record].index.values[0]
            print('i = ' + str(i))

            recordBegin = int(df_meta.loc[i, 'recordBegin'])
            print('recordBegin = ' + str(recordBegin))

            recordEnd = int(df_meta.loc[i, 'recordEnd'])
            print('recordEnd = ' + str(recordEnd))

            recordDuration = round((recordEnd - recordBegin) / 60, 4)

            df_meta.loc[i, 'recordDuration'] = recordDuration

            print('recordDuration = ' + str(recordDuration))

        save_meta(study, df_meta)
        print('df_meta = ')
        print(df_meta)
def trim_record_to_max():
    """
    Input: path to a csv
    Output: list of timestamps
    """

    print("finding the end of the record")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    max_record_time = retrieve_ref('max_record_time')

    sensor = 'TEMP'

    for study in study_list:

        df_meta = retrieve_meta(study)

        source_path = list(df_meta['source_path'])

        df_meta['recordLength'] = [None] * len(source_path)

        for record in source_path:

            # timestamped_file = os.path.join(study, 'timestamp', record, sensor + ".csv")
            timestamped_file = os.path.join(study, 'formatted', 'source',
                                            record, 'All', sensor + ".csv")
            df_timestamped = pd.read_csv(timestamped_file)

            record_length = max(list(df_timestamped['timeMinutes']))
            if record_length > max_record_time:
                record_length = max_record_time

            record_length = round(record_length, 4)
            i = df_meta[df_meta['source_path'] == record].index.values[0]
            df_meta.loc[i, 'recordLength'] = record_length

        # save the record length to meta file
        save_meta(study, df_meta)
def decide_inclusion():
    """
    Determine inclusion based on length of the record
    """

    print("begin decide inclusion")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    max_record_time = retrieve_ref('max_record_time')
    min_record_time = retrieve_ref('min_record_time')

    for study in study_list:

        df_meta = retrieve_meta(study)
        df_meta = df_meta.sort_values(by=['recordLength'])

        records_found = list(df_meta['source_path'])
        recordLength = list(df_meta['recordLength'])

        inclusionList = []
        for i in range(len(recordLength)):

            if recordLength[i] < min_record_time:
                inclusionList.append('excluded')

            else:
                inclusionList.append('included')

        # save the record length to meta file
        df_meta['included'] = inclusionList
        save_meta(study, df_meta)

        df_meta = df_meta.drop(
            df_meta[df_meta['included'] == 'excluded'].index)
        df_meta = df_meta.sort_values(by=['source_path'])
        save_meta(study, df_meta)

    print("completed decide inclusion")
def multiple_record_check():
    """
    check the record for multiple records
    """

    print("begin multiple record check")

    study_list = retrieve_ref('study_list')
    format_types = retrieve_ref('format_types')
    segment_list = retrieve_ref('segment_list')
    sensor_list = retrieve_ref('sensor_list')

    timePreStudy = retrieve_ref('timePreStudy')
    timePostStudy = retrieve_ref('timePostStudy')

    for study in study_list:

        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])
        source_path_new = list(df_meta['source_path'])
        timeBegin_list = list(df_meta['recordBegin'])
        timeEnd_list = list(df_meta['recordEnd'])

        for record in source_path:

            i = df_meta[df_meta['source_path'] == record].index.values[0]
            fullLength = float(df_meta.loc[i, 'fullLength'])
            truncatedLength = float(df_meta.loc[i, 'truncatedLength'])

            format_type = 'source'
            segment = 'All'
            sensor = 'TEMP'
            df = retrieve_analyzed(study, format_type, record, segment, sensor)

            new_record_list = []

            if fullLength > truncatedLength + 30:

                df = df.drop(df[df['timeMinutes'] < truncatedLength + 5].index)

                # print('df = ')
                # print(df)

                timeUnix = list(df['timeUnix'])
                timeMinutes = list(df['timeMinutes'])
                measurements = list(df['measurement'])

                for i in range(len(measurements)):

                    if i < len(measurements) - 30:

                        if measurements[i] + 3 < measurements[i + 28]:

                            print('new record found')

                            df = df.drop(
                                df[df['timeMinutes'] < timeMinutes[i +
                                                                   28]].index)

                            time_end = find_record_end_from_temp(df)
                            print('time_end = ' + str(time_end))

                            df = df.drop(
                                df[df['timeMinutes'] > time_end].index)

                            # print('df = ')
                            # print(df)

                            wearable_name = record.split('_')
                            wearable_name = wearable_name[1]

                            recordName = str(
                                str(int(timeUnix[0])) + '_' +
                                str(wearable_name))
                            print('recordName = ' + str(recordName))

                            new_record_list.append(recordName)

                            source_path_new.append(record)
                            timeBegin_list.append(int(timeUnix[0]))
                            print('timeUnix[0:20] = ')
                            print(timeUnix[0:20])
                            timeEnd = min(timeUnix)
                            print('timeEnd = ' + str(timeEnd))
                            timeEnd = min(timeUnix) + 60
                            print('timeEnd = ' + str(timeEnd))
                            timeEnd_list.append(int(timeEnd))

                            break

        df_meta_new = pd.DataFrame()
        df_meta_new['source_path'] = source_path_new
        df_meta_new['recordBegin'] = timeBegin_list
        df_meta_new['recordEnd'] = timeEnd_list

        save_meta(study, df_meta_new)
def format_truncate():
    """
    timestamp the source
    """

    print("begin format source")

    study_list = retrieve_ref('study_list')
    format_types = retrieve_ref('format_types')
    segment_list = retrieve_ref('segment_list')
    sensor_list = retrieve_ref('sensor_list')

    format_type = 'source'
    segment = 'All'

    for study in study_list:

        # print('study = ' + str(study))

        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        for record in source_path:

            i = df_meta[df_meta['source_path'] == record].index.values[0]
            truncatedLength = df_meta.loc[i, 'truncatedLength']

            for sensor in sensor_list:

                df = retrieve_analyzed(study, format_type, record, segment,
                                       sensor)
                df = df.drop(df[df['timeMinutes'] > truncatedLength].index)

                # create the path to where timestamped data is saved
                timestamped_path = os.path.join(study)
                if not os.path.isdir(timestamped_path):
                    os.mkdir(timestamped_path)

                timestamped_path = os.path.join(timestamped_path,
                                                str('formatted'))
                if not os.path.isdir(timestamped_path):
                    os.mkdir(timestamped_path)

                timestamped_path = os.path.join(timestamped_path,
                                                str('truncate'))
                if not os.path.isdir(timestamped_path):
                    os.mkdir(timestamped_path)

                timestamped_path = os.path.join(timestamped_path, str(record))
                if not os.path.isdir(timestamped_path):
                    os.mkdir(timestamped_path)

                timestamped_path = os.path.join(timestamped_path, str(segment))
                if not os.path.isdir(timestamped_path):
                    os.mkdir(timestamped_path)

                timestamped_file = os.path.join(timestamped_path,
                                                sensor + ".csv")

                # print('timestamped_file = ' + str(timestamped_file))
                df.to_csv(timestamped_file)

                print('format truncate file saved = ' + str(timestamped_file))
Example #10
0
def statisticSegments():
    """
    Calculate and save statistics from each record
    """

    print("begin statistical calculation")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')

    analysis_type = 'truncate'

    for study in study_list:

        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        dfStatistics = pd.DataFrame()

        statistics_types = ['mean', 'median', 'pVariance', 'stdev' 'quan']
        quan_types = [10, 20, 30, 40, 50, 60, 70, 80, 90]

        for record in source_path:
            dfStatistics['source_path'] = source_path
            for sensor in sensor_list:
                for segment in segment_list:
                    for statis in statistics_types:

                        colName = str(sensor + '_' + segment + '_' + statis)

                        if statis == 'quan':
                            for quanNum in quan_types:
                                colName = str(sensor + '_' + segment + '_' +
                                              statis + '_' + str(quanNum))

                        dfStatistics[colName] = [None] * len(source_path)

        analyzed_path = os.path.join(study, 'analyzed')
        if not os.path.isdir(analyzed_path): os.mkdir(analyzed_path)
        analyzed_path = os.path.join(study, 'analyzed', 'statistics')
        if not os.path.isdir(analyzed_path): os.mkdir(analyzed_path)
        analyzed_file = os.path.join(analyzed_path, 'statisticsSegments.csv')
        print('analyzed_file = ' + str(analyzed_file))
        dfStatistics.to_csv(analyzed_file)

        # retrieve statistics file
        df = pd.read_csv(analyzed_file)
        for name in list(df.columns):
            if 'Unnamed' in name:
                del df[name]

        for record in source_path:
            for sensor in sensor_list:
                for segment in segment_list:

                    df = retrieve_analyzed(study, analysis_type, record,
                                           sensor)
                    measurement = list(df['measurement'])

                    for statis in statistics_types:

                        colName = str(sensor + '_' + segment + '_' + statis)

                        valueValue = 'None'
                        if statis == "mean":
                            valueValue = statistics.mean(measurement)
                        if statis == 'median':
                            valueValue = statistics.median(measurement)
                        if statis == 'pvariance':
                            valueValue = statistics.pvariance(measurement)
                        if statis == 'stdev': statistics.stdev(measurement)

                        if statis == 'quan':
                            for quanNum in quan_types:
                                colName = str(sensor + '_' + segment + '_' +
                                              statis + '_' + str(quanNum))
                                valueValue = np.quantile(measurement, quanNum)

                        i = dfStatistics[dfStatistics['source_path'] ==
                                         record].index.values[0]
                        # print('i = ' + str(i))
                        dfStatistics.loc[i, colName] = valueValue

        dfStatistics.to_csv(analyzed_file)

    print("end statistical calculation")
def statisticsCalculation():
    """
    Calculate and save statistics from each record
    """

    print("begin statistical calculation")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')

    analysis_type = 'truncate'

    for study in study_list:

        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        dfStatistics = pd.DataFrame()

        for sensor in sensor_list:

            dfMeanList, dfMedianList, dfPvariance, dfStdev = [], [], [], []
            quan_10, quan_20, quan_30, quan_40, quan_50, quan_60, quan_70, quan_80, quan_90 = [], [], [], [], [], [], [], [], []

            for record in source_path:

                df = retrieve_analyzed(study, analysis_type, record, sensor)

                measurement = list(df['measurement'])
                # dfMean = statistics.mean(measurement)
                # print('dfMean = ' + str(dfMean))
                dfMeanList.append(statistics.mean(measurement))
                dfMedianList.append(statistics.median(measurement))
                dfPvariance.append(statistics.pvariance(measurement))
                dfStdev.append(statistics.stdev(measurement))

                quan_10.append(np.quantile(measurement, 0.1))
                quan_20.append(np.quantile(measurement, 0.2))
                quan_30.append(np.quantile(measurement, 0.3))
                quan_40.append(np.quantile(measurement, 0.4))
                quan_50.append(np.quantile(measurement, 0.5))
                quan_60.append(np.quantile(measurement, 0.6))
                quan_70.append(np.quantile(measurement, 0.7))
                quan_80.append(np.quantile(measurement, 0.8))
                quan_90.append(np.quantile(measurement, 0.9))

            colName = str(str(sensor) + '_mean')
            dfStatistics[colName] = dfMeanList
            colName = str(str(sensor) + '_median')
            # dfStatistics[colName] = dfMedianList
            colName = str(str(sensor) + '_pvariance')
            dfStatistics[colName] = dfPvariance
            colName = str(str(sensor) + '_stdev')
            dfStatistics[colName] = dfStdev

            dfStatistics[str(str(sensor) + 'quan_10')] = quan_10
            dfStatistics[str(str(sensor) + 'quan_20')] = quan_20
            dfStatistics[str(str(sensor) + 'quan_30')] = quan_30
            dfStatistics[str(str(sensor) + 'quan_40')] = quan_40
            dfStatistics[str(str(sensor) + 'quan_50')] = quan_50
            dfStatistics[str(str(sensor) + 'quan_60')] = quan_60
            dfStatistics[str(str(sensor) + 'quan_70')] = quan_70
            dfStatistics[str(str(sensor) + 'quan_80')] = quan_80
            dfStatistics[str(str(sensor) + 'quan_90')] = quan_90

        analyzed_path = os.path.join(study, 'analyzed')
        if not os.path.isdir(analyzed_path): os.mkdir(analyzed_path)
        analyzed_path = os.path.join(study, 'analyzed', 'statistics')
        if not os.path.isdir(analyzed_path): os.mkdir(analyzed_path)
        analyzed_file = os.path.join(analyzed_path, 'statistics.csv')
        print('analyzed_file = ' + str(analyzed_file))
        dfStatistics.to_csv(analyzed_file)

        print('statistical analysis for study / sensor complete: ' +
              str(study) + ' / ' + str(sensor))

        plt.scatter(dfStatistics['EDA_mean'], dfStatistics['HR_mean'])
        plt.xlabel('EDA mean')
        plt.ylabel('HR mean')
        plot_path = os.path.join(study, 'plot')
        if not os.path.isdir(plot_path): os.mkdir(plot_path)
        plot_path = os.path.join(study, 'plot', 'analyzed')
        if not os.path.isdir(plot_path): os.mkdir(plot_path)
        plot_file = os.path.join(plot_path, 'summary' + '.png')
        plt.savefig(plot_file, bbox_inches='tight')
        print('saved statistics - ' + str(plot_file))

    print("end statistical calculation")
Example #12
0
def plot_timestamp():
    """
    compare the curves to verify the end of the record was properly found
    plot the source measurements for temperature
    plot the timestamped data for the temperature
    plot the truncated data
    plot the timestamped and truncated on the same plot
    """

    print("begin plotting timestamped data")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    analysis_list = retrieve_ref('analysis_list')

    for study in study_list:

        metadata_path = os.path.join(study, 'meta')
        metadata_file = os.path.join(metadata_path, 'metadata.csv')
        df_meta = pd.read_csv(metadata_file)
        # print(df_meta)

    # timestamp temp
    sensor = 'TEMP'
    for study in study_list:

        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        for record in source_path:

            row_num, col_num, plot_num = 4, 1, 0
            row_width_mulp, col_width_mulp = 14, 5
            plot_width, plot_height = col_num*row_width_mulp, row_num*col_width_mulp
            plt.figure(figsize=(plot_width, plot_height))

            # plot the timestamp in unix of timestamped record
            plot_num += 1
            plt.subplot(row_num, col_num, plot_num)
            analysis_type = 'source'
            df = retrieve_analyzed(study, analysis_type, record, sensor)
            valueColor = retrieve_ref_color(str('color_' + str(analysis_type)))
            plt.scatter(df['count'], df['measurement'], color = valueColor, label = str(analysis_type))
            plt.title( analysis_type + ' ' + record + ' ' + sensor)
            plt.xlabel('Measurement Count - Before Timestamp')
            sensor_unit = retrieve_sensor_unit(sensor)
            plt.ylabel(str(sensor) + ' ( ' + str(sensor_unit) + ' )')
            plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left')

            # plot the timestamp in unix of timestamped record
            plot_num += 1
            plt.subplot(row_num, col_num, plot_num)
            analysis_type = 'timestamp'
            df = retrieve_analyzed(study, analysis_type, record, sensor)
            valueColor = retrieve_ref_color(str('color_' + str(analysis_type)))
            plt.scatter(df['timeMinutes'], df['measurement'], color = valueColor, label = str(analysis_type))
            plt.title( analysis_type + ' ' + record + ' ' + sensor)
            plt.xlabel('Time (Unix)')
            sensor_unit = retrieve_sensor_unit(sensor)
            plt.ylabel(str(sensor) + ' ( ' + str(sensor_unit) + ' )')
            plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left')
            # plot both the original and the truncated record
            plot_num += 1
            plt.subplot(row_num, col_num, plot_num)
            for analysis_type in analysis_list:
                df = retrieve_analyzed(study, analysis_type, record, sensor)
                valueColor = retrieve_ref_color(str('color_' + str(analysis_type)))
                plt.scatter(df['timeMinutes'], df['measurement'], color = valueColor, label = str(analysis_type))
            plt.title( analysis_type + ' ' + record + ' ' + sensor)
            plt.xlabel('Time (minutes)')
            sensor_unit = retrieve_sensor_unit(sensor)
            plt.ylabel(str(sensor) + ' ( ' + str(sensor_unit) + ' )')
            plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left')



            # plot the truncated record
            plot_num += 1
            plt.subplot(row_num, col_num, plot_num)
            analysis_type = 'truncate'
            df = retrieve_analyzed(study, analysis_type, record, sensor)
            valueColor = retrieve_ref_color(str('color_' + str(analysis_type)))
            plt.scatter(df['timeMinutes'], df['measurement'], color = valueColor, label = str(analysis_type))
            plt.title( analysis_type + ' ' + record + ' ' + sensor)
            plt.xlabel('Time (minutes)')
            sensor_unit = retrieve_sensor_unit(sensor)
            plt.ylabel(str(sensor) + ' ( ' + str(sensor_unit) + ' )')
            plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left')


            # save the plot
            plot_path = os.path.join(study, 'plot')
            if not os.path.isdir(plot_path): os.mkdir(plot_path)
            plot_path = os.path.join(study, 'plot', 'timestamp')
            if not os.path.isdir(plot_path): os.mkdir(plot_path)
            plot_path = os.path.join(study, 'plot', 'timestamp', record)
            if not os.path.isdir(plot_path): os.mkdir(plot_path)
            plot_file = os.path.join(plot_path, sensor + '.png')
            plt.savefig(plot_file, bbox_inches='tight')

    print("completed plotting timestamped data")
def find_paired_end():
    """
    Find the end of the paired record
    Add the end of the coregistered record in the meta file
    """

    print("begin find_paired_end")

    study_list = retrieve_ref('study_list')

    format_type = 'truncate'
    sensor = 'TEMP'
    segment = 'All'

    for study in study_list:

        df_meta = retrieve_meta(study)
        # print(df_meta)
        source_path = list(df_meta['source_path'])
        # recordCoregistered = list(df_meta['recordCoregistered'])

        df_meta['recordEnd'] = [None] * len(source_path)

        # there could be two wearables - or one
        # one wearable was turned off before the other
        # check if the participant record has one or two wearables
        # if there are two find the earlier stop time and save to meta file
        for record in source_path:

            # find the max value in the "timeUnix' column of analyzed data"
            df = retrieve_analyzed(study, format_type, record, segment, sensor)
            timeEndRecord = max(list(df['timeUnix']))

            # save that value in the dataframe
            i = df_meta[df_meta['source_path'] == record].index.values[0]
            df_meta.loc[i, 'recordEnd'] = int(timeEndRecord)

            # print('i = ' + str(i))
            recordCoregistered = df_meta.loc[i, 'recordCoregistered']
            # print('recordCoregistered = ')
            # print(recordCoregistered)

            if pd.isnull(df_meta.loc[i, 'recordCoregistered']):
                print('no pair found')

            elif len(df_meta.loc[i, 'recordCoregistered']) > 3 + len(record):

                recordCoregisteredStr = str(df_meta.loc[i,
                                                        'recordCoregistered'])
                recordCoregisteredStrList = recordCoregisteredStr.split(' ')
                timeEndRecord = []

                for recordCoregisteredStr in recordCoregisteredStrList:

                    df = retrieve_analyzed(study, analysis_type,
                                           recordCoregisteredStr, sensor)
                    timeEndRecord.append(max(list(df['timeUnix'])))

                df_meta.loc[i, 'recordEnd'] = int(min(timeEndRecord))

        save_meta(study, df_meta)
        print('df_meta = ')
        print(df_meta)
Example #14
0
def plot_segment():
    """
    Clean the data
    """

    print("plot segment data")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')
    timePreStudy = retrieve_ref('timePreStudy')
    timePostStudy = retrieve_ref('timePostStudy')

    for study in study_list:
        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        for record in source_path:

            row_num, col_num, plot_num = 6, 1, 0
            row_width_mulp, col_width_mulp = 14, 5
            plot_width, plot_height = col_num * row_width_mulp, row_num * col_width_mulp
            plt.figure(figsize=(plot_width, plot_height))

            for sensor in sensor_list:

                # plot the timestamp in unix of timestamped record
                plot_num += 1
                plt.subplot(row_num, col_num, plot_num)

                for segment in segment_list[0:-1]:

                    # print('segment_list')
                    # print(segment_list)

                    analysis_type = segment
                    df = retrieve_analyzed(study, analysis_type, record,
                                           sensor)

                    # print(df)

                    valueColor = retrieve_ref_color(
                        str('color_' + str(segment)))
                    plt.scatter(df['timeMinutes'],
                                df['measurement'],
                                color=valueColor,
                                label=str(segment))
                    plt.title(analysis_type + ' ' + record + ' ' + sensor)
                    plt.xlabel('Measurement Count - Before Timestamp')
                    sensor_unit = retrieve_sensor_unit(sensor)
                    plt.ylabel(str(sensor) + ' ( ' + str(sensor_unit) + ' )')
                    plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2),
                               loc='upper left')

            # save the plot
            plot_path = os.path.join(study, 'plot')
            if not os.path.isdir(plot_path): os.mkdir(plot_path)
            plot_path = os.path.join(study, 'plot', 'segment')
            if not os.path.isdir(plot_path): os.mkdir(plot_path)
            plot_path = os.path.join(study, 'plot', 'segment', record)
            if not os.path.isdir(plot_path): os.mkdir(plot_path)
            plot_file = os.path.join(plot_path, sensor + '.png')
            plt.savefig(plot_file, bbox_inches='tight')
Example #15
0
def find_pairs():
    """
    Pair up records
    Note pairs in the meta file
    """

    print("begin find_pairs")

    study_list = retrieve_ref('study_list')

    for study in study_list:

        df_meta = retrieve_meta(study)
        print(df_meta)
        source_path = list(df_meta['source_path'])

        df_meta['pairedRecord'] = [None] * len(source_path)
        df_meta['recordCoregistered'] = source_path
        df_meta['recordBegin'] = [None] * len(source_path)
        # df_meta['recordEnd'] = [None] * len(source_path)
        df_meta['wearableName'] = [None] * len(source_path)

        # sort dataframe by the wearable name
        for record in source_path:
            recordList = record.split('_')
            recordWearable = str(recordList[1])
            i = df_meta[df_meta['source_path'] == record].index.values[0]
            df_meta.loc[i, 'wearableName'] = recordWearable
        df_meta = df_meta.sort_values(by='wearableName')

        for recordA in source_path:

            recordAList = recordA.split('_')
            recordABegin = int(recordAList[0])
            recordAWearable = str(recordAList[1])

            # print('recordAList = ')
            # print(recordAList)
            # print('recordABegin = ')
            # print(recordABegin)
            # print('recordAWearable = ')
            # print(recordAWearable)

            recordCoregistered = str(recordA)
            i = df_meta[df_meta['source_path'] == recordA].index.values[0]
            # df_meta.loc[i, 'pairedRecord' ] = str(recordA)
            # df_meta.loc[i, 'recordCoregistered' ] = str(recordCoregistered)
            df_meta.loc[i, 'recordBegin'] = recordABegin

            recordList = []
            recordList.append(recordA)

            recordBegin = [recordABegin]

            for recordB in source_path:

                recordBList = recordB.split('_')
                recordBBegin = int(recordBList[0])
                recordBWearable = str(recordBList[1])

                if abs(recordABegin - recordBBegin
                       ) < 300 and recordAWearable != recordBWearable:

                    # print('pair found: ')

                    # print('recordBList = ')
                    # print(recordBList)
                    # print('recordBBegin = ')
                    # print(recordBBegin)
                    # print('recordBWearable = ')
                    # print(recordBWearable)

                    recordList = list([recordA, recordB])
                    recordBegin = list([recordABegin, recordBBegin])
                    recordWearable = list([recordAWearable, recordBWearable])

                    # print('recordList = ')
                    # print(recordList)
                    # print('recordBegin = ')
                    # print(recordBegin)
                    # print('recordWearable = ')
                    # print(recordWearable)

                    recordBegin = max(recordBegin)

                    recordCoregistered = str(recordA) + ' ' + str(recordB)

                    df_meta.loc[i, 'pairedRecord'] = str(recordB)
                    df_meta.loc[i,
                                'recordCoregistered'] = str(recordCoregistered)
                    df_meta.loc[i, 'recordBegin'] = recordBegin

        save_meta(study, df_meta)
        # print('df_meta = ')
        # print(df_meta)

        # drop duplicated entries
        df_meta = df_meta.drop_duplicates('recordBegin', keep='last')
        df_meta = df_meta.sort_values(by='recordBegin')
        del df_meta['wearableName']
        save_meta(study, df_meta)
        print('df_meta = ')
        print(df_meta)
Example #16
0
def plot_acc():
    """
    compare the curves to verify the end of the record was properly found
    plot the source measurements for temperature
    plot the timestamped data for the temperature
    plot the truncated data
    plot the timestamped and truncated on the same plot
    """

    print("begin plotting acc data")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    analysis_list = retrieve_ref('analysis_list')

    for study in study_list:

        metadata_path = os.path.join(study, 'meta')
        metadata_file = os.path.join(metadata_path, 'metadata.csv')
        df_meta = pd.read_csv(metadata_file)
        # print(df_meta)

    # timestamp ACC
    sensor = 'ACC'
    for study in study_list:

        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        for record in source_path:

            row_num, col_num, plot_num = 5, 1, 0
            row_width_mulp, col_width_mulp = 20, 5
            plot_width, plot_height = col_num * row_width_mulp, row_num * col_width_mulp
            plt.figure(figsize=(plot_width, plot_height))

            analysis_type = 'truncate'
            df = retrieve_analyzed(study, analysis_type, record, sensor)

            for name in list(df.columns):

                if 'time' not in name:

                    plot_num += 1
                    plt.subplot(row_num, col_num, plot_num)
                    colorScatter = valueColor = retrieve_ref_color(
                        str('color_' + str(sensor) + '_' + str(name)))
                    plt.scatter(df['timeMinutes'],
                                df[name],
                                color=colorScatter,
                                label=str(name))
                    # plt.scatter(df['timeMinutes'], df['measurement'], label = str('vector'))
                    plt.title(analysis_type + ' ' + record + ' ' + sensor)
                    plt.xlabel('Time (Minutes)')
                    plt.ylabel(str(sensor + ' ' + name))
                    plt.xlim([0, 1.02 * max(list(df['timeMinutes']))])
                    plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2),
                               loc='upper left')

            plot_num += 1
            plt.subplot(row_num, col_num, plot_num)

            for name in list(df.columns):

                if 'time' not in name:
                    colorScatter = valueColor = retrieve_ref_color(
                        str('color_' + str(sensor) + '_' + str(name)))
                    plt.scatter(df['timeMinutes'],
                                df[name],
                                color=colorScatter,
                                label=str(name))
                    plt.title(analysis_type + ' ' + record + ' ' + sensor +
                              ' ' + name)
                    plt.xlabel('Time (Minutes)')
                    sensor_unit = retrieve_sensor_unit(sensor)
                    plt.ylabel(
                        str(sensor + ' ' + name + ' ( ' + str(sensor_unit) +
                            ' )'))
                    plt.xlim([0, 1.02 * max(list(df['timeMinutes']))])
                    plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2),
                               loc='upper left')

            # save the plot
            plot_path = os.path.join(study, 'plot')
            if not os.path.isdir(plot_path): os.mkdir(plot_path)
            plot_path = os.path.join(study, 'plot', 'timestamp')
            if not os.path.isdir(plot_path): os.mkdir(plot_path)
            plot_path = os.path.join(study, 'plot', 'timestamp', record)
            if not os.path.isdir(plot_path): os.mkdir(plot_path)
            plot_file = os.path.join(plot_path, sensor + '.png')
            plt.savefig(plot_file, bbox_inches='tight')
            print('saved plotted acc figure - ' + str(plot_file))

    print("completed plotting acc data")