Example #1
0
def format_source():
    """
    timestamp the source
    """

    print("begin format source")

    study_list = retrieve_ref('study_list')
    format_types = retrieve_ref('format_types')
    segment_list = retrieve_ref('segment_list')
    sensor_list = retrieve_ref('sensor_list')

    segment = 'All'
    format_type = 'source'

    for study in study_list:

        # print('study = ' + str(study))

        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        for record in source_path:

            for sensor in sensor_list:

                df_timestamped = timestamp_source(study, format_type, segment,
                                                  record, sensor)
def segment_df(segment, df):
    """
    Accept a segment and a dataframe
    Trim according to the segment name
    Pass back the trimmed dataframe
    """

    segment_list = retrieve_ref('segment_list')
    timePreStudy = retrieve_ref('timePreStudy')
    timePostStudy = retrieve_ref('timePostStudy')


    if segment == segment_list[0]:
        timeEnd = timePreStudy
        df = df.drop(df[df['timeMinutes'] > timeEnd].index)

    if segment == segment_list[1]:
        timeBegin = timePreStudy
        timeEnd = timePostStudy
        df = df.drop(df[df['timeMinutes'] < timeBegin].index)
        df = df.drop(df[df['timeMinutes'] > timeEnd].index)

    if segment == segment_list[2]:
        timeBegin = timePostStudy
        df = df.drop(df[df['timeMinutes'] < timeBegin].index)

    return(df)
def count_inflections():
    """

    """

    print('count inflections')

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')

    searchRange = retrieve_ref('searchRange')
    searchRange = [int(x) for x in searchRange]
    searchRange.reverse()

    for study in study_list:

        format_type = 'clean'
        clean_path = os.path.join('studies', study, 'formatted', format_type)
        recordNames = os.listdir(clean_path)
        recordNames.reverse()

        for sensor in sensor_list:

            if sensor == 'ACC' or sensor == 'BVP':
                continue

            for record in recordNames:

                segment = 'All'
                for range in searchRange:

                    path = [
                        'studies', study, 'analyzed', 'inflections',
                        'all_times',
                        str(range), record, segment
                    ]
                    pathJoined = os.path.join(*path)
                    file = os.path.join(pathJoined, sensor + ".csv")
                    print('inflection file = ' + file)

                    if os.path.isfile(file):
                        print('file already found')
                        continue

                    find_inflections(path, file, study, format_type, record,
                                     sensor, segment, range)
                    unique_inflections(study, format_type, record, sensor,
                                       segment)
                    plot_inflections(study, record, sensor, segment)

                for segment in segment_list:
                    for range in searchRange:
                        segment_inflections(study, record, sensor, segment,
                                            range)

                segment = 'All'
                unique_inflections(study, format_type, record, sensor, segment)
                plot_inflections(study, record, sensor, segment)
def segment_data():
    """
    Clean the data
    """

    print("segment data")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')
    timePreStudy = retrieve_ref('timePreStudy')
    timePostStudy = retrieve_ref('timePostStudy')

    for study in study_list:
        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        for record in source_path:

            for sensor in sensor_list:

                for segment in segment_list:

                    # print('segment_list')
                    # print(segment_list)

                    analysis_type = 'truncate'
                    df = retrieve_analyzed(study, analysis_type, record,
                                           sensor)

                    if segment == segment_list[0]:
                        timeEnd = timePreStudy
                        df = df.drop(df[df['timeMinutes'] > timeEnd].index)

                    if segment == segment_list[1]:
                        timeBegin = timePreStudy
                        timeEnd = timePostStudy
                        df = df.drop(df[df['timeMinutes'] < timeBegin].index)
                        df = df.drop(df[df['timeMinutes'] > timeEnd].index)

                    if segment == segment_list[2]:
                        timeBegin = timePostStudy
                        df = df.drop(df[df['timeMinutes'] < timeBegin].index)

                    path = os.path.join(study, 'segment')
                    if not os.path.isdir(path): os.mkdir(path)
                    # print(path)
                    path = os.path.join(study, 'segment', str(segment))
                    if not os.path.isdir(path): os.mkdir(path)
                    # print(path)
                    path = os.path.join(study, 'segment', str(segment), record)
                    if not os.path.isdir(path): os.mkdir(path)
                    # print(path)
                    path = os.path.join(study, 'segment', str(segment), record,
                                        sensor + ".csv")
                    df.to_csv(path)

                    print('segments file saved: ' + str(path))
Example #5
0
def plot_regression():
    """

    """

    print('plotting regression')

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')

    degree_list = retrieve_ref('degree_list')
    degree_list = [int(x) for x in degree_list]

    for study in study_list:

        format_type = 'clean'
        clean_path = os.path.join('studies', study, 'formatted', format_type)
        recordNames = os.listdir(clean_path)

        for sensor in sensor_list:

            for degree in degree_list:

                for record in recordNames:

                    row_num, col_num, plot_num = len(segment_list) + 2, 1, 0
                    row_width_mulp, col_width_mulp = 14, 5
                    plot_width, plot_height = col_num * row_width_mulp, row_num * col_width_mulp
                    plt.figure(figsize=(plot_width, plot_height))

                    for segment in segment_list:
                        plot_num += 1
                        plt.subplot(row_num, col_num, plot_num)
                        complete = plot_regression_segment(
                            study, record, segment, sensor, degree)

                    plot_num += 1
                    plt.subplot(row_num, col_num, plot_num)
                    for segment in segment_list[:-1]:
                        complete = plot_regression_segment(
                            study, record, segment, sensor, degree)
                        plt.title(' ')

                    plot_num += 1
                    plt.subplot(row_num, col_num, plot_num)
                    complete = plot_coefficient_bar(study, record, sensor,
                                                    degree)
                    plt.title(' ')

                    path = [
                        'studies', study, 'plotted', 'regression',
                        str(degree), record
                    ]
                    path = build_path(path)
                    file = os.path.join(path, sensor + ".png")
                    plt.savefig(file, bbox_inches='tight')
                    print('plotted regression for ' + file)
def find_record_end_using_temp(df):
    """
    Find the record end
    by searching for the drop in the temperature
    If the temperature drops 2 deg C in 3 seconds and there is <5 minutes left - end the record
    If there is <5 minutes left, check if the temperature stays low
    """

    max_record_time = float(retrieve_ref('max_record_time'))
    min_record_time = float(retrieve_ref('min_record_time'))

    timeUnix = list(df['timeUnix'])
    timeMinutes = list(df['timeMinutes'])
    measurements = list(df['measurement'])

    time_end = timeMinutes[-12]
    timeEndUnix = timeUnix[-12]

    for i in range(len(measurements) - 12):

        # the record has to be a minimum length
        if float(timeMinutes[i]) > float(min_record_time) + timeMinutes[0]:

            # look for a drop of at least 2 deg C over 3 seconds
            # the TEMP sensor takes 4 measurements each seconds,
            # which means the measurement 12 steps ahead is 3 seconds later
            if float(measurements[i]) - 2 > float(measurements[i + 12]):
                # print('measurement[i] &  measurements[i+12] =' + str(measurements[i]), ' & ', str(measurements[i+12]))
                # print('timeMinutes[i] &  timeMinutes[i+12] =' + str(timeMinutes[i]), ' & ', str(timeMinutes[i+12]))

                if timeMinutes[-1] - timeMinutes[i] > 5:

                    if float(measurements[i]) - 2 > float(
                            measurements[i + 100]):

                        if float(measurements[i]) - 2 > float(
                                measurements[i + 200]):

                            if float(measurements[i]) - 3 > float(
                                    measurements[i + 300]):

                                time_end = timeMinutes[i - 12]
                                timeEndUnix = timeUnix[i - 12]
                                break

                else:
                    time_end = timeMinutes[i - 12]
                    timeEndUnix = timeUnix[i - 12]
                    break

    # trim the record back to a maximum
    if timeEndUnix - timeUnix[0] > 60 * max_record_time:
        timeEndUnix = timeUnix[0] + 60 * max_record_time

    return (int(timeEndUnix - 1))
Example #7
0
def format_source():
    """
    define each record
    set the beginning of the record
    set the end of the record
    record the length of the record
    """

    print("begin format_source")

    # timestamp and save the source measurements
    # no truncation
    # save as their recordName

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')

    # check each study
    for study in study_list:

        df_meta = retrieve_meta(study)
        print(df_meta)

        recordNames = list(df_meta['recordName'])

        for record in recordNames:

            i = df_meta[df_meta['recordName'] == record].index.values[0]
            recordSource = df_meta.loc[i, 'source_path']
            recordBegin = df_meta.loc[i, 'recordBegin']
            recordEnd = df_meta.loc[i, 'recordEnd']

            print('i = ' + str(i))
            print('record = ' + str(record))
            print('recordSource = ' + str(recordSource))

            for sensor in sensor_list:

                format_type, segment = 'source', 'All'
                source = os.path.join('studies', study, format_type,
                                      recordSource, sensor + '.csv')
                df_source = pd.read_csv(source)

                df_timestamped = build_timestamps(df_source, sensor)

                # df_timestamped = df_timestamped[df_timestamped['timeUnix'] > recordBegin]
                # df_timestamped = df_timestamped[df_timestamped['timeUnix'] < recordEnd]

                path = [
                    'studies', study, 'formatted', format_type, record, segment
                ]
                path = build_path(path)
                file = os.path.join(path, sensor + ".csv")
                df_timestamped.to_csv(file)
                print('formatted source file = ' + str(file))
def segment_inflections():
    """

    """

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')
    searchRange = retrieve_ref('searchRange')

    for study in study_list:

        format_type = 'clean'
        clean_path = os.path.join(study, 'formatted', format_type)
        recordNames = os.listdir(clean_path)

        for sensor in sensor_list:

            for record in recordNames:

                for range in searchRange:

                    for segment in segment_list:

                        if segment == 'All':
                            continue

                        segmentRef = 'All'
                        path = [
                            study, 'analyzed', 'inflections', 'all_times',
                            str(range), record, segmentRef
                        ]
                        path = build_path(path)
                        file = os.path.join(path, sensor + ".csv")

                        if os.path.isfile(file):

                            df = pd.read_csv(file)

                            for colName in df.columns:
                                if 'Unnamed' in str(colName):
                                    del df[colName]

                            df = segment_df(segment, df)
                            path = [
                                study, 'analyzed', 'inflections', 'all_times',
                                str(range), record, segmentRef
                            ]
                            path = build_path(path)
                            file = os.path.join(path, sensor + ".csv")

                            df.to_csv(file)
Example #9
0
def find_record_end_from_temp(df_timestamped):
    """
    Find the record end
    by searching for the dip in the temperature
    """

    max_record_time = float(retrieve_ref('max_record_time'))
    min_record_time = float(retrieve_ref('min_record_time'))

    print('df_timestamped = ')
    print(df_timestamped)

    timeUnix = list(df_timestamped['timeUnix'])
    timeMinutes = list(df_timestamped['timeMinutes'])
    measurements = list(df_timestamped['measurement'])

    time_end = max_record_time
    time_end = max(timeMinutes)

    for i in range(len(measurements) - 12):
        """
        print('i = ' + str(i) + ' len(measurements) = ' + str(len(measurements)))
        print('timeMinutes = ')
        print(timeMinutes)
        print('timeMinutes[i] = ' )
        print(str(timeMinutes[i]))
        print('min_record_time + float(timeMinutes[0] = ' + str(min_record_time + float(timeMinutes[0])))
        """

        if timeMinutes[i] > min_record_time + float(timeMinutes[0]):

            # if the temperature drops more than 2 degrees in 3 seconds
            # end the record
            if measurements[i] - 2 > measurements[i + 12]:
                # print('measurement[i] &  measurements[i+12] =' + str(measurements[i]), ' & ', str(measurements[i+12]))
                # print('timeMinutes[i] &  timeMinutes[i+12] =' + str(timeMinutes[i]), ' & ', str(timeMinutes[i+12]))
                time_end = timeMinutes[i]
                timeEndUnix = timeUnix[i]
                break

        else:
            time_end = timeMinutes[i]
            timeEndUnix = timeUnix[i]
            break

        time_end = float(time_end)

        print('time_end = ' + str(time_end))
        print('timeEndUnix = ' + str(timeEndUnix))

        return (time_end)
def find_record_end():
    """
    timestamp the source
    """

    print("begin timestamp source")

    study_list = retrieve_ref('study_list')
    format_types = retrieve_ref('format_types')
    segment_list = retrieve_ref('segment_list')
    sensor_list = retrieve_ref('sensor_list')

    # timestamp temp
    format_type = 'source'
    segment = 'All'
    sensor = 'TEMP'

    for study in study_list:

        print('study = ' + str(study))

        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        df_meta['recordBegin'] = [None] * len(source_path)
        df_meta['recordEnd'] = [None] * len(source_path)
        df_meta['fullLength'] = [None] * len(source_path)

        # summarize what has been found so far
        record_to_summary(study, 'Records found', len(source_path))

        for record in source_path:
            # source = os.path.join(study, 'source', record, sensor + '.csv')
            df_timestamped = timestamp_source(study, format_type, segment, record, sensor)


            # Save the full length of the uneditted record
            i = df_meta[ df_meta['source_path']== record].index.values[0]
            recordSplit = record.split('_')
            df_meta.loc[i, 'recordBegin' ] = int(recordSplit[0])
            df_meta.loc[i, 'recordEnd' ] = int(recordSplit[0]) + 60*(max(df_timestamped['timeMinutes']))
            df_meta.loc[i, 'fullLength' ] = round(max(df_timestamped['timeMinutes']) , 4)


        save_meta(study, df_meta)


    find_temp_end()

    """
Example #11
0
def format_truncate():
    """
    define each record
    set the beginning of the record
    set the end of the record
    record the length of the record
    """

    print("begin format_truncate")

    # timestamp and save the source measurements
    # no truncation
    # save as their recordName

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')

    # check each study
    for study in study_list:

        df_meta = retrieve_meta(study)
        recordNames = list(df_meta['recordName'])

        for record in recordNames:

            i = df_meta[ df_meta['recordName']== record].index.values[0]
            recordBegin = df_meta.loc[i, 'recordBegin' ]
            recordEnd = df_meta.loc[i, 'recordEnd' ]
            print('i = ' + str(i))

            for sensor in sensor_list:

                format_type, segment = 'source', 'All'
                source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv')
                df = pd.read_csv(source)

                df = df[df['timeUnix'] > recordBegin]
                df = df[df['timeUnix'] < recordEnd]

                assert len(list(df['timeUnix'])) > 0, 'during format truncate, dataframe empty'

                format_type, segment = 'truncate', 'All'
                path = ['studies', study, 'formatted', format_type, record, segment]
                path = build_path(path)
                file = os.path.join(path, sensor + ".csv")
                df.to_csv(file)
                print('formatted truncated file = ' + str(file))
def define_record():
    """
    define the original start time, end time, and length
    record to the metadata
    remove any records shorter than the minimum lemgth requirements
    """

    study_list = retrieve_ref('study_list')
    min_record_time = retrieve_ref('min_record_time')
    max_record_time = retrieve_ref('max_record_time')

    # check each study
    for study in study_list:

        # retrieve the list of records from the metadata.csv file
        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        df_meta['recordBegin'] = [None] * len(source_path)
        df_meta['recordEnd'] = [None] * len(source_path)
        df_meta['recordLength'] = [None] * len(source_path)

        # define the original length of the record
        # remove records that are too short
        for record in source_path:

            i = df_meta[df_meta['source_path'] == record].index.values[0]
            originalBegin = df_meta.loc[i, 'originalBegin']
            originalEnd = df_meta.loc[i, 'originalEnd']
            originalLength = df_meta.loc[i, 'originalLength']

            format_type, segment, sensor = 'source', 'All', 'TEMP'
            source = os.path.join('studies', study, 'formatted', format_type,
                                  record, segment, sensor + '.csv')
            df = pd.read_csv(source)

            timeEndUnix = find_record_end_using_temp(df)
            recordBegin = originalBegin
            recordEnd = timeEndUnix
            recordLength = (timeEndUnix - recordBegin) / 60

            df_meta.loc[i, 'recordBegin'] = recordBegin
            df_meta.loc[i, 'recordEnd'] = recordEnd
            df_meta.loc[i, 'recordLength'] = round(recordLength, 4)

        # save the metadata file
        save_meta(study, df_meta)
Example #13
0
def pair_records():
    """
    use the record begin time and wearable id to check all studies and records for pairs
    if found, find the latest common start time and earliest end times
    save as new columns in the metadata file
    """

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')

    # check each study
    for study in study_list:

        df_meta = retrieve_meta(study)
        recordNames = list(df_meta['recordName'])

        # create column to list wearableName and coregister records
        df_meta = add_wearableName(df_meta)
        df_meta['coregisterRecords'] = recordNames

        # look for paired records using the unix time stamp for when the record begins
        for recordA in recordNames:

            i = df_meta[df_meta['recordName'] == recordA].index.values[0]
            recordBeginA = df_meta.loc[i, 'recordBegin']
            wearableA = df_meta.loc[i, 'wearableName']

            for recordB in recordNames:

                j = df_meta[df_meta['recordName'] == recordB].index.values[0]
                recordBeginB = df_meta.loc[j, 'recordBegin']
                wearableB = df_meta.loc[j, 'wearableName']

                if abs(recordBeginA - recordBeginB) < 300:

                    if recordA != recordB:

                        if wearableA != wearableB:

                            print('coregister record found for ' + recordA +
                                  ' + ' + recordB)
                            coregisterList = str(recordA + ' ' + recordB)
                            df_meta.loc[i,
                                        'coregisterRecords'] = coregisterList

        save_meta(study, df_meta)
def define_original():
    """
    define the original start time, end time, and length
    record to the metadata
    remove any records shorter than the minimum lemgth requirements
    """

    study_list = retrieve_ref('study_list')
    min_record_time = retrieve_ref('min_record_time')

    # check each study
    for study in study_list:

        # retrieve the list of records from the metadata.csv file
        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        # add the columns to define the original record
        df_meta['recordName'] = source_path
        df_meta['originalBegin'] = [None] * len(source_path)
        df_meta['originalEnd'] = [None] * len(source_path)
        df_meta['originalLength'] = [None] * len(source_path)

        # define the original length of the record
        # remove records that are too short
        for record in source_path:

            format_type, segment, sensor = 'source', 'All', 'TEMP'
            df_timestamped = timestamp_source(study, format_type, segment,
                                              record, sensor)

            originalBegin = int(min(list(df_timestamped['timeUnix'])))
            originalEnd = int(max(list(df_timestamped['timeUnix'])))
            originalLength = (originalEnd - originalBegin) / 60

            i = df_meta[df_meta['source_path'] == record].index.values[0]

            df_meta.loc[i, 'originalBegin'] = originalBegin
            df_meta.loc[i, 'originalEnd'] = originalEnd
            df_meta.loc[i, 'originalLength'] = round(originalLength, 4)

        # save the metadata file
        save_meta(study, df_meta)
        df_meta = df_meta.drop(
            df_meta[df_meta['originalLength'] < min_record_time].index)
        save_meta(study, df_meta)
Example #15
0
def clean_save():
    """
    for each record
    break the record into a PreStudy, Study, and PostStudy period
    save each segment as a separate .csv
    """

    print("begin clean_save")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')

    # check each study
    for study in study_list:

        df_meta = retrieve_meta(study)
        recordNames = list(df_meta['recordName'])

        for record in recordNames:

            i = df_meta[ df_meta['recordName']== record].index.values[0]
            print('i = ' + str(i))

            for sensor in sensor_list:

                format_type, segment = 'coregister', 'All'
                source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv')
                df = pd.read_csv(source)

                df = reset_minutes(segment, df)

                for colName in list(df.head()):
                    if 'Unnamed' in colName:
                        del df[colName]

                format_type = 'clean'
                path = ['studies', study, 'formatted', format_type, record, segment]
                path = build_path(path)
                file = os.path.join(path, sensor + ".csv")
                df.to_csv(file)
                print('formatted clean file = ' + str(file))
Example #16
0
def plot_coefficient_bar(study, record, sensor, degree):
    """

    """

    segment_list = retrieve_ref('segment_list')

    for segment in segment_list:

        print('bar chart for segment = ' + str(segment))

        df_coef = retrieve_regression(study, segment, sensor, degree)
        print('df_coef = ')
        print(df_coef)

        i = df_coef[df_coef['recordName'] == record].index.values[0]
        print('i = ' + str(i))

        colNames = list(df_coef.head())
        coeff = df_coef.loc[i, colNames[-1]]
        print('coeff = ' + str(coeff))

        a = pd.isnull(df_coef.loc[i, colNames[-1]])
        print('a = ' + str(a))

        if a == 'True' or str(df_coef.loc[i, colNames[-1]]) == 'None':
            print('cell empty a = ' + str(a) + ' coeff = ')
            print(coeff)
            continue

        elif a != 'True':
            print('cell not empty a = ' + str(a) + ' coeff = ')
            print(coeff)

            try:
                float(coeff)
                coeff = [float(coeff)]
                print('try found coeff = ')
                print(coeff)

            except:
                coeff = coeff.split(' ')
                coeff = [float(x) for x in coeff]
                print('except found coeff = ')
                print(coeff)

            xx = [segment_list.index(segment)]
            yy = [coeff[0]]

            wearable_num = 1
            colorSegment = retrieve_ref_color_wearable_segment(
                wearable_num, segment)
            plt.bar(xx, yy, color=colorSegment)
            plt.xticks(range(len(segment_list)), segment_list)
Example #17
0
def reset_minutes(segment, df):
    """
    reset the minutes to be from 0
    """

    segment_list = retrieve_ref('segment_list')
    timePreStudy = retrieve_ref('timePreStudy')
    timePostStudy = retrieve_ref('timePostStudy')


    timeMinutes = []
    timeMinutesOriginal = list(df['timeMinutes'])

    for time in timeMinutesOriginal:

        timeReset = time - timeMinutesOriginal[0]
        timeMinutes.append(timeReset)

    df['timeMinutes'] = timeMinutes

    return(df)
Example #18
0
def latex_report(comprehensive_report):
    """
    Create a comprehensive report
    Compile the text and figures into a single pdf
    """

    # retrieve variables needed to find the figures
    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    analysis_list = retrieve_ref('analysis_list')

    # create a fle that has all the text and figures
    dst = open(comprehensive_report, 'a')

    # copy the text of the latex document to the report file
    src = os.path.join('code', 'latex', 'manuscript' + '.txt')
    file = open(src, 'r')
    dst.write(file.read())

    # close the latex file, still a .txt file extension
    dst.close()
def decide_inclusion():
    """
    Determine inclusion based on length of the record
    """

    print("begin decide inclusion")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    max_record_time = retrieve_ref('max_record_time')
    min_record_time = retrieve_ref('min_record_time')

    for study in study_list:

        df_meta = retrieve_meta(study)
        df_meta = df_meta.sort_values(by=['recordLength'])

        records_found = list(df_meta['source_path'])
        recordLength = list(df_meta['recordLength'])

        inclusionList = []
        for i in range(len(recordLength)):

            if recordLength[i] < min_record_time:
                inclusionList.append('excluded')

            else:
                inclusionList.append('included')

        # save the record length to meta file
        df_meta['included'] = inclusionList
        save_meta(study, df_meta)

        df_meta = df_meta.drop(
            df_meta[df_meta['included'] == 'excluded'].index)
        df_meta = df_meta.sort_values(by=['source_path'])
        save_meta(study, df_meta)

    print("completed decide inclusion")
def trim_record_to_max():
    """
    Input: path to a csv
    Output: list of timestamps
    """

    print("finding the end of the record")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    max_record_time = retrieve_ref('max_record_time')

    sensor = 'TEMP'

    for study in study_list:

        df_meta = retrieve_meta(study)

        source_path = list(df_meta['source_path'])

        df_meta['recordLength'] = [None] * len(source_path)

        for record in source_path:

            # timestamped_file = os.path.join(study, 'timestamp', record, sensor + ".csv")
            timestamped_file = os.path.join(study, 'formatted', 'source',
                                            record, 'All', sensor + ".csv")
            df_timestamped = pd.read_csv(timestamped_file)

            record_length = max(list(df_timestamped['timeMinutes']))
            if record_length > max_record_time:
                record_length = max_record_time

            record_length = round(record_length, 4)
            i = df_meta[df_meta['source_path'] == record].index.values[0]
            df_meta.loc[i, 'recordLength'] = record_length

        # save the record length to meta file
        save_meta(study, df_meta)
def count_inflections():
    """

    """

    print('begin counting inflections')

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')

    searchRange = retrieve_ref('searchRange')
    searchRange = [int(x) for x in searchRange]

    for study in study_list:

        format_type = 'truncate'
        clean_path = os.path.join(study, 'formatted', format_type)
        recordNames = os.listdir(clean_path)

        for sensor in sensor_list:

            for record in recordNames:

                for segment in segment_list:

                    for range in searchRange:

                        if segment == 'All':

                            find_inflections(study, format_type, record,
                                             sensor, segment, int(range))
                            # list_unique_inflections(study, record, sensor, segment, range)
                            plot_inflections(study, record, sensor, segment)

                        segment_inflections(study, record, sensor, segment,
                                            range)

    print('completed counting inflections')
Example #22
0
def machineLearningBasic():
    """
    Statistics
    """

    print("begin machine learning basic")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')

    for study in study_list:
        analyzed_path = os.path.join(study, 'analyzed', 'statistics')
        analyzed_file = os.path.join(analyzed_path, 'statistics.csv')
        df = pd.read_csv(analyzed_file)

        for name in list(df.columns):
            if 'Unnamed' in name:
                del df[name]

        print(df)

    print("end machine learning basic")
def segment_records():
    """
    segment records
    """

    print("begin segmenting records")

    study_list = retrieve_ref('study_list')
    format_types = retrieve_ref('format_types')
    segment_list = retrieve_ref('segment_list')
    sensor_list = retrieve_ref('sensor_list')

    timePreStudy = retrieve_ref('timePreStudy')
    timePostStudy = retrieve_ref('timePostStudy')

    for study in study_list:

        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        for record in source_path:

            for format in format_types:

                for sensor in sensor_list:

                    df = retrieve_analyzed(study, format, record, 'All',
                                           sensor)

                    for segment in segment_list:

                        if segment == segment_list[0]:
                            timeEnd = timePreStudy
                            df = df.drop(df[df['timeMinutes'] > timeEnd].index)

                        if segment == segment_list[1]:
                            timeBegin = timePreStudy
                            timeEnd = timePostStudy
                            df = df.drop(
                                df[df['timeMinutes'] < timeBegin].index)
                            df = df.drop(df[df['timeMinutes'] > timeEnd].index)

                        if segment == segment_list[2]:
                            timeBegin = timePostStudy
                            df = df.drop(
                                df[df['timeMinutes'] < timeBegin].index)

                        path = os.path.join(study, 'formatted', format, record,
                                            segment)
                        if not os.path.isdir(path): os.mkdir(path)
                        file_path = os.path.join(study, 'formatted', format,
                                                 record, segment,
                                                 sensor + ".csv")
                        df.to_csv(file_path)

                        print('dataframe saved for segments: ' +
                              str(file_path))
def count_inflections():
    """

    """

    print('begin counting inflections')

    searchRange = retrieve_ref('searchRange')
    searchRange.reverse()

    find_inflections()
    list_unique_inflections()
    segment_inflections()
    plot_inflections()

    print('completed counting inflections')
def segment_formatted(format_type):
    """
    for each record
    break the record into a PreStudy, Study, and PostStudy period
    save each segment as a separate .csv
    """

    print("begin segment_formatted")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    format_types = retrieve_ref('format_types')

    segment_list = retrieve_ref('segment_list')
    timePreStudy = retrieve_ref('timePreStudy')
    timePostStudy = retrieve_ref('timePostStudy')

    # check each study
    for study in study_list:

        df_meta = retrieve_meta(study)
        recordNames = list(df_meta['recordName'])

        for record in recordNames:

            i = df_meta[df_meta['recordName'] == record].index.values[0]
            print('i = ' + str(i))

            for sensor in sensor_list:

                for segment in segment_list:

                    format_type, segmentRef = 'clean', 'All'
                    source = os.path.join('studies', study, 'formatted',
                                          format_type, record, segmentRef,
                                          sensor + '.csv')
                    df = pd.read_csv(source)

                    df_segmented = segment_df(segment, df)

                    path = [
                        'studies', study, 'formatted', format_type, record,
                        segment
                    ]
                    path = build_path(path)
                    file = os.path.join(path, sensor + ".csv")
                    df_segmented.to_csv(file)
                    print('segmented clean file = ' + str(file))
Example #26
0
def plot_cleaned():
    """
    check the quality of the clean data by plotting
    compare source / truncate / coregister / clean
    """

    print("begin plotting the clean data ")

    study_list = retrieve_ref('study_list')

    for study in study_list:

        plot_source(study)

        plot_acc(study)

        plot_coregister(study)

        plot_segment(study)

    print("completed plotting the clean data")
def find_paired_duration():
    """
    Find the duration of the record
    Add the end of the coregistered record in the meta file
    """

    print("begin find_paired_duration")

    study_list = retrieve_ref('study_list')

    for study in study_list:

        df_meta = retrieve_meta(study)
        # print(df_meta)
        source_path = list(df_meta['source_path'])

        # add emptyt column
        df_meta['recordDuration'] = [None] * len(source_path)

        for record in source_path:

            # save that value in the dataframe
            i = df_meta[df_meta['source_path'] == record].index.values[0]
            print('i = ' + str(i))

            recordBegin = int(df_meta.loc[i, 'recordBegin'])
            print('recordBegin = ' + str(recordBegin))

            recordEnd = int(df_meta.loc[i, 'recordEnd'])
            print('recordEnd = ' + str(recordEnd))

            recordDuration = round((recordEnd - recordBegin) / 60, 4)

            df_meta.loc[i, 'recordDuration'] = recordDuration

            print('recordDuration = ' + str(recordDuration))

        save_meta(study, df_meta)
        print('df_meta = ')
        print(df_meta)
def compile_formatted(format_type):
    """
    collapse the information stored as separate csv into a single csv
    to make the information easier to plot in javascript/html
    also to upload less files to github
    """

    print("begin compile_formatted")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    format_types = retrieve_ref('format_types')

    segment_list = retrieve_ref('segment_list')
    timePreStudy = retrieve_ref('timePreStudy')
    timePostStudy = retrieve_ref('timePostStudy')

    # check each study
    for study in study_list:

        df_meta = retrieve_meta(study)
        recordNames = list(df_meta['recordName'])

        for record in recordNames:

            i = df_meta[ df_meta['recordName']== record].index.values[0]
            print('i = ' + str(i))

            for sensor in sensor_list:

                for segment in segment_list:

                    format_type, segmentRef = 'clean', 'All'
                    source = os.path.join('studies', study, 'formatted', format_type, record, segmentRef, sensor + '.csv')
                    df = pd.read_csv(source)

                    df_segmented = segment_df(segment, df)

                    path = ['studies', study, 'formatted', format_type, record, segment]
                    path = build_path(path)
                    file = os.path.join(path, sensor + ".csv")
                    df_segmented.to_csv(file)
                    print('segmented clean file = ' + str(file))
Example #29
0
def study_figures(comprehensive_report):
    """
    Create a comprehensive report
    Compile the text and figures into a single pdf
    """

    # retrieve variables needed to find the figures
    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    sensor_list.reverse()

    segment_list = retrieve_ref('segment_list')

    degree_list = retrieve_ref('degree_list')
    degree_list = [int(x) for x in degree_list]

    format_types = ['source', 'truncate', 'coregister', 'clean']

    format_types.append('regression0')
    format_types.append('regression1')
    format_types.append('regression2')

    print('format_types = ')
    print(format_types)

    for study in study_list:

        source_path = os.path.join('studies', study, 'formatted', 'truncate')
        format_folders = os.listdir(source_path)

        format_folders.sort()

        for record in format_folders:

            i = format_folders.index(record)

            for format_type in format_types:

                for sensor in sensor_list:

                    source = os.path.join('studies', study, 'plotted',
                                          format_type, record, sensor + '.png')

                    if format_type == 'regression0':
                        degree = 0

                        source = os.path.join('studies', study,
                                              'plotted', 'regression',
                                              str(degree), record,
                                              sensor + '.png')
                    if format_type == 'regression1':
                        degree = 1
                        source = os.path.join('studies', study,
                                              'plotted', 'regression',
                                              str(degree), record,
                                              sensor + '.png')

                    if format_type == 'regression2':
                        degree = 2
                        source = os.path.join('studies', study,
                                              'plotted', 'regression',
                                              str(degree), record,
                                              sensor + '.png')

                    if os.path.isfile(source):

                        print('path found: plot_file = ' + str(source))

                        file = open(comprehensive_report, "a")

                        file.write('\n')
                        file.write('\n')
                        file.write('\\begin{figure}[ht]')
                        file.write('\n')
                        file.write('\includegraphics')
                        file.write('[width=\\textwidth]')
                        file.write('{')
                        file.write(source)
                        file.write('}')
                        file.write('\n')
                        file.write('\\caption{' + str(study) + ' (record ' +
                                   str(i) + ' of ' + str(len(format_folders)) +
                                   ') ' + sensor + ' ' + format_type + '}' +
                                   '\n')
                        file.write('\centering' + ' \n')
                        file.write('\\end{figure}' + ' \n')
                        file.write('\\clearpage' + ' \n' + '\n')

                        file.close()
def multiple_record_check():
    """
    check the record for multiple records
    """

    print("begin multiple record check")

    study_list = retrieve_ref('study_list')
    format_types = retrieve_ref('format_types')
    segment_list = retrieve_ref('segment_list')
    sensor_list = retrieve_ref('sensor_list')

    timePreStudy = retrieve_ref('timePreStudy')
    timePostStudy = retrieve_ref('timePostStudy')

    for study in study_list:

        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])
        source_path_new = list(df_meta['source_path'])
        timeBegin_list = list(df_meta['recordBegin'])
        timeEnd_list = list(df_meta['recordEnd'])

        for record in source_path:

            i = df_meta[df_meta['source_path'] == record].index.values[0]
            fullLength = float(df_meta.loc[i, 'fullLength'])
            truncatedLength = float(df_meta.loc[i, 'truncatedLength'])

            format_type = 'source'
            segment = 'All'
            sensor = 'TEMP'
            df = retrieve_analyzed(study, format_type, record, segment, sensor)

            new_record_list = []

            if fullLength > truncatedLength + 30:

                df = df.drop(df[df['timeMinutes'] < truncatedLength + 5].index)

                # print('df = ')
                # print(df)

                timeUnix = list(df['timeUnix'])
                timeMinutes = list(df['timeMinutes'])
                measurements = list(df['measurement'])

                for i in range(len(measurements)):

                    if i < len(measurements) - 30:

                        if measurements[i] + 3 < measurements[i + 28]:

                            print('new record found')

                            df = df.drop(
                                df[df['timeMinutes'] < timeMinutes[i +
                                                                   28]].index)

                            time_end = find_record_end_from_temp(df)
                            print('time_end = ' + str(time_end))

                            df = df.drop(
                                df[df['timeMinutes'] > time_end].index)

                            # print('df = ')
                            # print(df)

                            wearable_name = record.split('_')
                            wearable_name = wearable_name[1]

                            recordName = str(
                                str(int(timeUnix[0])) + '_' +
                                str(wearable_name))
                            print('recordName = ' + str(recordName))

                            new_record_list.append(recordName)

                            source_path_new.append(record)
                            timeBegin_list.append(int(timeUnix[0]))
                            print('timeUnix[0:20] = ')
                            print(timeUnix[0:20])
                            timeEnd = min(timeUnix)
                            print('timeEnd = ' + str(timeEnd))
                            timeEnd = min(timeUnix) + 60
                            print('timeEnd = ' + str(timeEnd))
                            timeEnd_list.append(int(timeEnd))

                            break

        df_meta_new = pd.DataFrame()
        df_meta_new['source_path'] = source_path_new
        df_meta_new['recordBegin'] = timeBegin_list
        df_meta_new['recordEnd'] = timeEnd_list

        save_meta(study, df_meta_new)