Beispiel #1
0
def main():

    # loop over lines in export file
    path_to_export = os.path.join(PATH_TO_QS_EXPORT_FILE)
    with open(path_to_export) as fp:
        reader = csv.reader(fp, delimiter=';')
        next(reader, None)
        for row in tqdm(reader):
            if not row:
                continue

            # get info from entry
            row = row[0].split(',')
            start = dt.strptime(row[0], '%d-%b-%Y %H:%M')
            end = dt.strptime(row[1], '%d-%b-%Y %H:%M')
            date = dt.strptime(start.strftime('%Y-%m-%d'), '%Y-%m-%d')
            index = (date - config.START_DATE).days
            distance = float(row[2])

            # save to database
            document = {
                'date': date,
                'start': start,
                'end': end,
                'distance': distance,
            }
            path = ['raw_data', 'qs_export', 'distance']
            save_to_database(document, path=path)
Beispiel #2
0
def main():

    categories = [
        'active calories',
        'distance',
        'cycling distance',
        'steps',
        'flights climbed',
        'heart rate',
        'heart rate at rest',
        # 'heart rate variability',
    ]

    # prepare empty time series
    foo = {}
    for category in categories:
        foo[category] = prepare_dict(config.START_DATE, config.END_DATE)

    # transform high-precision dataset into daily (sum or avg)
    for category in categories:
        collection = config.MDB['raw_data']['qs_export'][category]
        daily_counter = {}
        for entry in collection.find():

            date = dt.strptime(entry['date'].strftime('%Y-%m-%d'), '%Y-%m-%d')
            value = entry[category]

            if foo[category][date] is not None:
                foo[category][date] += value
                daily_counter[date] += 1.
            else:
                foo[category][date] = value
                daily_counter[date] = 1.

        # for some datasets, we need to take the average (e.g. heart rate)
        if category in [
                'heart rate', 'heart rate at rest', 'heart rate variability'
        ]:
            for date in foo[category].keys():
                if date in daily_counter.keys():
                    foo[category][date] /= daily_counter[date]

    # save to database
    for category in foo.keys():
        dates = sorted(foo[category].keys())

        # df = pd.DataFrame({
        #     'date': dates,
        #     category: [foo[category][d] for d in dates]
        # })
        # print(df.head())
        # if category == 'heart rate':
        #     plt.plot(df['date'], df[category])
        #     plt.show()

        document = {'date': dates, category: [foo[category][d] for d in dates]}
        path = [
            'stats', 'time series', 'daily', 'health', 'activity', category
        ]
        save_to_database(document, path=path)
Beispiel #3
0
def main():

    # daily_log_history = []
    daily_log_files = sorted(os.listdir(PATH_TO_DAILY_LOGS))

    # measurement_times, daily_logs, nrs_of_chars, nrs_of_words = [], [], [], []

    # date = config.START_DATE
    # while date <= config.END_DATE:
    #     measurement_times.append(date)
    #     # daily_logs.append(None)
    #     nrs_of_chars.append(None)
    #     nrs_of_words.append(None)
    #     date += td(days=1)

    for file_name in tqdm(daily_log_files):
        file_path = os.path.join(PATH_TO_DAILY_LOGS, file_name)
        if os.path.isdir(file_path):  # skip directories
            continue

        with open(file_path) as fp:
            content = fp.readlines()

        # get info for each journal file
        date = dt.strptime(file_name.split('.')[0], '%Y-%m-%d')

        # save to database
        document = {
            'date': date,
            'content': content,
            'nr of characters': len(''.join(content)),
            'nr of words': len(' '.join(content).split(' ')),
        }
        path = ['raw_data', 'daily_log']
        save_to_database(document, path=path)
Beispiel #4
0
def main():

    rules = dict(resolution='daily')
    df = prepare_datasets(rules)
    correlation_matrix = get_correlation_matrix(df)

    document = {
        'correlation matrix': [list(i) for i in correlation_matrix],
        'date': dt.now().strftime('%Y-%m-%d')
    }
    path = ['stats', 'correlations']
    save_to_database(document, path=path)

    plot_correlation_matrix(df, correlation_matrix)
Beispiel #5
0
def main(cat, subcat):

    if cat not in translate_notes.keys():
        return
    if subcat not in translate_notes[cat].keys():
        return

    db_keys = translate_notes[cat][subcat].keys()

    # prepare empty time series (containing only None)
    foo = {}
    for db_key in db_keys:
        foo[db_key] = prepare_dict(START_DATE, END_DATE)

    # fill time series object
    for db_key in db_keys:
        collection = MDB['stats']['time series']['daily'][cat]
        collection[subcat]['db_key'].delete_many({})

        found_entry = False
        entries = config.MDB['raw_data']['sleep cycle'].find({})
        for entry in entries:
            found_translation = False
            for translation in translate_notes[cat][subcat][db_key]:
                if translation in entry['sleep notes']:
                    foo[db_key][entry['date']] = 1
                    found_entry = True
                    found_translation = True
                    break
            if found_entry:
                if not found_translation:
                    foo[db_key][entry['date']] = 0

        # create document & save to database
        dates = sorted(foo[db_key].keys())
        timestamps = [d.timestamp() for d in dates]
        values = [foo[db_key][d] for d in dates]
        document = {
            'dates': dates,
            'timestamps': timestamps,
            'values': values,
            'category': cat,
            'subcategory': subcat,
            'title': db_key,
            'resolution': 'daily',
        }
        path = ['stats', 'time series', 'daily']
        save_to_database(document, path=path)
Beispiel #6
0
def main():
    foo = {db_key: prepare_dict(START_DATE, END_DATE) for db_key in db_keys}
    for db_key in db_keys:
        for date in foo[db_key].keys():
            if db_key == 'whether it was weekend':
                if date.strftime('%a') in ['Sat', 'Sun']:
                    foo[db_key][date] = 1
                else:
                    foo[db_key][date] = 0
            if db_key == 'whether it was spring':
                if date.strftime('%b') in ['Mar', 'Apr', 'May']:
                    foo[db_key][date] = 1
                else:
                    foo[db_key][date] = 0
            if db_key == 'whether it was summer':
                if date.strftime('%b') in ['Jun', 'Jul', 'Aug']:
                    foo[db_key][date] = 1
                else:
                    foo[db_key][date] = 0
            if db_key == 'whether it was fall':
                if date.strftime('%b') in ['Sep', 'Oct', 'Nov']:
                    foo[db_key][date] = 1
                else:
                    foo[db_key][date] = 0
            if db_key == 'whether it was winter':
                if date.strftime('%b') in ['Dec', 'Jan', 'Feb']:
                    foo[db_key][date] = 1
                else:
                    foo[db_key][date] = 0

        # create document & save to database
        dates = sorted(foo[db_key].keys())
        timestamps = [d.timestamp() for d in dates]
        values = [foo[db_key][d] for d in dates]
        document = {
            'dates': dates,
            'timestamps': timestamps,
            'values': values,
            'category': 'various',
            'subcategory': 'seasons etc.',
            'title': db_key,
            'resolution': 'daily',
        }
        path = [
            'stats', 'time series', 'daily',
        ]
        save_to_database(document, path=path)
Beispiel #7
0
def main():

    chat_history = []
    for chat_file in os.listdir(PATH_TO_WHATSAPP_DATA):
        if chat_file == '.DS_Store':
            continue
        chat_name = chat_file.split('.')[0]
        if chat_name[-1] in [str(i) for i in range(1, 10)]:
            chat_name = chat_name[:-2]

        path_to_chat_file = os.path.join(PATH_TO_WHATSAPP_DATA, chat_file)
        with open(path_to_chat_file) as fp:
            content = fp.readlines()

        # make sure one line corresponds to exactly one message
        tmp = []
        for line in content:
            if line.startswith('['):
                tmp.append(line)
            else:
                tmp[-1] += '\n' + line
        content = tmp

        # go through messages
        for line in content:
            tmp = line.split(']')  # using these tmp vars saves ~10% comp. time
            date_str = tmp[0][1:]
            timestamp = dt.strptime(date_str, '%d.%m.%y, %H:%M:%S').timestamp()
            tmp = tmp[1][1:].split(':')
            message_content = tmp[1][1:]

            sender = tmp[0] if tmp[0] != 'vinc' else 'Vincent Mader'
            receiver = chat_name if sender == 'Vincent Mader' else 'Vincent Mader'

            document = {
                'message_type': 'whatsapp',
                'chat_name': chat_name,
                'timestamp': timestamp,
                'sender': sender,
                'receiver': receiver,
                'message_content': message_content,
            }
            path = ['raw_data', 'whatsapp']
            save_to_database(document, path=path)

    return chat_history
Beispiel #8
0
def main():

    # remove entries from database collection
    coll = MDB['stats']['time series']['daily']['health']['sleep analysis']
    for db_key in db_keys:
        coll[db_key].delete_many({})

    # setup temporary dictionary
    foo = {k: prepare_dict(START_DATE, END_DATE) for k in db_keys}
    # load raw data
    sleep_cycle_history = MDB['raw_data']['sleep cycle'].find({})

    # transform datapoints into time series & save to database
    for entry in sleep_cycle_history:
        for db_key in db_keys:
            date = entry['date']
            value = entry[db_key]

            foo[db_key][date] = value

    for db_key in db_keys:
        # create document & save to database
        dates = sorted(foo[db_key].keys())
        timestamps = [d.timestamp() for d in dates]
        values = [foo[db_key][d] for d in dates]
        document = {
            'dates': dates,
            'timestamps': timestamps,
            'values': values,
            'category': 'health',
            'subcategory': 'sleep analysis',
            'title': db_key,
            'resolution': 'daily',
        }
        # if db_key == 'wake-up mood':
        #     print([k for k in values if k is not None])
        #     input()
        path = [
            'stats',
            'time series',
            'daily',
        ]
        save_to_database(document, path=path)
Beispiel #9
0
def main():

    PATH_TO_SPOTIFY_DATA = os.path.join(PRD, 'spotify')

    # play_history = []
    timestamps_of_already_added_datapoints = []

    for export_directory in sorted(os.listdir(PATH_TO_SPOTIFY_DATA)):
        if export_directory == '.DS_Store':
            continue
        path_to_export_directory = os.path.join(PATH_TO_SPOTIFY_DATA,
                                                export_directory)
        for export_file in os.listdir(path_to_export_directory):
            if not export_file.startswith('StreamingHistory'):
                continue
            path_to_export_file = os.path.join(PATH_TO_SPOTIFY_DATA,
                                               export_directory, export_file)

            with open(path_to_export_file) as fp:
                content = json.load(fp)

            for datapoint in content:

                play_end_dt = dt.strptime(datapoint['endTime'],
                                          '%Y-%m-%d %H:%M')
                if play_end_dt in timestamps_of_already_added_datapoints:
                    continue
                date = play_end_dt

                artist_name = datapoint['artistName']
                track_name = datapoint['trackName']
                ms_played = datapoint['msPlayed']

                document = {
                    'date': date,
                    'artist name': artist_name,
                    'track name': track_name,
                    'time played [s]': ms_played / 1000.
                }
                path = ['raw_data', 'spotify', 'artist name']
                save_to_database(document, path=path)
Beispiel #10
0
def main():

    path_to_inbox = os.path.join(PRD, 'facebook/json/messages/inbox')
    # chat_history = []

    for dir_name in os.listdir(path_to_inbox):

        path_to_chat = os.path.join(path_to_inbox, dir_name, 'message.json')
        with open(path_to_chat) as fp:
            content = json.load(fp)

        participants = content['participants']
        if len(participants) > 2:  # TODO: handle group chats
            continue

        for msg in content['messages']:

            # TODO: make dict to translate into real names
            chat_id = dir_name.split('_')[0]

            sender = msg['sender_name']
            timestamp = int(msg['timestamp_ms']) / 1e3
            receiver = ME if sender != ME else sender

            try:
                message_content = msg['content']
            except KeyError:
                continue

            document = {
                'message type': 'facebook',
                'chat name': chat_id,
                'timestamp': timestamp,
                'sender': sender,
                'receiver': receiver,
                'message content': message_content,
                'message length': len(message_content),
            }
            path = ['raw_data', 'facebook', 'chat_history']
            save_to_database(document, path=path)
Beispiel #11
0
def main():

    # loop over entries in export file
    path_to_export = os.path.join(PATH_TO_QS_EXPORT_FILE)
    with open(path_to_export) as fp:
        reader = csv.reader(fp, delimiter=';')
        next(reader, None)
        for row in tqdm(reader):
            if not row:
                continue

            # get info from entry
            row = row[0].split(',')
            start = dt.strptime(row[0], '%d-%b-%Y %H:%M')
            end = dt.strptime(row[1], '%d-%b-%Y %H:%M')
            date = dt.strptime(start.strftime('%Y-%m-%d'), '%Y-%m-%d')
            index = (date - config.START_DATE).days
            time_to_fall_asleep = float(row[4])
            nr_of_sleep_cycles = float(row[5])

            # save to database
            document = {
                'date': date,
                'time to fall asleep': time_to_fall_asleep,
            }
            path = ['raw_data', 'qs_export', 'time to fall asleep']
            save_to_database(document, path=path)

            document = {
                'date': date,
                'start': start,
                'end': end,
                'nr of sleep cycles': nr_of_sleep_cycles,
            }
            path = ['raw_data', 'qs_export', 'nr of sleep cycles']
            save_to_database(document, path=path)
Beispiel #12
0
def main():

    PATH_TO_SLEEP_CYCLE_EXPORTS = os.path.join(
        config.PATH_TO_RAW_DATA, 'sleep_cycle'
    )

    for export in os.listdir(PATH_TO_SLEEP_CYCLE_EXPORTS):
        if not export.startswith('sleepdata'):
            continue

        path_to_export = os.path.join(PATH_TO_SLEEP_CYCLE_EXPORTS, export)
        with open(path_to_export) as fp:
            reader = csv.reader(fp, delimiter=';')
            next(reader, None)

            for row in reader:
                if not row:
                    continue

                # pull information from old export file (other format)
                if export == 'sleepdata5.csv':
                    start = dt.strptime(row[0], '%Y-%m-%d %H:%M:%S')
                    end = dt.strptime(row[1], '%Y-%m-%d %H:%M:%S')
                    date = dt(start.year, start.month, start.day)
                    index = (date - config.START_DATE).days

                    # measurement_dates[index] = date
                    regularity = None
                    sleep_quality = float(row[2][:-1]) / 100
                    wake_up_mood = row[4]
                    if wake_up_mood:
                        wake_up_mood = {
                            ':)': 1, ':|': 0, ':(': -1
                        }[wake_up_mood]
                    else:
                        wake_up_mood = None
                    # heart_rate = None
                    # steps = None
                    # alarm_mode = None
                    air_pressure = None
                    city = None
                    movements_per_hour = None
                    hours_in_bed = (end - start).seconds / 3600.
                    time_asleep = None
                    time_before_sleep = None
                    window_start = None
                    window_stop = None
                    did_snore = None
                    snore_time = None
                    weather_temperature = None
                    weather_type = None
                    sleep_notes = row[5].split(
                        ':') if row[5] and row[5] != '0' else []

                # pull information from new export file
                elif export == 'sleepdata.csv':
                    start = dt.strptime(row[0], '%Y-%m-%d %H:%M:%S')
                    end = dt.strptime(row[1], '%Y-%m-%d %H:%M:%S')
                    date = dt(start.year, start.month, start.day)
                    index = (date - config.START_DATE).days

                    # measurement_dates[index] = date
                    sleep_quality = float(row[2][:-1]) / 100
                    regularity = float(row[3][:-1]) / 100
                    wake_up_mood = row[4]
                    if wake_up_mood:
                        wake_up_mood = {
                            'Good': 1, 'OK': 0, 'Bad': -1
                        }[wake_up_mood]
                    else:
                        wake_up_mood = None
                    # heart_rate = None
                    # steps
                    # /alarm mode
                    air_pressure = row[8]
                    city = row[9]
                    movements_per_hour = row[10]
                    hours_in_bed = (end - start).seconds / 3600.
                    # time asleep
                    # time before sleep
                    # window start
                    # window Stop
                    # did snore
                    # snore time
                    # weather_temperature =
                    # weather type
                    sleep_notes = row[20].split(
                        ':') if row[20] and row[20] != '0' else []

                # create document & save to database
                document = {
                    'date': date,
                    'start': start,
                    'end': end,
                    'sleep quality': sleep_quality,
                    'sleep regularity': regularity,
                    'wake-up mood': wake_up_mood,
                    # /heart rate
                    # /steps
                    # /alarm mode
                    'air pressure': air_pressure,
                    'city': city,
                    'movements per hour': movements_per_hour,
                    'hours in bed': hours_in_bed,
                    # time asleep
                    # time before sleep
                    # window start
                    # window Stop
                    # did snore
                    # snore time
                    # weather temperature
                    # weather type
                    'sleep notes': sleep_notes,
                }
                path = ['raw_data', 'sleep cycle']
                save_to_database(document, path=path)