def generate_data_by_date(apple_data_type, dataset_name, data_type):

    date_dict = dict()
    for child in root:
        attr = child.attrib

        # fild the matching data type
        if child.tag == 'Record' and attr['type'] == apple_data_type:

            start_date = datetime.strptime(attr['startDate'],
                                           '%Y-%m-%d %H:%M:%S %z')
            end_date = datetime.strptime(attr['endDate'],
                                         '%Y-%m-%d %H:%M:%S %z')

            #check year
            if start_date.year == YEAR:

                # step count & date
                count = int(attr['value'])
                date = datetime.strftime(start_date, '%-m/%-d/%Y')

                # check start and end date if count happens over two or more days
                if datetime.isocalendar(start_date) != datetime.isocalendar(
                        end_date):
                    # split the count in proportion to duration of before and after midnight
                    midnight = datetime.strftime(datetime.date(end_date),
                                                 '%Y-%m-%d %H:%M:%S')
                    midnight_time = datetime.strptime(midnight,
                                                      '%Y-%m-%d %H:%M:%S')
                    till_midnight = (midnight_time -
                                     start_date.replace(tzinfo=None)).seconds
                    from_midnight = (end_date.replace(tzinfo=None) -
                                     midnight_time).seconds

                    in_the_middle = 0
                    mid_date_count = (end_date - start_date).days - 1
                    # more than one day gap, second of the middle days
                    if mid_date_count > 0:
                        in_the_middle = 60 * 60 * 24 * mid_date_count

                    count_before_midnight = round(
                        till_midnight /
                        (till_midnight + in_the_middle + from_midnight) *
                        count)
                    count_after_midnight = round(
                        from_midnight /
                        (till_midnight + in_the_middle + from_midnight) *
                        count)

                    # add count to start and end date
                    date_dict[date] = date_dict[
                        date] + count_before_midnight if date in date_dict.keys(
                        ) else count_before_midnight

                    if end_date.year == YEAR:
                        next_date = datetime.strftime(end_date, '%-m/%-d/%Y')
                        date_dict[next_date] = date_dict[
                            next_date] + count_after_midnight if next_date in date_dict.keys(
                            ) else count_after_midnight

                    # add count to the dates evenly distributed to the dates in the middle
                    for i in range(mid_date_count):
                        count_in_a_mid_day = round(
                            (count - count_before_midnight -
                             count_after_midnight) / mid_date_count)
                        mid_datetime = start_date + timedelta(days=(i + 1))
                        mid_date = datetime.strftime(mid_datetime,
                                                     '%-m/%-d/%Y')
                        if mid_datetime.year == YEAR:
                            date_dict[mid_date] = date_dict[
                                mid_date] + count_in_a_mid_day if mid_date in date_dict.keys(
                                ) else count_in_a_mid_day

                else:
                    date_dict[date] = date_dict[
                        date] + count if date in date_dict.keys() else count

    # convert dict to array
    data_of_year = []
    for d in date_dict:
        data_of_year.append(dict(date=d, value=date_dict[d]))
    # sort by date; often date isn't ordered in the original data
    data_of_year = sorted(
        data_of_year,
        key=lambda i: datetime.strptime(i['date'], '%m/%d/%Y').timestamp())
    # save data as json
    _savedatasets.save_dataset(data_of_year, _setup.NAME, dataset_name,
                               data_type)
import csv
from datetime import datetime
import _setup
import _savedatasets

YEAR = _setup.YEAR

day_dict = dict()

# Remove first few lines to start with the column heading
with open('data/pge_electric_interval_data.csv', newline='') as csvfile:
    for row in csv.DictReader(csvfile):
        date = row['DATE']
        # to keep the accuracy, multiply 100 as int
        usage = int(float(row['USAGE']) * 100)
        day_dict[date] = day_dict[date] + usage if date in day_dict.keys(
        ) else usage

data_of_year = []
for day in day_dict:
    day_p = datetime.strptime(day, '%m/%d/%y')
    day_formatted = datetime.strftime(day_p, '%-m/%-d/%Y')
    # divide by 100 to get the sum as float format
    data_of_year.append(dict(date=day_formatted, value=day_dict[day] / 100))

# save datasets
_savedatasets.save_dataset(data_of_year, _setup.PLACE, 'electricity-usage',
                           'electricity')
Example #3
0

def addDate(date):
    data_of_year.insert(0, dict(date=date, value=1))


def getScrobble(d):
    date = d.strftime('%-m/%-d/%-Y')
    if len(data_of_year) == 0:
        addDate(date)
    else:
        if data_of_year[0]['date'] == date:
            data_of_year[0]['value'] += 1
        else:
            addDate(date)


with open('data/' + _setup.LASTFM + '.csv', newline='') as csvfile:
    sheet = csv.DictReader(csvfile)
    for row in sheet:
        utc_date = datetime.strptime(row['date'], '%d %b %Y %H:%M')
        date = utc_date.astimezone(DEFAULT_TIMEZONE)
        year = int(date.year)
        if year == YEAR:
            getScrobble(date)
        elif year < YEAR:
            break

# save datasets
_savedatasets.save_dataset(data_of_year, _setup.LASTFM, 'lastfm', 'lastfm')
def get_tweet_info(d):
    date = d.strftime('%-m/%-d/%-Y')
    if len(data_of_year) == 0:
        add_date(date)
    else:
        if data_of_year[0]['date'] == date:
            data_of_year[0]['value'] += 1
        else:
            add_date(date)


# collect data first
tweets = []
# remove the variable name in tweet.js and tweet-part2.js and save them as JSON
for file_name in ['tweet', 'tweet-part1']:
    with open('data/' + file_name + '.json') as file:
        data = json.load(file)
        for d in data:
            utc_date = datetime.strptime(d['created_at'],
                                         '%a %b %d %H:%M:%S %z %Y')
            date = utc_date.astimezone(DEFAULT_TIMEZONE)
            if int(date.year) == YEAR:
                tweets.append(dict(date=date, id=d['id_str']))

# sort by date desc
for tweet in sorted(tweets, key=lambda x: x['id'], reverse=True):
    get_tweet_info(tweet['date'])

# save dataset
_savedatasets.save_dataset(data_of_year, _setup.TWITTER, 'twitter', 'twitter')
Example #5
0
                timestamp = dt.replace(tzinfo=pytz.utc).timestamp()
                dt_tz = datetime.fromtimestamp(timestamp).astimezone(pytz.utc).astimezone(DEFAULT_TIMEZONE)
                date = dt_tz.strftime('%-m/%-d/%Y')

                if current_date == '7/29/2018':
                    print(date, current_date, row['totalOdometerMeters'])
                if current_date == '7/30/2018':
                    print(date, current_date, row['totalOdometerMeters'])

                # get the very first odometer
                if first_odometer_of_date == 0 and dt_tz.year == YEAR:
                    first_odometer_of_date = int(row['totalOdometerMeters'])

                # date change
                if date != current_date and first_odometer_of_date > 0:
                    driving_distance = int(row['totalOdometerMeters']) - first_odometer_of_date

                    if date == '7/30/2018' or date == '7/31/2018':
                        print ('---', date, current_date, driving_distance)

                    if driving_distance > 0 and dt_tz.year == YEAR:
                        # meter to mile
                        data_of_year.append(dict(date=current_date, value= math.ceil(driving_distance * 0.000621371 * 100) / 100 ))

                    # reset for next day
                    current_date = date
                    first_odometer_of_date = int(row['totalOdometerMeters'])

# save datasets
_savedatasets.save_dataset(data_of_year, _setup.CAR, 'driving', 'driving')
                    # accumulate time spent at home location by day
                    by_date[today] = by_date[
                        today] + from_midnight if today in by_date.keys(
                        ) else from_midnight
                    by_date[yesterday] = by_date[
                        yesterday] + till_midnight if yesterday in by_date.keys(
                        ) else till_midnight

                # same date
                else:
                    duration = (current_time - prev_time).seconds
                    by_date[today] = by_date[
                        today] + duration if today in by_date.keys(
                        ) else duration

        elif year < YEAR:
            break

# convert dict to array
data_of_year = []
for d in by_date:
    # save it as hour, record only valid points
    hour = round(by_date[d] / 60 / 60 * 10) / 10
    if hour > 0.0:
        data_of_year.insert(0, dict(date=d, value=hour))

print(data_of_year)
# save datasets
_savedatasets.save_dataset(data_of_year, _setup.GOOGLE,
                           'time-at-' + _setup.PLACE, 'location')
Example #7
0
            data_of_year[len(data_of_year) - 1]['value'] += duration
        else:
            add_date(date, duration)


with open('data/piano.json') as file:
    data = json.load(file)

    for i in range(len(data) - 1):
        d = data[i]
        # if the first start time isn't recorded
        # in case there's no start or stop time recorded, set it to 20 minutes
        diff_in_min = 20
        if i == 0 and d[1] == 'stop':
            t = datetime.strptime(d[0], '%B %d, %Y at %I:%M%p')
            add_duration(datetime.strftime(t, '%-m/%-d/%Y'), diff_in_min)
        # find start time first
        elif d[1] == 'start':
            start_time = datetime.strptime(d[0], '%B %d, %Y at %I:%M%p')
            # end time should come in the next element
            end = data[i + 1]
            if end[1] == 'stop':
                end_time = datetime.strptime(end[0], '%B %d, %Y at %I:%M%p')
                # duration in minute
                diff_in_min = int((end_time - start_time).seconds / 60)
                date = datetime.strftime(start_time, '%-m/%-d/%Y')
                add_duration(date, diff_in_min)

# save datasets
_savedatasets.save_dataset(data_of_year, 'tanyoung', 'piano_practice', 'piano')