def load_data_from_local(filename=None, verbose=True):
    bike_data = None
    full_filename = resolve_filepath(filename)
    try:
        bike_data = io.load_file(full_filename)
    except:
        if verbose: print "ERROR: Bike data not loaded."
    return bike_data
def load_features():
    res = io.load_file('feature_file')
    y_nb = res['fremont_bridge_nb']
    y_sb = res['fremont_bridge_sb']
    y = res['y']
    # Remove, date, y_nb and y_sb because they are answers.
    y_cols = ['fremont_bridge_nb', 'fremont_bridge_sb', 'y']
    feature_cols = res.columns.drop(y_cols)
    return res[feature_cols], y
def pull_bike_data(bike_data=None, save_data=True, verbose=True):
    database_date_format, date_format, url, dataset_identifier = io.get_params(
        'database_date_format', 'date_format', 'url', 'dataset_identifier')
    info = io.load_file("info_file")
    last_pulled = info.get('last_updated', None)
    last_pulled_dt = datetime.strptime(
        last_pulled, date_format) if last_pulled is not None else datetime(
            1970, 1, 1)
    if bike_data is None:
        client = auth.get_soda_client(url)
        json_data = client.get(dataset_identifier=dataset_identifier,
                               content_type='json',
                               limit=50000)
        bike_data = pd.DataFrame(json_data)
        date = pd.to_datetime(bike_data['date'])
        expand_datetime_features(bike_data, date)
        info['last_updated'] = datetime.now().strftime(date_format)
        if verbose:
            print "New Data Pulled: {0} new rows of data".format(
                str(bike_data.shape[0]))
    elif datetime.date(last_pulled_dt) < datetime.date(datetime.today()):
        if verbose:
            print "Last Checked database on {0}, Checking for New Data".format(
                last_pulled_dt)
        client = auth.get_soda_client(url)
        new_json_data = client.get(
            dataset_identifier=dataset_identifier,
            content_type='json',
            limit=50000,
            where="date > \"{0}\"".format(
                bike_data.loc[bike_data.index[-1]]['date']))
        new_bike_data = pd.DataFrame(new_json_data)
        info['last_updated'] = datetime.now().strftime(date_format)
        if 'date' in new_bike_data.columns:  # Bedeutet data was pulled from server
            expand_datetime_features(new_bike_data, 'date')
            bike_data = bike_data.append(new_bike_data, ignore_index=True)
            if verbose:
                print "New data pulled:\n{0} new rows of data\nTotal = {1}".format(
                    str(new_bike_data.shape[0]), str(bike_data.shape[0]))
    else:
        if verbose: print "No new data"

    # TODO: convert numeric columns into numeric types from object types
    bridge_count_numeric = pd.to_numeric(
        bike_data.loc[:, ['fremont_bridge_sb', 'fremont_bridge_nb']])
    bike_data.loc[:, ['fremont_bridge_sb', 'fremont_bridge_nb'
                      ]] = bridge_count_numeric
    bike_data['y'] = bike_data['fremont_bridge_nb'].astype(
        float) + bike_data['fremont_bridge_sb'].astype(float)

    expand_datetime_features(bike_data, 'date')
    if save_data:
        io.save_file("data_file", bike_data)
        io.save_file("info_file", info)
    if verbose: print "Bike data is up-to-date."
    return bike_data
Beispiel #4
0
from visual_modules import *
from computational_modules import *

import utils.io as io
import utils.alg as alg
import utils.gen as gen
import utils.features as features

desired_width = 320
pd.set_option("display.width", desired_width)
pd.set_option('expand_frame_repr', True)

cfg = io.load_config_file()

# get paths
pwd = io.get_path('project_path', cfg=cfg)

# Data

xdf = io.load_file('feature_file')

xdf1 = xdf.groupby(['day', 'month', 'year']).mean()
def get_private_key(key_type='googlemaps_key',cfg=io.load_config_file()):
    key_json = io.load_file(name=key_type,cfg=cfg)
    return key_json['private_key']
def get_client_id(key_type='googlemaps_key',cfg=io.load_config_file()):
    key_json = io.load_file(name=key_type,cfg=cfg)
    return key_json['client_id']