def main():
    amenity_df = get_data()
    plot_amenities(amenity_df)
    plt.show())

    stats=amenity_IQR(amenity_df)
    to_markdown(stats)  # tabulated form of stats table
Exemple #2
0
def get_datapoint():
    '''
        Retrieves datapoint from external server and inserts datapoint into MongoDB
    '''
    r = requests.get(
        'http://galvanize-case-study-on-fraud.herokuapp.com/data_point')
    data = r.json()

    if not live_data.find({'object_id': data['object_id']}).count():
        data['prediction'] = model.predict(get_data(data))[0]
        data['potential_cost'] = parse_tickets(data['ticket_types'])[-1]
        live_data.insert_one(data)
        print 'inserted datapoint'
def main():
    
    (X,y,X_future,gp_data) = data_prep.get_data()
        
    # Splitting the data into Training and Test Sets
    X_train, X_test, y_train, y_test = train_test_split(X[:,:], y, test_size = 0.2)
    
    # Fitting Multiple Linear Regression to the Training set
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    
    # Prediction values
    y_pred = regressor.predict(X_test).astype(int)
    y_delta = y_pred-y_test
      
    # Visualizing Our Model's accuracy
    plt.figure(1)
    plt.scatter(y_test,y_delta, color = 'red')
    plt.title('Baseline Accuracy (Linear Regression)')
    plt.xlabel('Actual Event Attendance')
    plt.ylabel('Error in Linear Prediction')
    
    # Determining Which Variables Carry Significance
    format_weights = {'Block Constructed':0.}
    for i in range(12):
        try:
            format_weights[gp_data['Format'][np.where(X[:,i]==1)[0][0]]] = regressor.coef_[i]
        except:
            continue
    
    # Re-Center data on Standard For readability and sort
    for gp_format in format_weights:
        format_weights[gp_format] -= format_weights['Standard']
    format_weights.pop('Standard')
    sorted_X = sorted(list(format_weights.values()))
    sorted_y = []
    for i in range(len(sorted_X)):
        sorted_y.append([key for key,value in format_weights.items() if value == sorted_X[i]][0])
    
    # Create Bar Graphs
    plt.figure(2)
    index = np.arange(len(format_weights))
    plt.barh(index,list(sorted_X))
    plt.ylabel('New Grand Prix Format')
    plt.xlabel('Expected Change in Attendance')
    plt.title('Format Attendance Compared to Standard (Linear Approximation)')
    plt.yticks(index,sorted_y,fontsize = 8, rotation = 30)  
    plt.show()
Exemple #4
0
def plot_result(filename):

    df = data_prep.get_data(filename)
    [X, y] = data_prep.features(df)

    [X_train, X_test, X_cross] = data_prep.feature_scaling(X)
    [y_train, y_test, y_cross] = data_prep.data_set(y)
    ''' SVR with different kernels '''
    svr_lin = SVR(kernel='linear', C=1e3)
    svr_poly = SVR(kernel='poly', C=1e3, degree=2)
    svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.01)
    '''fitting model on training set '''
    svr_rbf.fit(X_train.values, y_train.loc[:, '1DayW'].values)
    svr_lin.fit(X_train.values, y_train.loc[:, '1DayW'].values)
    svr_poly.fit(X_train.values, y_train.loc[:, '1DayW'].values)

    lin_score = svr_lin.score(X_cross, y_cross)
    poly_score = svr_poly.score(X_cross, y_cross)
    rbf_score = svr_rbf.score(X_cross, y_cross)

    print('Confidence score for linear kernel :', lin_score * 100)
    print('Confidence score for poly. kernel :', poly_score * 100)
    print('Confidence score for rbf kernel :', rbf_score * 100)

    print("\nPlotting graph for rbf kernel:")
    plt.scatter(y_cross.index, y_cross.values, color='black')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.plot(y_cross.index,
             svr_rbf.predict(X_cross),
             color='b',
             label='RBF Kernel SVR')
    plt.show()

    print("\nPlotting graph for linear kernel:")
    plt.scatter(y_cross.index, y_cross.values, color='black')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.plot(y_cross.index,
             svr_lin.predict(X_cross),
             color='r',
             label='Linear Kernel SVR')
    plt.show()
def train(filename):
    df = data_prep.get_data(filename)
    [X, y] = data_prep.features(df)
    [X_train, X_test, X_cross] = data_prep.feature_scaling(X)
    [y_train, y_test, y_cross] = data_prep.data_set(y)
    lm = linear_model.LinearRegression()

    model = lm.fit(X_train.values,
                   y_train.values)  # training model on training set

    predictions = model.predict(X_test.values)
    print("confidence on test set is ",
          lm.score(X_test.values, y_test.values) * 100)
    predictions = model.predict(X_cross.values)
    print("confidence on cross validation set is ",
          lm.score(X_cross.values, y_cross.values) * 100)
    y_cross['predictions'] = predictions

    return y_cross
Exemple #6
0
import pandas as pd
import numpy as np
import statsmodels.api as sm
from data_prep import get_data
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt

pdf = PdfPages("bp_model_2wave.pdf")

d1 = get_data(1999)
d2 = get_data(2015)

# Code year relative to 2000
d1["Year"] = -1
d2["Year"] = 15

dx = pd.concat((d1, d2), axis=0)


def plot_fit_by_age(result, fml):

    # Create a dataframe in which all variables are at the reference
    # level
    da = dx.iloc[0:100, :].copy()
    da["RIDAGEYR"] = np.linspace(18, 80, 100)
    da["RIDRETH1"] = "OH"

    plt.figure(figsize=(8, 5))
    plt.clf()
    plt.axes([0.1, 0.1, 0.56, 0.8])
    plt.grid(True)
    filter = args.filter

    torch.manual_seed(seed)

    # Save the command run
    if not os.path.isdir('CMDs'):
        os.mkdir('CMDs')
    with open('CMDs/train_filter.cmd', 'a') as f:
        f.write(' '.join(sys.argv) + '\n')

    # Get the device
    device = get_default_device()

    # Load the data
    input_ids, mask, labels, _ = get_data(responses_file,
                                          grades_file,
                                          part=part)
    print(mask.size())

    # filter data to only keep grades equal to or above the filter value
    filtered = labels >= filter
    inds = filtered.nonzero().squeeze()

    input_ids = input_ids[inds]
    mask = mask[inds]
    labels = labels[inds]
    print(mask.size())

    # split into training and validation sets
    input_ids_val = input_ids[:val_size]
    mask_val = mask[:val_size]
Exemple #8
0
import numpy as np
from keras.models import Model
from keras.layers import Input
from keras.layers.convolutional import Convolution2D
from keras import backend as K
import keras.optimizers as optimizers
import data_prep as DP

HR_data, LR_data = DP.get_data()

K.set_image_dim_ordering('th')
weights_path = ''


def SRCNN(n1=64, n2=32, f1=9, f2=1, f3=5, load_weights=False):
    inputs = Input(shape=(1, 33, 33))
    x = Convolution2D(n1, (f1, f1),
                      activation='relu',
                      padding='valid',
                      name='level1')(inputs)
    x = Convolution2D(n2, (f2, f2),
                      activation='relu',
                      padding='valid',
                      name='level2')(x)
    out = Convolution2D(1, (f3, f3), padding='valid', name='output')(x)

    model = Model(inputs, out)
    adam = optimizers.Adam(lr=1e-3)
    model.compile(optimizer=adam, loss='mse')  #, metrics=[PSRNLoss])
    if load_weights:
        model.load_weights(weights_path)
Exemple #9
0
#
# The different models are assessed by plotting fitted values for
# various subpopulations.  Since all of the regression functions are
# well over 3-dimensional, we can plot E[Y|X] against one component of
# X, while holding the other components of X equal to reference
# values.  For example, we can plot the fitted mean blood pressure as
# a function of age, for each sex, for each ethnic group, and for
# people with average BMI.

import numpy as np
import statsmodels.api as sm
from data_prep import get_data
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt

dx = get_data(2015)

def plot_fit_by_age(result, fml):

    # Create a dataframe in which all variables are at the reference
    # level
    da = dx.iloc[0:100, :].copy()
    da["RIDAGEYR"] = np.linspace(18, 80, 100)
    da["RIDRETH1"] = "OH"

    plt.figure(figsize=(8, 5))
    plt.clf()
    plt.axes([0.1, 0.1, 0.66, 0.8])
    plt.grid(True)

    for female in 0, 1:
Exemple #10
0
        'ss_train_voc/angelica/JPEGImages/',
        'ss_train_voc/courtney/JPEGImages/', 'ss_train_voc/olivia/JPEGImages/',
        'ss_train_voc/tim/JPEGImages/'
    ]
    train_masks_path = [
        'ss_train_voc/angelica/SegmentationClassPNG/',
        'ss_train_voc/courtney/SegmentationClassPNG/',
        'ss_train_voc/olivia/SegmentationClassPNG/',
        'ss_train_voc/tim/SegmentationClassPNG/'
    ]
    test_images_path = [
        'ss_test_voc/angelica/JPEGImages/', 'ss_test_voc/courtney/JPEGImages/',
        'ss_test_voc/olivia/JPEGImages/', 'ss_test_voc/tim/JPEGImages/'
    ]
    test_masks_path = [
        'ss_test_voc/angelica/SegmentationClassPNG/',
        'ss_test_voc/courtney/SegmentationClassPNG/',
        'ss_test_voc/olivia/SegmentationClassPNG/',
        'ss_test_voc/tim/SegmentationClassPNG/',
    ]

    train_imgs, train_msks = data_prep.get_data(train_images_path,
                                                train_masks_path)
    data_prep.display_images([train_imgs[117], train_msks[117]])
    test_imgs, test_msks = data_prep.get_data(test_images_path,
                                              test_masks_path)
    data_prep.display_images([test_imgs[0], test_msks[0]])

    # train_model(X={'train':train_imgs, 'test':test_imgs},
    #             y={'train':train_msks, 'test':test_msks})
Exemple #11
0
import os
import pandas as pd
import pickle
import tensorflow as tf

cwd = os.getcwd()
data_path = os.path.join(cwd, 'RR Historical Data.csv')
df_pkl_path = os.path.join(cwd, 'rr_df.pkl')

try:
    df = pd.read_pickle(df_pkl_path)
    print('Pre-saved dataframe pickle found.  Reading "{}"'.format(data_path))
except FileNotFoundError as e:
    print(e)
    print('Rebulding dataframe from csv file "{}"'.format(data_path))
    df = data_prep.get_data(data_path)
    print('Saving dataframe to "{}"'.format(df_pkl_path))
    df.to_pickle(df_pkl_path)

df = df.iloc[::-1]
df = df.reset_index(drop=True)

#train_data = df[:-2]
train_data = df
#test_data = df[-2:]
#test_data = test_data.iloc[::-1]
#test_data = test_data.reset_index(drop=True)
print('train head')
print(train_data.head())
print('train tail')
print(train_data.tail())