def main(): amenity_df = get_data() plot_amenities(amenity_df) plt.show()) stats=amenity_IQR(amenity_df) to_markdown(stats) # tabulated form of stats table
def get_datapoint(): ''' Retrieves datapoint from external server and inserts datapoint into MongoDB ''' r = requests.get( 'http://galvanize-case-study-on-fraud.herokuapp.com/data_point') data = r.json() if not live_data.find({'object_id': data['object_id']}).count(): data['prediction'] = model.predict(get_data(data))[0] data['potential_cost'] = parse_tickets(data['ticket_types'])[-1] live_data.insert_one(data) print 'inserted datapoint'
def main(): (X,y,X_future,gp_data) = data_prep.get_data() # Splitting the data into Training and Test Sets X_train, X_test, y_train, y_test = train_test_split(X[:,:], y, test_size = 0.2) # Fitting Multiple Linear Regression to the Training set regressor = LinearRegression() regressor.fit(X_train, y_train) # Prediction values y_pred = regressor.predict(X_test).astype(int) y_delta = y_pred-y_test # Visualizing Our Model's accuracy plt.figure(1) plt.scatter(y_test,y_delta, color = 'red') plt.title('Baseline Accuracy (Linear Regression)') plt.xlabel('Actual Event Attendance') plt.ylabel('Error in Linear Prediction') # Determining Which Variables Carry Significance format_weights = {'Block Constructed':0.} for i in range(12): try: format_weights[gp_data['Format'][np.where(X[:,i]==1)[0][0]]] = regressor.coef_[i] except: continue # Re-Center data on Standard For readability and sort for gp_format in format_weights: format_weights[gp_format] -= format_weights['Standard'] format_weights.pop('Standard') sorted_X = sorted(list(format_weights.values())) sorted_y = [] for i in range(len(sorted_X)): sorted_y.append([key for key,value in format_weights.items() if value == sorted_X[i]][0]) # Create Bar Graphs plt.figure(2) index = np.arange(len(format_weights)) plt.barh(index,list(sorted_X)) plt.ylabel('New Grand Prix Format') plt.xlabel('Expected Change in Attendance') plt.title('Format Attendance Compared to Standard (Linear Approximation)') plt.yticks(index,sorted_y,fontsize = 8, rotation = 30) plt.show()
def plot_result(filename): df = data_prep.get_data(filename) [X, y] = data_prep.features(df) [X_train, X_test, X_cross] = data_prep.feature_scaling(X) [y_train, y_test, y_cross] = data_prep.data_set(y) ''' SVR with different kernels ''' svr_lin = SVR(kernel='linear', C=1e3) svr_poly = SVR(kernel='poly', C=1e3, degree=2) svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.01) '''fitting model on training set ''' svr_rbf.fit(X_train.values, y_train.loc[:, '1DayW'].values) svr_lin.fit(X_train.values, y_train.loc[:, '1DayW'].values) svr_poly.fit(X_train.values, y_train.loc[:, '1DayW'].values) lin_score = svr_lin.score(X_cross, y_cross) poly_score = svr_poly.score(X_cross, y_cross) rbf_score = svr_rbf.score(X_cross, y_cross) print('Confidence score for linear kernel :', lin_score * 100) print('Confidence score for poly. kernel :', poly_score * 100) print('Confidence score for rbf kernel :', rbf_score * 100) print("\nPlotting graph for rbf kernel:") plt.scatter(y_cross.index, y_cross.values, color='black') plt.xlabel('Date') plt.ylabel('Price') plt.plot(y_cross.index, svr_rbf.predict(X_cross), color='b', label='RBF Kernel SVR') plt.show() print("\nPlotting graph for linear kernel:") plt.scatter(y_cross.index, y_cross.values, color='black') plt.xlabel('Date') plt.ylabel('Price') plt.plot(y_cross.index, svr_lin.predict(X_cross), color='r', label='Linear Kernel SVR') plt.show()
def train(filename): df = data_prep.get_data(filename) [X, y] = data_prep.features(df) [X_train, X_test, X_cross] = data_prep.feature_scaling(X) [y_train, y_test, y_cross] = data_prep.data_set(y) lm = linear_model.LinearRegression() model = lm.fit(X_train.values, y_train.values) # training model on training set predictions = model.predict(X_test.values) print("confidence on test set is ", lm.score(X_test.values, y_test.values) * 100) predictions = model.predict(X_cross.values) print("confidence on cross validation set is ", lm.score(X_cross.values, y_cross.values) * 100) y_cross['predictions'] = predictions return y_cross
import pandas as pd import numpy as np import statsmodels.api as sm from data_prep import get_data from matplotlib.backends.backend_pdf import PdfPages import matplotlib.pyplot as plt pdf = PdfPages("bp_model_2wave.pdf") d1 = get_data(1999) d2 = get_data(2015) # Code year relative to 2000 d1["Year"] = -1 d2["Year"] = 15 dx = pd.concat((d1, d2), axis=0) def plot_fit_by_age(result, fml): # Create a dataframe in which all variables are at the reference # level da = dx.iloc[0:100, :].copy() da["RIDAGEYR"] = np.linspace(18, 80, 100) da["RIDRETH1"] = "OH" plt.figure(figsize=(8, 5)) plt.clf() plt.axes([0.1, 0.1, 0.56, 0.8]) plt.grid(True)
filter = args.filter torch.manual_seed(seed) # Save the command run if not os.path.isdir('CMDs'): os.mkdir('CMDs') with open('CMDs/train_filter.cmd', 'a') as f: f.write(' '.join(sys.argv) + '\n') # Get the device device = get_default_device() # Load the data input_ids, mask, labels, _ = get_data(responses_file, grades_file, part=part) print(mask.size()) # filter data to only keep grades equal to or above the filter value filtered = labels >= filter inds = filtered.nonzero().squeeze() input_ids = input_ids[inds] mask = mask[inds] labels = labels[inds] print(mask.size()) # split into training and validation sets input_ids_val = input_ids[:val_size] mask_val = mask[:val_size]
import numpy as np from keras.models import Model from keras.layers import Input from keras.layers.convolutional import Convolution2D from keras import backend as K import keras.optimizers as optimizers import data_prep as DP HR_data, LR_data = DP.get_data() K.set_image_dim_ordering('th') weights_path = '' def SRCNN(n1=64, n2=32, f1=9, f2=1, f3=5, load_weights=False): inputs = Input(shape=(1, 33, 33)) x = Convolution2D(n1, (f1, f1), activation='relu', padding='valid', name='level1')(inputs) x = Convolution2D(n2, (f2, f2), activation='relu', padding='valid', name='level2')(x) out = Convolution2D(1, (f3, f3), padding='valid', name='output')(x) model = Model(inputs, out) adam = optimizers.Adam(lr=1e-3) model.compile(optimizer=adam, loss='mse') #, metrics=[PSRNLoss]) if load_weights: model.load_weights(weights_path)
# # The different models are assessed by plotting fitted values for # various subpopulations. Since all of the regression functions are # well over 3-dimensional, we can plot E[Y|X] against one component of # X, while holding the other components of X equal to reference # values. For example, we can plot the fitted mean blood pressure as # a function of age, for each sex, for each ethnic group, and for # people with average BMI. import numpy as np import statsmodels.api as sm from data_prep import get_data from matplotlib.backends.backend_pdf import PdfPages import matplotlib.pyplot as plt dx = get_data(2015) def plot_fit_by_age(result, fml): # Create a dataframe in which all variables are at the reference # level da = dx.iloc[0:100, :].copy() da["RIDAGEYR"] = np.linspace(18, 80, 100) da["RIDRETH1"] = "OH" plt.figure(figsize=(8, 5)) plt.clf() plt.axes([0.1, 0.1, 0.66, 0.8]) plt.grid(True) for female in 0, 1:
'ss_train_voc/angelica/JPEGImages/', 'ss_train_voc/courtney/JPEGImages/', 'ss_train_voc/olivia/JPEGImages/', 'ss_train_voc/tim/JPEGImages/' ] train_masks_path = [ 'ss_train_voc/angelica/SegmentationClassPNG/', 'ss_train_voc/courtney/SegmentationClassPNG/', 'ss_train_voc/olivia/SegmentationClassPNG/', 'ss_train_voc/tim/SegmentationClassPNG/' ] test_images_path = [ 'ss_test_voc/angelica/JPEGImages/', 'ss_test_voc/courtney/JPEGImages/', 'ss_test_voc/olivia/JPEGImages/', 'ss_test_voc/tim/JPEGImages/' ] test_masks_path = [ 'ss_test_voc/angelica/SegmentationClassPNG/', 'ss_test_voc/courtney/SegmentationClassPNG/', 'ss_test_voc/olivia/SegmentationClassPNG/', 'ss_test_voc/tim/SegmentationClassPNG/', ] train_imgs, train_msks = data_prep.get_data(train_images_path, train_masks_path) data_prep.display_images([train_imgs[117], train_msks[117]]) test_imgs, test_msks = data_prep.get_data(test_images_path, test_masks_path) data_prep.display_images([test_imgs[0], test_msks[0]]) # train_model(X={'train':train_imgs, 'test':test_imgs}, # y={'train':train_msks, 'test':test_msks})
import os import pandas as pd import pickle import tensorflow as tf cwd = os.getcwd() data_path = os.path.join(cwd, 'RR Historical Data.csv') df_pkl_path = os.path.join(cwd, 'rr_df.pkl') try: df = pd.read_pickle(df_pkl_path) print('Pre-saved dataframe pickle found. Reading "{}"'.format(data_path)) except FileNotFoundError as e: print(e) print('Rebulding dataframe from csv file "{}"'.format(data_path)) df = data_prep.get_data(data_path) print('Saving dataframe to "{}"'.format(df_pkl_path)) df.to_pickle(df_pkl_path) df = df.iloc[::-1] df = df.reset_index(drop=True) #train_data = df[:-2] train_data = df #test_data = df[-2:] #test_data = test_data.iloc[::-1] #test_data = test_data.reset_index(drop=True) print('train head') print(train_data.head()) print('train tail') print(train_data.tail())