def test_AnscomeQuartet(): '''! RABE p25 ''' anscomeData = csv('../data/P025b.txt', sep='\t') Y1 = array(anscomeData['Y1']) X1 = array(anscomeData['X1']) Y2 = array(anscomeData['Y2']) X2 = array(anscomeData['X2']) Y3 = array(anscomeData['Y3']) X3 = array(anscomeData['X3']) Y4 = array(anscomeData['Y4']) X4 = array(anscomeData['X4']) lr1 = simpleLR(Y1, X1) lr2 = simpleLR(Y2, X2) lr3 = simpleLR(Y3, X3) lr4 = simpleLR(Y4, X4) cor1 = cor(Y1, X1) cor2 = cor(Y2, X2) cor3 = cor(Y3, X3) cor4 = cor(Y4, X4) assert (abs(cor1 - cor2) < 0.0005) assert (abs(cor1 - cor3) < 0.0005) assert (abs(cor1 - cor4) < 0.0005) assert (abs(lr1['beta0Hat'] - lr2['beta0Hat']) < 0.005) assert (abs(lr1['beta0Hat'] - lr3['beta0Hat']) < 0.005) assert (abs(lr1['beta0Hat'] - lr4['beta0Hat']) < 0.005)
def test_Parabola(): '''! RABE p25 ''' parabolaData = csv('../data/P025a.txt', sep='\t') Y = parabolaData['Y'] X = parabolaData['X'] assert (cor(X, Y) == 0)
def load_titanic(): """Load ames housing dataset. Returns ------- data : DataFrame DataFrame containing the ames housing dataset. """ module_path = dirname(__file__) return pd.csv(join(module_path, 'titanic.csv'))
def test_ComputerRepairData(): '''! RABE p27-42 NOTE: the correlation test here doesn't match RABE. I believe this to be an error in RABE, not in my code. ''' computerRepairData = csv('../data/P027.txt', sep='\t') Y = array(computerRepairData['Minutes']) X = array(computerRepairData['Units']) assert (abs(bar(Y) - 97.21) < 0.005) assert (abs(bar(X) - 6) == 0) assert (abs(cov(Y, X) - 136) < 0.5) assert (abs(cor(Y, X) - 0.9936) < 0.0005) #beta_1^0 == 12, beta_1^0 == 12, so beta00 nees to be set, while #beta10 does not. lr = simpleLR(Y, X, criticalT=2.18) # assert( abs(lr['SSR']) ) assert (abs(sum(X) - 84) < 0.5) assert (abs(sum(Y) - 1361) < 0.5) assert (abs(sum(Y - bar(Y)) < 1e-12)) assert (abs(sum(X - bar(X)) < 1e-12)) assert (abs(lr['SST'] - 27768.36) < 0.005) assert (abs(sum((X - bar(X))**2) - 114 < 0.5)) assert (abs(sum((X - bar(X)) * (Y - bar(Y))) - 1768 < 0.005)) assert (abs(lr['beta0Hat'] - 4.162) < 0.0005) assert (abs(lr['beta1Hat'] - 15.509) < 0.0005) assert (abs(lr['seBeta0Hat'] - 3.355) < 0.0005) assert (abs(lr['seBeta1Hat'] - 0.505) < 0.0005) assert (abs(lr['t0'] - 1.24) < 0.005) assert (abs(lr['t1'] - 30.71) < 0.005) assert (abs(lr['beta0HatPM'] - 2.18 * 3.355) < 0.0005) assert (abs(lr['beta1HatPM'] - 2.18 * 0.505) < 0.0005) assert (abs(lr['SSR'] / lr['SST'] - (1 - lr['SSE'] / lr['SST'])) < 1e-14) assert (abs(lr['SSR'] / lr['SST'] - cor(Y, X)**2) < 1e-14) assert (abs(lr['SSR'] / lr['SST'] - .987) < 0.0005) lrEstimate = simpleLREstimate(lr, 4, 0) assert (abs(lrEstimate['seY0Hat'] - 5.67) < 0.005) assert (abs(lrEstimate['seMu0Hat'] - 1.76) < 0.005)
for x in mycursor: print(x) ####################################### # Usando sqlalchemy ####################################### # pip install SQLAlchemy # conda install -c anaconda sqlalchemy import sqlalchemy # conexion para sqlalchemy # mysql+mysqldb://<user>:<password>@<host>[:<port>]/<dbname> engine = sqlalchemy.create_engine('mysql+mysqlconnector://root:root@localhost[8889]/') # leer el csv data = pd.csv('file.txt') # checar nombres de columnas igual a la tabla en MySQL # inserar en la tabla data.to_sql('customers', con = engine) ####################################### ####################################### # Ingresar la tabla directo a MySQL ####################################### ## SQL #LOAD DATA LOCAL INFILE '/file.csv' #INTO TABLE customers #FIELDS TERMINATED BY ',' #LINES TERMINATED BY '\n' #IGNORE 1 ROWS # header #(name,address)
import pandas as pd from sklearn.preprocessing import MinMaxScaler # Load training data set from CSV file training_data_df = pd.csv("sales_data_training.csv") # Load testing data set from CSV file test_data_df = pd.read_csv("sales_data_test.csv") # Data needs to be scaled to a small range like 0 to 1 for the neural # network to work well. scaler = MinMaxScaler(feature_range=(0, 1)) # Scale both the training inputs and outputs scaled_training = scaler.fit_transform(training_data_df) scaled_testing = scaler.fit_transform(training_data_df) # Print out the adjustment that the scaler applied to the total_earnings column of data print( "Note: total_earnings values were scaled by multiplying by {:.10f} and adding {:.6f}" .format(scaler.scale_[8], scaler.min_[8])) # Create new pandas DataFrame objects from the scaled data scaled_training_df = pd.DataFrame(scaled_training, columns=training_data_df.columns.values) scaled_testing_df = pd.DataFrame(scaled_testing, columns=test_data_df.columns.values) # Save scaled data dataframes to new CSV files scaled_training_df.to_csv("sales_data_training_scaled.csv", index=False) scaled_testing_df.to_csv("sales_data_test_scaled.csv", index=False)
import os import string import pandas as pd import numpy as np from pandas import Series,DataFrame os.chdir('f:/di____di/season_1/training_data/poi_data') df=pd.read_table('poi_data_count',header=None,error_bad_lines=False) df=df.fillna('nx') re=np.zeros([66,26]) mx=20 for x in range(0,66): for y in range(1,24): s=df.ix[x].ix[y] if(s!='nx'): tp=int(s.split(':')[0]) if tp>mx: mx=tp re[x][tp]=int(s.split(':')[1]) df=pd.csv('ddf1.csv')
@author: Arsene Gasana """ import pandas as pd import os from datetime import datetime version = '1_0' main_dir = (r'C:\Users\user\Desktop\Investigation data generation') os.chdir(main_dir) time = datetime.now() datestring_for_file = '%04d%02d%02d%02d%02d%02d' % ( time.year, time.month, time.day, time.hour, time.minute, time.second) SCdata = pd.csv( r'C:\Users\user\Desktop\Investigation data generation\Season Clients Detailed_20200928-093633.csv' ) VRdata = pd.csv( r'C:\Users\user\Desktop\Investigation data generation\Light_20200928-073312.csv' ) SCdata.insert(0, 'UID', SCdata['DistrictName'] + '_' + SCdata['OAFID'].astype('str')) VRdata.insert(0, 'UID', VRdata['District'] + '_' + VRdata['OAFID'].astype('str')) ListOfSitesUID = [ 'Nyamagabe_Nkumbure B', 'Nyagatare_Mahoro', 'Gatsibo_Nyabisindu A' ] for site in ListOfSitesUID: dir_name = '%s-%s-%s' % (site, version, datestring_for_file) os.chdir(main_dir)
if __name__ == "__main__": """ python scripts/05_blend_predictions.py """ # load test and leak test = load_data("test_clean") leak = load_data("is_leak") target = leak["meter_reading"].values # load predictions preds_matrix = [np.load(x) for x in glob.glob(f"{OUTPUT_PATH}/*.npy")] if len(glob.glob(f"{OUTPUT_PATH}/*.csv")) > 0: preds_matrix += [ pd.csv(x).meter_reading.values for x in glob.glob(f"{OUTPUT_PATH}/*.csv") ] preds_matrix = np.vstack(preds_matrix).T preds_matrix[preds_matrix < 0] = 0 # initialize data X_train = preds_matrix[~np.isnan(target)] y_train = target[~np.isnan(target)] # correct site 0 correction_indices = (test.site_id[~np.isnan(target)] == 0) & (test.meter[~np.isnan(target)] == 0) X_train[correction_indices] *= 0.2931 y_train[correction_indices] *= 0.2931
def importCSV(file): data = csv(file, header=None) data = data.fillna(value=0).transpose().to_numpy() print(data.shape) return data
#!/usr/bin/env python # -*- coding: utf-8 -*- import pandas as pd pd.csv()