def get_data(scaled=False): url = data_loader.dataset_path("housing", "housing.csv") housing = pd.read_csv(url) housing = housing.dropna() housing_train = housing.drop(["ocean_proximity", "median_house_value"], axis=1) housing_target = housing["median_house_value"] print("Housing data:") print(housing_train.head()) x_train = housing_train.values m, n = x_train.shape if scaled: scaler = StandardScaler() x_train = scaler.fit_transform(x_train) x_train_plus_bias = np.c_[np.ones((m, 1)), x_train] y_train = housing_target.values.reshape(-1, 1) return x_train_plus_bias, y_train
training = tf.placeholder_with_default(False, shape=(), name="training") extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) n_epochs = 15 batch_size = 100 with tf.Session() as sess: saver.restore(sess, tf.train.latest_checkpoint("./")) for epoch in range(n_epochs): for batch_index in range(int(y_train.shape[0]/batch_size)): b_start, b_end = batch_size*batch_index, batch_size*batch_index + batch_size X_batch, y_batch = X_train[b_start: b_end], y_train[b_start: b_end] sess.run([training_op, extra_update_ops], feed_dict={training: True, X: X_batch, y: y_batch}) acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch}) acc_val = accuracy.eval(feed_dict={X: X_val, y: y_val}) print("Epoch: ", epoch, "training: ", acc_train, "val_acc", acc_val) if __name__ =="__main__": image_path = data_loader.dataset_path("mnist", "mnist.npz") mnist = tf.keras.datasets.mnist (x_train, y_train), (x_test, y_test) = mnist.load_data(path=image_path) n_features = 28*28 x_train = x_train.reshape(-1, n_features) x_train = x_train/255 x_test = x_test.reshape(-1, n_features) x_test = x_test/255 #load_model_and_run(x_train, y_train, x_test,y_test) retrain_model(x_train, y_train, x_test,y_test)
import sys import os root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(root) import src.load_data.loader as data_loader if __name__ == '__main__': plt.rcParams['figure.figsize'] = (15, 10) bike_url ='https://raw.githubusercontent.com/jvns/pandas-cookbook/master/data/bikes.csv' dest_folder = 'bikes/' #fetch data from github data_loader.fetch_data(bike_url, dest_folder) data_path = data_loader.dataset_path(dest_folder, 'bikes.csv') print("Loading data from:\n" + data_path) df = pd.read_csv(data_path, sep=';',encoding='latin1', parse_dates=['Date'], dayfirst=True, index_col='Date') print(df.head(10)) print(df['Berri 1'].head(10)) #df['Berri 1'].plot() #plt.show() df.plot() plt.show()
def get_data(): housing_path = data_loader.dataset_path("housing", "housing.csv") df = pd.read_csv(housing_path) train, test = train_test_split(df, test_size=0.2, random_state=42) return train, test
ypred = lin_reg.predict(xline) plt.scatter(X[:, 1], y) plt.plot(xline[:, 1], ypred, color='red') plt.ylim(0, 30) plt.show() def logit(x): return 1 / (1 + 2.7182**(-x)) if __name__ == "__main__": #url = "https://gist.githubusercontent.com/curran/a08a1080b88344b0c8a7/raw/d546eaee765268bf2f487608c537c05e22e4b221/iris.csv" #data_loader.fetch_data(url, "iris", "iris.csv") flowertype = "virginica" data_path = data_loader.dataset_path("iris", "iris.csv") df = pd.read_csv(data_path) X = df.values[:, 3:4] y = (df.values[:, 4] == flowertype).astype(np.int) log_reg = LogisticRegression() log_reg.fit(X, y) xnew = np.linspace(0, 3, 100).reshape(-1, 1) y_proba = log_reg.predict_proba(xnew) plt.plot(xnew, y_proba[:, 1], label=flowertype) plt.plot(xnew, y_proba[:, 0], label="not " + flowertype) plt.scatter(X[y == True], np.zeros(X[y == True].shape), label=flowertype + " instance") plt.scatter(X[y == False], np.ones(X[y == False].shape), label="non " + flowertype + " instance")
import pandas as pd import matplotlib.pyplot as plt import __init__ import src.load_data.loader as data_loader if __name__ == "__main__": csv_path = data_loader.dataset_path("bikes", "bikes.csv") bikes = pd.read_csv(csv_path, sep=';', encoding='latin1', parse_dates=['Date'], dayfirst=True, index_col='Date') berri_bikes = bikes[['Berri 1']].copy() berri_bikes.loc[:, 'weekday'] = berri_bikes.index.weekday weekday_counts = berri_bikes.groupby('weekday').aggregate(sum) weekday_counts.index = [ 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday' ] weekday_counts.plot(kind='bar') plt.show()
import pandas as pd import matplotlib.pyplot as plt import numpy as np import sys import os root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(root) import src.load_data.loader as data_loader if __name__=="__main__": src_dir_name = 'non-emergency_calls' datafile_name = '311-service-requests.csv' data_path = data_loader.dataset_path(src_dir_name, datafile_name) complaints = pd.read_csv(data_path) noise_complaints = complaints[complaints['Complaint Type'] == "Noise - Street/Sidewalk"] noise_complaints_counts = noise_complaints['Borough'].value_counts() total_complaint_counts = complaints['Borough'].value_counts() print(noise_complaints_counts) print(total_complaint_counts) (noise_complaints_counts/total_complaint_counts).plot(kind='bar') plt.show()
import pandas as pd import matplotlib.pyplot as plt import numpy as np import __init__ import src.load_data.loader as data_loader if __name__ == "__main__": local_path = data_loader.dataset_path("weather", "weather_2012.csv") weather_2012 = pd.read_csv(local_path, parse_dates=True, index_col='Date/Time') weather_description = weather_2012['Weather'] is_snowing = weather_description.str.contains("Snow") is_snowing = is_snowing.astype(float).resample('D').apply(np.max) snow_totals = is_snowing.astype(float).resample('M').apply(np.sum) #snow_totals.plot(kind='bar') avg_temperature = weather_2012['Temp (C)'].resample('M').apply(np.mean) min_temperature = weather_2012['Temp (C)'].resample('M').apply(np.min) min_temperature.name = 'Min Temp' snow_totals.name = 'Day\'s snowed' avg_temperature.name = 'Average Temp' stats = pd.concat([snow_totals, min_temperature], axis=1) print(stats) stats.plot(kind='bar', subplots=True) plt.axhline(0, color='black') plt.subplots_adjust(bottom=0.3) plt.show()
import pandas as pd import matplotlib.pyplot as plt import numpy as np import __init__ import src.load_data.loader as data_loader if __name__ == "__main__": url = 'https://raw.githubusercontent.com/jvns/pandas-cookbook/master/data/weather_2012.csv' dest_folder = 'weather/' #data_loader.fetch_data(url, dest_folder) data_file = 'weather_2012.csv' local_path = data_loader.dataset_path(dest_folder, data_file) weather_2012_final = pd.read_csv(local_path, index_col='Date/Time', parse_dates=True) temperatures = weather_2012_final[[u'Temp (C)']].copy() print(temperatures.head()) temperatures.loc[:, 'Hour'] = weather_2012_final.index.hour print(temperatures.head()) temperatures.groupby('Hour').aggregate(np.median).plot() plt.show() temperatures.groupby('Hour').aggregate(np.mean).plot() plt.show()
import pandas as pd import __init__ import matplotlib.pyplot as plt import src.load_data.loader as data_loader import numpy as np if __name__ == "__main__": path = data_loader.dataset_path("non-emergency_calls", "311-service-requests.csv") na_values = ['NO CLUE', 'N/A', '0'] requests = pd.read_csv(path, na_values=na_values, dtype={'Incident Zip': str}) requests['Incident Zip'] = requests['Incident Zip'].str.slice(0, 5) zero_zips = requests['Incident Zip'] == '00000' requests.loc[zero_zips, 'Incident Zip'] = np.nan unique_zips = requests['Incident Zip'].unique().astype(str) unique_zips.sort() #print(unique_zips) zips = requests['Incident Zip'] is_close = zips.str.startswith('0') | zips.str.startswith('1') is_far = ~(is_close) & zips.notnull() print(zips[is_far].unique()) print(zips[is_close].unique()) print(requests[is_far][['Incident Zip', 'Descriptor',