Beispiel #1
0
def get_data(scaled=False):
    url = data_loader.dataset_path("housing", "housing.csv")
    housing = pd.read_csv(url)
    housing = housing.dropna()
    housing_train = housing.drop(["ocean_proximity", "median_house_value"],
                                 axis=1)
    housing_target = housing["median_house_value"]
    print("Housing data:")
    print(housing_train.head())
    x_train = housing_train.values
    m, n = x_train.shape
    if scaled:
        scaler = StandardScaler()
        x_train = scaler.fit_transform(x_train)
    x_train_plus_bias = np.c_[np.ones((m, 1)), x_train]
    y_train = housing_target.values.reshape(-1, 1)
    return x_train_plus_bias, y_train
Beispiel #2
0
    training = tf.placeholder_with_default(False, shape=(), name="training")
    extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    n_epochs = 15
    batch_size = 100

    with tf.Session() as sess:
        saver.restore(sess, tf.train.latest_checkpoint("./"))
        for epoch in range(n_epochs):
            for batch_index in range(int(y_train.shape[0]/batch_size)):
                b_start, b_end = batch_size*batch_index, batch_size*batch_index + batch_size
                X_batch, y_batch = X_train[b_start: b_end], y_train[b_start: b_end]
                sess.run([training_op, extra_update_ops], feed_dict={training: True, X: X_batch, y: y_batch})
            acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
            acc_val = accuracy.eval(feed_dict={X: X_val, y: y_val})
            print("Epoch: ", epoch, "training: ", acc_train, "val_acc", acc_val)



if __name__ =="__main__":
    image_path = data_loader.dataset_path("mnist", "mnist.npz")
    mnist = tf.keras.datasets.mnist
    (x_train, y_train), (x_test, y_test) = mnist.load_data(path=image_path)
    n_features = 28*28
    x_train = x_train.reshape(-1, n_features)
    x_train = x_train/255
    x_test = x_test.reshape(-1, n_features)
    x_test = x_test/255

    #load_model_and_run(x_train, y_train, x_test,y_test)
    retrain_model(x_train, y_train, x_test,y_test)
Beispiel #3
0
import sys
import os
root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(root)
import src.load_data.loader as data_loader


if __name__ == '__main__':
    plt.rcParams['figure.figsize'] = (15, 10)
    bike_url ='https://raw.githubusercontent.com/jvns/pandas-cookbook/master/data/bikes.csv'
    dest_folder = 'bikes/'

    #fetch data from github
    data_loader.fetch_data(bike_url, dest_folder)

    data_path = data_loader.dataset_path(dest_folder, 'bikes.csv')
    print("Loading data from:\n" + data_path)
    df = pd.read_csv(data_path, sep=';',encoding='latin1', parse_dates=['Date'], dayfirst=True, index_col='Date')
    print(df.head(10))

    print(df['Berri 1'].head(10))

    #df['Berri 1'].plot()
    #plt.show()
    df.plot()
    plt.show()




Beispiel #4
0
def get_data():
    housing_path = data_loader.dataset_path("housing", "housing.csv")
    df = pd.read_csv(housing_path)
    train, test = train_test_split(df, test_size=0.2, random_state=42)
    return train, test
Beispiel #5
0
        ypred = lin_reg.predict(xline)
        plt.scatter(X[:, 1], y)
        plt.plot(xline[:, 1], ypred, color='red')
        plt.ylim(0, 30)
        plt.show()


def logit(x):
    return 1 / (1 + 2.7182**(-x))


if __name__ == "__main__":
    #url = "https://gist.githubusercontent.com/curran/a08a1080b88344b0c8a7/raw/d546eaee765268bf2f487608c537c05e22e4b221/iris.csv"
    #data_loader.fetch_data(url, "iris", "iris.csv")
    flowertype = "virginica"
    data_path = data_loader.dataset_path("iris", "iris.csv")
    df = pd.read_csv(data_path)
    X = df.values[:, 3:4]
    y = (df.values[:, 4] == flowertype).astype(np.int)
    log_reg = LogisticRegression()
    log_reg.fit(X, y)
    xnew = np.linspace(0, 3, 100).reshape(-1, 1)
    y_proba = log_reg.predict_proba(xnew)
    plt.plot(xnew, y_proba[:, 1], label=flowertype)
    plt.plot(xnew, y_proba[:, 0], label="not " + flowertype)
    plt.scatter(X[y == True],
                np.zeros(X[y == True].shape),
                label=flowertype + " instance")
    plt.scatter(X[y == False],
                np.ones(X[y == False].shape),
                label="non " + flowertype + " instance")
Beispiel #6
0
import pandas as pd
import matplotlib.pyplot as plt
import __init__
import src.load_data.loader as data_loader

if __name__ == "__main__":
    csv_path = data_loader.dataset_path("bikes", "bikes.csv")
    bikes = pd.read_csv(csv_path,
                        sep=';',
                        encoding='latin1',
                        parse_dates=['Date'],
                        dayfirst=True,
                        index_col='Date')
    berri_bikes = bikes[['Berri 1']].copy()
    berri_bikes.loc[:, 'weekday'] = berri_bikes.index.weekday
    weekday_counts = berri_bikes.groupby('weekday').aggregate(sum)
    weekday_counts.index = [
        'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday',
        'Sunday'
    ]
    weekday_counts.plot(kind='bar')
    plt.show()
Beispiel #7
0
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys
import os
root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(root)
import src.load_data.loader as data_loader

if __name__=="__main__":
    src_dir_name = 'non-emergency_calls'
    datafile_name = '311-service-requests.csv'
    data_path = data_loader.dataset_path(src_dir_name, datafile_name)
    complaints = pd.read_csv(data_path)

    noise_complaints = complaints[complaints['Complaint Type'] == "Noise - Street/Sidewalk"]
    noise_complaints_counts = noise_complaints['Borough'].value_counts()
    total_complaint_counts = complaints['Borough'].value_counts()
    print(noise_complaints_counts)
    print(total_complaint_counts)
    (noise_complaints_counts/total_complaint_counts).plot(kind='bar')
    plt.show()


Beispiel #8
0
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import __init__
import src.load_data.loader as data_loader

if __name__ == "__main__":
    local_path = data_loader.dataset_path("weather", "weather_2012.csv")
    weather_2012 = pd.read_csv(local_path,
                               parse_dates=True,
                               index_col='Date/Time')
    weather_description = weather_2012['Weather']
    is_snowing = weather_description.str.contains("Snow")
    is_snowing = is_snowing.astype(float).resample('D').apply(np.max)
    snow_totals = is_snowing.astype(float).resample('M').apply(np.sum)
    #snow_totals.plot(kind='bar')
    avg_temperature = weather_2012['Temp (C)'].resample('M').apply(np.mean)
    min_temperature = weather_2012['Temp (C)'].resample('M').apply(np.min)
    min_temperature.name = 'Min Temp'
    snow_totals.name = 'Day\'s snowed'
    avg_temperature.name = 'Average Temp'
    stats = pd.concat([snow_totals, min_temperature], axis=1)
    print(stats)
    stats.plot(kind='bar', subplots=True)
    plt.axhline(0, color='black')

    plt.subplots_adjust(bottom=0.3)
    plt.show()
Beispiel #9
0
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import __init__
import src.load_data.loader as data_loader

if __name__ == "__main__":
    url = 'https://raw.githubusercontent.com/jvns/pandas-cookbook/master/data/weather_2012.csv'
    dest_folder = 'weather/'
    #data_loader.fetch_data(url, dest_folder)
    data_file = 'weather_2012.csv'
    local_path = data_loader.dataset_path(dest_folder, data_file)
    weather_2012_final = pd.read_csv(local_path,
                                     index_col='Date/Time',
                                     parse_dates=True)
    temperatures = weather_2012_final[[u'Temp (C)']].copy()
    print(temperatures.head())
    temperatures.loc[:, 'Hour'] = weather_2012_final.index.hour
    print(temperatures.head())
    temperatures.groupby('Hour').aggregate(np.median).plot()
    plt.show()
    temperatures.groupby('Hour').aggregate(np.mean).plot()
    plt.show()
Beispiel #10
0
import pandas as pd
import __init__
import matplotlib.pyplot as plt
import src.load_data.loader as data_loader
import numpy as np

if __name__ == "__main__":
    path = data_loader.dataset_path("non-emergency_calls",
                                    "311-service-requests.csv")
    na_values = ['NO CLUE', 'N/A', '0']
    requests = pd.read_csv(path,
                           na_values=na_values,
                           dtype={'Incident Zip': str})

    requests['Incident Zip'] = requests['Incident Zip'].str.slice(0, 5)

    zero_zips = requests['Incident Zip'] == '00000'
    requests.loc[zero_zips, 'Incident Zip'] = np.nan

    unique_zips = requests['Incident Zip'].unique().astype(str)
    unique_zips.sort()
    #print(unique_zips)

    zips = requests['Incident Zip']
    is_close = zips.str.startswith('0') | zips.str.startswith('1')
    is_far = ~(is_close) & zips.notnull()

    print(zips[is_far].unique())
    print(zips[is_close].unique())

    print(requests[is_far][['Incident Zip', 'Descriptor',