Ejemplo n.º 1
0
def knnSimulate(param):
    trainSet = SimData.simulate2Group(
        n = int(param['n']),
        p = int(param['p']),
        effect = [param['effect']] * int(param['p'])
    )
    knnFit = KNeighborsClassifier(n_neighbors=int(param['k']))
    knnFit.fit(np.array(trainSet['x']), np.array(trainSet['y']))
    testSet = SimData.simulate2Group(
        n = int(param['n']),
        p = int(param['p']),
        effect = [param['effect']] * int(param['p'])
    )
    out = OrderedDict()
    out['p'] = int(param['p'])
    out['k'] = int(param['k'])
    out['train'] = trainSet
    out['test'] = testSet
    out['resubPreds'] = knnFit.predict(trainSet['x'])
    out['resubProbs'] = knnFit.predict_proba(trainSet['x'])
    out['testPreds'] = knnFit.predict(testSet['x'])
    out['testProbs'] = knnFit.predict_proba(testSet['x'])
    out['resubTable'] = pd.crosstab(
        Series(out['resubPreds'], index=trainSet['y'].index),
        trainSet['y']
    )
    out['resubAccuracy'] = (np.sum(np.diag(out['resubTable'])) /
                            (1.0 * np.sum(np.sum(out['resubTable']))))
    out['testTable'] = pd.crosstab(
        Series(out['testPreds'], index=testSet['y'].index),
        testSet['y']
    )
    out['testAccuracy'] = (np.sum(np.diag(out['testTable'])) /
                           (1.0 * np.sum(np.sum(out['testTable']))))
    return out
Ejemplo n.º 2
0
def knnSimulate(param, nFold=5):
    trainSet = SimData.simulate2Group(n=int(param['n']),
                                      p=int(param['p']),
                                      effect=[param['effect']] *
                                      int(param['p']))
    knnClass = KNeighborsClassifier(n_neighbors=int(param['k']))
    cvAccs = cross_val_score(estimator=knnClass,
                             X=np.array(trainSet['x']),
                             y=np.array(trainSet['y']),
                             cv=nFold)
    knnClass.fit(np.array(trainSet['x']), np.array(trainSet['y']))
    testSet = SimData.simulate2Group(n=int(param['n']),
                                     p=int(param['p']),
                                     effect=[param['effect']] *
                                     int(param['p']))
    out = OrderedDict()
    out['p'] = param['p']
    out['k'] = param['k']
    out['train'] = trainSet
    out['test'] = testSet
    out['testPreds'] = knnClass.predict(testSet['x'])
    out['testProbs'] = knnClass.predict_proba(testSet['x'])
    out['cvAccuracy'] = np.mean(cvAccs)
    out['testTable'] = pandas.crosstab(
        Series(out['testPreds'], index=testSet['y'].index), testSet['y'])
    out['testAccuracy'] = (np.sum(np.diag(out['testTable'])) /
                           (1.0 * np.sum(np.sum(out['testTable']))))
    return out
Ejemplo n.º 3
0
def knnSimulate(param, nFold=5):
    trainSet = SimData.simulate2Group(
        n = int(param['n']),
        p = int(param['p']),
        effect = [param['effect']] * int(param['p'])
    )
    knnClass = KNeighborsClassifier(n_neighbors=int(param['k']))
    cvAccs = cross_val_score(estimator = knnClass,
                             X = np.array(trainSet['x']),
                             y = np.array(trainSet['y']),
                             cv = nFold)
    knnClass.fit(np.array(trainSet['x']), np.array(trainSet['y']))
    testSet = SimData.simulate2Group(
        n = int(param['n']),
        p = int(param['p']),
        effect = [param['effect']] * int(param['p'])
    )
    out = OrderedDict()
    out['p'] = param['p']
    out['k'] = param['k']
    out['train'] = trainSet
    out['test'] = testSet
    out['testPreds'] = knnClass.predict(testSet['x'])
    out['testProbs'] = knnClass.predict_proba(testSet['x'])
    out['cvAccuracy'] = np.mean(cvAccs)
    out['testTable'] = pandas.crosstab(
        Series(out['testPreds'], index=testSet['y'].index),
        testSet['y']
    )
    out['testAccuracy'] = (np.sum(np.diag(out['testTable'])) /
                           (1.0 * np.sum(np.sum(out['testTable']))))
    return out
Ejemplo n.º 4
0
def simulate(start_time, end_time, initial_stations, times):
    bike_return_full = []
    ebike_return_full = []
    three_trip_error = []
    demandlost = []
    for itera in range(times):
        print(itera)
        gc = SimData.GlobalClock(start_time, end_time, initial_stations)
        gc.clockAdvance()
        bike_return_full.append(len(gc.bike_return_full))
        ebike_return_full.append(len(gc.ebike_return_full))
        three_trip_error.append(len(gc.three_trip_error))
        demandlost.append(len(gc.demandlost))

    return bike_return_full, ebike_return_full, three_trip_error, demandlost
Ejemplo n.º 5
0
import numpy
import pandas
from pandas import DataFrame
from pandas import Series
import scipy
import scipy.cluster
import sklearn
import sklearn.cluster

import SimData

plt.style.use("fivethirtyeight")

simDat = SimData.simulate2Group(n=40, p=2, effect=[1, 0.75])
xsim = simDat['x']
ysim = simDat['y']

## -----------------------------------------------------------------
## k-means clustering
## -----------------------------------------------------------------
k2Clusterer = sklearn.cluster.KMeans(n_clusters=2)
kmSim = k2Clusterer.fit(xsim)
kmSimClusts = kmSim.predict(xsim)


def kmplot(xy):
    x = xy['x']
    y = xy['y']
    km = sklearn.cluster.KMeans(n_clusters=2).fit(x)
    plotdata = x.copy()
    clust = km.predict(x)
Ejemplo n.º 6
0
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas import DataFrame
from pandas import Series
import plotnine as gg
import sklearn as sk
from sklearn.neighbors import KNeighborsClassifier

plt.ion()

import SimData


x2_train = SimData.simulate2Group(n = 100,
                                  p = 2,
                                  effect = [1.25] * 2)
knnFit = KNeighborsClassifier(n_neighbors=3)
knnFit.fit(np.array(x2_train['x']), np.array(x2_train['y']))
knnResub = Series(knnFit.predict(x2_train['x']),
                  index = x2_train['y'].index)
np.sum(np.diag(pd.crosstab(knnResub, x2_train['y'])))

x2_test = SimData.simulate2Group(n = 100,
                                 p = 2,
                                 effect = [1.25] * 2)
knnTest = Series(knnFit.predict(x2_test['x']),
                 index = x2_test['y'].index)
np.sum(np.diag(pd.crosstab(knnTest, x2_test['y'])))

Ejemplo n.º 7
0
    return pandaized


@pandaize
def cross_val_score_pd(estimator, X, y, **kwargs):
    return sklearn.cross_validation.cross_val_score(estimator, X, y, **kwargs)


fsKnnFitter = sklearn.pipeline.Pipeline([
    ('featsel',
     sklearn.feature_selection.SelectKBest(
         sklearn.feature_selection.f_regression, k=10)),
    ('classifier', sklearn.neighbors.KNeighborsClassifier(n_neighbors=3))
])

simData = SimData.simulate2Group(n=40, p=1000, effect=[0] * 1000)
x = simData['x']
y = simData['y']

simSelBad = sklearn.feature_selection.SelectKBest(
    sklearn.feature_selection.f_regression, k=10).fit(x, y)
xbad = simSelBad.transform(x)
cvbad = mean(
    sklearn.cross_validation.cross_val_score(
        estimator=sklearn.neighbors.KNeighborsClassifier(n_neighbors=3),
        X=xbad,
        y=y,
        cv=5))

cvgood = mean(
    sklearn.cross_validation.cross_val_score(estimator=fsKnnFitter,
Ejemplo n.º 8
0
import numpy as np
import pandas as pd
from pandas import DataFrame
from pandas import Series
import scipy
import scipy.cluster
import seaborn as sns
import sklearn
import sklearn.cluster

import SimData

plt.ion()
plt.style.use("fivethirtyeight")

simDat = SimData.simulate2Group(n=40, p=2, effect=[1, 0.75])
xsim = simDat['x']
ysim = simDat['y']


## -----------------------------------------------------------------
## k-means clustering
## -----------------------------------------------------------------
k2Clusterer = sklearn.cluster.KMeans(n_clusters=2)
kmSim = k2Clusterer.fit(xsim)
kmSimClusts = kmSim.predict(xsim)

def kmplot(xy):
    x = xy['x']
    y = xy['y']
    km = sklearn.cluster.KMeans(n_clusters=2).fit(x)
Ejemplo n.º 9
0
"""

import SimData
from datetime import timedelta as td
from datetime import datetime as time
start_time = time(2017, 7, 1, hour=18)
end_time = start_time + td(weeks=75)
initial_stations = eval(open(("stations_initial.txt")).read())

propotion_of_ebike_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

for num_ebike in propotion_of_ebike_list:
    for station in initial_stations.keys():
        initial_stations[station]['ebike'] = round(
            initial_stations[station]['ecap'] * num_ebike)
    gc = SimData.GlobalClock(start_time, end_time, initial_stations)
    print('propotion_ebike: ', num_ebike)
    gc.clockAdvance()
    w_demandlost = list(gc.week_demandlost.values())[2:]
    w_three_trip_error = list(gc.week_three_trip_error.values())[2:]
    w_bike_return_full = list(gc.week_bike_return_full.values())[2:]
    w_ebike_return_full = list(gc.week_ebike_return_full.values())[2:]
    w_average_SOC = list(gc.week_average_SOC.values())[2:]
    w_out_of_battery = list(gc.week_out_of_battery.values())[2:]
    w_num_etrip = list(gc.week_num_etrip.values())[2:]
    w_num_alltrip = list(gc.week_num_alltrip.values())[2:]
    x = range(0, len(w_demandlost))
    with open('simdata_ebike/propotion_of_ebike_' + str(num_ebike) + '.csv',
              'w') as f:
        f.write(
            'week,bike_return_error,ebike_return_error,lost_demand,three_error,out_of_battery,average_SOC,ebike_trips,all_trips\n'
Ejemplo n.º 10
0
    return pandaized

@pandaize
def cross_val_score_pd(estimator, X, y, **kwargs):
    return sklearn.cross_validation.cross_val_score(
            estimator, X, y, **kwargs)


fsKnnFitter = sklearn.pipeline.Pipeline([
    ('featsel', sklearn.feature_selection.SelectKBest(
            sklearn.feature_selection.f_regression, k=10)),
    ('classifier', sklearn.neighbors.KNeighborsClassifier(
            n_neighbors=3))
])

simData = SimData.simulate2Group(n=40, p=1000, effect=[0]*1000)
x = simData['x']
y = simData['y']

simSelBad = sklearn.feature_selection.SelectKBest(
        sklearn.feature_selection.f_regression, k=10).fit(x, y)
xbad = simSelBad.transform(x)
cvbad = mean(sklearn.cross_validation.cross_val_score(
    estimator = sklearn.neighbors.KNeighborsClassifier(n_neighbors=3),
    X = xbad,
    y = y,
    cv = 5
))

cvgood = mean(sklearn.cross_validation.cross_val_score(
    estimator = fsKnnFitter,
Ejemplo n.º 11
0
'''

the runner of simulation of ebikes

'''

import SimData
from datetime import timedelta as td
from datetime import datetime as time
start_time = time(2017, 7, 1, hour=18)
end_time = start_time + td(weeks=20)
initial_stations = eval(open(("stations_initial.txt")).read())

gc = SimData.GlobalClock(start_time, end_time, initial_stations)

gc.clockAdvance()

#import pickle
#gc=pickle.load(open('../gc_20weeks.pickle','rb'))

#-------analyze the result------------
time_hour = end_time
end_hour = time_hour + td(hours=1)

gc1 = SimData.GlobalClock(time_hour, end_hour, stations_20weeks)

for hours in range(24):
    gc1.clockAdvance()
    with open(
            str(gc1.end_time.strftime('%m-%d-%Y_%H_%M_%S')) + '_bike.csv',
            'w') as f: