-
Notifications
You must be signed in to change notification settings - Fork 0
/
divvydata.py
83 lines (69 loc) · 2.29 KB
/
divvydata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from urllib2 import urlopen
from pandas import DataFrame, Series, isnull, Panel, HDFStore
import matplotlib.pyplot as plt, numpy as np, json, MySQLdb
from datetime import datetime
from time import sleep
url = 'http://www.divvybikes.com/stations/json'
def strdatetime(string):
d,t,m = string.split()
d = d.split('-')
t = t.split(':')
if m == 'PM': t[0] = (int(t[0])+12) % 24
return datetime(*(map(int,d+t)))
def get_df():
web = urlopen(url).read()
data = json.loads(web)
timestamp = strdatetime(data[data.keys()[0]])
data = data[data.keys()[1]]
df = DataFrame(data)
df['timestamp'] = timestamp
return df, timestamp
class panel_data(object):
def connect(self,dlist):
self.conn = MySQLdb.Connect(*dlist)
self.cur = self.conn.cursor()
def close(self):
self.cur.close()
self.conn.close()
def add_df(self):
self.df,self.timestamp = get_df()
return self.timestamp,self.df
def transpose(self,id):
foo = pd.transpose(1,0,2)
return foo[id]
def scrape(self,n=30):
foo = []
for i in xrange(n):
print i
foo.append(self.add_df())
sleep(60)
self.pd = Panel(dict(foo))
def export(self):
store = HDFStore('hdf5/divvy.h5')
store['divvy'] = self.pd
store.close()
def from_sql(self):
self.cur.execute('SELECT * FROM divvy;')
data = self.cur.fetchall()
return DataFrame(np.array(data),columns=['id','location_id','latitude','longitude','timestamp','stationName','availableBikes','availableDocks'])
def to_sql(self):
df = self.df[['id','latitude','longitude','timestamp','stationName','availableBikes','availableDocks']]
for i in df.index.tolist():
self.cur.execute('INSERT INTO divvy (location_id,latitude,longitude,timestamp,stationName,availableBikes,availableDocks) VALUES (%s,%s,%s,%s,%s,%s,%s);',df.xs(i).tolist())
self.conn.commit()
def main():
run = panel_data()
run.scrape()
def anls():
store = HDFStore('hdf5/divvy.h5')
pd = store['divvy']
store.close()
df = reduce(lambda x,y: x.append(y),[pd[i] for i in pd.items])
df.index = df.timestamp
foo = map(lambda x: x[1],df.groupby('id'))
for i in range(len(foo)): foo[i]['diff'] = foo[i].availableBikes.diff()
for i in range(len(foo)): foo[i]['diff'].hist(range=[-5,5],bins=20)
plt.show()
if __name__ == '__main__':
main()
anls()