-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
235 lines (204 loc) · 8.55 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
import os
from google.appengine.ext.webapp import template
import cgi
import twitter
from datetime import datetime, timedelta
import stockquote
from google.appengine.api import users
from google.appengine.ext import webapp
from google.appengine.ext.webapp.util import run_wsgi_app
from google.appengine.ext import db
class Tweet(db.Model):
# Author human readable. User is ID
# Not sure if we need to replicate the entire User class.
author = db.StringProperty()
user = db.IntegerProperty()
text = db.StringProperty(multiline=True)
id = db.IntegerProperty()
created_at = db.DateTimeProperty()
now = db.DateTimeProperty()
# And for retrieval:
keywords = db.StringListProperty()
class Stock(db.Model):
# Representing a stock price
ticker = db.StringProperty()
keywords = db.StringListProperty()
class StockQuote(db.Model):
# A quote is a child of a Stock
# So we can keep a historical account of values
stock = db.ReferenceProperty(Stock, collection_name='quotes')
value = db.FloatProperty()
time = db.DateTimeProperty()
class TweetBag(db.Model):
# Links a Stock with a set of Tweets
stock = db.ReferenceProperty(Stock, collection_name='volume')
sample_tweet = db.ReferenceProperty(Tweet, collection_name='bags')
time = db.DateTimeProperty() # Start of the 5-min period
count = db.IntegerProperty()
class MainPage(webapp.RequestHandler):
def get(self):
# api = twitter.Api()
# moved api call to PopulateDatabase method
# here we can also have (some) control on rate limits?
# statuses = api.Search('good')
query = "yahoo"
stock_query = Stock.all().filter('keywords =', query).fetch(1)[0]
now_quote = stockquote.get_quote(stock_query.ticker.upper())
tweet_query = Tweet.all().filter('keywords = ', query).order('-created_at')
statuses = tweet_query.fetch(20)
count = 20 # otherwise it 'almost' re-searches
template_values = {
'stock' : now_quote,
'ticker' : stock_query.ticker,
'query' : query,
'statuses': statuses,
'count' : count,
}
path = os.path.join(os.path.dirname(__file__), 'index.html')
self.response.out.write(template.render(path, template_values))
class PopulateDatabase(webapp.RequestHandler):
# Separating out database intensive updates
# from display and processing
# This sub-URL (eventually obfuscated) gets the latest
# search results and adds to database.
# Will need to be called for new results? Or just add
# all tweets.
# Note: Disk size might be a problem!
def get(self, query):
api = twitter.Api()
the_now = datetime.now()
statuses = api.Search(query)
inserted_count = 0
tweet_putlist = []
for status in statuses:
tweet_query = Tweet.get_by_key_name("t" + str(status.id))
if not (tweet_query):
# Insert into our database
# since_id doesn't work because we need different ids for each query
# +0000 static or variable?! lets see.
# Seems to work for every tweet so I'm assuming static.
created_datetime = datetime.strptime(status.created_at,
"%a, %d %b %Y %H:%M:%S +0000")
now_datetime = datetime.fromtimestamp(status.now)
# Find the list of keywords from the tweet
# Ignoring fancy stuff like term extraction for now
keywords = status.text.split()
# But do need to lowercase and strip
keywords_lc = []
for word in keywords:
lastchar = word[-1:]
if lastchar in [",", ".", "!", "?", ";"]:
word2 = word.rstrip(lastchar)
else:
word2 = word
keywords_lc.append(word2.lower())
tweet = Tweet(
created_at = created_datetime,
author = status.user.screen_name,
user = status.user.id,
text = status.text,
id = status.id,
now = now_datetime,
keywords = keywords_lc,
key_name = "t" + str(status.id)
)
tweet_putlist.append(tweet)
inserted_count += 1
db.put(tweet_putlist)
time_delta = datetime.now() - the_now
print "Inserted %s tweets matching %s in %s" % (inserted_count,query,time_delta)
class PopulateStock(webapp.RequestHandler):
def get(self, ticker):
stock_price = stockquote.get_quote(ticker.upper())
stock = Stock.get_by_key_name(ticker)
if stock:
quote = StockQuote(stock = stock,
value = float(stock_price),
time = datetime.now())
quote.put()
self.response.out.write("Successful")
else:
stockobj = Stock(ticker = ticker,
key_name = ticker,
keywords = ["yahoo","yhoo","carol bartz"])
stockobj.put()
self.response.out.write("Created")
class UserStatuses(webapp.RequestHandler):
def get(self, username):
api = twitter.Api()
statuses = api.GetUserTimeline(username)
template_values = {
'statuses': statuses,
}
path = os.path.join(os.path.dirname(__file__), 'index.html')
self.response.out.write(template.render(path, template_values))
class ArchiveDeltas(webapp.RequestHandler):
def get(self,time):
ticker = "yhoo"
ticker_instance = Stock.get_by_key_name(ticker)
# time = how many hours back in the past we go
# loop between now-time and now-time-1
start_time = datetime.now() - timedelta(hours=int(time))
end_time = start_time - timedelta(hours=1)
loop_time = start_time
# Check we aren't already covering this time period
exists_query = TweetBag.all().filter("stock = ", ticker_instance).filter("time >", end_time).filter("time <", start_time).order("-time")
if exists_query.count() > 0:
highest = exists_query.get()
end_time = highest.time
inserts = []
while loop_time > end_time:
num_tweets = 0
loop_time_5 = loop_time - timedelta(minutes=5)
for keyword in ticker_instance.keywords:
tweet_query = Tweet.all().filter("keywords = ", keyword).filter("created_at > ", loop_time_5).filter("created_at < ", loop_time)
num_tweets += tweet_query.count()
bagobj = TweetBag (
sample_tweet = tweet_query.get(),
stock = ticker_instance,
count = num_tweets,
time = loop_time_5
)
inserts.append(bagobj)
loop_time = loop_time_5
db.put(inserts)
class StockDisplay(webapp.RequestHandler):
def get(self,period):
# Default
ticker = "yhoo"
if period == "":
period = "24"
# Get last $period hours ticker data from database
yesterday_now = datetime.now() - timedelta(hours=int(period))
ticker_instance = Stock.get_by_key_name(ticker)
stock_query = StockQuote.all().filter("stock = ", ticker_instance.key()).filter("time > ", yesterday_now).order("-time")
# Should be 288 results. Fetching 300 in case of individual non-cron queries
stockquotes = stock_query.fetch(1000)
# use our new TweetBag object
tweet_bag_query = TweetBag.all().filter("stock = ", ticker_instance).filter("time >", yesterday_now).order("-time")
tweet_bag = tweet_bag_query.fetch(1000)
# Now find matching tweets. Need to use ticker_instance.keywords
#for keyword in ticker_instance.keywords:
# tweet_matches = Tweet.all().filter("created_at > ", yesterday_now).filter("keywords =", keyword).order("-created_at")
# tweet_results = tweet_matches.fetch(500)
# tweet_bag.extend(tweet_results)
template_values = {
'tweets' : tweet_bag,
'stockquotes' : stockquotes,
'since' : yesterday_now,
'ticker' : ticker.upper(),
}
path = os.path.join(os.path.dirname(__file__), 'stock.html')
self.response.out.write(template.render(path, template_values))
application = webapp.WSGIApplication(
[('/', MainPage),
('/populate/(.*)', PopulateDatabase),
('/check_stock/(.*)', PopulateStock),
('/user/(.*)', UserStatuses),
('/stock/(.*)', StockDisplay),
('/deltas/(.*)', ArchiveDeltas),],
debug=True)
def main():
run_wsgi_app(application)
if __name__ == "__main__":
main()