/
marketmoodssin.py
194 lines (156 loc) · 5.37 KB
/
marketmoodssin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#marketmoodssin
#Market Psychology Analysis Tool
#With MySQL Database leverage
#
from __future__ import division
#setup
min_word_length = 2
min_word_frequency = 4
default_search_iterations = 4
#Import statements
import urllib, re, pymysql, ystockquote, nltk, urllib2, json
from nltk import FreqDist
#Connect to Database
db = pymysql.connect(host="localhost", user="root", passwd="", db="market_moods_db_backup")
cur = db.cursor()
#querey database for all company names
cur.execute("SELECT * FROM companies")
companies = cur.fetchall()
single_day_valence_dict = {}
def get_company_urls(c_name):
"takes a company name and searches for the top urls"
urlList = []
#convert company name into a google-searchable company name
c_name = re.sub(' ', '%20' , c_name)
search_line = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=' + c_name
response = urllib2.urlopen(search_line)
search_results = json.loads(response.read())
for result in search_results['responseData']['results']:
urlList.append(result['visibleUrl'])
return urlList
def get_relevant_words(c_name):
"takes a list of URLs and strips out the relevant words"
fulltext = ""
for url in get_company_urls(c_name):
#url = res.url.encode('utf8')
html = urllib.urlopen(url).read()
raw = nltk.clean_html(html)
fulltext = fulltext + " " + raw
#Convert the raw text into an nltk Text object
tokens = nltk.wordpunct_tokenize(fulltext)
text = nltk.Text(tokens)
fdist = FreqDist(text)
#Get all words with length > 5 that are not on the stopword list
stopwords = nltk.corpus.stopwords.words('english')
words = sorted([w for w in set(text) if len(w) >= min_word_length and fdist[w] >= min_word_frequency and w.lower() not in stopwords])
return words
for row in companies :
#get the stock change and price
stock_name = row[0]
stock_ticker = row[1]
stock_change = ystockquote.get_change(row[1])
stock_price = float(ystockquote.get_price(row[1]))
continue_check = False
try:
float(stock_change[1:])
continue_check = True
except Exception, e:
continue_check = False
if continue_check:
new_stock_change = 0
# convert the stock change into a number
if stock_change[0] == '+':
new_stock_change = float(stock_change[1:])
if stock_change[0] == '-':
new_stock_change = -float(stock_change[1:])
stock_change = new_stock_change
overall_company_valence = 0
company_word_count = 0
words = get_relevant_words(stock_name)
for word in words:
word = word.lower()
company_word_count = company_word_count + 1
valence = 0
count = 1
execute_string = 'SELECT * FROM valences WHERE word = \"' + word + '\"'
numrows = cur.execute(execute_string)
wordrows = cur.fetchall()
if numrows == 0:
valence = abs(stock_change)
else:
valence = wordrows[0][1] + abs(stock_change)
count = wordrows[0][2] + 1
average_valence = valence / count
overall_company_valence = overall_company_valence + average_valence
#update global variances
args = (word,valence,count,average_valence)
valence_string = "CALL updatevalence" + str(args)
cur.execute(valence_string)
#create single day valence list
for word in words:
word = word.lower()
try:
valenceArray = single_day_valence_dict[word]
except Exception, e:
single_day_valence_dict[word] = [0,0]
valenceArray = single_day_valence_dict[word]
valence = valenceArray[0]
count = valenceArray[1]
single_day_valence_dict[word] = [(valence + stock_change), (count +1)]
#create prediction for tommorow
try:
overall_company_valence = overall_company_valence / company_word_count
except Exception, e:
overall_company_valence = 0.0
create_args = (stock_ticker, str(overall_company_valence), stock_name)
create_string = str(create_args)
cur.execute("CALL create_prediction" + create_string)
#update movement table object with this information
fetchargs = (stock_ticker,1,1,1)
fetch_string = "CALL fetch_movement_today" + str(fetchargs)
rows = cur.execute(fetch_string)
movement = cur.fetchall()
if movement:
predicted_move = movement[0][2]
difference = 0
try:
difference = stock_change - predicted_move
except Exception, e:
predicted_move = 1;
if stock_change == 0:
stock_change = 1
percentage = 0
try:
percentage = min(stock_change,predicted_move)/max(stock_change,predicted_move)
except Exception, e:
percentage = 0
movement_update_args = (stock_ticker,stock_change,difference,percentage,stock_name)
update_string = "CALL evaluate_prediction" + str(movement_update_args)
cur.execute(update_string)
else:
print stock_name + ": No Change in Stock Price"
for row in companies:
#add the single day valence prediction to the database
stock_name = row[0]
stock_ticker = row[1]
words = get_company_words(stock_name)
overall_company_valence = 0
for word in words:
word = word.lower()
try:
valenceArray = single_day_valence_dict[word]
averageWordValence = valenceArray[0]/valenceArray[1]
overall_company_valence = overall_company_valence + averageWordValence
except Exception, e:
pass
overall_company_valence = overall_company_valence / len(words)
#upload to database
uploadString = "CALL create_single_day_prediction" + str((stock_ticker,overall_company_valence))
cur.execute(uploadString)
#commit and close connection
#disable this when testing the script
db.commit()
cur.close()
db.close()
print "MarketMoods run complete"
print('\a')