-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
159 lines (125 loc) · 5.67 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
from bs4 import BeautifulSoup
import requests
import stock_performance as sp
import pandas as pd
import logging
import create_db
import configparser
def stocks_screener(command = None):
"""
Get market data tables of stocks screener filter
screener details:
stocks
volume > 1M
market capitalization > 100M
exchange = all [NYSE, NASDAQ, AMEX]
industry = all
"""
print('command = ', command)
TABLE_NAME = 'stocks_screener'
DB_NAME = 'market_scraper'
name = 'stocks screener'
exchange_list_options = ['all', 'NYSE', 'NASDAQ', 'AMEX']
base_url = "https://www.marketwatch.com"
url = 'https://www.marketwatch.com/tools/stockresearch/screener/results.asp?submit=Screen&Symbol=true&' \
'Symbol=false&ChangePct=true&ChangePct=false&FiftyTwoWeekLow=false&' \
'CompanyName=true&CompanyName=false&Volume=true&Volume=false&' \
'PERatio=true&PERatio=false&Price=true&Price=false&LastTradeTime=false' \
'&MarketCap=true&MarketCap=false&Change=true&Change=false&FiftyTwoWeekHigh=false' \
'&MoreInfo=false&SortyBy=ChangePct&SortDirection=Descending&ResultsPerPage=Fifty&' \
'TradesShareEnable=false&TradesShareMin=&TradesShareMax=&' \
'PriceDirEnable=falsePriceDir=Up&PriceDirPct=&LastYearEnable=false&' \
'LastYearAboveHigh=&TradeVolEnable=true&TradeVolMin=1000000&TradeVolMax=&' \
'BlockEnable=false&BlockAmt=&BlockTime=&PERatioEnable=false&PERatioMin=&' \
'PERatioMax=&MktCapEnable=true&MktCapMin=100&MktCapMax=&MovAvgEnable=false&' \
'MovAvgType=Outperform&MovAvgTime=FiftyDay&MktIdxEnable=false&' \
'MktIdxType=Outperform&MktIdxPct=&MktIdxExchange=&Exchange=All' \
'&IndustryEnable=false&Industry=Accounting'
#select exchange option (dafault = all)
selected_exchange = exchange_list_options[0]
url = url.replace("Exchange=All", "Exchange={}".format(selected_exchange))
sp.url_check(url)
# getting a page request from the url and parsing
# the page to a table with BeautifulSoup
try:
page = requests.get(url)
soup = BeautifulSoup(page.text, 'html.parser')
table = soup.find('table')
# if table is not None and len(table.find_all('tr')) > 0:
table_elements = table.find_all('tr')
except ResourceWarning:
print("Error reading page from url: {}".format(url))
else:
stocks_symbol_list = []
stocks_links_list = []
rows = []
columns_list = ['symbol', 'company', 'last_price', 'price_change',
'change_percentage', 'volume', 'pe_ratio', 'market_cap']
# iterating the table row (stock) to gat the data
for result in table_elements:
try:
# find all columns per result
data = result.find_all('td')
# check that columns have data
if len(data) == 0:
continue
# write columns to variables
symbol = data[0].getText()
company = data[1].getText()
last_price = data[2].getText()
price_change = data[3].getText()
change_percentage = data[4].getText()
volume = data[5].getText()
pe_ratio = data[6].getText()
market_cap = data[7].getText()
# adding the stock symbol to a list
if symbol not in stocks_symbol_list:
stocks_symbol_list.append(symbol)
print('{} added to stocks_symbol_list'.format(symbol))
# append each result to rows
rows.append([symbol, company, last_price, price_change,
change_percentage, volume, pe_ratio, market_cap])
print(rows)
except ResourceWarning:
logging.warning('Error getting "{}" data'.format(result))
else:
# getting the stock pages url link
link = result.find("a").get('href')
# adding the stock page url link to a list
if [base_url + link] not in stocks_links_list:
stocks_links_list.append([symbol, base_url + link])
# creating a pd DataFrame from the 'rows' table
df = pd.DataFrame(rows, columns=columns_list)
df['date_time'] = pd.to_datetime('now')
# prints df for tests
print(df)
# uploading df to the database
try:
sp.df_to_db(DB_NAME, TABLE_NAME, df, option='append')
logging.info('df table {} uploaded to the database'.format(TABLE_NAME))
print('df table {} uploaded to the database'.format(TABLE_NAME))
except:
logging.ERROR('uploading df table to the database failed')
print('uploading df table {} to the database failed'.format(TABLE_NAME))
print("==================")
print(f"{name} OUTPUT")
if command == 'full':
print("Stocks Links List:")
for link in stocks_links_list:
print(link)
if command == None:
# writing to file - remove later
# sp.write_file('stocks_links_list', stocks_links_list)
# creating a pd DataFrame from the 'stocks_links_list' table
df_stocks_links_list = pd.DataFrame(stocks_links_list, columns=['symbol', 'url'])
df_stocks_links_list['date_time'] = pd.to_datetime('now')
# uploading df to the database
sp.df_to_db(DB_NAME, 'stocks_links_list', df_stocks_links_list, 'replace')
return
def main(command=None):
create_db.create_db()
stocks_screener(command)
sp.stock_key_data()
sp.stock_profile()
if __name__ == '__main__':
main()