forked from RogerSF/Glassdoor-Sentiment-Analyzer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fortune100scraper.py
59 lines (50 loc) · 1.59 KB
/
fortune100scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from BeautifulSoup import BeautifulSoup as Soup
import soupselect; soupselect.monkeypatch()
import urllib2
import MySQLdb
class Fortune100Scraper:
"""A convenient way to get the 100 companies into a database."""
def __init__(self):
pass
def scrapeFortune100(self):
"""Scrapes off 100 companies from CNN's Fortune 500 companies list."""
url = 'http://money.cnn.com/magazines/fortune/global500/2012/full_list/index.html'
soup = Soup(urllib2.urlopen(url))
companies = soup.findSelect('td.cnncol2 a')
con = MySQLdb.connect(
host='silo.cs.indiana.edu',
user='harry',
passwd='rutabega',
db='glassdoor',
port=14272
)
x = con.cursor()
for company in companies:
try:
query = """INSERT INTO companies (name) VALUES (%s)"""
x.execute(query, (company.string))
con.commit()
except:
con.rollback()
con.close()
def getCompanyList():
"""Retrieves 100 companies in the companies table."""
con = MySQLdb.connect(
host='silo.cs.indiana.edu',
user='harry',
passwd='rutabega',
db='glassdoor',
port=14272
)
x = con.cursor()
companiesList = []
try:
x.execute("""SELECT name FROM companies""")
company = x.fetchone()
while company is not None:
companiesList.append(company[0])
company = x.fetchone()
except:
con.rollback()
con.close()
return companiesList