-
Notifications
You must be signed in to change notification settings - Fork 0
/
valchain.py
104 lines (88 loc) · 2.48 KB
/
valchain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from wikipedia import wikipedia
import sys
import requests
import lxml
from lxml import etree
import beautifulsoup
from wikipedia.wikipedia import WikipediaPage
print('-------------------------------------')
print('| Strategy Value Chain Tool |')
print('-------------------------------------')
query = input("Company Search: ")
iters = int(20)
searchresult = wikipedia.search(query, 10)
resultlist = []
resultlist = searchresult
a = len(resultlist)
narrowlist = []
b = 0
print("Crawling for companies...")
for thing in resultlist:
b = b + 1
sys.stdout.write('\r')
sys.stdout.write('%.0f%% complete' % (b / a * 100,))
sys.stdout.flush()
try:
summary = wikipedia.summary(thing, 1)
if summary.find('company'):
narrowlist.append(thing)
elif summary.find('corporation'):
narrowlist.append(thing)
else: continue
except: continue
print('\n')
print(narrowlist)
print("Scoring search results...")
a = len(narrowlist)
dive = []
consolation = []
b = 0
for thing in narrowlist: #ranks based on likelihood of it being a company
b = b + 1
sys.stdout.write('\r')
sys.stdout.write('%.0f%% complete' % (b / a * 100,))
sys.stdout.flush()
if thing.find('Inc.'):
dive.append(thing)
elif thing.find('Company'):
dive.append(thing)
elif thing.find('Corporation'):
dive.append(thing)
elif thing.find('Co.'):
dive.append(thing)
elif thing.find('Group'):
dive.append(thing)
else :
consolation.append(thing)
print('\n')
print("Digging for company info...")
a = len(dive)
b = 0
c = []
for thing in dive:
b = b + 1
sys.stdout.write('\r')
sys.stdout.write('%.0f%% complete' % (b / a * 100,))
sys.stdout.flush()
thing = str(thing)
# manually storing desired URL
url1 = wikipedia.page(thing)
url2 = url1.url
c.append(url2)
req = requests.get(url2)
store = etree.fromstring(req.text)
output = store.xpath('//*[@id="mw-content-text"]/div/table[1]/tbody/tr[11]')
#print(output)
print('\n')
for thing in c:
print(thing)
#'//*[@id="mw-content-text"]/div/table[1]/tbody/tr[17]/th'
#'//*[@id="mw-content-text"]/div/table[1]/tbody/tr[17]/td'
#'//*[@id="mw-content-text"]/div/table[1]/tbody/tr[11]/td/span/text()'
#//*[@id="mw-content-text"]/div/table[1]/tbody/tr[16]/td/span/text()
#type = str() #Public
#traded_as = str()
#traded_as_exchange = str() #NYSE, split at ':'
#traded_as_ticker = str() #EMN
#industry = str()
#revenue =