for result in results['results']['bindings']:
	if 'value' in result['prov']:
		p = result['prov']['value']
	else:
		p = ''
	battlesWithProv.append((result['battle']['value'], p))

battlesWithProv = list(set(battlesWithProv))
print("{} instances identified.".format(len(battlesWithProv)))
print("Downloading wikipdia pages for each identified battle.")

dlErrors = []

with progress.Bar(expected_size=len(battlesWithProv)) as bar:
	for i, (b, p) in enumerate(battlesWithProv):
		name = uriToName(b)
		if p != '':
			p = urllib.parse.quote(p.encode('utf-8'), ':/=?()')
			try:
				urllib.request.urlretrieve(p, "rawData/html/battles/{}.html".format(name))
			except urllib.error.HTTPError:
				dlErrors.append((b, p, '404'))
		else:
			url = "https://en.wikipedia.org/wiki/{}".format(name)
			try:
				urllib.request.urlretrieve(url, "rawData/html/battles/{}.html".format(name))
			except urllib.error.HTTPError:
				dlErrors.append((b, url, '404'))
		bar.show(i+1)

print("{} pages successfully downloaded.".format(len(battlesWithProv)-len(dlErrors)))
from bs4 import BeautifulSoup
from clint.textui import progress

battles = []
with open("processedData/listBattles.csv") as f:
	w = csv.reader(f, delimiter=',', quotechar='"')
	for i, row in enumerate(w):
		if i != 0:
			battles.append(row[0])

battlesWithCombatants = []
battlesWithoutCombatants = []
print("Identifying combatants for battles in downloaded HTML files...")
with progress.Bar(expected_size=len(battles)) as bar:
	for i, battle in enumerate(battles):
		n = uriToName(battle)
		s = BeautifulSoup(open("rawData/html/battles/{}.html".format(n)), 'lxml')
		address = s.find(text="Belligerents")
		try:
			tr = address.parent.parent.nextSibling.nextSibling
			td = tr.find_all('td')
			combatants = []
			for team, cell in enumerate(td):
				anchors = cell.find_all('a')
				for a in anchors:
					if not a.img:
						href = a.get('href')
						href = href[href.rfind('/')+1:]
						href = nameToUri(href)
						if 'File:' not in href and '#' not in href:
							combatants.append((battle, team, href))
for result in results["results"]["bindings"]:
    battlesPartOf.append((result["x"]["value"], result["y"]["value"]))

g = Graph()
g.vertex_properties["uri"] = g.new_vertex_property("string")
g.vertex_properties["Label"] = g.new_vertex_property("string")
g.nv = {}

for o, t in battlesPartOf:
    if o in g.nv:
        v1 = g.nv[o]
    else:
        v1 = g.add_vertex()
        g.nv[o] = v1
        g.vp.uri[v1] = o
        g.vp.Label[v1] = uriToName(o)

    if t in g.nv:
        v2 = g.nv[t]
    else:
        v2 = g.add_vertex()
        g.nv[t] = v2
        g.vp.uri[v2] = t
        g.vp.Label[v2] = uriToName(t)

    e = g.add_edge(v1, v2)

g.save("graphs/battlesPartOf.graphml")

with open("processedData/battlesPartOf.csv", "w") as f:
    w = csv.writer(f, delimiter=",", quotechar='"', quoting=csv.QUOTE_NONNUMERIC)