def main(restricted=False): global styles #gc.set_debug(gc.DEBUG_LEAK) site = input("What site to crawl?") maxDepth = int(input("Max depth?")) http = httplib2.Http() links = set() pages = set() #dot = Digraph(comment = site, format="png") dot = Graph(comment=site, format="png", engine="sfdp") dot.overlap = "true" #dot.graph_attr.update(size = "10000000,10000000") try: soup = BeautifulSoup(urllib2.urlopen(site), "html.parser") pageTitle = soup.title.string pages.add(pageTitle) titles[site] = pageTitle soup.decompose() except Exception as e: pageTitle = site print("Error: {0}".format(e)) siteBase = "" try: pos1 = site.find(".") pos2 = site.find(".", pos1 + 1) siteBase = site[pos1 + 1:pos2] except Exception as e: print("Error: {0}".format(e)) print(siteBase) crawlPage(site, pageTitle, maxDepth, pages, links, restricted, siteBase) #print(pages) #print(links) #for p in pages: #print("Adding node: " + p) #dot.node(p) for l in links: try: #print("Adding edge: " + l[0] + " -> " + l[1]) dot.edge(l[0], l[1]) except Exception as e: print("Error: {0}".format(e)) #print(dot) #dot = apply_styles(dot, styles) loc = str(dot).find("{") + 1 dot = Source(str(dot)[0:loc] + "\n\tgraph [overlap = prism]\n" + str(dot)[loc:], format="png", engine="sfdp") #print("-------------------") filename = r'C:\Users\Gabe\Miniconda3\MyScripts\test-crawler15' dot.save() try: os.remove(filename) except Exception as e: print("Error: {0}".format(e)) try: outFile = open(filename + ".txt", "w") outFile.write(str(dot)) outFile.close() except Exception as e: print("Error: {0}".format(e)) dot.render(filename, view=True)