def update(self): print "Deleting invalid links. This might take a while." ls = self.frame.get(TingyuChuanchunPengjhihLink) print "Found ", len(ls), " links." count = 0 for l in ls: if not is_valid(l.full_url): self.frame.delete(TingyuChuanchunPengjhihLink, l) count += 1 print "Deleting", count, "links." self.done = True
def update(self): print "Deleting invalid links. This might take a while." ls = self.frame.get(SkayaniEdwardc6ForsterjLink) print "Found ", len(ls), " links." count = 0 for l in ls: if not is_valid(l.full_url): self.frame.delete(SkayaniEdwardc6ForsterjLink, l) count += 1 print "Deleting", count, "links" self.done = True
def update(self): print "Deleting invalid links. This might take a while." ls = self.frame.get(EawolfeSantiadmTevinl1Link) print "Found ", len(ls), " links." count = 0 for l in ls: if not is_valid(l.full_url): self.frame.delete(EawolfeSantiadmTevinl1Link, l) count += 1 print "Deleting", count, "links" self.done = True
def update(self): print "Deleting invalid links. This might take a while." ls = self.frame.get(VadebonaAdesanyoKdmontenLink) print "Found ", len(ls), " links." count = 0 for l in ls: if not is_valid(l.full_url): self.frame.delete(VadebonaAdesanyoKdmontenLink, l) count += 1 print "Deleting", count, "links". self.done = True
def update(self): print "Deleting invalid links. This might take a while." ls = self.frame.get(Adityan1MonishppSkanade1Link) print "Found ", len(ls), " links." count = 0 for l in ls: if not is_valid(l.full_url): self.frame.delete(Adityan1MonishppSkanade1Link, l) count += 1 print "Deleting", count, "links." self.done = True
def update(self): print "Deleting invalid links. This might take a while." ls = self.frame.get(Dinhaq1Lochn2Jonat3Link) print "Found ", len(ls), " links." count = 0 for l in ls: if not is_valid(l.full_url): self.frame.delete(Dinhaq1Lochn2Jonat3Link, l) count += 1 print "Deleting", count, "links". self.done = True
def update(self): print "Deleting invalid links. This might take a while." ls = self.frame.get(Sanghuk2Newmanc1CarletozLink) print "Found ", len(ls), " links." count = 0 for l in ls: if not is_valid(l.full_url): self.frame.delete(Sanghuk2Newmanc1CarletozLink, l) count += 1 print "Deleting", count, "links." self.done = True
def update(self): print "Deleting invalid links. This might take a while." ls = self.frame.get(Yyuan13Jianl9DiyuegLink) print "Found ", len(ls), " links." count = 0 for l in ls: if not is_valid(l.full_url): self.frame.delete(Yyuan13Jianl9DiyuegLink, l) count += 1 print "Deleting", count, "links." self.done = True
def update(self): print "Deleting invalid links. This might take a while." ls = self.frame.get(Vuqt1Hoangt5MalayaLink) print "Found ", len(ls), " links." count = 0 for l in ls: if not is_valid(l.full_url): self.frame.delete(Vuqt1Hoangt5MalayaLink, l) count += 1 print "Deleting", count, "links." self.done = True
def update(self): print "Deleting invalid links. This might take a while." ls = self.frame.get(TianzelZiyangc2Zijianx1Link) print "Found ", len(ls), " links." count = 0 for l in ls: if not is_valid(l.full_url): self.frame.delete(TianzelZiyangc2Zijianx1Link, l) count += 1 print "Deleting", count, "links." self.done = True
url = "http://fano.ics.uci.edu/cites/" url2 = "http://fano.ics.uci.edu/cites/Publication/" redirect = "http://www.doorway.com" url3 = "http://www.ics.uci.edu/dept/" dataObj1 = CopyData(url,200,"",False) dataObj2 = CopyData(url2,200,"nope",False) dataObj3 = CopyData(redirect,200,url3,True) ## print(linkRetrieve(url3)) ## logFile.close() ## baseContent = urlopen(url3) ## baseFile = baseContent.read() ## print(crawler_frame.get_url_content(baseFile,url3)) ## baseContent.close() ## print("DELIMITER!!!!\n\n\n\n\n\n\n") crawler_frame.extract_next_links([dataObj1,dataObj2,dataObj3]) statement = crawler_frame.is_valid("https://duttgroup.ics.uci.edu?query=hello") print(statement) ## crawler.SetupLoggers() ## c = crawler.Simulation("amazon.ics.uci.edu",9050) urlFile = open("../successful_urls.txt","r") urlList = urlFile.readlines() print("Total URLs is " + str(len(urlList))) urlFile.close() for i in urlList: crawler_frame.analytics.splitDomains(i) print(crawler_frame.analytics.mainDict) print(crawler_frame.analytics.domains) print(crawler_frame.analytics.maxUrl) print(crawler_frame.analytics.MAXCOUNT) print(crawler_frame.RETRIEVE_COUNTER) crawler_frame.makeOutputFile("sampleAnalytics.txt",crawler_frame.analytics)