def verge_listen(self, event): currlinks, currtitles, currcategories = grabheadline.grabfront("https://www.theverge.com/") if len(currlinks) == 0: tk.messagebox.showerror("Error", "Failed to Retrieve Website. No Internet Connection?") else: self.title.pack_forget() self.main2.pack_forget() self.prev.pack_forget() fpl = FrontPageList(self.currMaster, 'The Verge', currlinks, currtitles, currcategories, self.currPage)
def guardian_listen(self, event): currlinks, currtitles, currcategories = grabheadline.grabfront('https://www.theguardian.com/international') if len(currlinks) == 0: tk.messagebox.showerror("Error", "Failed to Retrieve Website. No Internet Connection?") else: self.title.pack_forget() self.main1.pack_forget() self.next.pack_forget() fpl = FrontPageList(self.currMaster, 'The Guardian', currlinks, currtitles, currcategories, self.currPage)
def huffington_listen(self, event): currlinks, currtitles, currcategories = grabheadline.grabfront("http://huffpost.com") if len(currlinks) == 0: tk.messagebox.showerror("Error", "Failed to Retrieve Website. No Internet Connection?") else: self.title.pack_forget() self.main2.pack_forget() self.prev.pack_forget() fpl = FrontPageList(self.currMaster, 'Huffington Post', currlinks, currtitles, currcategories, self.currPage)
def latimes_listen(self, event): currlinks, currtitles, currcategories = grabheadline.grabfront('https://latimes.com') if len(currlinks) == 0: tk.messagebox.showerror("Error", "Failed to Retrieve Website. No Internet Connection?") else: self.title.pack_forget() self.main2.pack_forget() self.prev.pack_forget() fpl = FrontPageList(self.currMaster, 'Los Angeles Times', currlinks, currtitles, currcategories, self.currPage)
def ap_listen(self, event): currlinks, currtitles, currcategories = grabheadline.grabfront('https://apnews.com') if len(currlinks) == 0: tk.messagebox.showerror("Error", "Failed to Retrieve Website. No Internet Connection?") else: self.title.pack_forget() self.main1.pack_forget() self.next.pack_forget() fpl = FrontPageList(self.currMaster, 'Associated Press', currlinks, currtitles, currcategories, self.currPage)
def ny_listen(self, event): # Grab the list of links and titles from the front page of nytimes. See grabheadline.py currlinks, currtitles, currcategories = grabheadline.grabfront('https://nytimes.com') # If there are no links to be grabbed, it must mean there's no Internet connection if len(currlinks) == 0: tk.messagebox.showerror("Error", "Failed to Retrieve Website. No Internet Connection?") else: # Replace the GUI of the front grid with the generated headline list of the clicked website (i.e nytimes here) self.title.pack_forget() self.main1.pack_forget() self.next.pack_forget() fpl = FrontPageList(self.currMaster, 'New York Times', currlinks, currtitles, currcategories, self.currPage)
def graball(): # Takes around 160-180s for 200 articles, way too slow prev = time.time() list_news = ['https://nytimes.com', 'https://reuters.com', 'https://bbc.com', 'https://www.theguardian.com/international', 'https://apnews.com', 'https://latimes.com', 'http://huffpost.com', 'https://www.npr.org/'] links = [] titles = [] categories = [] bags_key = [[]] keywords = dict() for source in list_news: prevt = time.time() templinks, temptitles, tempcategories = grabheadline.grabfront(source) # print(templinks) links.extend(templinks) titles.extend(temptitles) categories.extend(tempcategories) print(time.time() - prevt) for link in links: print(link) prevt = time.time() currkeys = lxrTest.generateKeywords(link) bags_key.append(currkeys) # print(type(currkeys)) for key in currkeys: if key in keywords.keys(): keywords[key] = keywords[key] + 1 else: keywords[key] = 1 print(time.time() - prevt) sred = sorted(keywords.items(), key=lambda value: value[1], reverse=True) bags_key = bags_key[1:] print(sred) print(time.time() - prev)
def grabfront_wrapper(self, source, pos): """ Generates a random string of numbers, lower- and uppercase chars. """ templinks, temptitles, tempcategories = grabheadline.grabfront(source) print(templinks) print('[RESULT] FINISHED GRABBING from ' + source) return pos, templinks, temptitles
def grabfront_wrapper(source, pos): """ Generates a random string of numbers, lower- and uppercase chars. """ templinks, temptitles, tempcategories = grabheadline.grabfront(source) print(templinks) print(pos) return pos, templinks, temptitles, tempcategories