Beispiel #1
0
    def verge_listen(self, event):
        currlinks, currtitles, currcategories = grabheadline.grabfront("https://www.theverge.com/")
        if len(currlinks) == 0:
            tk.messagebox.showerror("Error", "Failed to Retrieve Website. No Internet Connection?")
        else:
            self.title.pack_forget()
            self.main2.pack_forget()
            self.prev.pack_forget()

            fpl = FrontPageList(self.currMaster, 'The Verge', currlinks, currtitles, currcategories, self.currPage)
Beispiel #2
0
    def guardian_listen(self, event):
        currlinks, currtitles, currcategories = grabheadline.grabfront('https://www.theguardian.com/international')
        if len(currlinks) == 0:
            tk.messagebox.showerror("Error", "Failed to Retrieve Website. No Internet Connection?")
        else:
            self.title.pack_forget()
            self.main1.pack_forget()
            self.next.pack_forget()

            fpl = FrontPageList(self.currMaster, 'The Guardian', currlinks, currtitles, currcategories, self.currPage)
Beispiel #3
0
    def huffington_listen(self, event):
        currlinks, currtitles, currcategories = grabheadline.grabfront("http://huffpost.com")
        if len(currlinks) == 0:
            tk.messagebox.showerror("Error", "Failed to Retrieve Website. No Internet Connection?")
        else:
            self.title.pack_forget()
            self.main2.pack_forget()
            self.prev.pack_forget()

            fpl = FrontPageList(self.currMaster, 'Huffington Post', currlinks, currtitles, currcategories,
                                self.currPage)
Beispiel #4
0
    def latimes_listen(self, event):
        currlinks, currtitles, currcategories = grabheadline.grabfront('https://latimes.com')
        if len(currlinks) == 0:
            tk.messagebox.showerror("Error", "Failed to Retrieve Website. No Internet Connection?")
        else:
            self.title.pack_forget()
            self.main2.pack_forget()
            self.prev.pack_forget()

            fpl = FrontPageList(self.currMaster, 'Los Angeles Times', currlinks, currtitles, currcategories,
                                self.currPage)
Beispiel #5
0
    def ap_listen(self, event):
        currlinks, currtitles, currcategories = grabheadline.grabfront('https://apnews.com')
        if len(currlinks) == 0:
            tk.messagebox.showerror("Error", "Failed to Retrieve Website. No Internet Connection?")
        else:
            self.title.pack_forget()
            self.main1.pack_forget()
            self.next.pack_forget()

            fpl = FrontPageList(self.currMaster, 'Associated Press', currlinks, currtitles, currcategories,
                                self.currPage)
Beispiel #6
0
    def ny_listen(self, event):
        # Grab the list of links and titles from the front page of nytimes. See grabheadline.py
        currlinks, currtitles, currcategories = grabheadline.grabfront('https://nytimes.com')

        # If there are no links to be grabbed, it must mean there's no Internet connection
        if len(currlinks) == 0:
            tk.messagebox.showerror("Error", "Failed to Retrieve Website. No Internet Connection?")
        else:
            # Replace the GUI of the front grid with the generated headline list of the clicked website (i.e nytimes here)
            self.title.pack_forget()
            self.main1.pack_forget()
            self.next.pack_forget()

            fpl = FrontPageList(self.currMaster, 'New York Times', currlinks, currtitles, currcategories, self.currPage)
Beispiel #7
0
def graball():
    # Takes around 160-180s for 200 articles, way too slow
    prev = time.time()
    list_news = ['https://nytimes.com', 'https://reuters.com', 'https://bbc.com',
                 'https://www.theguardian.com/international', 'https://apnews.com',
                 'https://latimes.com', 'http://huffpost.com', 'https://www.npr.org/']
    links = []
    titles = []
    categories = []
    bags_key = [[]]
    keywords = dict()
    for source in list_news:
        prevt = time.time()
        templinks, temptitles, tempcategories = grabheadline.grabfront(source)
        # print(templinks)
        links.extend(templinks)
        titles.extend(temptitles)
        categories.extend(tempcategories)
        print(time.time() - prevt)

    for link in links:
        print(link)
        prevt = time.time()
        currkeys = lxrTest.generateKeywords(link)
        bags_key.append(currkeys)
        # print(type(currkeys))
        for key in currkeys:
            if key in keywords.keys():
                keywords[key] = keywords[key] + 1
            else:
                keywords[key] = 1
        print(time.time() - prevt)
    sred = sorted(keywords.items(), key=lambda value: value[1], reverse=True)
    bags_key = bags_key[1:]
    print(sred)
    print(time.time() - prev)
Beispiel #8
0
 def grabfront_wrapper(self, source, pos):
     """ Generates a random string of numbers, lower- and uppercase chars. """
     templinks, temptitles, tempcategories = grabheadline.grabfront(source)
     print(templinks)
     print('[RESULT] FINISHED GRABBING from ' + source)
     return pos, templinks, temptitles
Beispiel #9
0
def grabfront_wrapper(source, pos):
    """ Generates a random string of numbers, lower- and uppercase chars. """
    templinks, temptitles, tempcategories = grabheadline.grabfront(source)
    print(templinks)
    print(pos)
    return pos, templinks, temptitles, tempcategories