Beispiel #1
0
    def findTermFrequencies(self):
        """
            Method for calculating the frequencies of each term in the tweets.
        """

        if not os.path.isfile(self.TwitterKeysFile):
            tkMessageBox.showerror("ERROR", "Twitter API credentials not filled. Use the Set/Update Twitter Credentials button to do so.", parent = self.parent)
            return

        self.count += 1
        if self.termFrequenciesOpenedFlag == False:
            # set window opened
            self.termFrequenciesOpenedFlag = True

            # initialize window
            global TermFrequenciesWindow
            def toggleFlag():
                self.termFrequenciesOpenedFlag = False
                TermFrequenciesWindow.destroy()
                
            TermFrequenciesWindow = tk.Toplevel(self)
            TermFrequenciesWindow.minsize(500, 500)
            #TwitterKeysWindow.overrideredirect(True)
            TermFrequenciesWindow.geometry("500x500+100+100")
            TermFrequenciesWindow.title("Term Frequencies (only > 0.5%)")
            TermFrequenciesWindow.config(bd=5)
            TermFrequenciesWindow.protocol("WM_DELETE_WINDOW", toggleFlag)

            # create a new TableModel for table data
            model = TableModel()

            # create a new TableCanvas for showing the table
            table = TableCanvas(TermFrequenciesWindow, model=model,
                                 editable=False)
            table.createTableFrame()

            # dictionary for storing table data
            tableData = {}
            
            # calculate frequencies
            freqs = {}
            total = 0
            outfile = open(self.TwitterStreamFile)
            for line in outfile:
                json_obj = json.loads(line)
                try:            
                    text = json_obj['text'].decode('utf-8')
                    # clean the text
                    text = ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)","",text).split())
                    text = re.sub(r'^https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
                    text = re.sub(r'RT', '', text, flags=re.MULTILINE)
                    #print text
                    text_list = text.split(' ')
                    for char in text_list:
                        if char.isalnum():
                            if char not in freqs:
                                freqs[char] = 1
                            else:
                                freqs[char] += 1
                            total += 1  

                except:
                    #print "passed"
                    pass
                    
            for key in freqs.keys():
                if freqs[key]/float(total) > 0.005:
                    # insert frequencies into the table dictionary
                    tableData[uuid.uuid4()] = {'Term': key, 'Frequency (%)': str(round((freqs[key]/float(total))*100, 2))}
                #print key + " " + str(freqs[key]/float(total)) 

            # insert and sort data in the table
            model.importDict(tableData)
            #sort in descending order
            model.setSortOrder(columnIndex = 1, reverse = 1)
            table.adjustColumnWidths()
            table.resizeColumn(0, 200)
            table.resizeColumn(1, 200)
            table.redrawTable()
Beispiel #2
0
    def findTermFrequencies(self):
        """
            Method for calculating the frequencies of each term in the tweets.
        """

        if not os.path.isfile(self.TwitterKeysFile):
            tkMessageBox.showerror(
                "ERROR",
                "Twitter API credentials not filled. Use the Set/Update Twitter Credentials button to do so.",
                parent=self.parent)
            return

        self.count += 1
        if self.termFrequenciesOpenedFlag == False:
            # set window opened
            self.termFrequenciesOpenedFlag = True

            # initialize window
            global TermFrequenciesWindow

            def toggleFlag():
                self.termFrequenciesOpenedFlag = False
                TermFrequenciesWindow.destroy()

            TermFrequenciesWindow = tk.Toplevel(self)
            TermFrequenciesWindow.minsize(500, 500)
            #TwitterKeysWindow.overrideredirect(True)
            TermFrequenciesWindow.geometry("500x500+100+100")
            TermFrequenciesWindow.title("Term Frequencies (only > 0.5%)")
            TermFrequenciesWindow.config(bd=5)
            TermFrequenciesWindow.protocol("WM_DELETE_WINDOW", toggleFlag)

            # create a new TableModel for table data
            model = TableModel()

            # create a new TableCanvas for showing the table
            table = TableCanvas(TermFrequenciesWindow,
                                model=model,
                                editable=False)
            table.createTableFrame()

            # dictionary for storing table data
            tableData = {}

            # calculate frequencies
            freqs = {}
            total = 0
            outfile = open(self.TwitterStreamFile)
            for line in outfile:
                json_obj = json.loads(line)
                try:
                    text = json_obj['text'].decode('utf-8')
                    # clean the text
                    text = ' '.join(
                        re.sub(
                            "(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)",
                            "", text).split())
                    text = re.sub(r'^https?:\/\/.*[\r\n]*',
                                  '',
                                  text,
                                  flags=re.MULTILINE)
                    text = re.sub(r'RT', '', text, flags=re.MULTILINE)
                    #print text
                    text_list = text.split(' ')
                    for char in text_list:
                        if char.isalnum():
                            if char not in freqs:
                                freqs[char] = 1
                            else:
                                freqs[char] += 1
                            total += 1

                except:
                    #print "passed"
                    pass

            for key in freqs.keys():
                if freqs[key] / float(total) > 0.005:
                    # insert frequencies into the table dictionary
                    tableData[uuid.uuid4()] = {
                        'Term':
                        key,
                        'Frequency (%)':
                        str(round((freqs[key] / float(total)) * 100, 2))
                    }
                #print key + " " + str(freqs[key]/float(total))

            # insert and sort data in the table
            model.importDict(tableData)
            #sort in descending order
            model.setSortOrder(columnIndex=1, reverse=1)
            table.adjustColumnWidths()
            table.resizeColumn(0, 200)
            table.resizeColumn(1, 200)
            table.redrawTable()
 def adjustColumnWidths(self):
     self.cols = self.model.getColumnCount()
     TableCanvas.adjustColumnWidths(self)
 def adjustColumnWidths(self):
     self.cols = self.model.getColumnCount()
     TableCanvas.adjustColumnWidths(self)