Exemple #1
0
 def __init__(self, gui, stream, mainDir):
     StoppableThread.__init__(self, "Model")
     self.__iter = stream.__iter__()
     self.__elem = None
     self.__gui = gui
     self.__softPause = True
     self.__urlBuilder = UrlBuilder()
     self.__userBuilder = UserBuilder()
     streamDir = os.path.join(mainDir, "stream")
     userDir = os.path.join(mainDir, "user")
     self.__tweetResolvedListener = ResolvedTweetQueue(
         streamDir=streamDir, userDir=userDir, userBuilder=self.__userBuilder, urlBuilder=self.__urlBuilder
     )
     self.__urlResolver = UrlResolverManager(
         os.path.join(streamDir, "urlResolverCache.db2"), self.__tweetResolvedListener
     )
     self.__refreshGui = Event()
     self.__refreshStatusBar = Event()
     self.__showProbDist = Event()
     self.__probDistUrl = None
     Publisher.subscribe(self.onPauseJob, "model.pause")
     Publisher.subscribe(self.onResumeJob, "model.start")
     Publisher.subscribe(self.onRefreshGui, "model.refreshGui")
     Publisher.subscribe(self.onRefreshStatusBar, "model.refreshStatusBar")
     Publisher.subscribe(self.onProbDist, "model.prob_dist")
     Publisher.subscribe(self.onShowTreeMap, "model.showTreeMap")
     self.doPauseJob()
     self.start()
Exemple #2
0
class Model(StoppableThread):
    def __init__(self, gui, stream, mainDir):
        StoppableThread.__init__(self, "Model")
        self.__iter = stream.__iter__()
        self.__elem = None
        self.__gui = gui
        self.__softPause = True
        self.__urlBuilder = UrlBuilder()
        self.__userBuilder = UserBuilder()
        streamDir = os.path.join(mainDir, "stream")
        userDir = os.path.join(mainDir, "user")
        self.__tweetResolvedListener = ResolvedTweetQueue(
            streamDir=streamDir, userDir=userDir, userBuilder=self.__userBuilder, urlBuilder=self.__urlBuilder
        )
        self.__urlResolver = UrlResolverManager(
            os.path.join(streamDir, "urlResolverCache.db2"), self.__tweetResolvedListener
        )
        self.__refreshGui = Event()
        self.__refreshStatusBar = Event()
        self.__showProbDist = Event()
        self.__probDistUrl = None
        Publisher.subscribe(self.onPauseJob, "model.pause")
        Publisher.subscribe(self.onResumeJob, "model.start")
        Publisher.subscribe(self.onRefreshGui, "model.refreshGui")
        Publisher.subscribe(self.onRefreshStatusBar, "model.refreshStatusBar")
        Publisher.subscribe(self.onProbDist, "model.prob_dist")
        Publisher.subscribe(self.onShowTreeMap, "model.showTreeMap")
        self.doPauseJob()
        self.start()

    def onRefreshGui(self, msg):
        self.__refreshGui.set()

    def onRefreshStatusBar(self, msg):
        self.__refreshStatusBar.set()

    def onPauseJob(self, msg):
        d = msg.data
        if self.__softPause:
            self.__softPause = d["soft"] if d and "soft" in d else False
        self.doPauseJob()

    def onProbDist(self, msg):
        self.__showProbDist.set()
        self.__probDistUrl = msg.data

    def onShowTreeMap(self, msg):
        import webbrowser

        webbrowser.open(self.__tweetResolvedListener.getServerUrl())

    def doPauseJob(self):
        self.pauseJob()
        self.__urlResolver.pauseWorkers()
        Publisher.sendMessage("model.paused")

    def onResumeJob(self, msg):
        d = msg.data
        softResume = d["soft"] if d and "soft" in d else False
        userResume = not softResume
        if userResume or self.__softPause:
            logger.info("Continue job " + unicode(softResume) + unicode(self.__softPause))
            self.doContinueJob()
        else:
            logger.info("Ignore resume request")

    def doContinueJob(self):
        self.continueJob()
        self.__urlResolver.continueWorkers()
        Publisher.sendMessage("model.started")

    def atBegin(self):
        logger.info("Preparing model...")
        self.__urlResolver.start()
        logger.info("Start analyzing tweets")
        self.__tweetResolvedListener.start()

    def runPart(self):
        try:
            s = self.__elem or self.__iter.next()
            self.__elem = s
            if u"text" in s:
                try:
                    retweeted = (
                        TweetText(s[u"retweeted_status"], self.__urlBuilder, self.__userBuilder, None)
                        if s.has_key(u"retweeted_status")
                        else None
                    )
                    if retweeted:
                        for url in retweeted.urls():
                            self.__urlResolver.addUrlToQueue(url)

                    tweet = TweetText(s, self.__urlBuilder, self.__userBuilder, retweeted.id() if retweeted else None)
                    for url in tweet.urls():
                        self.__urlResolver.addUrlToQueue(url)

                except UrlException as e:
                    logger.warn(u"Cannot build url: " + str(e))
            self._doSmthElse()
            self.__elem = None
        except Full:
            return
        except StopIteration:
            raise NothingToDo()

    def onPause(self):
        self._doSmthElse()

    def _doSmthElse(self):
        if self.__refreshGui.isSet():
            logger.info("Send data to GUI")
            self.__refreshGui.clear()
            data = {}
            data["urls"] = self.__tweetResolvedListener.finalUrls()
            Publisher.sendMessage("update.urls", data=data)
        if self.__showProbDist.isSet():
            url = self.__probDistUrl
            self.__showProbDist.clear()
            self.__probDistUrl = None
            probDistI = TxtClassificatorWrapper.instance().probDist(url.getText())
        if self.__refreshStatusBar.isSet():
            self.__refreshStatusBar.clear()
            data = {}
            data["cache"] = self.__urlResolver.cacheHitRate()
            data["position"] = self.__iter.position()
            data["position_end"] = self.__iter.count()
            data["current_file_c"] = self.__iter.currentFile()
            data["last_file_c"] = self.__iter.filesCount()
            Publisher.sendMessage("update.statusBar", data=data)

    def stop(self):
        StoppableThread.stop(self)
        self.__urlResolver.stop()
        self.__tweetResolvedListener.stop()
        logger.info("Wait for thread: " + ",".join([t.name for t in threading.enumerate()]))