def __init__(self, mainDir, input, inlinedWebpageDir): self.__mainDir = mainDir self.__input = input self.__langId = LangDetect.instance() self.__inlinedWebpageDir = inlinedWebpageDir if not os.path.exists(self.__inlinedWebpageDir): os.makedirs(self.__inlinedWebpageDir) htmlsDir = os.path.join(self.__inlinedWebpageDir, "htmls"); if not os.path.exists(htmlsDir): os.makedirs(htmlsDir) data = shelve.open(self.__input) self.__data = [] self.__classes = set([self.defaultClass()]) url2klass = self.__readKlassFile() logger.info("Read shelve...") for item in data.itervalues(): text = item["text"] url = item["url"] klass = self.__getKlass(url2klass, url) if not self.__ignorable(text, url): self.__data.append(RowModel(url, text, klass, self)) if klass: self.__classes.add(klass) logger.info("Done " + str(len(self.__data))) Publisher.subscribe(self._onSave, "model.save") self.__downloader = UrlDownloaderController(self) self.__downloader.start()
def __init__(self): self.app = wx.App() wx.Frame.__init__(self, None, title='PyNews', pos=(150,150), size=(350,200)) menuBar = wx.MenuBar() menuBar.Append(self.__buildMenuAnalyze(), "&Analyze") menuBar.Append(self.__buildMenuView(), "&View") self.SetMenuBar(menuBar) self.grid = UrlsGrid(self) self.CreateStatusBar() self.Show() self.timer = wx.Timer(self) Publisher.subscribe(self.updateUrls, "update.urls") Publisher.subscribe(self.updateStatusBar, "update.statusBar") Publisher.subscribe(self.onModelPaused, "model.paused") Publisher.subscribe(self.onModelStarted, "model.started") self.Bind(wx.EVT_TIMER, self.onTimerEvent, self.timer) self.__paused = True self.timer.Start(1000 * 10)
def __init__(self, gui, stream, mainDir): StoppableThread.__init__(self, "Model") self.__iter = stream.__iter__() self.__elem = None self.__gui = gui self.__softPause = True self.__urlBuilder = UrlBuilder() self.__userBuilder = UserBuilder() streamDir = os.path.join(mainDir, "stream") userDir = os.path.join(mainDir, "user") self.__tweetResolvedListener = ResolvedTweetQueue( streamDir=streamDir, userDir=userDir, userBuilder=self.__userBuilder, urlBuilder=self.__urlBuilder ) self.__urlResolver = UrlResolverManager( os.path.join(streamDir, "urlResolverCache.db2"), self.__tweetResolvedListener ) self.__refreshGui = Event() self.__refreshStatusBar = Event() self.__showProbDist = Event() self.__probDistUrl = None Publisher.subscribe(self.onPauseJob, "model.pause") Publisher.subscribe(self.onResumeJob, "model.start") Publisher.subscribe(self.onRefreshGui, "model.refreshGui") Publisher.subscribe(self.onRefreshStatusBar, "model.refreshStatusBar") Publisher.subscribe(self.onProbDist, "model.prob_dist") Publisher.subscribe(self.onShowTreeMap, "model.showTreeMap") self.doPauseJob() self.start()