コード例 #1
0
ファイル: wrhp.py プロジェクト: wmarsey/wrhp
    def run(self):
        if self.flags["launch"]:
            return self.launch()

        if self.flags["dbrepair"]:
            return self.dbrepair()

        while True:
            while True:
                print "---------FETCHING WIKIPEDIA PAGE------------"
                scraper = wk.WikiRevisionScrape(
                    title=self.params["title"],
                    pageid=self.params["pageid"],
                    domain=self.params["domain"],
                    scrapemin=self.params["scrapemin"],
                )

                if scraper.scrape():
                    pageid = scraper.getPageID()
                    title = scraper.getTitle()
                    domain = scraper.getDomain()
                    break
                elif self.params["domain"]:
                    return -1  ##if you asked but didnt get. terminate
                    ##instead of trying again

            print
            print "-----------------ANALYSING------------------"
            analyser = WikiAnalysis(title, pageid, domain)
            results = analyser.analyse()
            if not results:
                return -1

            if self.flags["plot"]:
                print
                print "--------------------PLOT--------------------"
                import wikiDataPlot as dpl

                plotter = dpl.WikiDataPlot(
                    os.path.abspath(self.params["plotpath"]) if self.params["plotpath"] else None
                )
                plotted = plotter.plot(title, pageid, domain)
                print len(plotted), "plotted"

            revidLog(title, pageid, domain)

            if not self.flags["trundle"]:
                break

        return 0
コード例 #2
0
    def run(self):
        if self.flags['launch']:
            return self.launch()

        if self.flags['dbrepair']:
            return self.dbrepair()

        while True:
            while True:
                print "---------FETCHING WIKIPEDIA PAGE------------"
                scraper = wk.WikiRevisionScrape(
                    title=self.params['title'],
                    pageid=self.params['pageid'],
                    domain=self.params['domain'],
                    scrapemin=self.params['scrapemin'])

                if scraper.scrape():
                    pageid = scraper.getPageID()
                    title = scraper.getTitle()
                    domain = scraper.getDomain()
                    break
                elif (self.params['domain']):
                    return -1  ##if you asked but didnt get. terminate
                    ##instead of trying again

            print
            print "-----------------ANALYSING------------------"
            analyser = WikiAnalysis(title, pageid, domain)
            results = analyser.analyse()
            if not results:
                return -1

            if self.flags['plot']:
                print
                print "--------------------PLOT--------------------"
                import wikiDataPlot as dpl
                plotter = dpl.WikiDataPlot(
                    os.path.abspath(self.params['plotpath']) if self.
                    params['plotpath'] else None)
                plotted = plotter.plot(title, pageid, domain)
                print len(plotted), "plotted"

            revidLog(title, pageid, domain)

            if not self.flags['trundle']:
                break

        return 0
コード例 #3
0
ファイル: wrhp.py プロジェクト: wmarsey/wrhp
    def dbrepair(self, delete=False, clear=False):
        import database as db

        dtb = db.WikiDatabase()
        fetch = dtb.getallfetched()

        delete = True

        if delete:
            print "cleaning incomplete entries from the database"
            if clear:
                dtb.empty()
            else:
                dtb.cleanup()
            return 0
        else:
            piddoms = dtb.getallscraped()

        print "Checking", len(piddoms), "pageids for complete details"

        for t in piddoms:
            scraper = wk.WikiRevisionScrape(
                pageid=self.params["pageid"], title=self.params["title"], domain=self.params["domain"], scrapemin=0
            )
            if scraper.scrape():
                pageid = scraper.getPageID()
                title = scraper.getTitle()
                domain = scraper.getDomain()
            else:
                continue

        print "Checking", len(fetch), "fetched entries for analyses"

        for f in fetch:
            analyser = WikiAnalysis(*f)
            results = analyser.analyse()
            if not results:
                return -1
        return 0
コード例 #4
0
    def dbrepair(self, delete=False, clear=False):
        import database as db
        dtb = db.WikiDatabase()
        fetch = dtb.getallfetched()

        delete = True

        if delete:
            print "cleaning incomplete entries from the database"
            if clear:
                dtb.empty()
            else:
                dtb.cleanup()
            return 0
        else:
            piddoms = dtb.getallscraped()

        print "Checking", len(piddoms), "pageids for complete details"

        for t in piddoms:
            scraper = wk.WikiRevisionScrape(pageid=self.params['pageid'],
                                            title=self.params['title'],
                                            domain=self.params['domain'],
                                            scrapemin=0)
            if scraper.scrape():
                pageid = scraper.getPageID()
                title = scraper.getTitle()
                domain = scraper.getDomain()
            else:
                continue

        print "Checking", len(fetch), "fetched entries for analyses"

        for f in fetch:
            analyser = WikiAnalysis(*f)
            results = analyser.analyse()
            if not results:
                return -1
        return 0