예제 #1
0
def wiki_analyzer(language: str) -> None:
    """
	Analyzes a single language and prints the speed at which it's currently
	running at. Dumps the result into the database when the buffer fills up
	or when the user exits the program (or if it crashes).

	Arguments:
	language 		The language code for the database.
	"""
    running = True
    dbase = database.WikiDatabase(f'{language}wikidb')

    source_buffer = []
    target_buffer = []
    paths_buffer = []

    last_time = time.time()
    paths_added = 0

    try:
        while running:

            sources, targets, all_paths = analyze_path(dbase)
            source_buffer.extend(sources)
            target_buffer.extend(targets)
            paths_buffer.extend(all_paths)
            paths_added += 2

            if len(source_buffer) >= BUFFER_SIZE:
                dbase.dump_statistics(source_buffer, target_buffer,
                                      paths_buffer)
                source_buffer = []
                target_buffer = []
                paths_buffer = []

            if select.select([sys.stdin], [], [], 0.0)[0]:
                usr_input = input()
                if usr_input.lower() == 'q' or usr_input.lower() == 'quit':
                    running = False
                    if source_buffer:
                        dbase.dump_statistics(source_buffer, target_buffer,
                                              paths_buffer)

            if time.time() - last_time > 15:
                d_t = time.time() - last_time
                paths_per_min = round(paths_added / d_t * 60)
                print(f"\r{paths_per_min} paths / min ", end='')
                paths_added = 0
                last_time = time.time()
    finally:
        if running:
            dbase.dump_statistics(source_buffer, target_buffer, paths_buffer)
예제 #2
0
파일: logger.py 프로젝트: wmarsey/wrhp
def revidLog(title, pageid, domain):
    import database as db
    d = db.WikiDatabase()

    ilogger.info("CALC SUMMARY FOR " + ", ".join([str(pageid), str(domain)]))
    weights = d.getrevidlog(pageid, domain)
    for w in weights:
        ilogger.info(" ")
        ilogger.info("-----" + str(w[:1]) + " / " + str(w[-1]) + "-----")
        ilogger.info(w[2:-2])
        ilogger.info(" TOTAL " + str(w[-2]))
    ilogger.info(" ")
    ilogger.info("END-----------------------------------")
    ilogger.info(" ")
예제 #3
0
    def __init__(self, title="", pageid=None, domain=None, scrapemin=50):

        if not (title or pageid) or title == 'random':
            self.rand = True

        self.title = title
        if pageid:
            self.pageid = pageid

        if domain:
            self.api_domain = domain
            self.domainset = True

        self.scrapemin = scrapemin

        self.db = db.WikiDatabase()
        self.domains = self.langsreader()
예제 #4
0
    def dbrepair(self, delete=False, clear=False):
        import database as db
        dtb = db.WikiDatabase()
        fetch = dtb.getallfetched()

        delete = True

        if delete:
            print "cleaning incomplete entries from the database"
            if clear:
                dtb.empty()
            else:
                dtb.cleanup()
            return 0
        else:
            piddoms = dtb.getallscraped()

        print "Checking", len(piddoms), "pageids for complete details"

        for t in piddoms:
            scraper = wk.WikiRevisionScrape(pageid=self.params['pageid'],
                                            title=self.params['title'],
                                            domain=self.params['domain'],
                                            scrapemin=0)
            if scraper.scrape():
                pageid = scraper.getPageID()
                title = scraper.getTitle()
                domain = scraper.getDomain()
            else:
                continue

        print "Checking", len(fetch), "fetched entries for analyses"

        for f in fetch:
            analyser = WikiAnalysis(*f)
            results = analyser.analyse()
            if not results:
                return -1
        return 0
예제 #5
0
 def __init__(self, title, pageid, domain):
     self.title = title
     self.pageid = pageid
     self.domain = domain
     self.dtb = db.WikiDatabase()
예제 #6
0
파일: validator.py 프로젝트: wmarsey/wrhp
def fetchdatadump(flags, classnum):
    extension = '.pickle'
    dfile = BASEPATH + '/data/alldata' + str(classnum) + extension

    ##get data
    alldata = None
    dtb = db.WikiDatabase()

    if classnum == 0:
        print "Test: can we predict gradient from weights?"
        alldata = dtb.gettrainingdata1()
    if classnum == 1:
        print "Test: can we predict gradient from weights and size?"
        alldata = dtb.gettrainingdata2()
    if classnum == 2:
        print "Test: can we predict gradient from weights and time change?"
        alldata = dtb.gettrainingdata3()
    if classnum == 3:
        print "Test: can we predict gradient from summed weights and size?"
        alldata = dtb.gettrainingdata4()
    if classnum == 4:
        print "Test: can we predict gradient from weights and username edit count over the whole english wiki?"
        alldata = dtb.gettrainingdata5()
    if classnum == 5:
        print "Test: can we predict gradient from weights and username edit count over the whole english wiki?"
        alldata = dtb.gettrainingdata6()
    if classnum == 7:
        print "Test: can we predict gradient from weights? (classification)"
        alldata = dtb.gettrainingdata1()
    if classnum == 8:
        print "Test: can we predict gradient from weights and size? (classification)"
        alldata = dtb.gettrainingdata2()
    if classnum == 9:
        print "Test: can we predict gradient from weights and time change? (classification)"
        alldata = dtb.gettrainingdata3()
    if classnum == 10:
        print "Test: can we predict gradient from summed weights and size? (classification)"
        alldata = dtb.gettrainingdata4()
    if classnum == 11:
        print "Test: can we predict gradient from weights and username edit count over the whole english wiki? (classification)"
        alldata = dtb.gettrainingdata5()
    if classnum == 12:
        print "Test: can we predict gradient from weights and username edit count over the whole english wiki? (classification)"
        alldata = dtb.gettrainingdata6()

    print "recieved", len(alldata), "cases"

    ##pick a random subgroup if asked
    if flags['clip']:
        print "picking", flags['clip'], "random entries"
        shuffle(alldata)
        alldata = alldata[:flags['clip']]

    print "splitting"
    weights, classifications = zip(*[[list(w[:-1]),\
                                          (0 if w[-1] < 0.5 else 1) \
                                          if classnum > 5 else w[-1]] \
                                         for w in alldata])
    for i in range(len(weights)):
        for v in range(len(weights[i])):
            weights[i][v] = float(weights[i][v])
    print "got", len(weights[0]), "weights"

    return weights, classifications