def wiki_analyzer(language: str) -> None: """ Analyzes a single language and prints the speed at which it's currently running at. Dumps the result into the database when the buffer fills up or when the user exits the program (or if it crashes). Arguments: language The language code for the database. """ running = True dbase = database.WikiDatabase(f'{language}wikidb') source_buffer = [] target_buffer = [] paths_buffer = [] last_time = time.time() paths_added = 0 try: while running: sources, targets, all_paths = analyze_path(dbase) source_buffer.extend(sources) target_buffer.extend(targets) paths_buffer.extend(all_paths) paths_added += 2 if len(source_buffer) >= BUFFER_SIZE: dbase.dump_statistics(source_buffer, target_buffer, paths_buffer) source_buffer = [] target_buffer = [] paths_buffer = [] if select.select([sys.stdin], [], [], 0.0)[0]: usr_input = input() if usr_input.lower() == 'q' or usr_input.lower() == 'quit': running = False if source_buffer: dbase.dump_statistics(source_buffer, target_buffer, paths_buffer) if time.time() - last_time > 15: d_t = time.time() - last_time paths_per_min = round(paths_added / d_t * 60) print(f"\r{paths_per_min} paths / min ", end='') paths_added = 0 last_time = time.time() finally: if running: dbase.dump_statistics(source_buffer, target_buffer, paths_buffer)
def revidLog(title, pageid, domain): import database as db d = db.WikiDatabase() ilogger.info("CALC SUMMARY FOR " + ", ".join([str(pageid), str(domain)])) weights = d.getrevidlog(pageid, domain) for w in weights: ilogger.info(" ") ilogger.info("-----" + str(w[:1]) + " / " + str(w[-1]) + "-----") ilogger.info(w[2:-2]) ilogger.info(" TOTAL " + str(w[-2])) ilogger.info(" ") ilogger.info("END-----------------------------------") ilogger.info(" ")
def __init__(self, title="", pageid=None, domain=None, scrapemin=50): if not (title or pageid) or title == 'random': self.rand = True self.title = title if pageid: self.pageid = pageid if domain: self.api_domain = domain self.domainset = True self.scrapemin = scrapemin self.db = db.WikiDatabase() self.domains = self.langsreader()
def dbrepair(self, delete=False, clear=False): import database as db dtb = db.WikiDatabase() fetch = dtb.getallfetched() delete = True if delete: print "cleaning incomplete entries from the database" if clear: dtb.empty() else: dtb.cleanup() return 0 else: piddoms = dtb.getallscraped() print "Checking", len(piddoms), "pageids for complete details" for t in piddoms: scraper = wk.WikiRevisionScrape(pageid=self.params['pageid'], title=self.params['title'], domain=self.params['domain'], scrapemin=0) if scraper.scrape(): pageid = scraper.getPageID() title = scraper.getTitle() domain = scraper.getDomain() else: continue print "Checking", len(fetch), "fetched entries for analyses" for f in fetch: analyser = WikiAnalysis(*f) results = analyser.analyse() if not results: return -1 return 0
def __init__(self, title, pageid, domain): self.title = title self.pageid = pageid self.domain = domain self.dtb = db.WikiDatabase()
def fetchdatadump(flags, classnum): extension = '.pickle' dfile = BASEPATH + '/data/alldata' + str(classnum) + extension ##get data alldata = None dtb = db.WikiDatabase() if classnum == 0: print "Test: can we predict gradient from weights?" alldata = dtb.gettrainingdata1() if classnum == 1: print "Test: can we predict gradient from weights and size?" alldata = dtb.gettrainingdata2() if classnum == 2: print "Test: can we predict gradient from weights and time change?" alldata = dtb.gettrainingdata3() if classnum == 3: print "Test: can we predict gradient from summed weights and size?" alldata = dtb.gettrainingdata4() if classnum == 4: print "Test: can we predict gradient from weights and username edit count over the whole english wiki?" alldata = dtb.gettrainingdata5() if classnum == 5: print "Test: can we predict gradient from weights and username edit count over the whole english wiki?" alldata = dtb.gettrainingdata6() if classnum == 7: print "Test: can we predict gradient from weights? (classification)" alldata = dtb.gettrainingdata1() if classnum == 8: print "Test: can we predict gradient from weights and size? (classification)" alldata = dtb.gettrainingdata2() if classnum == 9: print "Test: can we predict gradient from weights and time change? (classification)" alldata = dtb.gettrainingdata3() if classnum == 10: print "Test: can we predict gradient from summed weights and size? (classification)" alldata = dtb.gettrainingdata4() if classnum == 11: print "Test: can we predict gradient from weights and username edit count over the whole english wiki? (classification)" alldata = dtb.gettrainingdata5() if classnum == 12: print "Test: can we predict gradient from weights and username edit count over the whole english wiki? (classification)" alldata = dtb.gettrainingdata6() print "recieved", len(alldata), "cases" ##pick a random subgroup if asked if flags['clip']: print "picking", flags['clip'], "random entries" shuffle(alldata) alldata = alldata[:flags['clip']] print "splitting" weights, classifications = zip(*[[list(w[:-1]),\ (0 if w[-1] < 0.5 else 1) \ if classnum > 5 else w[-1]] \ for w in alldata]) for i in range(len(weights)): for v in range(len(weights[i])): weights[i][v] = float(weights[i][v]) print "got", len(weights[0]), "weights" return weights, classifications