def setup_logging(): # Logging logger = logging.getLogger("infolog") logger.setLevel(logging.DEBUG) # Formatter formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") # Stream ch = logging.StreamHandler(sys.stdout) ch.setFormatter(formatter) ch.addFilter(LessThanFilter(logging.ERROR)) ch.setLevel(logging.DEBUG) logger.addHandler(ch) # Error stream eh = logging.StreamHandler(sys.stderr) eh.setLevel(logging.ERROR) eh.setFormatter(formatter) logger.addHandler(eh) # Info log if config_loader.config["enable_log"]: il = logging.FileHandler(path.main()+"logs/info.log") il.setLevel(logging.DEBUG) il.addFilter(LessThanFilter(logging.ERROR)) il.setFormatter(formatter) logger.addHandler(il) # Error log el = logging.FileHandler(path.main()+"logs/crashreport.log") el.setLevel(logging.ERROR) el.setFormatter(formatter) logger.addHandler(el)
def crashreport(*message): crashfile = open(path.main()+"logs/crashreport.log", "a") finalmessage = "" for l, mes in enumerate(message): finalmessage += str(mes) if l != len(message): finalmessage += " " time = datetime.datetime.now() line = str(time)+' '+finalmessage+"\n" crashfile.flush() crashfile.write(line)
def setupLogging(): # Logging logger = logging.getLogger("infolog") logger.setLevel(logging.DEBUG) # Formatter formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") # Stream ch = logging.StreamHandler(sys.stdout) ch.setFormatter(formatter) ch.addFilter(LessThanFilter(logging.ERROR)) ch.setLevel(logging.DEBUG) logger.addHandler(ch) # Error stream eh = logging.StreamHandler(sys.stderr) eh.setLevel(logging.ERROR) eh.setFormatter(formatter) logger.addHandler(eh) # Info log if cfgl.cur_conf["core"]["enable_log"]: il = logging.FileHandler(path.main() + "logs/info.log") il.setLevel(logging.DEBUG) il.addFilter(LessThanFilter(logging.ERROR)) il.setFormatter(formatter) logger.addHandler(il) # Error log el = logging.FileHandler(path.main() + "logs/crashreport.log") el.setLevel(logging.ERROR) el.setFormatter(formatter) logger.addHandler(el) # Stable logger slogger = logging.getLogger("stablelog") slogger.setLevel(logging.DEBUG) # Info log isl = logging.FileHandler(path.main() + "logs/stable.log") isl.setLevel(logging.INFO) isl.setFormatter(formatter) # Add handlers slogger.addHandler(isl)
def load_config(filename=path.main() + "mws.json"): global config # TODO allow to set the config file from command line if not os.path.isfile(filename): with open(filename, "w") as config_f: json.dump(config, config_f, indent=2, separators=(',', ': ')) return False with open(filename, "r") as config_f: temp_config = json.load(config_f) merge_config(config, temp_config) return True
def main(): parser = optparse.OptionParser() parser.add_option('-n', '--name', action="store", dest="name", help="grid job name", default=None) options, args = parser.parse_args() if options.name: command = "jstart -mem 500m -N " + options.name + " " + path.main( ) + "grid.sh" os.system(command) else: parser.print_help()
def checkForLocalUpdate(): global cur_conf global cfg_ver for conf in glob.glob(path.main() + "conf/" + "*.json"): try: conf_f = open(conf, "r") conf_json = json.load(conf_f) conf_f.close() except: logger.error( "failed to load config from %s check crashreport for more info" % conf) logger.critical(traceback.format_exc()) continue namestr = os.path.basename(conf)[:-5] if namestr not in cur_conf or cur_conf[namestr] != conf_json: cur_conf[namestr] = conf_json cfg_ver += 1 logger.info("new local config \"%s\" loaded to core" % namestr)
class ThanatosTask: name = "archive_linker" # Time min/hour/day/month time = ["00/*/*/*"] # Template name template_name = "Käyttäjä:4shadowwBOT/linker" # Template Page template_page = None # Execute on start exeonstart = True # Html tags stag = "<div class=4linker>" etag = "</div>" comments = { "fi": ": päivitetty arkistolinkit", } db = TinyDB(path.main()+"core/db/taskdb/archiver_linker.json") def load_config(self, page, site): confg = Config() for tpl in page.templatesWithParams(): if tpl[0] == pywikibot.Page(site, self.template_name, ns=10): for param in tpl[1]: item, value = param.split('=', 1) if item == "archive": config.archive = value elif item == "style": config.style = value elif item == "linktext": config.linktext = value break return config def template_title_regex(self, tpl_page): """ Return a regex that matches to variations of the template title. It supports the transcluding variant as well as localized namespaces and case-insensitivity depending on the namespace. @param tpl_page: The template page @type tpl_page: Page """ ns = tpl_page.site.namespaces[tpl_page.namespace()] marker = '?' if ns.id == 10 else '' title = tpl_page.title(withNamespace=False) if ns.case != 'case-sensitive': title = '[%s%s]%s' % (re.escape(title[0].upper()), re.escape(title[0].lower()), re.escape(title[1:])) else: title = re.escape(title) return re.compile(r'(?:(?:%s):)%s%s' % (u'|'.join(ns), marker, title)) def getpages(self): site = pywikibot.Site() transclusion_page = pywikibot.Page(site, self.template_name, ns=10) self.template_page = transclusion_page return transclusion_page.getReferences(onlyTemplateInclusion=True, follow_redirects=False, namespaces=[]) def get_links(self, page, site, config): query = Query() links = [] counter = 1 while True: apage = pywikibot.Page(site, page.title()+"/"+config.archive.replace("%(counter)d", str(counter))) if apage.exists() and apage.text != "": links.append(apage.title()) else: self.db.update({"counter": counter-1}, query.name == page.title()) break counter += 1 return links def update_text(self, page, linktable): if self.stag in page.text: reg = self.stag+".*?"+"<\/div>" oldtable = re.findall(reg, page.text, re.DOTALL) page.text = page.text.replace(oldtable[0], self.stag+"\n"+linktable+self.etag) else: rx = re.compile(r'\{\{%s\s*?\n.*?\n\}\}' % (self.template_title_regex(self.template_page).pattern), re.DOTALL) match = rx.search(page.text).group(0) if match: match = match.split("\n") textlist = page.text.split("\n") x = 0 placepos = 0 for i in range(0, len(textlist)): if match[x] in textlist[i]: if x == len(match)-1: placepos = i break x += 1 textlist[placepos] = textlist[placepos]+"\n"+self.stag+"\n"+linktable+self.etag page.text = '\n'.join(textlist) def link(self, page, site, config): query = Query() match = self.db.search(query.name == page.title()) must_update = False if match == []: self.db.insert({"name": page.title(), "counter": 1}) match = self.db.search(query.name == page.title()) must_update = True else: apage = pywikibot.Page(site, page.title()+"/"+config.archive.replace("%(counter)d", str(match[0]["counter"]+1))) if apage.exists() and apage.text != "": must_update = True if must_update: printlog("archive linker: checking links for", page) oldtext = page.text links = self.get_links(page, site, config) lg = LinkGenerator() linktable = lg.generate(config, links) self.update_text(page, linktable) newmatch = self.db.search(query.name == page.title()) comment = create_comment.comment([self.comments[config.lang]]) if oldtext != page.text: printlog("archive linker: updating links for", page) wikipedia_worker.savepage(page, page.text, comment) def run(self): pages = self.getpages() site = pywikibot.Site() #pages = [pywikibot.Page(site, "Keskustelu käyttäjästä:4shadoww")] for page in pages: if page.botMayEdit() and page.canBeEdited(): printlog("archive linker: checking", page.title()) try: config = self.load_config(page, site) self.link(page, site, config) except KeyboardInterrupt: return except: error = traceback.format_exc() printlog("unknown error:\n"+error)
import json import traceback import sys import glob import os import logging logger = logging.getLogger("infolog") # Import core modules from core import path # Load core config for startup cur_conf = {} try: core_config_f = open(path.main() + "core/config.json", "r") cur_conf["core"] = json.load(core_config_f) core_config_f.close() except: logger.critical("failed to load core config") logger.critical("failed to startup") logger.critical(traceback.format_exc()) sys.exit(1) # Import core modules from core import yapi cfg_ver = 0 def updateConfigItems(holder, new):
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # Import python modules import os import sys import logging # Import path tool from core import path # Append lib path sys.path.append(path.main() + "core/lib/") # Import core modules from core import config_loader as cfgl from core import worker from core import session class LessThanFilter(logging.Filter): def __init__(self, exclusive_maximum, name=""): super(LessThanFilter, self).__init__(name) self.max_level = exclusive_maximum def filter(self, record): return 1 if record.levelno < self.max_level else 0 def setupLogging(): # Logging
import traceback import logging from sseclient import SSEClient as EventSource # Import core modules from core import config_loader as cfgl from core import rule_executor from core import yapi from core import path from core import timelib api = yapi.MWAPI logger = logging.getLogger("infolog") f_dict = open(path.main() + "core/dict.json") dictionary = json.load(f_dict) def shouldCheck(rev): # Check should revision to be checked at all revs = api.getRevision([rev["revision"]["new"]]) if "badrevids" in revs["query"]: return False if api.stabilized(rev["title"]): return False if not api.reviewed(rev["title"]): return False
class ThanatosTask: comments = { "fi00": "arkistoi", "fi01": "keskustelun arkistoon", "fi01m": "keskustelua arkistoon", "fi02": "keskustelun sivulta", "fi02m": "keskustelua sivulta", } name = "archiver" # Time min/hour/day/month time = ["00/04/*/*", "00/12/*/*", "00/18/*/*", "00/00/*/*"] # Database db = TinyDB(path.main() + "core/db/taskdb/archiver.json") # Archiver config template_names = [ "Käyttäjä:HarrivBOT/config", "Käyttäjä:4shadowwBOT/config" ] # Template page object template_page = None # Execute on start exeonstart = True ignore = [] site = pywikibot.Site() def parse_mas_config(self, value): return int( value.replace("t", "").replace("T", "").replace("M", "").replace( "m", "").replace("K", "").replace("k", "").replace("B", "").replace("b", "")) def load_config(self): config = Config() for tpl in self.dpage.page.templatesWithParams(): if tpl[0] == pywikibot.Page(self.site, self.template_name, ns=10): for param in tpl[1]: item, value = param.split('=', 1) if item == "archive": now = datetime.datetime.now() if "%(year)d" in value: value = value.replace("%(year)d", str(now.year)) config.using_year = True elif "%(month)d" in value: raise UnsupportedConfig("invalid archive param") elif "%(monthname)s" in value: raise UnsupportedConfig("invalid archive param") elif "%(monthnameshort)s" in value: raise UnsupportedConfig("invalid archive param") config.archive = value.replace( self.dpage.page.title() + "/", "").replace("{{FULLPAGENAMEE}}/", "") elif item == "algo": if "old(" in value: algo = re.findall(r"old\((.*?)\)", value)[0] if int(algo.replace("d", "").replace("D", "")) > 0: config.algo = algo else: if int(value.replace("d", "").replace("D", "")) > 0: config.algo = value elif item == "maxarchivesize": try: if int(self.parse_mas_config(value)) > 0: if "t" in value or "T" in value: config.maxarchivesize = value config.threads = True else: config.maxarchivesize = value config.threads = False except ValueError: printlog("invalid maxarchivesize") elif item == "minthreadsleft": try: if int(value) >= 0: config.minthreadsleft = int(value) except ValueError: printlog("invalid minthreadsleft") elif item == "minthreadstoarchive": try: config.minthreadstoarchive = int(value) except ValueError: printlog("invalid minthreadstoarchive") elif item == "archiveheader": config.archiveheader = value elif item == "counter": try: config.counter = int(value) except ValueError: printlog("invalid counter") elif item == "key": raise UnsupportedConfig("key") break return config def str2time(self, string): if string.endswith('d'): return datetime.timedelta(days=int(string[:-1])) elif string.endswith('h'): return datetime.timedelta(hours=int(string[:-1])) else: return datetime.timedelta(seconds=int(string)) def str2bytes(self, string): factor = 0 if "k" in string or "K" in string: factor = 1024 if "m" in string or "M" in string: factor = 1048576 string = string.replace("M", "").replace("m", "") string = string.replace("K", "").replace("k", "") string = string.replace("B", "").replace("b", "") return int(string) * factor def updatecounter(self, template, counter): template_and_params = textlib.extract_templates_and_params(template) for temp in template_and_params: if temp[0] == self.template_name and "counter" in temp[1]: temp[1]["counter"] = str(counter) return glue_template_and_params(temp) def getpages(self): transclusion_page = pywikibot.Page(self.site, self.template_name, ns=10) self.template_page = transclusion_page return transclusion_page.getReferences(onlyTemplateInclusion=True, follow_redirects=False, namespaces=[]) def get_threads(self): self.dpage.threads = [] start = 0 cut = False ts = textlib.TimeStripper(site=self.site) for l in range(0, len(self.dpage.text)): thread_header = re.search('^== *([^=].*?) *== *$', self.dpage.text[l]) if thread_header: if cut == True: self.dpage.threads.append( Thread(self.dpage.text[start:l], ts)) start = l cut = True elif len(self.dpage.text) - 1 == l: self.dpage.threads.append( Thread(self.dpage.text[start:l + 1], ts)) def removefromlist(self, oldthread): confirmed = False i = 0 startpos = None for l in range(0, len(self.dpage.text)): if i == len(oldthread): confirmed = True break if oldthread[i] == self.dpage.text[l]: if startpos == None: startpos = l i += 1 else: startpos = None i = 0 for l in range(0, len(oldthread)): self.dpage.text.pop(startpos) def removeoldt(self): count = len(self.dpage.threads) if len(self.dpage.oldthreads) >= self.dpage.config.minthreadstoarchive: for thread in self.dpage.oldthreads: if count > self.dpage.config.minthreadsleft: self.dpage.toarchive.append(thread) self.removefromlist(thread.content) count -= 1 def addthread2archive(self, counter): x = 0 if "%(counter)d" in self.dpage.config.archive: page = pywikibot.Page( self.site, self.dpage.page.title() + "/" + self.dpage.config.archive.replace("%(counter)d", str(counter))) using_counter = True else: page = pywikibot.Page( self.site, self.dpage.page.title() + "/" + self.dpage.config.archive) using_counter = False if not page.exists() or page.text == "": page.text += self.dpage.config.archiveheader archived = False for i in range(len(self.dpage.toarchive)): if self.dpage.config.using_year or not self.dpage.config.threads and len( page.text) < self.str2bytes( self.dpage.config.maxarchivesize ) or self.dpage.config.threads and self.threads_count( page.text) < self.parse_mas_config( self.dpage.config.maxarchivesize): if '\n'.join(self.dpage.toarchive[0].content) in page.text: self.dpage.toarchive.pop(0) else: archived = True if i == 0: page.text += "\n\n" page.text += '\n'.join( self.dpage.toarchive[0].content) + "\n" self.dpage.toarchive.pop(0) x += 1 else: counter += 1 return page, x, counter, archived return page, x, counter, archived def save2archive(self): archives = [] usingdb = False if self.dpage.config.counter == None: printlog("archiver: counter method db") usingdb = True exet = Query() matches = self.db.search(exet.name == self.dpage.page.title()) if matches == []: self.db.insert({"name": self.dpage.page.title(), "counter": 1}) counter = 1 else: counter = matches[0]["counter"] else: printlog("archiver: counter method wikipage") counter = self.dpage.config.counter while len(self.dpage.toarchive) > 0: data = self.addthread2archive(counter) if data[1] > 1: comment = create_comment.comment([ self.comments[config.lang + "00"] + " " + str(data[1]) + " " + self.comments[config.lang + "02m"] + " [[" + self.dpage.page.title() + "]]" ]) else: comment = create_comment.comment([ self.comments[config.lang + "00"] + " yhden " + self.comments[config.lang + "02"] + " [[" + self.dpage.page.title() + "]]" ]) if data[0].text != '\n'.join(self.dpage.text) and data[3]: archives.append(data[0].title()) printlog("archiver: saving archive " + self.dpage.page.title() + "/" + self.dpage.config.archive.replace( "%(counter)d", str(counter))) wikipedia_worker.savepage(data[0], data[0].text, comment) counter = data[2] elif data[2] > counter: counter = data[2] if usingdb: self.db.update({"counter": counter}, exet.name == self.dpage.page.title()) else: self.dpage.counter = counter strarchives = "" for i in range(len(archives)): strarchives += "[[" + archives[i] + "]]" if i != len(archives) - 1: strarchives += ", " return strarchives def archive(self): ac = len(self.dpage.toarchive) archives = self.save2archive() if ac > 1: comment = create_comment.comment([ self.comments[config.lang + "00"] + " " + str(ac) + " " + self.comments[config.lang + "01m"] + " " + archives ]) else: comment = create_comment.comment([ self.comments[config.lang + "00"] + " yhden " + self.comments[config.lang + "01"] + " " + archives ]) # Update counter if self.dpage.counter != None and self.dpage.counter != self.dpage.config.counter: printlog("archiver: have to update counter") rx = re.compile( r'\{\{%s\s*?\n.*?\n\}\}' % (template_title_regex(self.template_page).pattern), re.DOTALL) match = rx.search(self.dpage.page.text).group(0) newtemplate = self.updatecounter(match, self.dpage.counter) self.dpage.text = '\n'.join(self.dpage.text).replace( match, newtemplate).split("\n") printlog("archiver: saving page") wikipedia_worker.savepage(self.dpage.page, '\n'.join(self.dpage.text), comment) def shouldArchive(self): self.dpage.text = self.dpage.page.text.split("\n") oldtext = list(self.dpage.text) self.get_threads() now = datetime.datetime.utcnow().replace(tzinfo=TZoneUTC()) for thread in self.dpage.threads: if thread.timestamp: if now - thread.timestamp > self.str2time( self.dpage.config.algo): self.dpage.oldthreads.append(thread) self.dpage.oldthreads.sort(key=lambda t: t.timestamp) self.removeoldt() if len(self.dpage.toarchive) < self.dpage.config.minthreadstoarchive: printlog("archiver: not enough old threads") return False for t in self.dpage.toarchive: printlog("archiver: going to archive", t.content[0]) if oldtext != self.dpage.text: return True def run(self): for template in self.template_names: self.template_name = template pages = self.getpages() #pages = [pywikibot.Page(self.site, "Keskustelu wikiprojektista:Urheilu")] #self.template_page = pywikibot.Page(self.site, self.template_name) for page in pages: if page.title() in self.ignore: print("ignored", page.title()) elif page.botMayEdit() and page.canBeEdited(): printlog("archiver: checking", page) try: self.dpage = DiscussionPage() self.dpage.page = page self.dpage.config = self.load_config() self.dpage.counter = self.dpage.config.counter if self.shouldArchive(): self.archive() except KeyboardInterrupt: return except UnsupportedConfig: printlog("archiver: skipped", page.title(), "because uc") except: error = traceback.format_exc() crashreport(error) printlog("unknown error:\n" + error)
import datetime from core import config from core import colors from core import path import sys time = datetime.datetime.now() logfilename = str(time) if config.enable_log == True: logfile = open(path.main()+"logs/"+logfilename+".log", "a") def printlog(*message, end='\n'): finalmessage = "" for l, mes in enumerate(message): finalmessage += str(mes) if l != len(message): finalmessage += " " time = datetime.datetime.now() line = str(time)+' '+finalmessage+end if config.enable_log == True: logfile.flush() logfile.write(line) sys.stdout.write(line) def log(*message, end='\n'): finalmessage = "" for l, mes in enumerate(message): finalmessage += str(mes) if l != len(message): finalmessage += " "