예제 #1
0
	def read_replace_log(self):
		# FIXME: Make sqlite3 compatible
		insert = """INSERT INTO %s (timestamp, old_image, new_image, 
			status, user, comment) VALUES (%%s, %%s, %%s,
			'pending', %%s, %%s)""" % self.config['replacer_table']
		
		page = wikipedia.Page(self.site, self.config['command_page'])
		
		# Get last revision date
		if self.cursor.execute("""SELECT timestamp FROM %s 
				ORDER BY timestamp DESC LIMIT 1""" % \
				self.config['replacer_table']):
			since = mw_timestamp(self.cursor.fetchone()[0])
		else:
			since = None
			
		try:
			revisions = page.fullVersionHistory(max = 500, since = since)
			# Fetch the page any way, to prevent editconflicts
			old_text = text = page.get()
		except StandardError, e:
			# Network error, not critical
			output(u'Warning! Unable to read replacement log.', False)
			output('%s: %s' % (e.__class__.__name__, str(e)), False)
			return time.sleep(self.config['timeout'])
예제 #2
0
	def do(self, args):
		try:
			self.report(args)
		except Exception, e:
			output(u'A critical error during reporting has occured!')
			output('%s: %s' % (e.__class__.__name__, str(e)), False)
			traceback.print_exc(file = sys.stderr)
			self.exit()
			os.kill(0, signal.SIGTERM)
예제 #3
0
 def do(self, args):
     try:
         self.report(args)
     except Exception, e:
         output(u'A critical error during reporting has occured!', False)
         output('%s: %s' % (e.__class__.__name__, str(e)), False)
         traceback.print_exc(file = sys.stderr)
         sys.stderr.flush()
         self.exit()
         os.kill(0, signal.SIGTERM)
예제 #4
0
    def examine_revision_history(self, revisions, replacement, username):
        """ Find out who is to blame for a replacement """

        for revision in revisions:
            if replacement.group(0) in revision['*']:
                db_time = db_timestamp(revision['timestamp'])
                if db_time < self.first_revision or not self.first_revision:
                    self.first_revision = int(db_time)
                return (db_time, strip_image(replacement.group(1)),
                    strip_image(replacement.group(2)),
                    revision['user'], replacement.group(3))

        output('Warning! Could not find out who did %s' % \
                repr(replacement.group(0)), False)
        return
예제 #5
0
	def examine_revision_history(self, revisions, replacement, username):
		if replacement.group(0) in revisions[0][2]:
			return (db_timestamp(revisions[0][0]),
				strip_image(replacement.group(1)),
				strip_image(replacement.group(2)),
				'<Unknown>', replacement.group(3))
				
		for timestamp, user, text in revisions[1:]:
			if replacement.group(0) in text and user != username:
				return (db_timestamp(timestamp), 
					strip_image(replacement.group(1)),
					strip_image(replacement.group(2)),
					user, replacement.group(3))
					
		output('Warning! Could not find out who did %s' % \
				repr(replacement.group(0)), False)
		return
예제 #6
0
	def examine_revision_history(self, revisions, replacement, username):
		#if replacement.group(0) in revisions[0][2]:
		#	return (db_timestamp(revisions[0][0]),
		#		strip_image(replacement.group(1)),
		#		strip_image(replacement.group(2)),
		#		'<Unknown>', replacement.group(3))
				
		for timestamp, user, text in revisions:
			if replacement.group(0) in text and user != username:
				db_time = db_timestamp(timestamp)
				if db_time < self.first_revision or not self.first_revision:
					self.first_revision = int(db_time)
				return (db_time, strip_image(replacement.group(1)),
					strip_image(replacement.group(2)),
					user, replacement.group(3))
					
		output('Warning! Could not find out who did %s' % \
				repr(replacement.group(0)), False)
		return
예제 #7
0
    def read_replace_log(self):
        """ The actual worker method """

        # FIXME: Make sqlite3 compatible
        insert = """INSERT INTO %s (timestamp, old_image, new_image,
            status, user, comment) VALUES (%%s, %%s, %%s,
            'pending', %%s, %%s)""" % self.config['replacer_table']

        page = wikipedia.Page(self.site, self.config['command_page'])

        # Get last revision date
        if self.cursor.execute("""SELECT timestamp FROM %s
                ORDER BY timestamp DESC LIMIT 1""" % \
                self.config['replacer_table']):
            since = mw_timestamp(self.cursor.fetchone()[0])
        else:
            since = None

        if self.config.get('clean_list', False):
            username = config.sysopnames[self.site.family.name][self.site.lang]
        else:
            username = None

        try:
            # Fetch revision history
            revisions = self.get_history(page.title(), since, username)
            # Fetch the page any way, to prevent editconflicts
            old_text = text = page.get()
        except (SystemExit, KeyboardInterrupt):
            raise
        except Exception, e:
            # Network error, not critical
            output(u'Warning! Unable to read replacement log.', False)
            output('%s: %s' % (e.__class__.__name__, str(e)), False)
            #self.site.conn.close()
            #self.site.conn.connect()
            return time.sleep(self.config['timeout'])
예제 #8
0
def main():
    global R

    import sys, traceback
    wikipedia.handleArgs()
    output(u'Running ' + __version__)

    try:
        try:
            # FIXME: Add support for single-process replacer.
            R = Replacer()
            output(u'This bot runs from: ' + str(R.site))
            R.start()
        except (SystemExit, KeyboardInterrupt):
            raise
        except Exception, e:
            output('A critical error has occured! Aborting!')
            traceback.print_exc(file = sys.stderr)
    finally:
        output('Exitting replacer')
        wikipedia.stopme()
예제 #9
0
class Replacer(object):
    def __init__(self):
        self.config = config.CommonsDelinker
        self.config.update(getattr(config, 'Replacer', ()))
        self.template = re.compile(r'\{\{%s\|([^|]*?)\|([^|]*?)(?:(?:\|reason\=(.*?))?)\}\}' % \
                self.config['replace_template'])
        self.disallowed_replacements = [(re.compile(i[0], re.I), re.compile(i[1], re.I))
            for i in self.config.get('disallowed_replacements', ())]

        self.site = wikipedia.getSite()
        self.site.forceLogin()

        self.database = connect_database()
        self.cursor = self.database.cursor()

        self.first_revision = 0
        if self.config.get('replacer_report_replacements', False):
            self.reporters = []
        else:
            self.reporters = None


    def read_replace_log(self):
        """ The actual worker method """

        # FIXME: Make sqlite3 compatible
        insert = """INSERT INTO %s (timestamp, old_image, new_image,
            status, user, comment) VALUES (%%s, %%s, %%s,
            'pending', %%s, %%s)""" % self.config['replacer_table']

        page = wikipedia.Page(self.site, self.config['command_page'])

        # Get last revision date
        if self.cursor.execute("""SELECT timestamp FROM %s
                ORDER BY timestamp DESC LIMIT 1""" % \
                self.config['replacer_table']):
            since = mw_timestamp(self.cursor.fetchone()[0])
        else:
            since = None

        if self.config.get('clean_list', False):
            username = config.sysopnames[self.site.family.name][self.site.lang]
        else:
            username = None

        try:
            # Fetch revision history
            revisions = self.get_history(page.title(), since, username)
            # Fetch the page any way, to prevent editconflicts
            old_text = text = page.get()
        except (SystemExit, KeyboardInterrupt):
            raise
        except Exception, e:
            # Network error, not critical
            output(u'Warning! Unable to read replacement log.', False)
            output('%s: %s' % (e.__class__.__name__, str(e)), False)
            #self.site.conn.close()
            #self.site.conn.connect()
            return time.sleep(self.config['timeout'])

        # We're being killed
        if '{{stop}}' in text.lower():
            output(u'Found {{stop}} on command page. Not replacing anything.')
            return time.sleep(self.config['timeout'])

        # Sort oldest first
        revisions.sort(key = lambda rev: rev['timestamp'])

        # Find all commands
        replacements = self.template.finditer(text)

        remove_from_list = []
        count = 0
        for replacement in replacements:
            if count == self.config.get('replacer_rate_limit', -1): break
            # Find out who's to blame
            res = self.examine_revision_history(
                revisions, replacement, username)
            if res and self.allowed_replacement(replacement) and \
                    replacement.group(1) != replacement.group(2):
                # Insert replace command into database
                self.cursor.execute(insert, res)
                # Tag line for removal
                remove_from_list.append(replacement.group(0))
                output('Replacing %s by %s: %s' % replacement.groups())
            count += 1

        # Save all replaces to database
        self.database.commit()

        if remove_from_list and self.config.get('clean_list', False):
            # Cleanup the command page
            while True:
                try:
                    for remove in remove_from_list:
                        text = text.replace(remove, u'')
                    # Kill the freaky CommonsDupes
                    text = text.replace('== Dummy section, heading can be deleted (using [http://tools.wikimedia.de/~magnus/commons_dupes.php CommonsDupes]) ==', '')
                    # Kill the freaky whitespace
                    text = text.replace('\r', '')
                    while '\n\n\n' in text:
                        text = text.replace('\n\n\n', '\n')
                    # Save the page
                    page.put(text.strip(), comment = 'Removing images being processed')
                    return
                except wikipedia.EditConflict:
                    # Try again
                    text = page.get()
예제 #10
0
                    not_ok_items.append(u'[[:%s%s]]' % \
                        (namespace_name, page_title))
            else:
                not_ok_items.append(u'[[:%s:%s%s]]' % (site_prefix(site),
                    namespace_name, page_title))

        template = u'{{%s|new_image=%s|user=%s|comment=%s|not_ok=%s}}' % \
            (self.config['replacer_report_template'],
            new_image, user, comment,
            self.config.get('replacer_report_seperator', u', ').join(not_ok_items))
        page = wikipedia.Page(self.site, u'Image:' + old_image)

        try:
            text = page.get()
        except wikipedia.NoPage:
            output(u'Warning! Unable to report replacement to %s. Page does not exist!' % old_image)
            return
        except wikipedia.IsRedirectPage:
            output(u'Warning! %s is a redirect; not reporting replacement!' % old_image)
            return
        try:
            page.put(u'%s\n%s' % (template, text),
                comment = u'This image has been replaced by ' + new_image)
        except wikipedia.PageNotSaved, e:
            output(u'Warning! Unable to report replacement to %s.' % old_image, False)
            output('%s: %s' % (e.__class__.__name__, str(e)), False)
        except wikipedia.ServerError, e:
            output(u'Warning! Server error while reporting replacement to %s.' % old_image, False)
            output('%s: %s' % (e.__class__.__name__, str(e)), False)
            return self.report((old_image, new_image, user, comment, not_ok))
        else:
예제 #11
0
					not_ok_items.append(u'[[:%s%s]]' % \
						(namespace_name, page_title))
			else:
				not_ok_items.append(u'[[:%s:%s%s]]' % (site_prefix(site),
					namespace_name, page_title))
		
		template = u'{{%s|new_image=%s|user=%s|comment=%s|not_ok=%s}}' % \
			(self.config['replacer_report_template'],
			new_image, user, comment, 
			self.config.get('replacer_report_seperator', u', ').join(not_ok_items))
		page = wikipedia.Page(self.site, u'Image:' + old_image)
		
		try:
			text = page.get()
		except wikipedia.NoPage:
			output(u'Warning! Unable to report replacement to %s. Page does not exist!' % old_image)
			return
			
		try:
			page.put(u'%s\n%s' % (template, text), 
				comment = u'This image has been replaced by ' + new_image)
		except PageNotSaved, e:
			output(u'Warning! Unable to report replacement to %s.' % old_image, False)
			output('%s: %s' % (e.__class__.__name__, str(e)), False)
		else:
			output(u'Reporting replacement of %s by %s.' % \
				(old_image, new_image))
			

if __name__ == '__main__':
	import sys, traceback
예제 #12
0
				
		for timestamp, user, text in revisions[1:]:
			if replacement.group(0) in text and user != username:
				return (db_timestamp(timestamp), 
					strip_image(replacement.group(1)),
					strip_image(replacement.group(2)),
					user, replacement.group(3))
					
		output('Warning! Could not find out who did %s' % \
				repr(replacement.group(0)), False)
		return
			
	def start(self):
		while True:
			self.read_replace_log()
			# Replacer should not loop as often as delinker
			time.sleep(self.config['timeout'] * 2)

if __name__ == '__main__':
	import sys, cgitb
	try:
		# FIXME: Add support for single-process replacer.
		r = Replacer()
		r.start()
	except StandardError, e:
		if type(e) not in (SystemExit, KeyboardInterrupt):
			output('A critical error has occured! Aborting!')
			print >>sys.stderr, cgitb.text(sys.exc_info())
	except:
		pass
	wikipedia.stopme()