def find_undersized(): for item in RossUploads(): fi = item.latest_file_info tree = mwparserfromhell.parse(item.text) try: geograph_template = tlgetone(tree, ['Geograph']) except IndexError: continue gridimage_id = int(str(geograph_template.get(1).value)) commons_author = str(geograph_template.get(2).value) if commons_author != "Ross Watson": continue c = geodb.cursor() c.execute( """ SELECT * FROM gridimage_size WHERE gridimage_id = ? """, (gridimage_id, )) row = c.fetchone() gi = get_geograph_info(gridimage_id) if row['original_width'] <= fi.width: continue uploadurl = "https://commons.wikimedia.org/w/index.php?" + urlencode( { 'title': 'Special:Upload', 'wpDestFile': item.title(underscore=True, withNamespace=False), 'wpForReUpload': '1', 'wpSourceType': 'url', 'wpUploadFileURL': get_geograph_full_url(gridimage_id, gi), 'wpUploadDescription': 'Higher-resolution version from Geograph' }) print("* [%s %d × %d → %d × %d] %s" % (uploadurl, fi.width, fi.height, row['original_width'], row['original_height'], item.title(asLink=True, textlink=True)))
def add_creditline(t, line): assert(len(tlgetall(t, ['Credit line'])) == 0) info = tlgetone(t, infoboxes) for f in otherfieldses: if info.has(f): otherfields = info.get(f) otherfields.value.append(Text(" ")) otherfields.value.append(line) otherfields.value.append(Text("\n")) return info.add("other fields", line)
def is_original_title(self, page, title): # This heuristic depends on GeographBot's behaviour. if not self.is_geographbot_upload(page): return False first_tree = self.get_original_tree(page) first_description = str( tlgetone( first_tree, ["Information"]).get("description").value.get(0).get(1).value) return (first_description == title or first_description == title + "." or first_description.startswith(title + " ") or first_description.startswith(title + ". "))
def can_add_creditline(t, line): if len(tlgetall(t, ['Credit line'])) != 0: return False # Already have a credit line try: geo = tlgetone(t, ['Geograph']) except IndexError: return False except TooManyTemplates: return False geo_author = geo.get(2).value cl_author = line.get('Author').value if canonicalise_name(geo_author) != canonicalise_name(cl_author): # Don't add a credit line with wrong author return False return True
def get_object_location(tree): return tlgetone(tree, objtls)
def get_location(tree): return tlgetone(tree, loctls)
def treat_page(self): t = mwparserfromhell.parse(self.current_page.text) info = tlgetone(t, infoboxes) others = [f for f in otherfieldses if info.has(f)] if len(others) > 1: bot.warning("excess other_fields in %s" % (self.current_page, ))
def process_page(self, page): if not page.botMayEdit(): raise NotEligible("bot forbidden from editing this page") tree = mwparserfromhell.parse(page.text) try: geograph_template = tlgetone(tree, ['Geograph']) except IndexError: raise NotEligible("No {{Geograph}} template") try: gridimage_id = int(str(geograph_template.get(1).value)) commons_author = str(geograph_template.get(2).value) except ValueError: raise BadTemplate("broken {{Geograph}} template") except IndexError: raise BadTemplate("broken {{Geograph}} template") bot.log("Geograph ID is %d" % (gridimage_id, )) c = geodb.cursor() c.execute( """ SELECT * FROM gridimage_base NATURAL JOIN gridimage_size WHERE gridimage_id = ? """, (gridimage_id, )) row = c.fetchone() if row == None: raise NotInGeographDatabase("Geograph ID %d not in database" % (gridimage_id, )) gwidth, gheight, original_width, original_height, original_diff = [ row[x] for x in ('width', 'height', 'original_width', 'original_height', 'original_diff') ] if original_width == 0: raise NotEligible("no high-res version available") fi = page.latest_file_info bot.log("%d × %d version available" % (original_width, original_height)) bot.log("current Commons version is %d × %d" % (fi.width, fi.height)) if fi.width >= original_width and fi.height >= original_height: raise NotEligible("no higher-resolution version on Geograph") if not aspect_ratios_match(fi.width, fi.height, original_width, original_height): raise NotEligible("aspect ratios of images differ") if (fi.width, fi.height) == (gwidth, gheight): if original_diff == 'yes': raise NotEligible("Geograph says pictures are different") else: if max(fi.width, fi.height) not in (800, 1024): raise NotEligible("dimensions do not match any Geograph image") for ofi in page.get_file_history().values(): if ofi.user == "Geograph Update Bot": raise NotEligible("file already uploaded by me") geograph_info = get_geograph_info(gridimage_id) if (canonicalise_name(geograph_info['author_name']) != canonicalise_name(commons_author)): raise NotEligible( "author does not match Geograph (%s vs. %s)" % (repr(commons_author), repr(geograph_info['author_name']))) try: credit_line = tlgetone(tree, ['Credit line']) except IndexError: pass else: commons_title = ''.join([ str(x) for x in credit_line.get('Other').value.filter_text() ]).strip() bot.log("Title on Commons: %s" % (commons_title, )) if (canonicalise_name(commons_title) != canonicalise_name( geograph_info['title'])): raise NotEligible( "title does not match Geograph (%s vs. %s)" % (repr(commons_title), repr(geograph_info['title']))) geograph_image = get_geograph_size(gridimage_id, geograph_info, max(fi.width, fi.height)) if hashlib.sha1(geograph_image).hexdigest() != fi.sha1: raise NotEligible("SHA-1 does not match Geograph %d px image." % (max(fi.width, fi.height), )) bot.log("Image matches. Update possible.") self.replace_file(page, get_geograph_full_url(gridimage_id, geograph_info)) compare_revisions(self.site, parameters=dict(titles=page.title()))