def main(give_url, image_url, desc): """Run the bot.""" url = give_url image_url = '' if url == '': if image_url: url = pywikibot.input(u"What URL range should I check " u"(use $ for the part that is changeable)") else: url = pywikibot.input(u"From what URL should I get the images?") if image_url: minimum = 1 maximum = 99 answer = pywikibot.input( u"What is the first number to check (default: 1)") if answer: minimum = int(answer) answer = pywikibot.input( u"What is the last number to check (default: 99)") if answer: maximum = int(answer) if not desc: basicdesc = pywikibot.input( u"What text should be added at the end of " u"the description of each image from this url?") else: basicdesc = desc if image_url: ilinks = [] i = minimum while i <= maximum: ilinks += [url.replace("$", str(i))] i += 1 else: ilinks = get_imagelinks(url) for image in ilinks: if pywikibot.input_yn('Include image %s?' % image, default=False, automatic_quit=False): desc = pywikibot.input(u"Give the description of this image:") categories = [] while True: cat = pywikibot.input(u"Specify a category (or press enter to " u"end adding categories)") if not cat.strip(): break if ":" in cat: categories.append(u"[[%s]]" % cat) else: categories.append(u"[[%s:%s]]" % (mysite.namespace(14), cat)) desc += "\r\n\r\n" + basicdesc + "\r\n\r\n" + \ "\r\n".join(categories) uploadBot = UploadRobot(image, description=desc) uploadBot.run() elif answer == 's': break
def complete_desc_and_upload(url, pagetitle, image_description, author, date, categories, fileId): # complete this once if applies to all files description = u"""{{Information |Description = """ + image_description + """ |Source = {{Institution:Estonian Museum of Natural History}} |Author = """ + author + """ |Date = """ + (date if date is not None else '{{Upload date}}') + """ |Permission = {{cc-by-sa-4.0}} |other_fields = {{EMNH geo}} }}\n """ + categories + """ [[Category:Photographs by """ + author + """]] """ keepFilename = True # set to True to skip double-checking/editing destination filename verifyDescription = False # set to False to skip double-checking/editing description => change to bot-mode targetSite = pywikibot.getSite('commons', 'commons') bot = UploadRobot(url, description=description, useFilename=pagetitle, keepFilename=keepFilename, verifyDescription=verifyDescription, targetSite=targetSite) bot.run() # We add the id to the uploaded ids file f = open("ids_uploaded.txt", "a") f.write('{}\n'.format(fileId)) f.close() # We wait to not upload too quickly time.sleep(30)
def run_bot(give_url, image_url, desc): """Run the bot.""" url = give_url image_url = '' if url == '': if image_url: url = pywikibot.input(u"What URL range should I check " u"(use $ for the part that is changeable)") else: url = pywikibot.input(u"From what URL should I get the images?") if image_url: minimum = 1 maximum = 99 answer = pywikibot.input( u"What is the first number to check (default: 1)") if answer: minimum = int(answer) answer = pywikibot.input( u"What is the last number to check (default: 99)") if answer: maximum = int(answer) if not desc: basicdesc = pywikibot.input( u"What text should be added at the end of " u"the description of each image from this url?") else: basicdesc = desc if image_url: ilinks = [] i = minimum while i <= maximum: ilinks += [url.replace("$", str(i))] i += 1 else: ilinks = get_imagelinks(url) for image in ilinks: if pywikibot.input_yn('Include image %s?' % image, default=False, automatic_quit=False): desc = pywikibot.input(u"Give the description of this image:") categories = [] while True: cat = pywikibot.input(u"Specify a category (or press enter to " u"end adding categories)") if not cat.strip(): break if ":" in cat: categories.append(u"[[%s]]" % cat) else: categories.append(u"[[%s:%s]]" % (mysite.namespace(14), cat)) desc += "\r\n\r\n" + basicdesc + "\r\n\r\n" + \ "\r\n".join(categories) uploadBot = UploadRobot(image, description=desc) uploadBot.run() elif answer == 's': break
def test_png_url(self): """Test uploading a png from url using upload.py.""" link = 'https://upload.wikimedia.org/' link += 'wikipedia/commons/f/fc/MP_sounds.png' bot = UploadRobot(url=[link], target_site=self.get_site(), **self.params) bot.run()
def update_article(article): web = requests.get(article["link"], headers=header) soup = BeautifulSoup(web.text, 'lxml') js = soup.find_all("script") for each in js: match = re.search(pattern, str(each), flags=re.M | re.I) if match is not None: article["cover"] = match.groups()[0] print(match.groups()[0]) pic = requests.get(match.groups()[0], headers=header) with open("pic.jpeg", 'wb') as f: f.write(pic.content) local_file = 'pic.jpeg' key = "{}.jpeg".format(int(time.time())) pic_cropped = qiniu_upload(local_file, key) with open("{}".format(key), 'wb') as f: f.write(pic_cropped) uploadbot = UploadRobot(url=[key], description="Biz article cover uploaded by bot", keepFilename=True, verifyDescription=False, ignoreWarning=True) uploadbot.run() article["cover"] = key page.text = template_homepage.format(pic=article["cover"], link=article["link"], title=article["title"], author=article["author"]) page.save("biz update") with open('lastupdated.json', 'w') as f: f.write(json.dumps(article, ensure_ascii=False))
def upload_to_commons(plot, file_name, file_desc, desc_template, year, tag): current_folder = os.path.realpath( os.path.abspath( os.path.split(inspect.getfile(inspect.currentframe()))[0])) folder_parts = current_folder.split(os.sep) parent_folder = os.sep.join(folder_parts[0:-1]) image_path = os.path.join(parent_folder, 'images', file_name) try: plot.get_figure().savefig(image_path, transparent=False, bbox_inches='tight', pad_inches=0, dpi=(200)) except AttributeError: plot.savefig(image_path, transparent=False, bbox_inches='tight', pad_inches=0, dpi=(200)) # image already in Commons if not is_commons_file(get_hash(image_path)): bot = UploadRobot([image_path], description=desc_template.format( tag, year, file_desc, datetime.now().strftime("%Y-%m-%d")), useFilename=file_name, keepFilename=True, verifyDescription=False, ignoreWarning=True, targetSite=commons_site) bot.run() os.remove(image_path)
def treat(self, photo): """ Process each page. 1. Check for existing duplicates on the wiki specified in self.site. 2. If duplicates are found, then skip uploading. 3. Download the file from photo.URL and upload the file to self.site. """ duplicates = photo.findDuplicateImages() if duplicates: pywikibot.output('Skipping duplicate of {!r}'.format(duplicates)) return duplicates[0] title = photo.getTitle(self.titlefmt) description = photo.getDescription(self.pagefmt) bot = UploadRobot(url=photo.URL, description=description, use_filename=title, keep_filename=True, verify_description=False, target_site=self.site) bot._contents = photo.downloadPhoto().getvalue() bot._retrieved = True bot.run() return title
def uploadWorldCaseDataMap(): description = '{{cc0}} COVID-19 case data by country' bot = UploadRobot(WORLD_MAP_DATA_FILE, description=description, verifyDescription=False, ignoreWarning=True, always=True) bot.run()
def test_png_list(self): """Test uploading a list of pngs using upload.py.""" image_list = [] for directory_info in os.walk(join_images_path()): for dir_file in directory_info[2]: image_list.append(os.path.join(directory_info[0], dir_file)) bot = UploadRobot(url=image_list, target_site=self.get_site(), **self.params) bot.run()
def processImage(self, fields): """Work on a single image.""" cid = self.buildNewImageDescription(fields) pywikibot.output(cid) bot = UploadRobot(url=fields.get('imagepage').fileUrl(), description=cid, useFilename=fields.get('filename'), keepFilename=True, verifyDescription=False, ignoreWarning=True, targetSite=pywikibot.Site('commons', 'commons')) bot.run() self.tagNowcommons(fields.get('imagepage'), fields.get('filename')) self.replaceUsage(fields.get('imagepage'), fields.get('filename'))
def upload(filepath): metadata = extract_metadata(filepath) description = make_description(metadata) language = metadata['LANGUAGE'] phrase = metadata['TITLE'] wikimedia_filename = '%s-%s.flac' % (language, phrase) summary = 'Bot: Upload %s pronunciation of “%s”' % (language, phrase) bot = UploadRobot(url=[filepath], description=description, useFilename=wikimedia_filename, keepFilename=False, verifyDescription=True, summary=summary, targetSite=pywikibot.getSite('commons', 'commons')) bot.run()
def uploadToCommons(filename, desc, file_url): targetSite = pywikibot.Site('commons', 'commons') upFile = None if os.path.exists(TEMP_FILE): os.remove(TEMP_FILE) file = urllib.request.urlopen(file_url) ##img_data = requests.get(file_url).content img_data = file.read() with open(TEMP_FILE, 'wb') as handler: handler.write(img_data) file.close() duplicate = findDuplicateImage(BytesIO(img_data)) if duplicate: pywikibot.output(u'Found duplicate at %s' % (duplicate)) else: bot = UploadRobot(TEMP_FILE, description=desc, useFilename=filename, keepFilename=True, verifyDescription=False, targetSite=targetSite) upFile = bot.run() return (upFile, duplicate)
def complete_desc_and_upload(filename, pagetitle, desc, date, categories, source, author, wpid, qid): description = (u""" == [https://tools.wmflabs.org/pathway-viewer?id=""" + wpid + """ Interactive View] == Link above goes to an interactive viewer that allows pan and zoom. =={{int:filedesc}}== {{Information |Description = {{en|1=""" + desc + """}} |Source = """ + source + """ |Author = """ + author + """ |Date = """ + date + """ |Permission = CC0 |other_versions = }} =={{int:license-header}}== {{cc-zero}} ==More Information== {{On Wikidata|""" + qid + """}} {{current}} """ + categories) print("") print(description) print("") url = [filename] keepFilename = ( True ) # set to True to skip double-checking/editing destination filename verifyDescription = ( False ) # set to False to skip double-checking/editing description => change to bot-mode targetSite = pywikibot.getSite("commons", "commons") bot = UploadRobot( url, description=description, useFilename=pagetitle, keepFilename=keepFilename, verifyDescription=verifyDescription, targetSite=targetSite, ) bot.run()
def complete_desc_and_upload(filename, pagetitle, desc, date, categories): """ Uploads image to commons Example: complete_desc_and_upload("test.jpg", "test.jpg", "testpage", "", "[[Category:Test]]") Keyword arguments: filename -- filename to use pagetitle -- title of page to use desc -- description to use date -- date to use categories -- categories to use <filename> ::= {<char>} <pagetitle> ::= {<char>} <desc> ::= {<char>} <date> ::= {<char>} <categories> ::= {<char>} Returns: Nothing """ url = [filename] #keepFilename = False #set to True to skip double-checking/editing destination filename keepFilename = True #set to True to skip double-checking/editing destination filename #verifyDescription = True #set to False to skip double-checking/editing description => change to bot-mode verifyDescription = False #set to False to skip double-checking/editing description => change to bot-mode #targetSite = pywikibot.Site('commons', 'commons') #targetSite = pywikibot.Site(fam='commons',code='commons', user='******', sysop='Kim Bach') try: targetSite = pywikibot.Site() except Exception as e: print(str(e)) print(targetSite) targetSite = pywikibot.Site('commons', 'commons') bot = UploadRobot(url, description=desc, useFilename=pagetitle, keepFilename=keepFilename, verifyDescription=verifyDescription, targetSite=targetSite, summary='Created artwork') bot.run()
def treat(self, photo): """Process each page.""" duplicates = photo.findDuplicateImages() if duplicates: pywikibot.output(u"Skipping duplicate of %r" % duplicates) return duplicates[0] title = photo.getTitle(self.titlefmt) description = photo.getDescription(self.pagefmt) bot = UploadRobot(url=photo.URL, description=description, useFilename=title, keepFilename=True, verifyDescription=False, targetSite=self.site) bot._contents = photo.downloadPhoto().getvalue() bot._retrieved = True bot.run() return title
def complete_artwork_desc_and_upload(filename, pagetitle, desc, date, categories): #complete this once if applies to all files description = u"""{{Artwork |Description = {{en|1=""" + desc + """}} |Source = [[Statens Museeum for Kunst]] |Author = |Date = """ + date + """ |Permission = |other_versions = }} =={{int:license-header}}== {{PD-old-70}} """ + categories + """ """ url = [filename] #keepFilename = False #set to True to skip double-checking/editing destination filename keepFilename = True #set to True to skip double-checking/editing destination filename #verifyDescription = True #set to False to skip double-checking/editing description => change to bot-mode verifyDescription = False #set to False to skip double-checking/editing description => change to bot-mode targetSite = pywikibot.Site('beta', 'commons') bot = UploadRobot(url, description=description, useFilename=pagetitle, keepFilename=keepFilename, verifyDescription=verifyDescription, targetSite=targetSite) bot.run() # page = pywikibot.Page(targetSite, 'File:' + filename) page = pywikibot.Page(targetSite, 'File:' + filename) # print(page.text) page.text = description page.save('Replacing description') # Saves the page
def transferImage(self, sourceImagePage): """ Download image and its description, and upload it to another site. @return: the filename which was used to upload the image """ sourceSite = sourceImagePage.site url = sourceImagePage.fileUrl().encode('utf-8') pywikibot.output(u"URL should be: %s" % url) # localize the text that should be printed on the image description page try: description = sourceImagePage.get() # try to translate license templates if (sourceSite.sitename, self.targetSite.sitename) in licenseTemplates: for old, new in licenseTemplates[ (sourceSite.sitename, self.targetSite.sitename)].items(): new = '{{%s}}' % new old = re.compile('{{%s}}' % old) description = textlib.replaceExcept(description, old, new, ['comment', 'math', 'nowiki', 'pre']) description = i18n.twtranslate(self.targetSite, 'imagetransfer-file_page_message', {'site': sourceSite, 'description': description}) description += '\n\n' description += sourceImagePage.getFileVersionHistoryTable() # add interwiki link if sourceSite.family == self.targetSite.family: description += u'\r\n\r\n{0}'.format(sourceImagePage) except pywikibot.NoPage: description = '' pywikibot.output( 'Image does not exist or description page is empty.') except pywikibot.IsRedirectPage: description = '' pywikibot.output('Image description page is redirect.') else: bot = UploadRobot(url=url, description=description, targetSite=self.targetSite, urlEncoding=sourceSite.encoding(), keepFilename=self.keep_name, verifyDescription=not self.keep_name, ignoreWarning=self.ignore_warning) # try to upload targetFilename = bot.run() if targetFilename and self.targetSite.family.name == 'commons' and \ self.targetSite.code == 'commons': # upload to Commons was successful reason = i18n.twtranslate(sourceSite, 'imagetransfer-nowcommons_notice') # try to delete the original image if we have a sysop account if sourceSite.family.name in config.sysopnames and \ sourceSite.lang in config.sysopnames[sourceSite.family.name]: if sourceImagePage.delete(reason): return if sourceSite.lang in nowCommonsTemplate and \ sourceSite.family.name in config.usernames and \ sourceSite.lang in config.usernames[sourceSite.family.name]: # add the nowCommons template. pywikibot.output(u'Adding nowCommons template to %s' % sourceImagePage.title()) sourceImagePage.put(sourceImagePage.get() + '\n\n' + nowCommonsTemplate[sourceSite.lang] % targetFilename, summary=reason)
def run(self): """Run the bot.""" tosend = { 'language': self.imagePage.site.lang.encode('utf-8'), 'image': self.imagePage.title(with_ns=False).encode('utf-8'), 'newname': self.newname.encode('utf-8'), 'project': self.imagePage.site.family.name.encode('utf-8'), 'username': '', 'commonsense': '1', 'remove_categories': '1', 'ignorewarnings': '1', 'doit': 'Uitvoeren' } pywikibot.output(tosend) CH = pageTextPost('https://commonshelper.toolforge.org/index.php', tosend) pywikibot.output('Got CH desc.') tablock = CH.split('<textarea ')[1].split('>')[0] CH = CH.split('<textarea ' + tablock + '>')[1].split('</textarea>')[0] CH = CH.replace('×', '×') CH = self.fixAuthor(CH) pywikibot.output(CH) # I want every picture to be tagged with the bottemplate so i can check # my contributions later. CH = ('\n\n{{BotMoveToCommons|%s.%s|year={{subst:CURRENTYEAR}}' '|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}' % (self.imagePage.site.lang, self.imagePage.site.family.name) + CH) if self.category: CH = CH.replace( '{{subst:Unc}} <!-- Remove this line once you have ' 'added categories -->', '') CH += '[[Category:' + self.category + ']]' bot = UploadRobot(url=self.imagePage.get_file_url(), description=CH, use_filename=self.newname, keep_filename=True, verify_description=False, ignore_warning=True, target_site=self.image_repo) bot.run() # Should check if the image actually was uploaded if pywikibot.Page(self.image_repo, 'Image:' + self.newname).exists(): # Get a fresh copy, force to get the page so we don't run into edit # conflicts imtxt = self.imagePage.get(force=True) # Remove the move to commons templates if self.imagePage.site.lang in moveToCommonsTemplate: for moveTemplate in moveToCommonsTemplate[ self.imagePage.site.lang]: imtxt = re.sub(r'(?i)\{\{' + moveTemplate + r'[^\}]*\}\}', '', imtxt) # add {{NowCommons}} if self.imagePage.site.lang in nowCommonsTemplate: addTemplate = nowCommonsTemplate[ self.imagePage.site.lang] % self.newname else: addTemplate = nowCommonsTemplate['_default'] % self.newname commentText = i18n.twtranslate( self.imagePage.site, 'commons-file-now-available', { 'localfile': self.imagePage.title(with_ns=False), 'commonsfile': self.newname }) pywikibot.showDiff(self.imagePage.get(), imtxt + addTemplate) self.imagePage.put(imtxt + addTemplate, comment=commentText) self.gen = pagegenerators.FileLinksGenerator(self.imagePage) self.preloadingGen = pagegenerators.PreloadingGenerator(self.gen) moveSummary = i18n.twtranslate( self.imagePage.site, 'commons-file-moved', { 'localfile': self.imagePage.title(with_ns=False), 'commonsfile': self.newname }) # If the image is uploaded under a different name, replace all # instances if self.imagePage.title(with_ns=False) != self.newname: imagebot = ImageRobot( generator=self.preloadingGen, oldImage=self.imagePage.title(with_ns=False), newImage=self.newname, summary=moveSummary, always=True, loose=True) imagebot.run() # If the user want to delete the page and # the user has sysops privilege, delete the page, otherwise # it will be marked for deletion. if self.delete_after_done: self.imagePage.delete(moveSummary, False) return
def run(self): """Run the bot.""" tosend = {'language': self.imagePage.site.lang.encode('utf-8'), 'image': self.imagePage.title( withNamespace=False).encode('utf-8'), 'newname': self.newname.encode('utf-8'), 'project': self.imagePage.site.family.name.encode('utf-8'), 'username': '', 'commonsense': '1', 'remove_categories': '1', 'ignorewarnings': '1', 'doit': 'Uitvoeren' } tosend = urlencode(tosend) pywikibot.output(tosend) CH = pageTextPost('http://tools.wmflabs.org/commonshelper/index.php', tosend) pywikibot.output('Got CH desc.') tablock = CH.split('<textarea ')[1].split('>')[0] CH = CH.split('<textarea ' + tablock + '>')[1].split('</textarea>')[0] CH = CH.replace(u'×', u'×') CH = self.fixAuthor(CH) pywikibot.output(CH) # I want every picture to be tagged with the bottemplate so i can check # my contributions later. CH = ('\n\n{{BotMoveToCommons|' + self.imagePage.site.lang + '.' + self.imagePage.site.family.name + '|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}' '|day={{subst:CURRENTDAY}}}}' + CH) if self.category: CH = CH.replace('{{subst:Unc}} <!-- Remove this line once you have ' 'added categories -->', '') CH += u'[[Category:' + self.category + u']]' bot = UploadRobot(url=self.imagePage.fileUrl(), description=CH, useFilename=self.newname, keepFilename=True, verifyDescription=False, ignoreWarning=True, targetSite=pywikibot.Site('commons', 'commons')) bot.run() # Should check if the image actually was uploaded if pywikibot.Page(pywikibot.Site('commons', 'commons'), u'Image:' + self.newname).exists(): # Get a fresh copy, force to get the page so we dont run into edit # conflicts imtxt = self.imagePage.get(force=True) # Remove the move to commons templates if self.imagePage.site.lang in moveToCommonsTemplate: for moveTemplate in moveToCommonsTemplate[ self.imagePage.site.lang]: imtxt = re.sub(r'(?i)\{\{' + moveTemplate + r'[^\}]*\}\}', '', imtxt) # add {{NowCommons}} if self.imagePage.site.lang in nowCommonsTemplate: addTemplate = nowCommonsTemplate[ self.imagePage.site.lang] % self.newname else: addTemplate = nowCommonsTemplate['_default'] % self.newname commentText = i18n.twtranslate( self.imagePage.site, 'commons-file-now-available', {'localfile': self.imagePage.title(withNamespace=False), 'commonsfile': self.newname}) pywikibot.showDiff(self.imagePage.get(), imtxt + addTemplate) self.imagePage.put(imtxt + addTemplate, comment=commentText) self.gen = pagegenerators.FileLinksGenerator(self.imagePage) self.preloadingGen = pagegenerators.PreloadingGenerator(self.gen) # If the image is uploaded under a different name, replace all # instances if self.imagePage.title(withNamespace=False) != self.newname: moveSummary = i18n.twtranslate( self.imagePage.site, 'commons-file-moved', {'localfile': self.imagePage.title(withNamespace=False), 'commonsfile': self.newname}) imagebot = image.ImageRobot( generator=self.preloadingGen, oldImage=self.imagePage.title(withNamespace=False), newImage=self.newname, summary=moveSummary, always=True, loose=True) imagebot.run() return
def transferImage(self, sourceImagePage): """ Download image and its description, and upload it to another site. @return: the filename which was used to upload the image """ sourceSite = sourceImagePage.site url = sourceImagePage.fileUrl().encode('utf-8') pywikibot.output(u"URL should be: %s" % url) # localize the text that should be printed on the image description page try: description = sourceImagePage.get() # try to translate license templates if (sourceSite.sitename, self.targetSite.sitename) in licenseTemplates: for old, new in licenseTemplates[( sourceSite.sitename, self.targetSite.sitename)].items(): new = '{{%s}}' % new old = re.compile('{{%s}}' % old) description = textlib.replaceExcept( description, old, new, ['comment', 'math', 'nowiki', 'pre']) description = i18n.twtranslate(self.targetSite, 'imagetransfer-file_page_message', { 'site': sourceSite, 'description': description }) description += '\n\n' description += sourceImagePage.getFileVersionHistoryTable() # add interwiki link if sourceSite.family == self.targetSite.family: description += u'\r\n\r\n{0}'.format(sourceImagePage) except pywikibot.NoPage: description = '' pywikibot.output( 'Image does not exist or description page is empty.') except pywikibot.IsRedirectPage: description = '' pywikibot.output('Image description page is redirect.') else: bot = UploadRobot(url=url, description=description, targetSite=self.targetSite, urlEncoding=sourceSite.encoding(), keepFilename=self.keep_name, verifyDescription=not self.keep_name, ignoreWarning=self.ignore_warning) # try to upload targetFilename = bot.run() if targetFilename and self.targetSite.family.name == 'commons' and \ self.targetSite.code == 'commons': # upload to Commons was successful reason = i18n.twtranslate(sourceSite, 'imagetransfer-nowcommons_notice') # try to delete the original image if we have a sysop account if sourceSite.family.name in config.sysopnames and \ sourceSite.lang in config.sysopnames[sourceSite.family.name]: if sourceImagePage.delete(reason): return if sourceSite.lang in nowCommonsTemplate and \ sourceSite.family.name in config.usernames and \ sourceSite.lang in config.usernames[sourceSite.family.name]: # add the nowCommons template. pywikibot.output(u'Adding nowCommons template to %s' % sourceImagePage.title()) sourceImagePage.put( sourceImagePage.get() + '\n\n' + nowCommonsTemplate[sourceSite.lang] % targetFilename, summary=reason)
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ url = u'' description = [] summary = None keepFilename = False always = False useFilename = None verifyDescription = True aborts = set() ignorewarn = set() chunk_size = 0 chunk_size_regex = r'^-chunked(?::(\d+(?:\.\d+)?)[ \t]*(k|ki|m|mi)?b?)?$' chunk_size_regex = re.compile(chunk_size_regex, re.I) recursive = False # process all global bot args # returns a list of non-global args, i.e. args for upload.py for arg in pywikibot.handle_args(args): if arg: if arg == '-always': keepFilename = True always = True verifyDescription = False elif arg == '-recursive': recursive = True elif arg.startswith('-keep'): keepFilename = True elif arg.startswith('-filename:'): useFilename = arg[10:] elif arg.startswith('-summary'): summary = arg[9:] elif arg.startswith('-noverify'): verifyDescription = False elif arg.startswith('-abortonwarn'): if len(arg) > len('-abortonwarn:') and aborts is not True: aborts.add(arg[len('-abortonwarn:'):]) else: aborts = True elif arg.startswith('-ignorewarn'): if len(arg) > len('-ignorewarn:') and ignorewarn is not True: ignorewarn.add(arg[len('-ignorewarn:'):]) else: ignorewarn = True elif arg.startswith('-chunked'): match = chunk_size_regex.match(arg) if match: if match.group(1): # number was in there base = float(match.group(1)) if match.group(2): # suffix too suffix = match.group(2).lower() if suffix == "k": suffix = 1000 elif suffix == "m": suffix = 1000000 elif suffix == "ki": suffix = 1 << 10 elif suffix == "mi": suffix = 1 << 20 else: pass # huh? else: suffix = 1 chunk_size = math.trunc(base * suffix) else: chunk_size = 1 << 20 # default to 1 MiB else: pywikibot.error('Chunk size parameter is not valid.') elif url == u'': url = arg else: description.append(arg) description = u' '.join(description) # curly barckets need to double in formatted string description = """=={{{{int:filedesc}}}}== {{{{Information |description={{{{en|1=Native Israeli pronunciation of this Hebrew word}}}} |date={0} |source={{{{own}}}} |author=[[User:{1}|{1}]] |permission= |other versions= }}}} =={{{{int:license-header}}}}== {{{{self|cc-zero}}}} [[Category:Hebrew pronunciation]]""".format(date.today(),config.usernames['commons']['commons']) while not ("://" in url or os.path.exists(url)): if not url: error = 'No input filename given.' else: error = 'Invalid input filename given.' if not always: error += ' Try again.' if always: url = None break else: pywikibot.output(error) url = pywikibot.input(u'URL, file or directory where files are now:') if always and ((aborts is not True and ignorewarn is not True) or not description or url is None): additional = '' missing = [] if url is None: missing += ['filename'] additional = error + ' ' if description is None: missing += ['description'] if aborts is not True and ignorewarn is not True: additional += ('Either -ignorewarn or -abortonwarn must be ' 'defined for all codes. ') additional += 'Unable to run in -always mode' suggest_help(missing_parameters=missing, additional_text=additional) return False if os.path.isdir(url): file_list = [] for directory_info in os.walk(url): if not recursive: # Do not visit any subdirectories directory_info[1][:] = [] for dir_file in directory_info[2]: file_list.append(os.path.join(directory_info[0], dir_file)) url = file_list else: url = [url] bot = UploadRobot(url, description=description, useFilename=useFilename, keepFilename=keepFilename, verifyDescription=verifyDescription, aborts=aborts, ignoreWarning=ignorewarn, chunk_size=chunk_size, always=always, summary="bot upload", targetSite=pywikibot.Site('commons', 'commons')) bot.run()
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: str """ url = '' description = [] summary = None keep_filename = False always = False use_filename = None filename_prefix = None verify_description = True aborts = set() ignorewarn = set() chunk_size = 0 recursive = False description_file = None # process all global bot args # returns a list of non-global args, i.e. args for upload.py local_args = pywikibot.handle_args(args) for option in local_args: arg, _, value = option.partition(':') if arg == '-always': keep_filename = True always = True verify_description = False elif arg == '-recursive': recursive = True elif arg == '-keep': keep_filename = True elif arg == '-filename': use_filename = value elif arg == '-prefix': filename_prefix = value elif arg == '-summary': summary = value elif arg == '-noverify': verify_description = False elif arg == '-abortonwarn': if value and aborts is not True: aborts.add(value) else: aborts = True elif arg == '-ignorewarn': if value and ignorewarn is not True: ignorewarn.add(value) else: ignorewarn = True elif arg == '-chunked': match = CHUNK_SIZE_REGEX.match(option) chunk_size = get_chunk_size(match) elif arg == '-descfile': description_file = value elif not url: url = option else: description.append(option) description = ' '.join(description) if description_file: if description: pywikibot.error('Both a description and a -descfile were ' 'provided. Please specify only one of those.') return False with codecs.open(description_file, encoding=pywikibot.config.textfile_encoding) as f: description = f.read().replace('\r\n', '\n') while not ('://' in url or os.path.exists(url)): if not url: error = 'No input filename given.' else: error = 'Invalid input filename given.' if not always: error += ' Try again.' if always: url = None break else: pywikibot.output(error) url = pywikibot.input('URL, file or directory where files are now:') if always and (aborts is not True and ignorewarn is not True or not description or url is None): additional = '' missing = [] if url is None: missing += ['filename'] additional = error + ' ' if description is None: missing += ['description'] if aborts is not True and ignorewarn is not True: additional += ('Either -ignorewarn or -abortonwarn must be ' 'defined for all codes. ') additional += 'Unable to run in -always mode' suggest_help(missing_parameters=missing, additional_text=additional) return False if os.path.isdir(url): file_list = [] for directory_info in os.walk(url): if not recursive: # Do not visit any subdirectories directory_info[1][:] = [] for dir_file in directory_info[2]: file_list.append(os.path.join(directory_info[0], dir_file)) url = file_list else: url = [url] bot = UploadRobot(url, description=description, useFilename=use_filename, keepFilename=keep_filename, verifyDescription=verify_description, aborts=aborts, ignoreWarning=ignorewarn, chunk_size=chunk_size, always=always, summary=summary, filename_prefix=filename_prefix) bot.run()
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ url = u'' description = [] summary = None keepFilename = False always = False useFilename = None verifyDescription = True aborts = set() ignorewarn = set() chunk_size = 0 chunk_size_regex = r'^-chunked(?::(\d+(?:\.\d+)?)[ \t]*(k|ki|m|mi)?b?)?$' chunk_size_regex = re.compile(chunk_size_regex, re.I) recursive = False # process all global bot args # returns a list of non-global args, i.e. args for upload.py for arg in pywikibot.handle_args(args): if arg: if arg == '-always': keepFilename = True always = True verifyDescription = False elif arg == '-recursive': recursive = True elif arg.startswith('-keep'): keepFilename = True elif arg.startswith('-filename:'): useFilename = arg[10:] elif arg.startswith('-summary'): summary = arg[9:] elif arg.startswith('-noverify'): verifyDescription = False elif arg.startswith('-abortonwarn'): if len(arg) > len('-abortonwarn:') and aborts is not True: aborts.add(arg[len('-abortonwarn:'):]) else: aborts = True elif arg.startswith('-ignorewarn'): if len(arg) > len('-ignorewarn:') and ignorewarn is not True: ignorewarn.add(arg[len('-ignorewarn:'):]) else: ignorewarn = True elif arg.startswith('-chunked'): match = chunk_size_regex.match(arg) if match: if match.group(1): # number was in there base = float(match.group(1)) if match.group(2): # suffix too suffix = match.group(2).lower() if suffix == "k": suffix = 1000 elif suffix == "m": suffix = 1000000 elif suffix == "ki": suffix = 1 << 10 elif suffix == "mi": suffix = 1 << 20 else: pass # huh? else: suffix = 1 chunk_size = math.trunc(base * suffix) else: chunk_size = 1 << 20 # default to 1 MiB else: pywikibot.error('Chunk size parameter is not valid.') elif url == u'': url = arg else: description.append(arg) description = u' '.join(description) while not ("://" in url or os.path.exists(url)): if not url: error = 'No input filename given.' else: error = 'Invalid input filename given.' if not always: error += ' Try again.' if always: url = None break else: pywikibot.output(error) url = pywikibot.input(u'URL, file or directory where files are now:') if always and ((aborts is not True and ignorewarn is not True) or not description or url is None): additional = '' missing = [] if url is None: missing += ['filename'] additional = error + ' ' if description is None: missing += ['description'] if aborts is not True and ignorewarn is not True: additional += ('Either -ignorewarn or -abortonwarn must be ' 'defined for all codes. ') additional += 'Unable to run in -always mode' suggest_help(missing_parameters=missing, additional_text=additional) return False if os.path.isdir(url): file_list = [] for directory_info in os.walk(url): if not recursive: # Do not visit any subdirectories directory_info[1][:] = [] for dir_file in directory_info[2]: file_list.append(os.path.join(directory_info[0], dir_file)) url = file_list else: url = [url] bot = UploadRobot(url, description=description, useFilename=useFilename, keepFilename=keepFilename, verifyDescription=verifyDescription, aborts=aborts, ignoreWarning=ignorewarn, chunk_size=chunk_size, always=always, summary=summary) bot.run()
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ url = u'' description = [] summary = None keepFilename = False always = False useFilename = None filename_prefix = None verifyDescription = True aborts = set() ignorewarn = set() chunk_size = 0 chunk_size_regex = re.compile( r'^-chunked(?::(\d+(?:\.\d+)?)[ \t]*(k|ki|m|mi)?b?)?$', re.I) recursive = False # process all global bot args # returns a list of non-global args, i.e. args for upload.py local_args = pywikibot.handle_args(args) for option in local_args: arg, _, value = option.partition(':') if arg == '-always': keepFilename = True always = True verifyDescription = False elif arg == '-recursive': recursive = True elif arg == '-keep': keepFilename = True elif arg == '-filename': useFilename = value elif arg == '-prefix': filename_prefix = value elif arg == '-summary': summary = value elif arg == '-noverify': verifyDescription = False elif arg == '-abortonwarn': if value and aborts is not True: aborts.add(value) else: aborts = True elif arg == '-ignorewarn': if value and ignorewarn is not True: ignorewarn.add(value) else: ignorewarn = True elif arg == '-chunked': match = chunk_size_regex.match(option) chunk_size = get_chunk_size(match) elif arg and not value: if not url: url = arg else: description.append(arg) description = u' '.join(description) while not ("://" in url or os.path.exists(url)): if not url: error = 'No input filename given.' else: error = 'Invalid input filename given.' if not always: error += ' Try again.' if always: url = None break else: pywikibot.output(error) url = pywikibot.input(u'URL, file or directory where files are now:') if always and ((aborts is not True and ignorewarn is not True) or not description or url is None): additional = '' missing = [] if url is None: missing += ['filename'] additional = error + ' ' if description is None: missing += ['description'] if aborts is not True and ignorewarn is not True: additional += ('Either -ignorewarn or -abortonwarn must be ' 'defined for all codes. ') additional += 'Unable to run in -always mode' suggest_help(missing_parameters=missing, additional_text=additional) return False if os.path.isdir(url): file_list = [] for directory_info in os.walk(url): if not recursive: # Do not visit any subdirectories directory_info[1][:] = [] for dir_file in directory_info[2]: file_list.append(os.path.join(directory_info[0], dir_file)) url = file_list else: url = [url] bot = UploadRobot(url, description=description, useFilename=useFilename, keepFilename=keepFilename, verifyDescription=verifyDescription, aborts=aborts, ignoreWarning=ignorewarn, chunk_size=chunk_size, always=always, summary=summary, filename_prefix=filename_prefix) bot.run()
def test_png(self): """Test uploading a png using upload.py.""" bot = UploadRobot( url=[join_images_path('MP_sounds.png')], target_site=self.get_site(), **self.params) bot.run()
def main(): site = pywikibot.Site('15mpedia', '15mpedia') flickrapilimit = 500 flickrapikey = getflickrapikey() #do not share key flickrseturl = "" categories = [] tags = [] #load parameters if len(sys.argv) > 1: for arg in sys.argv[1:]: if arg.startswith('--flickrset:'): # --flickrset:http://www.flickr.com/photos/15mmalagacc/sets/72157629844179358/ flickrseturl = arg[12:] elif arg.startswith('--categories:'): # --categories:"15M_en_Madrid;Ocupa_el_Congreso" categories = [re.sub('_', ' ', category) for category in arg[13:].split(';')] elif arg.startswith('--tags:'): # --tags:"15M;Acampada_Sol" tags = [re.sub('_', ' ', tag) for tag in arg[7:].split(';')] if not flickrseturl: print('Provide --flickrset: parameter. Example: --flickrset:https://www.flickr.com/photos/15mmalagacc/sets/72157629844179358/') sys.exit() """if not categories: print('Provide --categories: parameter. Example: --categories:"15M_en_Madrid;Ocupa_el_Congreso"') sys.exit()""" flickrseturls = [] if '://' in flickrseturl: if '/people/' in flickrseturl: raw = getURL(url=flickrseturl) flickruser = flickrseturl.split('/people/')[1].split('/')[0].strip('/') flickruserid = re.findall(r'"nsid":"([^"]+)"', raw)[0] apiquery = 'https://api.flickr.com/services/rest/?method=flickr.photosets.getList&api_key=%s&user_id=%s&format=json&nojsoncallback=1' % (flickrapikey, flickruserid) jsonset3 = json.loads(getURL(url=apiquery)) if not "photosets" in jsonset3: print("ERROR: API key caducada o invalida?") sys.exit() flickrseturls = ["https://www.flickr.com/photos/%s/albums/%s" % (flickruser, x["id"]) for x in jsonset3["photosets"]["photoset"]] print('\n'.join(flickrseturls)) sys.exit() else: flickrseturls = [flickrseturl] else: f = open(flickrseturl, 'r') flickrseturls = f.read().strip().splitlines() f.close() print('Loaded', len(flickrseturls), 'photosets') for flickrseturl in flickrseturls: flickrseturl = flickrseturl.replace('/sets/', '/albums/') flickruser = flickrseturl.split('/photos/')[1].split('/albums/')[0].strip() flickrsetid = flickrseturl.split('/albums/')[1].split('/')[0].strip('/').strip() raw = getURL(url=flickrseturl) m = re.findall(r'"albumId":"%s","nsid":"([^"]+?)"' % (flickrsetid), raw) flickruserid = '' if m: flickruserid = m[0] else: print("No se encontro flickruserid") sys.exit() m = re.findall(r',"pathAlias":"([^"]+?)"', raw) #load set metadata apiquery = 'https://api.flickr.com/services/rest/?method=flickr.photosets.getPhotos&api_key=%s&photoset_id=%s&user_id=%s&per_page=%s&format=json&nojsoncallback=1' % (flickrapikey, flickrsetid, flickruserid, flickrapilimit) jsonset = json.loads(getURL(url=apiquery)) if not "photoset" in jsonset: print("ERROR: API key caducada o invalida?") sys.exit() #print(jsonset) flickrsetname = jsonset["photoset"]["title"] #flickruser = jsonset["photoset"]["ownername"] #hay usuarios con espacios, mejor extraer el usuario de la url del set photoids = [photo["id"] for photo in jsonset["photoset"]["photo"]] pages = int(jsonset["photoset"]["pages"]) if pages > 1: for page in range(2, pages+1): apiquery2 = apiquery + '&page=' + str(page) jsonset2 = json.loads(getURL(url=apiquery2)) photoids += [photo["id"] for photo in jsonset2["photoset"]["photo"]] print('There are', len(photoids), 'images in the set', flickrsetid, 'by', flickruser) #load images metadata for photoid in photoids: apiquery = 'https://api.flickr.com/services/rest/?method=flickr.photos.getInfo&api_key=%s&photo_id=%s&format=json&nojsoncallback=1' % (flickrapikey, photoid) jsonphoto = json.loads(getURL(url=apiquery)) #check license, if not free, do not donwload later photolicense = jsonphoto["photo"]["license"] if not photolicense in ["1", "2", "3", "4", "5", "6", "9", "10"]: print('Skiping', photoid, 'which is not Creative Commons or Public Domain') continue photometadata = { 'title': unquote("title" in jsonphoto["photo"] and jsonphoto["photo"]["title"]["_content"].strip() or ''), 'description': unquote("description" in jsonphoto["photo"] and jsonphoto["photo"]["description"]["_content"].strip() or ''), 'date-taken': "taken" in jsonphoto["photo"]["dates"] and jsonphoto["photo"]["dates"]["taken"] or '', 'license': photolicense, 'coordinates': "location" in jsonphoto["photo"] and [jsonphoto["photo"]["location"]["latitude"], jsonphoto["photo"]["location"]["longitude"]] or '', 'localfilename': '%s - %s - %s.jpg' % (flickruser, flickrsetid, photoid), 'photourl': "https://www.flickr.com/photos/%s/%s/" % (flickruser, photoid), 'tags': [tag["raw"] for tag in jsonphoto["photo"]["tags"]["tag"]] + tags, } photometadata['description'] = re.sub(r' *\n+ *', r'\n\n', photometadata['description']) if 'has uploaded' in photometadata['description']: photometadata['description'] = '' photofullres = 'https://farm%s.staticflickr.com/%s/%s_%s_o_d.jpg' % (jsonphoto["photo"]["farm"], jsonphoto["photo"]["server"], jsonphoto["photo"]["id"], jsonphoto["photo"]["originalsecret"]) print(photoid) print(photometadata) print(photofullres) cats = '' if categories: cats = '\n\n%s' % ('\n'.join(['[[Categoría:%s]]' % (category) for category in categories])) output = generateInfobox(photoid, photometadata, cats, flickrseturl, flickrsetname, flickruser) #https://www.mediawiki.org/wiki/Manual:Pywikibot/upload.py aborts = set() ignorewarn = set(['duplicate']) # los duplicados los controlamos con page.exists() mas abajo summary = "BOT - Subiendo imagen %s" % (photometadata['photourl']) page = pywikibot.Page(site, "File:%s" % (photometadata['localfilename'])) if page.exists(): print("Ya existe") continue #print(output) bot = UploadRobot(photofullres, description=output, useFilename=photometadata['localfilename'], keepFilename=True, verifyDescription=False, aborts=aborts, ignoreWarning=ignorewarn, summary=summary, targetSite=site) bot.run()