def process_all_files(base_dir=MAIN_DIR, xml_dir=XML_DIR): """Identify all xml files in a directory, load the data and process.""" # Check categories xml_dir = os.path.join(base_dir, xml_dir) for directory in (base_dir, xml_dir): if not os.path.isdir(directory): raise common.MyError( u'The provided directory was not a valid directory: %s' % directory) # Find candidate files found_files = prep.find_files(path=xml_dir, file_exts=('.xml', ), subdir=False) pywikibot.output("Found %d .xml files" % len(found_files)) data = {} for xml_file in found_files: try: test = InfoEntry(load_xml(xml_file)) except Exception as e: pywikibot.output(u"Encountered error while processing %s: %s" % (os.path.split(xml_file)[-1], e)) continue if test.obj_id in data.keys(): pywikibot.output(u"Multiple files for same object: %s, %s, %s" % (test.obj_id, test.source_file, data[test.obj_id]['source_file'])) continue data[test.obj_id] = test.output() out_file = os.path.join(base_dir, u'processed_lido.json') common.open_and_write_file(out_file, data, as_json=True) pywikibot.output("Created %s with %d entries" % (out_file, len(data)))
def rename(base_dir, sub_cat, in_filename, log_file='move.log'): """ Identify any files to replace and rename them to their commons names. :param base_dir: Path to directory in which replacement image files are found. :param sub_cat: The name of the subdirectory into which processed files should be moved. :param in_filename: The photoAll.csv file filtered to only contain the files to replace. :param log_file: The name of the log file to be created (in base_dir). """ # Load indata in_filename = common.modify_path(base_dir, in_filename) header_check = u'PhoId|PhoObjId|PhoBeschreibungM|PhoAufnahmeortS|' \ u'PhoSwdS|MulId|filnamn|AdrVorNameS|AdrNameS|PhoSystematikS' data = csv.csv_file_to_dict(in_filename, "filnamn", header_check, keep=('PhoSystematikS', 'filnamn'), delimiter='|', codec='utf-16') # reformat the commons filenames url_prefix = u'https://commons.wikimedia.org/wiki/File:' for k, v in data.iteritems(): if v['PhoSystematikS'].startswith(url_prefix): data[k] = v['PhoSystematikS'][len(url_prefix):] else: pywikibot.output("error in indatafile: %s, %s" % (k, v)) # find candidate files candidates = prep.find_files(base_dir, ('.tif', ), subdir=False) # rename the files sub_cat = common.modify_path(base_dir, sub_cat) log_file = common.modify_path(base_dir, log_file) common.create_dir(sub_cat) log = [] for candidate in candidates: base_name = os.path.basename(candidate) if base_name not in data.keys(): log.append('%s not found in csv file' % base_name) continue commons_name = data.pop(base_name) commons_name = common.modify_path(sub_cat, commons_name) os.rename(candidate, commons_name) for k in data.keys(): log.append('%s not found on disk' % k) common.open_and_write_file(log_file, '\n'.join(log), codec='utf-8') pywikibot.output(u'Created %s' % log_file)
def upload_all(base_dir, sub_dir=u'Uploaded', log_file='upload.log', verbose=True): """ Upload the renamed files. We cannot just use uploader.up_all since there are no corresponding .info files. """ commons = pywikibot.Site('commons', 'commons') commons.login() upload_comment = u'Source image improved by the institution #LSH' # create target directories if they don't exist done_dir = common.modify_path(base_dir, sub_dir) error_dir = u'%s_errors' % done_dir warnings_dir = u'%s_warnings' % done_dir common.create_dir(done_dir) common.create_dir(error_dir) common.create_dir(warnings_dir) # logfile logfile = common.modify_path(base_dir, log_file) flog = codecs.open(logfile, 'a', 'utf-8') # find candidate files media_files = prep.find_files(base_dir, ('.tif', ), subdir=False) for media_file in media_files: file_name = os.path.basename(media_file) target_dir = None result = uploader.upload_single_file(file_name, media_file, upload_comment, commons, overwrite_page_exists=True) if result.get('error'): target_dir = error_dir elif result.get('warning'): target_dir = warnings_dir else: target_dir = done_dir if verbose: pywikibot.output(result.get('log')) flog.write(u'%s\n' % result.get('log')) os.rename(media_file, common.modify_path(target_dir, file_name)) flog.flush() flog.close() pywikibot.output(u'Created %s' % logfile)
def upload_all(base_dir, sub_dir=u'Uploaded', log_file='upload.log', verbose=True): """ Upload the renamed files. We cannot just use uploader.up_all since there are no corresponding .info files. """ commons = pywikibot.Site('commons', 'commons') commons.login() upload_comment = u'Source image improved by the institution #LSH' # create target directories if they don't exist done_dir = common.modify_path(base_dir, sub_dir) error_dir = u'%s_errors' % done_dir warnings_dir = u'%s_warnings' % done_dir common.create_dir(done_dir) common.create_dir(error_dir) common.create_dir(warnings_dir) # logfile logfile = common.modify_path(base_dir, log_file) flog = codecs.open(logfile, 'a', 'utf-8') # find candidate files media_files = prep.find_files(base_dir, ('.tif', ), subdir=False) for media_file in media_files: file_name = os.path.basename(media_file) target_dir = None result = uploader.upload_single_file( file_name, media_file, upload_comment, commons, overwrite_page_exists=True) if result.get('error'): target_dir = error_dir elif result.get('warning'): target_dir = warnings_dir else: target_dir = done_dir if verbose: pywikibot.output(result.get('log')) flog.write(u'%s\n' % result.get('log')) os.rename(media_file, common.modify_path(target_dir, file_name)) flog.flush() flog.close() pywikibot.output(u'Created %s' % logfile)
def up_all(in_path, cutoff=None, target='Uploaded', file_exts=None, verbose=False, test=False, target_site=None, chunked=True): """ Upload all matched files in the supplied directory. Media (image) files and metadata files with the expected extension .info should be in the same directory. Metadata files should contain the entirety of the desired description page (in wikitext). Moves each file to one the target folders after processing. @param in_path: path to directory with files to upload @param cutoff: number of files to upload (defaults to all) @param target: sub-directory for uploaded files (defaults to "Uploaded") @param file_exts: tuple of allowed file extensions (defaults to FILE_EXTS) @param verbose: whether to output confirmation after each upload @param test: set to True to test but not upload @param target_site: pywikibot.Site to which file should be uploaded, defaults to Commons. @param chunked: Whether to do chunked uploading or not. """ # set defaults unless overridden file_exts = file_exts or FILE_EXTS target_site = target_site or pywikibot.Site('commons', 'commons') target_site.login() # Verify in_path if not os.path.isdir(in_path): pywikibot.output('The provided in_path was not a valid ' 'directory: %s' % in_path) exit() # create target directories if they don't exist done_dir = os.path.join(in_path, target) error_dir = '%s_errors' % done_dir warnings_dir = '%s_warnings' % done_dir common.create_dir(done_dir) common.create_dir(error_dir) common.create_dir(warnings_dir) # logfile flog = common.LogFile(in_path, '¤uploader.log') # find all content files found_files = prepUpload.find_files(path=in_path, file_exts=file_exts, subdir=False) counter = 1 for f in found_files: if cutoff and counter > cutoff: break # verify that there is a matching info file info_file = '%s.info' % os.path.splitext(f)[0] base_name = os.path.basename(f) base_info_name = os.path.basename(info_file) if not os.path.exists(info_file): flog.write_w_timestamp( '{0}: Found multimedia file without info'.format(base_name)) continue # prepare upload txt = common.open_and_read_file(info_file) if test: pywikibot.output('Test upload "%s" with the following ' 'description:\n%s\n' % (base_name, txt)) counter += 1 continue # stop here if testing target_dir = None result = upload_single_file(base_name, f, txt, target_site, upload_if_badprefix=True, chunked=chunked) if result.get('error'): target_dir = error_dir elif result.get('warning'): target_dir = warnings_dir else: target_dir = done_dir if verbose: pywikibot.output(result.get('log')) flog.write_w_timestamp(result.get('log')) os.rename(f, os.path.join(target_dir, base_name)) os.rename(info_file, os.path.join(target_dir, base_info_name)) counter += 1 pywikibot.output(flog.close_and_confirm())