コード例 #1
0
def process_all_files(base_dir=MAIN_DIR, xml_dir=XML_DIR):
    """Identify all xml files in a directory, load the data and process."""
    # Check categories
    xml_dir = os.path.join(base_dir, xml_dir)
    for directory in (base_dir, xml_dir):
        if not os.path.isdir(directory):
            raise common.MyError(
                u'The provided directory was not a valid directory: %s' %
                directory)

    # Find candidate files
    found_files = prep.find_files(path=xml_dir,
                                  file_exts=('.xml', ),
                                  subdir=False)
    pywikibot.output("Found %d .xml files" % len(found_files))

    data = {}
    for xml_file in found_files:
        try:
            test = InfoEntry(load_xml(xml_file))
        except Exception as e:
            pywikibot.output(u"Encountered error while processing %s: %s" %
                             (os.path.split(xml_file)[-1], e))
            continue
        if test.obj_id in data.keys():
            pywikibot.output(u"Multiple files for same object: %s, %s, %s" %
                             (test.obj_id, test.source_file,
                              data[test.obj_id]['source_file']))
            continue
        data[test.obj_id] = test.output()

    out_file = os.path.join(base_dir, u'processed_lido.json')
    common.open_and_write_file(out_file, data, as_json=True)
    pywikibot.output("Created %s with %d entries" % (out_file, len(data)))
コード例 #2
0
ファイル: upload_dupes.py プロジェクト: lokal-profil/LSH
def rename(base_dir, sub_cat, in_filename, log_file='move.log'):
    """
    Identify any files to replace and rename them to their commons names.

    :param base_dir: Path to directory in which replacement image files are
        found.
    :param sub_cat: The name of the subdirectory into which processed files
        should be moved.
    :param in_filename: The photoAll.csv file filtered to only contain the
        files to replace.
    :param log_file: The name of the log file to be created (in base_dir).
    """
    # Load indata
    in_filename = common.modify_path(base_dir, in_filename)
    header_check = u'PhoId|PhoObjId|PhoBeschreibungM|PhoAufnahmeortS|' \
                   u'PhoSwdS|MulId|filnamn|AdrVorNameS|AdrNameS|PhoSystematikS'
    data = csv.csv_file_to_dict(in_filename,
                                "filnamn",
                                header_check,
                                keep=('PhoSystematikS', 'filnamn'),
                                delimiter='|',
                                codec='utf-16')

    # reformat the commons filenames
    url_prefix = u'https://commons.wikimedia.org/wiki/File:'
    for k, v in data.iteritems():
        if v['PhoSystematikS'].startswith(url_prefix):
            data[k] = v['PhoSystematikS'][len(url_prefix):]
        else:
            pywikibot.output("error in indatafile: %s, %s" % (k, v))

    # find candidate files
    candidates = prep.find_files(base_dir, ('.tif', ), subdir=False)

    # rename the files
    sub_cat = common.modify_path(base_dir, sub_cat)
    log_file = common.modify_path(base_dir, log_file)
    common.create_dir(sub_cat)
    log = []

    for candidate in candidates:
        base_name = os.path.basename(candidate)
        if base_name not in data.keys():
            log.append('%s not found in csv file' % base_name)
            continue

        commons_name = data.pop(base_name)
        commons_name = common.modify_path(sub_cat, commons_name)
        os.rename(candidate, commons_name)

    for k in data.keys():
        log.append('%s not found on disk' % k)

    common.open_and_write_file(log_file, '\n'.join(log), codec='utf-8')
    pywikibot.output(u'Created %s' % log_file)
コード例 #3
0
ファイル: upload_dupes.py プロジェクト: lokal-profil/LSH
def upload_all(base_dir,
               sub_dir=u'Uploaded',
               log_file='upload.log',
               verbose=True):
    """
    Upload the renamed files.

    We cannot just use uploader.up_all since  there are no corresponding .info
    files.
    """
    commons = pywikibot.Site('commons', 'commons')
    commons.login()

    upload_comment = u'Source image improved by the institution #LSH'

    # create target directories if they don't exist
    done_dir = common.modify_path(base_dir, sub_dir)
    error_dir = u'%s_errors' % done_dir
    warnings_dir = u'%s_warnings' % done_dir
    common.create_dir(done_dir)
    common.create_dir(error_dir)
    common.create_dir(warnings_dir)

    # logfile
    logfile = common.modify_path(base_dir, log_file)
    flog = codecs.open(logfile, 'a', 'utf-8')

    # find candidate files
    media_files = prep.find_files(base_dir, ('.tif', ), subdir=False)
    for media_file in media_files:
        file_name = os.path.basename(media_file)
        target_dir = None
        result = uploader.upload_single_file(file_name,
                                             media_file,
                                             upload_comment,
                                             commons,
                                             overwrite_page_exists=True)

        if result.get('error'):
            target_dir = error_dir
        elif result.get('warning'):
            target_dir = warnings_dir
        else:
            target_dir = done_dir
        if verbose:
            pywikibot.output(result.get('log'))

        flog.write(u'%s\n' % result.get('log'))
        os.rename(media_file, common.modify_path(target_dir, file_name))
        flog.flush()

    flog.close()
    pywikibot.output(u'Created %s' % logfile)
コード例 #4
0
ファイル: upload_dupes.py プロジェクト: lokal-profil/LSH
def rename(base_dir, sub_cat, in_filename, log_file='move.log'):
    """
    Identify any files to replace and rename them to their commons names.

    :param base_dir: Path to directory in which replacement image files are
        found.
    :param sub_cat: The name of the subdirectory into which processed files
        should be moved.
    :param in_filename: The photoAll.csv file filtered to only contain the
        files to replace.
    :param log_file: The name of the log file to be created (in base_dir).
    """
    # Load indata
    in_filename = common.modify_path(base_dir, in_filename)
    header_check = u'PhoId|PhoObjId|PhoBeschreibungM|PhoAufnahmeortS|' \
                   u'PhoSwdS|MulId|filnamn|AdrVorNameS|AdrNameS|PhoSystematikS'
    data = csv.csv_file_to_dict(in_filename, "filnamn", header_check,
                                keep=('PhoSystematikS', 'filnamn'),
                                delimiter='|', codec='utf-16')

    # reformat the commons filenames
    url_prefix = u'https://commons.wikimedia.org/wiki/File:'
    for k, v in data.iteritems():
        if v['PhoSystematikS'].startswith(url_prefix):
            data[k] = v['PhoSystematikS'][len(url_prefix):]
        else:
            pywikibot.output("error in indatafile: %s, %s" % (k, v))

    # find candidate files
    candidates = prep.find_files(base_dir, ('.tif', ), subdir=False)

    # rename the files
    sub_cat = common.modify_path(base_dir, sub_cat)
    log_file = common.modify_path(base_dir, log_file)
    common.create_dir(sub_cat)
    log = []

    for candidate in candidates:
        base_name = os.path.basename(candidate)
        if base_name not in data.keys():
            log.append('%s not found in csv file' % base_name)
            continue

        commons_name = data.pop(base_name)
        commons_name = common.modify_path(sub_cat, commons_name)
        os.rename(candidate, commons_name)

    for k in data.keys():
        log.append('%s not found on disk' % k)

    common.open_and_write_file(log_file, '\n'.join(log), codec='utf-8')
    pywikibot.output(u'Created %s' % log_file)
コード例 #5
0
ファイル: upload_dupes.py プロジェクト: lokal-profil/LSH
def upload_all(base_dir, sub_dir=u'Uploaded', log_file='upload.log',
               verbose=True):
    """
    Upload the renamed files.

    We cannot just use uploader.up_all since  there are no corresponding .info
    files.
    """
    commons = pywikibot.Site('commons', 'commons')
    commons.login()

    upload_comment = u'Source image improved by the institution #LSH'

    # create target directories if they don't exist
    done_dir = common.modify_path(base_dir, sub_dir)
    error_dir = u'%s_errors' % done_dir
    warnings_dir = u'%s_warnings' % done_dir
    common.create_dir(done_dir)
    common.create_dir(error_dir)
    common.create_dir(warnings_dir)

    # logfile
    logfile = common.modify_path(base_dir, log_file)
    flog = codecs.open(logfile, 'a', 'utf-8')

    # find candidate files
    media_files = prep.find_files(base_dir, ('.tif', ), subdir=False)
    for media_file in media_files:
        file_name = os.path.basename(media_file)
        target_dir = None
        result = uploader.upload_single_file(
            file_name, media_file, upload_comment, commons,
            overwrite_page_exists=True)

        if result.get('error'):
            target_dir = error_dir
        elif result.get('warning'):
            target_dir = warnings_dir
        else:
            target_dir = done_dir
        if verbose:
            pywikibot.output(result.get('log'))

        flog.write(u'%s\n' % result.get('log'))
        os.rename(media_file, common.modify_path(target_dir, file_name))
        flog.flush()

    flog.close()
    pywikibot.output(u'Created %s' % logfile)
コード例 #6
0
def up_all(in_path,
           cutoff=None,
           target='Uploaded',
           file_exts=None,
           verbose=False,
           test=False,
           target_site=None,
           chunked=True):
    """
    Upload all matched files in the supplied directory.

    Media (image) files and metadata files with the expected extension .info
    should be in the same directory. Metadata files should contain the entirety
    of the desired description page (in wikitext).

    Moves each file to one the target folders after processing.

    @param in_path: path to directory with files to upload
    @param cutoff: number of files to upload (defaults to all)
    @param target: sub-directory for uploaded files (defaults to "Uploaded")
    @param file_exts: tuple of allowed file extensions (defaults to FILE_EXTS)
    @param verbose: whether to output confirmation after each upload
    @param test: set to True to test but not upload
    @param target_site: pywikibot.Site to which file should be uploaded,
        defaults to Commons.
    @param chunked: Whether to do chunked uploading or not.
    """
    # set defaults unless overridden
    file_exts = file_exts or FILE_EXTS
    target_site = target_site or pywikibot.Site('commons', 'commons')
    target_site.login()

    # Verify in_path
    if not os.path.isdir(in_path):
        pywikibot.output('The provided in_path was not a valid '
                         'directory: %s' % in_path)
        exit()

    # create target directories if they don't exist
    done_dir = os.path.join(in_path, target)
    error_dir = '%s_errors' % done_dir
    warnings_dir = '%s_warnings' % done_dir
    common.create_dir(done_dir)
    common.create_dir(error_dir)
    common.create_dir(warnings_dir)

    # logfile
    flog = common.LogFile(in_path, '¤uploader.log')

    # find all content files
    found_files = prepUpload.find_files(path=in_path,
                                        file_exts=file_exts,
                                        subdir=False)
    counter = 1
    for f in found_files:
        if cutoff and counter > cutoff:
            break
        # verify that there is a matching info file
        info_file = '%s.info' % os.path.splitext(f)[0]
        base_name = os.path.basename(f)
        base_info_name = os.path.basename(info_file)
        if not os.path.exists(info_file):
            flog.write_w_timestamp(
                '{0}: Found multimedia file without info'.format(base_name))
            continue

        # prepare upload
        txt = common.open_and_read_file(info_file)

        if test:
            pywikibot.output('Test upload "%s" with the following '
                             'description:\n%s\n' % (base_name, txt))
            counter += 1
            continue
        # stop here if testing

        target_dir = None
        result = upload_single_file(base_name,
                                    f,
                                    txt,
                                    target_site,
                                    upload_if_badprefix=True,
                                    chunked=chunked)
        if result.get('error'):
            target_dir = error_dir
        elif result.get('warning'):
            target_dir = warnings_dir
        else:
            target_dir = done_dir
        if verbose:
            pywikibot.output(result.get('log'))

        flog.write_w_timestamp(result.get('log'))
        os.rename(f, os.path.join(target_dir, base_name))
        os.rename(info_file, os.path.join(target_dir, base_info_name))
        counter += 1

    pywikibot.output(flog.close_and_confirm())