Example #1
0
def up_all(in_path,
           cutoff=None,
           target='Uploaded',
           file_exts=None,
           verbose=False,
           test=False,
           target_site=None,
           chunked=True):
    """
    Upload all matched files in the supplied directory.

    Media (image) files and metadata files with the expected extension .info
    should be in the same directory. Metadata files should contain the entirety
    of the desired description page (in wikitext).

    Moves each file to one the target folders after processing.

    @param in_path: path to directory with files to upload
    @param cutoff: number of files to upload (defaults to all)
    @param target: sub-directory for uploaded files (defaults to "Uploaded")
    @param file_exts: tuple of allowed file extensions (defaults to FILE_EXTS)
    @param verbose: whether to output confirmation after each upload
    @param test: set to True to test but not upload
    @param target_site: pywikibot.Site to which file should be uploaded,
        defaults to Commons.
    @param chunked: Whether to do chunked uploading or not.
    """
    # set defaults unless overridden
    file_exts = file_exts or FILE_EXTS
    target_site = target_site or pywikibot.Site('commons', 'commons')
    target_site.login()

    # Verify in_path
    if not os.path.isdir(in_path):
        pywikibot.output('The provided in_path was not a valid '
                         'directory: %s' % in_path)
        exit()

    # create target directories if they don't exist
    done_dir = os.path.join(in_path, target)
    error_dir = '%s_errors' % done_dir
    warnings_dir = '%s_warnings' % done_dir
    common.create_dir(done_dir)
    common.create_dir(error_dir)
    common.create_dir(warnings_dir)

    # logfile
    flog = common.LogFile(in_path, '¤uploader.log')

    # find all content files
    found_files = prepUpload.find_files(path=in_path,
                                        file_exts=file_exts,
                                        subdir=False)
    counter = 1
    for f in found_files:
        if cutoff and counter > cutoff:
            break
        # verify that there is a matching info file
        info_file = '%s.info' % os.path.splitext(f)[0]
        base_name = os.path.basename(f)
        base_info_name = os.path.basename(info_file)
        if not os.path.exists(info_file):
            flog.write_w_timestamp(
                '{0}: Found multimedia file without info'.format(base_name))
            continue

        # prepare upload
        txt = common.open_and_read_file(info_file)

        if test:
            pywikibot.output('Test upload "%s" with the following '
                             'description:\n%s\n' % (base_name, txt))
            counter += 1
            continue
        # stop here if testing

        target_dir = None
        result = upload_single_file(base_name,
                                    f,
                                    txt,
                                    target_site,
                                    upload_if_badprefix=True,
                                    chunked=chunked)
        if result.get('error'):
            target_dir = error_dir
        elif result.get('warning'):
            target_dir = warnings_dir
        else:
            target_dir = done_dir
        if verbose:
            pywikibot.output(result.get('log'))

        flog.write_w_timestamp(result.get('log'))
        os.rename(f, os.path.join(target_dir, base_name))
        os.rename(info_file, os.path.join(target_dir, base_info_name))
        counter += 1

    pywikibot.output(flog.close_and_confirm())
Example #2
0
 def load_data(self, in_file):
     return common.open_and_read_file(in_file, as_json=False)
Example #3
0
def main(*args):
    """Command line entry-point."""
    usage = (
        'Usage:'
        '\tpython uploader.py -in_path:PATH -dir:PATH -cutoff:NUM\n'
        '\t-in_path:PATH path to the directory containing the media files or '
        'to the make_info output file if "-type" is set to url\n'
        '\t-type:STRING the type of upload to make. Must be either "FILES" '
        'or "URL". Defaults to FILES (optional)\n'
        '\t-dir:PATH specifies the path to the directory containing a '
        'user_config.py file (optional)\n'
        '\t-cutoff:NUM stop the upload after the specified number of files '
        '(optional)\n'
        '\t-confirm Whether to output a confirmation after each upload '
        'attempt (optional)\n'
        '\t-test Whether to do mock upload, simply outputting to commandline. '
        '(optional)\n'
        '\t-nochunk Whether to turn off chunked uploading, this is slow '
        'and does not support files > 100Mb (optional, type:FILES only)\n'
        '\t-only:PATH to file containing list of urls to upload, skipping all '
        'others. One entry per line. (optional, type:URL only)\n'
        '\t-skip:PATH to file containing list of urls to skip, uploading all '
        'others. Can be combined with "-only" for further filtering, e.g '
        '"-only:<list of vase images> -skip:<list of blue images>" to get '
        'non-blue vase images. One entry per line. (optional, type:URL only)\n'
        '\tExample:\n'
        '\tpython uploader.py -in_path:../diskkopia -cutoff:100\n')
    cutoff = None
    in_path = None
    test = False
    confirm = False
    chunked = True
    typ = 'files'
    only = None
    skip = None
    ignore_warnings = False

    # Load pywikibot args and handle local args
    for arg in pywikibot.handle_args(args):
        option, sep, value = arg.partition(':')
        if option == '-cutoff':
            if common.is_pos_int(value):
                cutoff = int(value)
        elif option == '-in_path':
            in_path = value
        elif option == '-test':
            test = True
        elif option == '-confirm':
            confirm = True
        elif option == '-nochunk':
            chunked = False
        elif option == '-ignore_warnings':
            ignore_warnings = True
        elif option == '-type':
            if value.lower() == 'url':
                typ = 'url'
            elif value.lower() not in ('url', 'files'):
                pywikibot.output(usage)
                return
        elif option == '-only':
            only = common.trim_list(
                common.open_and_read_file(value).split('\n'))
        elif option == '-skip':
            skip = common.trim_list(
                common.open_and_read_file(value).split('\n'))
        elif option == '-usage':
            pywikibot.output(usage)
            return

    if in_path:
        if typ == 'files':
            up_all(in_path,
                   cutoff=cutoff,
                   test=test,
                   verbose=confirm,
                   chunked=chunked)
        elif typ == 'url':
            up_all_from_url(in_path,
                            cutoff=cutoff,
                            only=only,
                            skip=skip,
                            test=test,
                            ignore_warnings=ignore_warnings,
                            verbose=confirm)
    else:
        pywikibot.output(usage)
Example #4
0
def up_all_from_url(info_path,
                    cutoff=None,
                    target='upload_logs',
                    file_exts=None,
                    verbose=False,
                    test=False,
                    target_site=None,
                    ignore_warnings=False,
                    only=None,
                    skip=None):
    """
    Upload all images provided as urls in a make_info json file.

    Media (image) files and metadata files with the expected extension .info
    should be in the same directory. Metadata files should contain the entirety
    of the desired description page (in wikitext).

    Outputs separate logfiles for files triggering errors, warnings (and
    successful) so that these can be used in latter runs.

    @param info_path: path to the make_info json file
    @param cutoff: number of files to upload (defaults to all)
    @param target: sub-directory for log files (defaults to "upload_logs")
    @param file_exts: tuple of allowed file extensions (defaults to FILE_EXTS)
    @param verbose: whether to output confirmation after each upload
    @param test: set to True to test but not upload
    @param target_site: pywikibot.Site to which file should be uploaded,
        defaults to Commons.
    @param ignore_warnings: ignore warnings when uploading.
    @param only: list of urls to upload, if provided all others will be skipped
    @param skip: list of urls to skip, all others will be uploaded
    """
    # set defaults unless overridden
    file_exts = file_exts or FILE_EXTS
    target_site = target_site or pywikibot.Site('commons', 'commons')
    target_site.login()

    # load info file
    info_datas = common.open_and_read_file(info_path, as_json=True)

    # create target directory if it doesn't exist
    output_dir = os.path.join(os.path.dirname(info_path), target)
    common.create_dir(output_dir)

    # create all log files
    logs = {
        'success': common.LogFile(output_dir, 'success.log'),
        'warning': common.LogFile(output_dir, 'warnings.log'),
        'error': common.LogFile(output_dir, 'errors.log'),
        'general': common.LogFile(output_dir, 'uploader.log')
    }

    # shortcut to the general/verbose logfile
    flog = logs['general']

    # filtering based on entries in only/skip
    kill_list = set()
    if only:
        kill_list |= set(info_datas.keys()) - set(only)  # difference
    if skip:
        kill_list |= set(info_datas.keys()) & set(skip)  # intersection
    for key in kill_list:
        del info_datas[key]
    flog.write_w_timestamp('{} files remain to upload after filtering'.format(
        len(info_datas)))

    counter = 1
    for url, data in info_datas.items():
        if cutoff and counter > cutoff:
            break

        # verify that the file extension is ok
        try:
            ext = verify_url_file_extension(url, file_exts)
        except common.MyError as e:
            flog.write_w_timestamp(e)
            continue

        # verify that info and output filenames are provided
        if not data['info']:
            flog.write_w_timestamp(
                '{url}: Found url missing the info field (at least)'.format(
                    url=url))
            continue
        elif not data['filename']:
            flog.write_w_timestamp(
                '{url}: Found url missing the output filename'.format(url=url))
            continue

        # prepare upload
        txt = make_info_page(data)
        filename = '{filename}{ext}'.format(filename=data['filename'], ext=ext)

        if test:
            pywikibot.output(
                'Test upload "{filename}" from "{url}" with the following '
                'description:\n{txt}\n'.format(filename=filename,
                                               url=url,
                                               txt=txt))
            counter += 1
            continue
        # stop here if testing

        result = upload_single_file(filename,
                                    url,
                                    txt,
                                    target_site,
                                    ignore_all_warnings=ignore_warnings,
                                    upload_if_badprefix=True)
        if result.get('error'):
            logs['error'].write(url)
        elif result.get('warning'):
            logs['warning'].write(url)
        else:
            logs['success'].write(url)
        if verbose:
            pywikibot.output(result.get('log'))

        flog.write_w_timestamp(result.get('log'))
        counter += 1

    for log in logs.values():
        pywikibot.output(log.close_and_confirm())
Example #5
0
    def load_mappings(self, update_mappings):
        """
        Update mapping files, load these and package appropriately.

        :param update_mappings: whether to first download the latest mappings
        """
        socken_file = os.path.join(MAPPINGS_DIR, 'socken.json')
        kommun_file = os.path.join(MAPPINGS_DIR, 'kommun.json')
        countries_file = os.path.join(MAPPINGS_DIR, 'countries_for_cats.json')
        tags_file = os.path.join(MAPPINGS_DIR, 'tags.json')
        primary_classes_file = os.path.join(
            MAPPINGS_DIR, 'primary_classes.json')
        photographer_file = os.path.join(MAPPINGS_DIR, 'photographers.json')
        kmb_files_file = os.path.join(MAPPINGS_DIR, 'kmb_files.json')
        commonscat_file = os.path.join(MAPPINGS_DIR, 'commonscat.json')
        church_file = os.path.join(MAPPINGS_DIR, 'churches.json')
        photographer_page = 'Institution:Riksantikvarieämbetet/KMB/creators'

        if update_mappings:
            query_props = {'P373': 'commonscat'}
            self.mappings['socken'] = KMBInfo.query_to_lookup(
                KMBInfo.build_query('P777', optional_props=query_props.keys()),
                props=query_props)
            self.mappings['kommun'] = KMBInfo.query_to_lookup(
                KMBInfo.build_query('P525', optional_props=query_props.keys()),
                props=query_props)
            self.mappings['photographers'] = self.get_photographer_mapping(
                photographer_page)
            self.mappings['kmb_files'] = self.get_existing_kmb_files()
            self.mappings['commonscat'] = {'bbr': {}, 'fmis': {}}
            KMBInfo.get_commonscat_from_heritage(
                'se-bbr', limit=1000,
                data=self.mappings['commonscat']['bbr'])
            KMBInfo.get_commonscat_from_heritage(
                'se-fornmin', limit=1000,
                data=self.mappings['commonscat']['fmis'])
            self.load_wikidata_bbr_fmis_commonscat()

            # dump to mappings
            common.open_and_write_file(
                socken_file, self.mappings['socken'], as_json=True)
            common.open_and_write_file(
                kommun_file, self.mappings['kommun'], as_json=True)
            common.open_and_write_file(
                photographer_file, self.mappings['photographers'],
                as_json=True)
            common.open_and_write_file(
                kmb_files_file, self.mappings['kmb_files'], as_json=True)
            common.open_and_write_file(
                commonscat_file, self.mappings['commonscat'], as_json=True)
        else:
            self.mappings['socken'] = common.open_and_read_file(
                socken_file, as_json=True)
            self.mappings['kommun'] = common.open_and_read_file(
                kommun_file, as_json=True)
            self.mappings['photographers'] = common.open_and_read_file(
                photographer_file, as_json=True)
            self.mappings['kmb_files'] = common.open_and_read_file(
                kmb_files_file, as_json=True)
            self.mappings['commonscat'] = common.open_and_read_file(
                commonscat_file, as_json=True)

        self.mappings['countries'] = common.open_and_read_file(
            countries_file, as_json=True)
        self.mappings['churches'] = common.open_and_read_file(
            church_file, as_json=True)
        self.mappings['tags'] = common.open_and_read_file(
            tags_file, as_json=True)
        self.mappings['primary_classes'] = common.open_and_read_file(
            primary_classes_file, as_json=True)

        pywikibot.output('Loaded all mappings')
Example #6
0
def load_list(filename=None):
    """Load json list."""
    filename = filename or LIST_FILE
    return common.open_and_read_file(filename, as_json=True)