Python LogFile Examples, batchupload.common.LogFile Python Examples

Example #1

0

Show file

File: info_glass_uncat.py Project: Wikimedia-Sverige/SMV

 def __init__(self, **options):
     super(GlassInfo, self).__init__(**options)
     self.batch_cat = "{}: {}".format(BATCH_CAT, BATCH_DATE)
     self.commons = pywikibot.Site('commons', 'commons')
     self.wikidata = pywikibot.Site('wikidata', 'wikidata')
     self.log = common.LogFile('', LOGFILE)
     self.category_cache = []

Example #2

0

Show file

File: DiMuHarvester.py Project: jhsoby/Wikimedia-Commons-uploads

 def __init__(self, options):
     """Initialise a harvester object for a DigitaltMuseum harvest."""
     if not os.path.exists(CACHE_DIR):
         os.makedirs(CACHE_DIR)  # Create directory for cache if needed
     self.data = {}  # data container for harvested info
     self.settings = options
     self.log = common.LogFile('', self.settings.get('harvest_log_file'))
     self.log.write_w_timestamp('Harvester started...')
     self.exhibition_cache = {}  # cache for exhibition dimu-code, as it's

Example #3

0

Show file

File: make_KMB_info.py Project: lokal-profil/KMB-import

 def __init__(self, **options):
     """Initialise a make_info object."""
     batch_date = options.get('batch_label') or BATCH_DATE
     batch_cat = options.get('base_meta_cat') or BATCH_CAT
     super(KMBInfo, self).__init__(batch_cat, batch_date, **options)
     self.commons = pywikibot.Site('commons', 'commons')
     self.wikidata = pywikibot.Site('wikidata', 'wikidata')
     self.category_cache = {}  # cache for category_exists()
     self.photographer_cache = {}
     self.log = common.LogFile('', LOGFILE)

Example #4

0

Show file

File: makeInfo.py Project: Wikimedia-Sverige/SMVK-import

    def __init__(self, **options):
        """Initialise a make_info object."""
        batch_date = common.pop(options, 'batch_label') or BATCH_DATE
        batch_cat = common.pop(options, 'base_meta_cat') or BATCH_CAT
        super(SMVKInfo, self).__init__(batch_cat, batch_date, **options)

        self.commons = pywikibot.Site('commons', 'commons')
        self.wikidata = pywikibot.Site('wikidata', 'wikidata')
        self.category_cache = {}  # cache for category_exists()
        self.wikidata_cache = {}  # cache for Wikidata results
        self.log = common.LogFile('', LOGFILE)
        self.log.write_w_timestamp('Make info started...')
        self.pd_year = datetime.now().year - 70

Example #5

0

Show file

File: make_glam_info.py Project: jhsoby/Wikimedia-Commons-uploads

    def __init__(self, **options):
        """Initialise a make_info object."""
        self.b_settings = self.load_batch_settings(options)
        super(GLAMInfo,
              self).__init__(self.b_settings["batch_cat"],
                             self.b_settings["batch_date"], **options)

        self.commons = pywikibot.Site('commons', 'commons')
        self.wikidata = pywikibot.Site('wikidata', 'wikidata')
        self.category_cache = {}  # cache for category_exists()
        self.wikidata_cache = {}  # cache for Wikidata results
        self.log = common.LogFile(
            '', self.b_settings.get("makeinfo_log_file" or LOGFILE))
        self.log.write_w_timestamp('Make info started...')
        self.pd_year = datetime.now().year - 70

Example #6

0

Show file

File: kmb_massload.py Project: lokal-profil/KMB-import

def run(start=None, end=None):
    """Get parsed data for whole kmb hitlist and store as json."""
    log = common.LogFile('', LOGFILE)
    hitlist = load_list()
    if start or end:
        hitlist = hitlist[start:end]
    data = {}
    total_count = len(hitlist)
    for count, kmb in enumerate(hitlist):
        data[kmb] = kmb_wrapper(kmb, log)
        time.sleep(THROTTLE)
        if count % 100 == 0:
            pywikibot.output('{time:s} - {count:d} of {total:d} parsed'.format(
                time=time.strftime('%H:%M:%S'), count=count,
                total=total_count))
    output_blob(data)
    pywikibot.output(log.close_and_confirm())

Example #7

0

Show file

File: updateMappings.py Project: Wikimedia-Sverige/SMVK-import

    def __init__(self, options):
        """Initialise an mapping updater for a SMVK dataset."""
        self.settings = options
        parser = CsvParser(**self.settings)

        self.log = common.LogFile('', self.settings.get('mapping_log_file'))
        self.log.write_w_timestamp('Updater started...')
        self.mappings = load_mappings(
            update_mappings=True,
            mappings_dir=self.settings.get('mappings_dir'))
        data = parser.load_data(self.settings.get('data_file'))
        # load archive card data to ensure formatting is still valid
        archive_data = parser.load_archive_data(
            self.settings.get('archive_file'))

        self.people_to_map = Counter()
        self.ethnic_to_map = Counter()
        self.places_to_map = OrderedDict()
        self.keywords_to_map = Counter()
        self.expedition_to_match = set()
        self.museum_to_match = set()
        self.external_to_parse = set()

        self.parse_data(data)
        self.parse_archive_data(archive_data)

        # validate hard coded mappings
        for ext_id in self.external_to_parse:
            utils.parse_external_id(ext_id)
        for expedition in self.expedition_to_match:
            if expedition not in self.mappings.get('expeditions'):
                pywikibot.warning(
                    '{} must be added to expeditions.json'.format(expedition))
        museum_mapping = self.mappings.get('museums')
        for museum, type in self.museum_to_match:
            if museum not in museum_mapping:
                pywikibot.warning(
                    '{} must be added to museum.json'.format(museum))
            elif type not in museum_mapping.get(museum).get('known_types'):
                pywikibot.warning(
                    'The "{}" type for {} must be added the Wikimedia link '
                    'templates and to museum.json'.format(type, museum))

        self.dump_to_wikifiles()

Example #8

0

Show file

    def __init__(self, options):
        """Initialise an mapping updater for a DigitaltMuseum harvest."""
        self.settings = options

        self.log = common.LogFile('', self.settings.get('mapping_log_file'))
        self.log.write_w_timestamp('Updater started...')
        self.mappings = load_mappings(
            update_mappings=True,
            mappings_dir=self.settings.get('mappings_dir'))
        harvest_data = load_harvest_data(self.settings.get('harvest_file'))

        self.kulturnav_hits = load_kulturnav_data()
        self.people_to_map = {}
        self.places_to_map = OrderedDict()
        self.subjects_to_map = Counter()

        self.parse_harvest_data(harvest_data)
        self.check_and_remove_code_place_entries()
        self.dump_to_wikifiles()

Example #9

0

Show file

def get_data():
    """Get parsed data for given keywords and store as json files."""
    log = common.LogFile('', LOGFILE)
    settings = load_settings()
    keywords = settings["keywords"]
    api_key = settings["api_key"]
    for keyword in keywords:
        print("[{}] : fetching data.".format(keyword))
        filename = "results_{0}.json".format(keyword)
        results = {}
        hits_limit = 500
        start_at = 1
        counter = 0
        while True:
            url = create_url(keyword, hits_limit, start_at, api_key)
            records = get_records_from_url(url)
            total_results = get_total_hits(records)
            records = split_records(records)
            records_on_page = len(records)
            if records_on_page == 0:
                break
            else:
                for record in records:
                    counter += 1
                    id_no = extract_id_number(record)
                    processed_dict = {'ID': id_no, 'problem': []}
                    processed_record = parse_record(record, processed_dict,
                                                    log)
                    if id_no not in results:
                        results[id_no] = processed_record
                    if counter % 100 == 0:
                        print("Processed {} out of {}".format(
                            counter, total_results))
                start_at += hits_limit
                time.sleep(THROTTLE)
        print("[{}] : fetched {} records to {}.".format(
            keyword, len(results), filename))
        save_data(results, filename)

Example #10

0

Show file

File: uploader.py Project: Vesihiisi/BatchUploadTools

def up_all_from_url(info_path,
                    cutoff=None,
                    target='upload_logs',
                    file_exts=None,
                    verbose=False,
                    test=False,
                    target_site=None,
                    only=None,
                    skip=None):
    """
    Upload all images provided as urls in a make_info json file.

    Media (image) files and metadata files with the expected extension .info
    should be in the same directory. Metadata files should contain the entirety
    of the desired description page (in wikitext).

    Outputs separate logfiles for files triggering errors, warnings (and
    successful) so that these can be used in latter runs.

    @param info_path: path to the make_info json file
    @param cutoff: number of files to upload (defaults to all)
    @param target: sub-directory for log files (defaults to "upload_logs")
    @param file_exts: tuple of allowed file extensions (defaults to FILE_EXTS)
    @param verbose: whether to output confirmation after each upload
    @param test: set to True to test but not upload
    @param target_site: pywikibot.Site to which file should be uploaded,
        defaults to Commons.
    @param only: list of urls to upload, if provided all others will be skipped
    @param skip: list of urls to skip, all others will be uploaded
    """
    # set defaults unless overridden
    file_exts = file_exts or FILE_EXTS
    target_site = target_site or pywikibot.Site('commons', 'commons')
    target_site.login()

    # load info file
    info_datas = common.open_and_read_file(info_path, as_json=True)

    # create target directory if it doesn't exist
    output_dir = os.path.join(os.path.dirname(info_path), target)
    common.create_dir(output_dir)

    # create all log files
    logs = {
        'success': common.LogFile(output_dir, 'success.log'),
        'warning': common.LogFile(output_dir, 'warnings.log'),
        'error': common.LogFile(output_dir, 'errors.log'),
        'general': common.LogFile(output_dir, 'uploader.log')
    }

    # shortcut to the general/verbose logfile
    flog = logs['general']

    # filtering based on entries in only/skip
    kill_list = set()
    if only:
        kill_list |= set(info_datas.keys()) - set(only)  # difference
    if skip:
        kill_list |= set(info_datas.keys()) & set(skip)  # intersection
    for key in kill_list:
        del info_datas[key]
    flog.write_w_timestamp('{} files remain to upload after filtering'.format(
        len(info_datas)))

    counter = 1
    for url, data in info_datas.items():
        if cutoff and counter > cutoff:
            break

        # verify that the file extension is ok
        try:
            ext = verify_url_file_extension(url, file_exts)
        except common.MyError as e:
            flog.write_w_timestamp(e)
            continue

        # verify that info and output filenames are provided
        if not data['info']:
            flog.write_w_timestamp(
                '{url}: Found url missing the info field (at least)'.format(
                    url=url))
            continue
        elif not data['filename']:
            flog.write_w_timestamp(
                '{url}: Found url missing the output filename'.format(url=url))
            continue

        # prepare upload
        txt = make_info_page(data)
        filename = '{filename}{ext}'.format(filename=data['filename'], ext=ext)

        if test:
            pywikibot.output(
                'Test upload "{filename}" from "{url}" with the following '
                'description:\n{txt}\n'.format(filename=filename,
                                               url=url,
                                               txt=txt))
            counter += 1
            continue
        # stop here if testing

        result = upload_single_file(filename,
                                    url,
                                    txt,
                                    target_site,
                                    upload_if_badprefix=True)
        if result.get('error'):
            logs['error'].write(url)
        elif result.get('warning'):
            logs['warning'].write(url)
        else:
            logs['success'].write(url)
        if verbose:
            pywikibot.output(result.get('log'))

        flog.write_w_timestamp(result.get('log'))
        counter += 1

    for log in logs.values():
        pywikibot.output(log.close_and_confirm())

Example #11

0

Show file

File: uploader.py Project: Vesihiisi/BatchUploadTools

def up_all(in_path,
           cutoff=None,
           target='Uploaded',
           file_exts=None,
           verbose=False,
           test=False,
           target_site=None,
           chunked=True):
    """
    Upload all matched files in the supplied directory.

    Media (image) files and metadata files with the expected extension .info
    should be in the same directory. Metadata files should contain the entirety
    of the desired description page (in wikitext).

    Moves each file to one the target folders after processing.

    @param in_path: path to directory with files to upload
    @param cutoff: number of files to upload (defaults to all)
    @param target: sub-directory for uploaded files (defaults to "Uploaded")
    @param file_exts: tuple of allowed file extensions (defaults to FILE_EXTS)
    @param verbose: whether to output confirmation after each upload
    @param test: set to True to test but not upload
    @param target_site: pywikibot.Site to which file should be uploaded,
        defaults to Commons.
    @param chunked: Whether to do chunked uploading or not.
    """
    # set defaults unless overridden
    file_exts = file_exts or FILE_EXTS
    target_site = target_site or pywikibot.Site('commons', 'commons')
    target_site.login()

    # Verify in_path
    if not os.path.isdir(in_path):
        pywikibot.output('The provided in_path was not a valid '
                         'directory: %s' % in_path)
        exit()

    # create target directories if they don't exist
    done_dir = os.path.join(in_path, target)
    error_dir = '%s_errors' % done_dir
    warnings_dir = '%s_warnings' % done_dir
    common.create_dir(done_dir)
    common.create_dir(error_dir)
    common.create_dir(warnings_dir)

    # logfile
    flog = common.LogFile(in_path, '¤uploader.log')

    # find all content files
    found_files = prepUpload.find_files(path=in_path,
                                        file_exts=file_exts,
                                        subdir=False)
    counter = 1
    for f in found_files:
        if cutoff and counter > cutoff:
            break
        # verify that there is a matching info file
        info_file = '%s.info' % os.path.splitext(f)[0]
        base_name = os.path.basename(f)
        base_info_name = os.path.basename(info_file)
        if not os.path.exists(info_file):
            flog.write_w_timestamp(
                '{0}: Found multimedia file without info'.format(base_name))
            continue

        # prepare upload
        txt = common.open_and_read_file(info_file)

        if test:
            pywikibot.output('Test upload "%s" with the following '
                             'description:\n%s\n' % (base_name, txt))
            counter += 1
            continue
        # stop here if testing

        target_dir = None
        result = upload_single_file(base_name,
                                    f,
                                    txt,
                                    target_site,
                                    upload_if_badprefix=True,
                                    chunked=chunked)
        if result.get('error'):
            target_dir = error_dir
        elif result.get('warning'):
            target_dir = warnings_dir
        else:
            target_dir = done_dir
        if verbose:
            pywikibot.output(result.get('log'))

        flog.write_w_timestamp(result.get('log'))
        os.rename(f, os.path.join(target_dir, base_name))
        os.rename(info_file, os.path.join(target_dir, base_info_name))
        counter += 1

    pywikibot.output(flog.close_and_confirm())