コード例 #1
0
    def _ocr_callback(self, cmd_uri, parser_func=None, ocr_tool=None):
        """OCR callback function.

        @return: tuple (error, text [error description in case of error]).
        """
        def identity(x):
            return x

        if not cmd_uri:
            raise ValueError('Parameter cmd_uri is mandatory.')

        if parser_func is None:
            parser_func = identity

        if not callable(parser_func):
            raise TypeError('Keyword parser_func must be callable.')

        if ocr_tool not in self._OCR_METHODS:
            raise TypeError(
                "ocr_tool must be in %s, not '%s'." %
                (self._OCR_METHODS, ocr_tool))

        # wrong link fail with Exceptions
        retry = 0
        while retry < 5:
            pywikibot.debug('{0}: get URI {1!r}'.format(ocr_tool, cmd_uri),
                            _logger)
            try:
                response = http.fetch(cmd_uri)
            except requests.exceptions.ReadTimeout as e:
                retry += 1
                pywikibot.warning('ReadTimeout %s: %s' % (cmd_uri, e))
                pywikibot.warning('retrying in %s seconds ...' % (retry * 5))
                time.sleep(retry * 5)
            except Exception as e:
                pywikibot.error('"%s": %s' % (cmd_uri, e))
                return (True, e)
            else:
                pywikibot.debug('{0}: {1}'.format(ocr_tool, response.text),
                                _logger)
                break

        data = json.loads(response.text)

        if ocr_tool == self._PHETOOLS:  # phetools
            assert 'error' in data, 'Error from phetools: %s' % data
            assert data['error'] in [0, 1, 2, 3], (
                'Error from phetools: %s' % data)
            error, _text = bool(data['error']), data['text']
        else:  # googleOCR
            if 'error' in data:
                error, _text = True, data['error']
            else:
                error, _text = False, data['text']

        if error:
            pywikibot.error('OCR query %s: %s' % (cmd_uri, _text))
            return (error, _text)
        else:
            return (error, parser_func(_text))
コード例 #2
0
def output_country_report(rows, report_page, max_images=1000):
    """
    Output a gallery of images without id.

    @param rows: list of (image, id, template) or (image, ) tuples.
    @param report_page: pywikibot.Page where report will be outputted.
    @param max_images: the max number of images to report to a page. Defaults
        to 1000.
    """
    # FIXME create this page. Different name?
    central_page = ':c:Commons:Monuments database/Images without id'
    text = common.instruction_header(central_page)

    if rows:
        gallery_rows = [format_gallery_row(*row) for row in rows[:max_images]]
        text += u'<gallery>\n{}\n</gallery>'.format('\n'.join(gallery_rows))
    else:
        text += common.done_message(central_page, 'images without id')

    if len(rows) > max_images:
        text += (
            u'\n<!-- Maximum number of images reached: {0}, '
            u'total of images without id: {1} -->'.format(
                max_images, len(rows)))
        comment = (
            u'Images without an id: {0} (gallery maximum reached), '
            u'total of images without id: {1}'.format(
                max_images, len(rows)))
    else:
        comment = u'Images without an id: {0}'.format(len(rows))

    pywikibot.debug(text, _logger)
    common.save_to_wiki_or_local(
        report_page, comment, text, minorEdit=False)
コード例 #3
0
def process_monument(params, source, countryconfig, conn, cursor, source_page,
                     header_defaults, unknown_fields):
    """Process a single instance of a monument row template."""
    title = source_page.title(True)

    # Get all the fields
    contents = {}
    # Add the source of information (permalink)
    contents['source'] = source
    for field in countryconfig.get('fields'):
        if field.get('source') in header_defaults:
            contents[field.get('source')] = header_defaults.get(
                field.get('source'))
        else:
            contents[field.get('source')] = ''

    contents['title'] = title

    for param in params:
        (field, value) = extract_elements_from_template_param(param)

        # Check first that field is not empty
        if field.strip():
            # Is it in the fields list?
            if field in contents:
                # Load it with Big f*****g escape hack. Stupid mysql lib
                # Do this somewhere else.replace("'", "\\'")
                contents[field] = value
            else:
                # FIXME: Include more information where it went wrong
                pywikibot.debug(
                    'Found unknown field on page {0} : ({1}: {2})'.format(
                        title, field, value),
                    _logger)
                if field not in unknown_fields:
                    unknown_fields[field] = Counter()
                unknown_fields[field][source_page] += 1
                # time.sleep(5)

    # If we truncate we don't have to check for primkey (it's a made up one)
    if countryconfig.get('truncate'):
        update_monument(
            contents, source, countryconfig, conn, cursor, source_page)
    # Check if the primkey is a tuple and if all parts are present
    elif isinstance(countryconfig.get('primkey'), tuple):
        all_keys = True
        for partkey in countryconfig.get('primkey'):
            if not contents.get(lookup_source_field(partkey, countryconfig)):
                all_keys = False
        if all_keys:
            update_monument(
                contents, source, countryconfig, conn, cursor, source_page)
    # Check if the primkey is filled. This only works for a single primkey,
    # not a tuple
    elif contents.get(lookup_source_field(countryconfig.get('primkey'),
                                          countryconfig)):
        update_monument(
            contents, source, countryconfig, conn, cursor, source_page)
    else:
        raise NoPrimkeyException
コード例 #4
0
ファイル: common.py プロジェクト: amCap1712/mb2wikidatabot
    def add_mbid_claim_to_item(self, item, mbid):
        """
        Adds a claim with pid `pid` with value `mbid` to `item` and call `donefunc`
        with `mbid` to signal the completion.

        :type pid: str
        :type mbid: str
        :type item: pywikibot.ItemPage
        """
        claim = wp.Claim(const.WIKIDATA_DATASITE, self.property_id)
        claim.setTarget(mbid)
        wp.debug(u"Adding property {pid}, value {mbid} to {title}".format
                 (pid=self.property_id, mbid=mbid, title=item.title()),
                 layer="")
        if wp.config.simulate:
            wp.output("Simulation, no property has been added")
            return
        try:
            item.addClaim(claim, True)
        except wp.UserBlocked as e:
            wp.error("I have been blocked")
            exit(1)
        except wp.Error as e:
            wp.warning(e)
            return
        else:
            wp.debug("Adding the source Claim", layer="")
            claim.addSources([const.MUSICBRAINZ_CLAIM, const.RETRIEVED_CLAIM], bot=True)
            self.donefunc(mbid)
コード例 #5
0
def makeStatistics(totals):
    """Make statistics on the number of indexed images and put on Commons."""
    site = pywikibot.Site('commons', 'commons')
    page = pywikibot.Page(
        site, u'Commons:Monuments database/Indexed images/Statistics')

    title_column = [
        'country', ('images', 'total'), 'tracked',
        ('template', 'tracker template'), ('cat', 'tracker category')
    ]
    table = StatisticsTable(title_column, ('images', 'tracked'))

    for (countrycode, countryresults) in sorted(totals.iteritems()):
        table.add_row({
            'country': countrycode,
            'images': countryresults.get('totalImages'),
            'tracked': countryresults.get('tracked_images'),
            'template': u'{{tl|%s}}' % countryresults.get('commonsTemplate'),
            'cat': u'[[:Category:{cat}|{cat}]]'.format(
                cat=countryresults.get('commonsTrackerCategory'))
        })

    text = table.to_wikitext()

    comment = (
        u'Updating indexed image statistics. '
        u'Total indexed images: {}'.format(table.get_sum('tracked')))
    pywikibot.debug(text, _logger)
    common.save_to_wiki_or_local(page, comment, text)
コード例 #6
0
def output_country_report(unused_images, report_page, max_images=1000):
    """
    Format and output the unused images data for a a single country.

    @param unused_images: the output of group_unused_images
    @param report_page: pywikibot.Page to which the report should be written
    @param max_images: the max number of images to report to a page. Defaults
        to 1000. Note that actual number of images may be slightly higher in
        order to ensure all candidates for a given monument id are presented.
    """
    # People can add a /header template for with more info
    central_page = ':c:Commons:Monuments database/Unused images'
    text = common.instruction_header(central_page)
    total_pages = 0
    total_ids = 0
    totalImages = 0

    if not unused_images:
        text += common.done_message(central_page, 'unused images')
    else:
        for source_page, value in unused_images.iteritems():
            total_pages += 1
            if totalImages < max_images:
                text += u'=== {0} ===\n'.format(source_page)
                text += u'<gallery>\n'
                for monument_id, candidates in value.iteritems():
                    total_ids += 1
                    if totalImages < max_images:
                        for candidate in candidates:
                            text += u'File:{0}|{1}\n'.format(
                                candidate, monument_id)
                    totalImages += len(candidates)
                text += u'</gallery>\n'
            else:
                for monument_id, candidates in value.iteritems():
                    total_ids += 1
                    totalImages += len(candidates)

    if totalImages >= max_images:
        text += (
            u'<!-- Maximum number of images reached: {0}, '
            u'total of unused images: {1} -->\n'.format(
                max_images, totalImages))
        comment = (
            u'Images to be used in monument lists: '
            u'{0} (gallery maximum reached), '
            u'total of unused images: {1}'.format(
                max_images, totalImages))
    else:
        comment = u'Images to be used in monument lists: {0}'.format(
            totalImages)

    pywikibot.debug(text, _logger)
    common.save_to_wiki_or_local(report_page, comment, text, minorEdit=False)

    return {
        'images': totalImages,
        'pages': total_pages,
        'ids': total_ids
    }
コード例 #7
0
ファイル: threadedhttp.py プロジェクト: azatoth/pywikipedia
    def request(self, uri, method="GET", body=None, headers=None,
                max_redirects=None, connection_type=None):
        """Start an HTTP request.

        @param uri: The uri to retrieve
        @param method: (optional) The HTTP method to use. Default is 'GET'
        @param body: (optional) The request body. Default is no body.
        @param headers: (optional) Additional headers to send. Defaults
               include C{connection: keep-alive}, C{user-agent} and
               C{content-type}.
        @param max_redirects: (optional) The maximum number of redirects to
               use for this request. The class instance's max_redirects is
               default
        @param connection_type: (optional) see L{httplib2.Http.request}

        @return: (response, content) tuple

        """
        if max_redirects is None:
            max_redirects = self.max_redirects
        if headers is None:
            headers = {}
        # Prepare headers
        headers.pop('cookie', None)
        req = DummyRequest(uri, headers)
        self.cookiejar.lock.acquire()
        try:
            self.cookiejar.add_cookie_header(req)
        finally:
            self.cookiejar.lock.release()
        headers = req.headers

        # Wikimedia squids: add connection: keep-alive to request headers
        # unless overridden
        headers['connection'] = headers.pop('connection', 'keep-alive')

        # determine connection pool key and fetch connection
        (scheme, authority, request_uri, defrag_uri) = httplib2.urlnorm(
                                                        httplib2.iri2uri(uri))
        conn_key = scheme+":"+authority

        connection = self.connection_pool.pop_connection(conn_key)
        if connection is not None:
            self.connections[conn_key] = connection

        # Redirect hack: we want to regulate redirects
        follow_redirects = self.follow_redirects
        self.follow_redirects = False
        pywikibot.debug(u"%r" % (
                            (uri.replace("%7C","|"), method, body,
                            headers, max_redirects,
                            connection_type),),
                        _logger)
        try:
            (response, content) = httplib2.Http.request(
                                    self, uri, method, body, headers,
                                    max_redirects, connection_type)
        except Exception, e: # what types?
            # return exception instance to be retrieved by the calling thread
            return e
コード例 #8
0
ファイル: common.py プロジェクト: mineo/mb2wikidatabot
    def add_mbid_claim_to_item(self, item, mbid):
        """
        Adds a claim with pid `pid` with value `mbid` to `item` and call `donefunc`
        with `mbid` to signal the completion.

        :type pid: str
        :type mbid: str
        :type item: pywikibot.ItemPage
        """
        claim = wp.Claim(const.WIKIDATA_DATASITE, self.property_id)
        claim.setTarget(mbid)
        wp.debug(u"Adding property {pid}, value {mbid} to {title}".format
                 (pid=self.property_id, mbid=mbid, title=item.title()),
                 layer="")
        if wp.config.simulate:
            wp.output("Simulation, no property has been added")
            return
        try:
            item.addClaim(claim, True)
        except wp.UserBlocked as e:
            wp.error("I have been blocked")
            exit(1)
        except wp.Error as e:
            wp.warning(e)
            return
        else:
            wp.debug("Adding the source Claim", layer="")
            claim.addSources([const.MUSICBRAINZ_CLAIM, const.RETRIEVED_CLAIM], bot=True)
            self.donefunc(mbid)
コード例 #9
0
ファイル: logging.py プロジェクト: harej/reports_bot
def _disable_pywikibot_logging():
    """Tells Pywikibot to not log messages below WARNING level to stderr."""
    # We need to wake up Pywikibot's logging interface so that its logger level
    # won't get overridden by a later logging call:
    import pywikibot
    pywikibot.debug("Disabling routine logging", "logging")
    getLogger("pywiki").setLevel("WARNING")
コード例 #10
0
ファイル: throttle.py プロジェクト: xaster-Kies/pywikibot
    def checkMultiplicity(self):
        """Count running processes for site and set process_multiplicity."""
        global pid
        mysite = self.mysite
        pywikibot.debug('Checking multiplicity: pid = {pid}'.format(pid=pid),
                        _logger)
        with self.lock:
            processes = []
            my_pid = pid or 1  # start at 1 if global pid not yet set
            count = 1
            # open throttle.log
            try:
                f = open(self.ctrlfilename, 'r')
            except IOError:
                if pid:
                    raise
            else:
                now = time.time()
                for line in f.readlines():
                    # parse line; format is "pid timestamp site"
                    try:
                        line = line.split(' ')
                        this_pid = int(line[0])
                        ptime = int(line[1].split('.')[0])
                        this_site = line[2].rstrip()
                    except (IndexError, ValueError):
                        # Sometimes the file gets corrupted ignore that line
                        continue
                    if now - ptime > self.releasepid:
                        continue  # process has expired, drop from file
                    if now - ptime <= self.dropdelay \
                       and this_site == mysite \
                       and this_pid != pid:
                        count += 1
                    if this_site != self.mysite or this_pid != pid:
                        processes.append({
                            'pid': this_pid,
                            'time': ptime,
                            'site': this_site
                        })
                    if not pid and this_pid >= my_pid:
                        my_pid = this_pid + 1  # next unused process id
                f.close()

            if not pid:
                pid = my_pid
            self.checktime = time.time()
            processes.append({
                'pid': pid,
                'time': self.checktime,
                'site': mysite
            })
            processes.sort(key=lambda p: (p['pid'], p['site']))
            with suppress(IOError), open(self.ctrlfilename, 'w') as f:
                for p in processes:
                    f.write(FORMAT_LINE.format_map(p))
            self.process_multiplicity = count
            pywikibot.log(
                'Found {} {} processes running, including this one.'.format(
                    count, mysite))
コード例 #11
0
ファイル: logentries.py プロジェクト: Zeffar/Elobot
 def __eq__(self, other):
     """Compare if self is equal to other."""
     if not isinstance(other, LogEntry):
         pywikibot.debug("'{0}' cannot be compared with '{1}'"
                         .format(type(self).__name__, type(other).__name__),
                         _logger)
         return False
     return self.logid() == other.logid() and self.site == other.site
コード例 #12
0
def p(n):

    pywikibot.debug('going to load %s.' % n, _logger)
    site = pywikibot.Site('en',"wikipedia")
    repo = site.data_repository()

    print 'SD going to load %s' % n
    return pywikibot.ItemPage(repo, n).get()
コード例 #13
0
 def __eq__(self, other):
     """Compare if self is equal to other."""
     if not isinstance(other, LogEntry):
         pywikibot.debug("'{0}' cannot be compared with '{1}'"
                         .format(type(self).__name__, type(other).__name__),
                         _logger)
         return False
     return self.logid() == other.logid() and self.site == other.site
コード例 #14
0
def make_statistics(statistics):
    """
    Output the overall results of the bot as a nice wikitable.

    @param statistics: list of per dataset statistic dicts where the allowed
        keys are: config, totals, report page and cmt.
    """
    site = pywikibot.Site('commons', 'commons')
    page = pywikibot.Page(
        site, u'Commons:Monuments database/Images without id/Statistics')

    title_column = OrderedDict([
        ('code', 'country'),
        ('lang', '[[:en:List of ISO 639-1 codes|lang]]'),
        ('total_with_id', 'Total monuments with suggested id'),
        ('total_without_id', 'Total monuments without suggested id'),
        # ('total_added', 'Total templates automatically added'),
        ('Report page', None),
        ('Commons template', None)
    ])
    numeric = [key for key in title_column.keys() if key.startswith('total_')]
    table = StatisticsTable(title_column, numeric)

    for row in statistics:
        country_config = row.get('config')
        totals = row.get('totals', {})
        total_with_id_or_cmt = row.get('cmt')
        commons_template = None
        report_page = None

        if totals:
            total_with_id_or_cmt = totals.get('with_id')

        if country_config.get('commonsTemplate'):
            commons_template = u'{{tl|%s}}' % (
                country_config.get('commonsTemplate'), )

        if row.get('report_page'):
            report_page = row.get('report_page').title(
                as_link=True, with_ns=False, insite=site)

        table.add_row({
            'code': country_config.get('country'),
            'lang': country_config.get('lang'),
            'total_with_id': total_with_id_or_cmt,
            'total_without_id': totals.get('without_id'),
            # 'total_added': totals.get('added'),
            'Report page': report_page,
            'Commons template': commons_template})

    text = table.to_wikitext()

    comment = (
        u'Updating images without id statistics. Total of {total_with_id} '
        u'images with suggested ids and {total_without_id} without.'.format(
            **table.get_sum()))
    pywikibot.debug(text, _logger)
    common.save_to_wiki_or_local(page, comment, text)
コード例 #15
0
def makeStatistics(statistics):
    """Output the overall results of the bot as a nice wikitable."""
    site = pywikibot.Site('commons', 'commons')
    page = pywikibot.Page(
        site,
        u'Commons:Monuments database/Missing commonscat links/Statistics')

    title_column = OrderedDict([
        ('code', 'country'),
        ('lang', None),
        ('total', None),
        ('report_page', 'page'),
        ('row template', None),
        ('Commons template', None)
    ])
    table = StatisticsTable(title_column, ('total', ))

    for row in statistics:
        countryconfig = row.get('config')
        total_cats_or_cmt = row.get('total_cats')
        row_template = None
        commons_template = None
        report_page = None

        if row.get('total_cats') is None:
            total_cats_or_cmt = row.get('cmt')

        if countryconfig.get('type') != 'sparql':
            row_template = common.get_template_link(
                row.get('lang'),
                countryconfig.get('project', u'wikipedia'),
                countryconfig.get('rowTemplate'),
                site)

        if countryconfig.get('commonsTemplate'):
            commons_template = u'{{tl|%s}}' % (
                countryconfig.get('commonsTemplate'), )

        if row.get('report_page'):
            report_page = row.get('report_page').title(
                as_link=True, with_ns=False, insite=site)

        table.add_row({
            'code': row.get('code'),
            'lang': row.get('lang'),
            'total': total_cats_or_cmt,
            'report_page': report_page,
            'row template': row_template,
            'Commons template': commons_template})

    text = table.to_wikitext()

    comment = (
        u'Updating missing commonscat links statistics. '
        u'Total missing links: {total_cats}'.format(
            total_cats=table.get_sum('total')))
    pywikibot.debug(text, _logger)
    common.save_to_wiki_or_local(page, comment, text)
コード例 #16
0
ファイル: api.py プロジェクト: anrao91/pywikibot-core
    def __init__(self, **kwargs):
        """Constructor."""
        try:
            self.site = kwargs.pop("site")
        except KeyError:
            self.site = pywikibot.Site()
        if 'mime_params' in kwargs:
            self.mime_params = kwargs.pop('mime_params')
            # mime may not be different from mime_params
            if 'mime' in kwargs and kwargs.pop('mime') != self.mime:
                raise ValueError('If mime_params is set, mime may not differ '
                                 'from it.')
        else:
            self.mime = kwargs.pop('mime', False)
        self.throttle = kwargs.pop('throttle', True)
        self.max_retries = kwargs.pop("max_retries", pywikibot.config.max_retries)
        self.retry_wait = kwargs.pop("retry_wait", pywikibot.config.retry_wait)
        self.params = {}
        if "action" not in kwargs:
            raise ValueError("'action' specification missing from Request.")
        self.update(**kwargs)
        self._warning_handler = None
        # Actions that imply database updates on the server, used for various
        # things like throttling or skipping actions when we're in simulation
        # mode
        self.write = self.params["action"] in (
            "edit", "move", "rollback", "delete", "undelete",
            "protect", "block", "unblock", "watch", "patrol",
            "import", "userrights", "upload", "emailuser",
            "createaccount", "setnotificationtimestamp",
            "filerevert", "options", "purge", "revisiondelete",
            "wbeditentity", "wbsetlabel", "wbsetdescription",
            "wbsetaliases", "wblinktitles", "wbsetsitelink",
            "wbcreateclaim", "wbremoveclaims", "wbsetclaimvalue",
            "wbsetreference", "wbremovereferences"
        )
        # MediaWiki 1.23 allows assertion for any action,
        # whereas earlier WMF wikis and others used an extension which
        # could only allow assert for action=edit.
        #
        # When we can't easily check whether the extension is loaded,
        # to avoid cyclic recursion in the Pywikibot codebase, assume
        # that it is present, which will cause a API warning emitted
        # to the logging (console) if it is not present, but will not
        # otherwise be a problem.
        # This situation is only tripped when one of the first actions
        # on the site is a write action and the extension isn't installed.
        if ((self.write and LV(self.site.version()) >= LV("1.23")) or
                (self.params['action'] == 'edit' and
                 self.site.has_extension('AssertEdit'))):
            pywikibot.debug(u"Adding user assertion", _logger)
            self.params["assert"] = "user"  # make sure user is logged in

        if (self.site.protocol() == 'http' and (config.use_SSL_always or (
                self.params["action"] == "login" and config.use_SSL_onlogin))
                and self.site.family.name in config.available_ssl_project):
            self.site = EnableSSLSiteWrapper(self.site)
コード例 #17
0
ファイル: common.py プロジェクト: mineo/mb2wikidatabot
 def fix_redirect(self, gid, old, new):
     """
     :param gid str:
     :param old str:
     :param new str:
     """
     wp.debug("Fixing the redirect from %s to %s" % (old, new), layer="")
     self.client.edit_url(gid, old, new, self.edit_note % (old, new))
     self._performed_edit()
コード例 #18
0
ファイル: common.py プロジェクト: amCap1712/mb2wikidatabot
 def fix_redirect(self, gid, old, new):
     """
     :param gid str:
     :param old str:
     :param new str:
     """
     wp.debug("Fixing the redirect from %s to %s" % (old, new), layer="")
     self.client.edit_url(gid, old, new, self.edit_note % (old, new))
     self._performed_edit()
コード例 #19
0
def make_statistics(statistics):
    """Output the overall results for unknown fields as a nice wikitable."""
    site = pywikibot.Site('commons', 'commons')
    page = pywikibot.Page(
        site, 'Commons:Monuments database/Unknown fields/Statistics')

    title_column = OrderedDict([
        ('code', 'country'),
        ('lang', None),
        ('total_fields', 'Total fields'),
        ('total_usages', 'Total usage of fields'),
        ('total_pages', 'Total pages containing fields'),
        ('report_page', 'Report page'),
        ('row_template', 'Row template'),
        ('header_template', 'Header template')
    ])
    table = StatisticsTable(
        title_column,
        list(filter(lambda col: col.startswith('total'), title_column)))
    for row in statistics:
        if not row:
            # sparql harvests don't generate statistics
            continue
        countryconfig = row.get('config')

        row_template = common.get_template_link(
            countryconfig.get('lang'),
            countryconfig.get('project', u'wikipedia'),
            countryconfig.get('rowTemplate'),
            site)
        header_template = common.get_template_link(
            countryconfig.get('lang'),
            countryconfig.get('project', u'wikipedia'),
            countryconfig.get('headerTemplate'),
            site)
        report_page = row.get('report_page').title(
            as_link=True, with_ns=False, insite=site)

        table.add_row({
            'code': countryconfig.get('country'),
            'lang': countryconfig.get('lang'),
            'total_fields': row.get('total_fields'),
            'total_usages': row.get('total_usages'),
            'total_pages': row.get('total_pages'),
            'report_page': report_page,
            'row_template': row_template,
            'header_template': header_template
        })

    text = table.to_wikitext()

    comment = (
        'Updating unknown fields statistics. Total of {total_fields} '
        'unknown fields used {total_usages} times on {total_pages} different '
        'pages.'.format(**table.get_sum()))
    pywikibot.debug(text, _logger)
    common.save_to_wiki_or_local(page, comment, text)
コード例 #20
0
ファイル: threadedhttp.py プロジェクト: azatoth/pywikipedia
    def __init__(self, maxnum=5):
        """
        @param maxnum: Maximum number of connections per identifier.
                       The pool drops excessive connections added.

        """
        pywikibot.debug(u"Creating connection pool.", _logger)
        self.connections = {}
        self.lock = threading.Lock()
        self.maxnum = maxnum
コード例 #21
0
ファイル: threadedhttp.py プロジェクト: dtbinh/code
    def __init__(self, maxnum=5):
        """
        @param maxnum: Maximum number of connections per identifier.
                       The pool drops excessive connections added.

        """
        pywikibot.debug(u"Creating connection pool.", _logger)
        self.connections = {}
        self.lock = threading.Lock()
        self.maxnum = maxnum
コード例 #22
0
ファイル: logentries.py プロジェクト: dtbinh/code
 def _createFromData(self, logdata):
     """
     Checks for logtype from data, and creates the correct LogEntry
     """
     try:
         logtype = logdata['type']
         return LogEntryFactory._getEntryClass(logtype)(logdata)
     except KeyError:
         pywikibot.debug(u"API log entry received:\n" + logdata, _logger)
         raise Error("Log entry has no 'type' key")
コード例 #23
0
 def _createFromData(self, logdata):
     """
     Checks for logtype from data, and creates the correct LogEntry
     """
     try:
         logtype = logdata['type']
         return LogEntryFactory._getEntryClass(logtype)(logdata)
     except KeyError:
         pywikibot.debug(u"API log entry received:\n" + logdata,
                         _logger)
         raise Error("Log entry has no 'type' key")
コード例 #24
0
ファイル: common.py プロジェクト: reosarevok/mb2wikidatabot
    def process_result(self, result):
        entity_gid, url_gid, wikipage, rel_id, link_type_id = result
        wp.debug("» {wp} https://musicbrainz.org/{entitytype}/{gid}".format(
            entitytype=self._current_entity_type.replace("_", "-"),
            wp=wikipage,
            gid=entity_gid),
                 layer="")
        try:
            itempage = get_wikidata_itempage_from_wikilink(wikipage)
        except wp.exceptions.SiteDefinitionError:
            wp.warning("{page} no supported family".format(page=wikipage))
            return
        except (wp.exceptions.InvalidTitleError) as e:
            wp.error(
                "Bad or invalid title received while processing {page}".format(
                    page=wikipage))
            wp.exception(e, tb=True)
            return
        except SkipPage as e:
            wp.warning("{page} is being skipped because: {reason}".format(
                page=wikipage, reason=e))
            return
        except IsRedirectPage as e:
            wp.debug("{page} is a redirect".format(page=wikipage), layer="")
            if self.can_edit:
                self.fix_redirect(url_gid, e.old, e.new)
            return
        except ValueError as e:
            wp.output(e)
            return
        except PageGone as e:
            if self.can_edit:
                self.end_removed(rel_id, link_type_id, entity_gid, url_gid,
                                 self._current_entity_type, wikipage)
            return
        if itempage is None:
            wp.debug(
                u"There's no wikidata page for {mbid}".format(mbid=entity_gid),
                layer="")
            return

        if any((key.lower() == self.property_id.lower()
                and claim.target == entity_gid)
               for key, claims in itempage.claims.items() for claim in claims):
            wp.debug(
                u"{page} already has property {pid} with value {mbid}".format(
                    page=wikipage, mbid=entity_gid, pid=self.property_id),
                layer="")
            self.donefunc(entity_gid)
            return

        wp.debug("{mbid} is not linked in Wikidata".format(mbid=entity_gid),
                 layer="")
        self.add_mbid_claim_to_item(itempage, entity_gid)
コード例 #25
0
    def storecookiedata(self, data: str) -> None:
        """
        Store cookie data.

        @param data: The raw data as returned by getCookie()
        """
        # THIS IS OVERRIDDEN IN data/api.py
        filename = config.datafilepath('pywikibot.lwp')
        pywikibot.debug('Storing cookies to {}'.format(filename), _logger)
        with open(filename, 'w') as f:
            f.write(data)
コード例 #26
0
def categorizeImage(
        countrycode, lang, commonsTemplateName, commonsCategoryBase,
        commonsCatTemplates, page, conn, cursor, harvest_type):
    pywikibot.log(u'Working on: %s' % page.title())
    commonsTemplate = _get_commons_template(commonsTemplateName)
    currentcats = list(page.categories())
    if commonsCategoryBase not in currentcats:
        pywikibot.log(u'%s category not found at: %s. Someone probably already categorized it.' % (
            commonsCategoryBase, page.title()))
        return False

    if u'Wikipedia image placeholders for cultural heritage monuments' in currentcats:
        pywikibot.log(u'%s in %s is a placeholder, skipping it.' % (
            page.title(), commonsCategoryBase))
        return False

    templates = page.templates()
    if commonsTemplate not in templates:
        pywikibot.log(u'%s template not found at: %s' % (
            commonsTemplate, page.title()))
        return False

    try:
        monumentId = get_monument_id(page, commonsTemplate)
    except NoMonumentIdentifierFoundException:
        pywikibot.warning(u'Didn\'t find a valid monument identifier at: %s' % (
            page.title(),))
        return False

    monData = getMonData(countrycode, lang, monumentId, conn, cursor)
    if not monData:
        try:
            monumentId = int(monumentId)
            monData = getMonData(countrycode, lang, monumentId, conn, cursor)
        except ValueError:
            pywikibot.debug(
                u'Can\'t convert %s to an integer' % (monumentId,), _logger)

    if not monData:
        # Triage as log since there are plenty of valid reasons for this
        pywikibot.log(
            u'Monument with id %s not in monuments database' % (monumentId, ))
        return False

    (newcats, categorisation_method) = get_new_categories(monumentId, monData, lang, commonsCatTemplates, harvest_type)

    # See if one of the three options worked
    if newcats:
        comment = u'Adding categories based on [[Template:%s]] with identifier %s (method %s)' % (
            commonsTemplateName, monumentId, categorisation_method)
        return replace_default_cat_with_new_categories_in_image(
            page, commonsCategoryBase, newcats, comment, verbose=True)
    else:
        pywikibot.log(u'Categories not found for %s' % page.title())
コード例 #27
0
 def __missing__(self, key):
     """Debug when the key is missing."""
     pywikibot.debug('API log entry received:\n' + repr(self), _logger)
     if ((key in ('ns', 'title', 'pageid', 'logpage', 'params', 'action')
          and 'actionhidden' in self)
             or (key == 'comment' and 'commenthidden' in self)
             or (key == 'user' and 'userhidden' in self)):
         raise HiddenKeyError(
             "Log entry ({0}) has a hidden '{1}' key and you don't have "
             'permission to view it.'.format(self._type, key))
     raise KeyError("Log entry (%s) has no '%s' key" % (self._type, key))
コード例 #28
0
ファイル: version.py プロジェクト: djff/pywikibot-core
def getversiondict():
    """Get version info for the package.

    @return:
        - tag (name for the repository),
        - rev (current revision identifier),
        - date (date of current revision),
        - hash (git hash for the current revision)
    @rtype: C{dict} of four C{str}
    """
    global cache
    if cache:
        return cache

    _program_dir = _get_program_dir()
    exceptions = {}

    for vcs_func in (getversion_git,
                     getversion_svn_setuptools,
                     getversion_svn,
                     getversion_nightly,
                     getversion_package):
        try:
            (tag, rev, date, hsh) = vcs_func(_program_dir)
        except Exception as e:
            exceptions[vcs_func] = e
        else:
            break
    else:
        # nothing worked; version unknown (but suppress exceptions)
        # the value is most likely '$Id' + '$', it means that
        # pywikibot was imported without using version control at all.
        tag, rev, date, hsh = (
            '', '-1 (unknown)', '0 (unknown)', '(unknown)')

    # git and svn can silently fail, as it may be a nightly.
    if getversion_package in exceptions:
        warn('Unable to detect version; exceptions raised:\n%r'
             % exceptions, UserWarning)
    elif exceptions:
        pywikibot.debug('version algorithm exceptions:\n%r'
                        % exceptions, _logger)

    if isinstance(date, basestring):
        datestring = date
    elif isinstance(date, time.struct_time):
        datestring = time.strftime('%Y/%m/%d, %H:%M:%S', date)
    else:
        warn('Unable to detect package date', UserWarning)
        datestring = '-2 (unknown)'

    cache = dict(tag=tag, rev=rev, date=datestring, hsh=hsh)
    return cache
コード例 #29
0
ファイル: version.py プロジェクト: emijrp/pywikibot-core
def getversiondict():
    """Get version info for the package.

    @return:
        - tag (name for the repository),
        - rev (current revision identifier),
        - date (date of current revision),
        - hash (git hash for the current revision)
    @rtype: C{dict} of four C{str}
    """
    global cache
    if cache:
        return cache

    _program_dir = _get_program_dir()
    exceptions = {}

    for vcs_func in (getversion_git,
                     getversion_svn_setuptools,
                     getversion_nightly,
                     getversion_svn,
                     getversion_package):
        try:
            (tag, rev, date, hsh) = vcs_func(_program_dir)
        except Exception as e:
            exceptions[vcs_func] = e
        else:
            break
    else:
        # nothing worked; version unknown (but suppress exceptions)
        # the value is most likely '$Id' + '$', it means that
        # pywikibot was imported without using version control at all.
        tag, rev, date, hsh = (
            '', '-1 (unknown)', '0 (unknown)', '(unknown)')

    # git and svn can silently fail, as it may be a nightly.
    if getversion_package in exceptions:
        warn('Unable to detect version; exceptions raised:\n%r'
             % exceptions, UserWarning)
    elif exceptions:
        pywikibot.debug('version algorithm exceptions:\n%r'
                        % exceptions, _logger)

    if isinstance(date, basestring):
        datestring = date
    elif isinstance(date, time.struct_time):
        datestring = time.strftime('%Y/%m/%d, %H:%M:%S', date)
    else:
        warn('Unable to detect package date', UserWarning)
        datestring = '-2 (unknown)'

    cache = dict(tag=tag, rev=rev, date=datestring, hsh=hsh)
    return cache
コード例 #30
0
ファイル: logentries.py プロジェクト: Zeffar/Elobot
 def __missing__(self, key):
     """Debug when the key is missing."""
     pywikibot.debug(u"API log entry received:\n" + repr(self),
                     _logger)
     if ((key in ('ns', 'title', 'pageid', 'logpage', 'params', 'action')
          and 'actionhidden' in self)
             or (key == 'comment' and 'commenthidden' in self)
             or (key == 'user' and 'userhidden' in self)):
         raise HiddenKeyError(
             "Log entry ({0}) has a hidden '{1}' key and you don't have "
             'permission to view it.'.format(self._type, key))
     raise KeyError("Log entry (%s) has no '%s' key" % (self._type, key))
コード例 #31
0
    def storecookiedata(self, data):
        """
        Store cookie data.

        The argument data is the raw data, as returned by getCookie().

        Returns nothing.
        """
        # THIS IS OVERRIDDEN IN data/api.py
        filename = config.datafilepath('pywikibot.lwp')
        pywikibot.debug('Storing cookies to %s' % filename, _logger)
        with open(filename, 'w') as f:
            f.write(data)
コード例 #32
0
 def __iter__(self):
     """Iterator."""
     n = 0
     event = None
     ignore_first_empty_warning = True
     while self._total is None or n < self._total:
         if not hasattr(self, 'source'):
             self.source = EventSource(**self.sse_kwargs)
             # sseclient >= 0.0.18 is required for eventstreams (T184713)
             # we don't have a version string inside but the instance
             # variable 'chunk_size' was newly introduced with 0.0.18
             if not hasattr(self.source, 'chunk_size'):
                 warning(
                     'You may not have the right sseclient version;\n'
                     'sseclient >= 0.0.18 is required for eventstreams.\n'
                     "Install it with 'pip install \"sseclient>=0.0.18\"'")
         try:
             event = next(self.source)
         except (ProtocolError, socket.error, httplib.IncompleteRead) as e:
             warning('Connection error: {0}.\n'
                     'Try to re-establish connection.'.format(e))
             del self.source
             if event is not None:
                 self.sse_kwargs['last_id'] = event.id
             continue
         if event.event == 'message':
             if event.data:
                 try:
                     element = json.loads(event.data)
                 except ValueError as e:
                     warning(
                         'Could not load json data from\n{0}\n{1}'.format(
                             event, e))
                 else:
                     if self.streamfilter(element):
                         n += 1
                         yield element
             elif not ignore_first_empty_warning:
                 warning('Empty message found.')
             else:
                 ignore_first_empty_warning = False
         elif event.event == 'error':
             warning('Encountered error: {0}'.format(event.data))
         else:
             warning('Unknown event {0} occurred.'.format(event.event))
     else:
         debug(
             '{0}: Stopped iterating due to '
             'exceeding item limit.'.format(self.__class__.__name__),
             _logger)
     del self.source
コード例 #33
0
ファイル: threadedhttp.py プロジェクト: azatoth/pywikipedia
 def __del__(self):
     """Destructor to close all connections in the pool."""
     self.lock.acquire()
     try:
         pywikibot.debug(u"Closing connection pool (%s connections)"
                              % len(self.connections),
                         _logger)
         for key in self.connections:
             for connection in self.connections[key]:
                 connection.close()
     except AttributeError:
         pass   # this shows up when logger has been destroyed first
     finally:
         self.lock.release()
コード例 #34
0
ファイル: logentries.py プロジェクト: KaiCode2/pywikibot-core
    def _createFromData(self, logdata):
        """
        Check for logtype from data, and creates the correct LogEntry.

        @param logdata: log entry data
        @type logdata: dict
        @rtype: LogEntry
        """
        try:
            logtype = logdata["type"]
            return LogEntryFactory._getEntryClass(logtype)(logdata, self._site)
        except KeyError:
            pywikibot.debug("API log entry received:\n" + logdata, _logger)
            raise Error("Log entry has no 'type' key")
コード例 #35
0
ファイル: common.py プロジェクト: reosarevok/mb2wikidatabot
 def fix_redirect(self, gid, old, new):
     """
     :param gid str:
     :param old str:
     :param new str:
     """
     if wp.config.simulate:
         wp.output("Simulation, not fixing the redirect from %s to %s" %
                   (old, new))
         return
     if self.client is None:
         return
     wp.debug("Fixing the redirect from %s to %s" % (old, new), layer="")
     self.client.edit_url(gid, old, new, self.edit_note % (old, new))
コード例 #36
0
    def set_maximum_items(self, value: int):
        """
        Set the maximum number of items to be retrieved from the stream.

        If not called, most queries will continue as long as there is
        more data to be retrieved from the stream.

        @param value: The value of maximum number of items to be retrieved
            in total to set.
        """
        if value is not None:
            self._total = int(value)
            debug('{}: Set limit (maximum_items) to {}.'
                  .format(self.__class__.__name__, self._total), _logger)
コード例 #37
0
    def storecookiedata(self, data):
        """
        Store cookie data.

        The argument data is the raw data, as returned by getCookie().

        Returns nothing.
        """
        # THIS IS OVERRIDDEN IN data/api.py
        filename = config.datafilepath("pywikibot.lwp")
        pywikibot.debug(u"Storing cookies to %s" % filename, _logger)
        f = open(filename, "w")
        f.write(data)
        f.close()
コード例 #38
0
    def storecookiedata(self, data):
        """
        Store cookie data.

        The argument data is the raw data, as returned by getCookie().

        Returns nothing.
        """
        # THIS IS OVERRIDDEN IN data/api.py
        filename = config.datafilepath('pywikibot.lwp')
        pywikibot.debug('Storing cookies to %s' % filename,
                        _logger)
        with open(filename, 'w') as f:
            f.write(data)
コード例 #39
0
    def _createFromData(self, logdata: dict):
        """
        Check for logtype from data, and creates the correct LogEntry.

        @param logdata: log entry data
        @rtype: LogEntry
        """
        try:
            logtype = logdata['type']
        except KeyError:
            pywikibot.debug('API log entry received:\n{0}'.format(logdata),
                            _logger)
            raise Error("Log entry has no 'type' key")
        return LogEntryFactory.get_entry_class(logtype)(logdata, self._site)
コード例 #40
0
ファイル: common.py プロジェクト: jc86035/mb2wikidatabot
 def fix_redirect(self, gid, old, new):
     """
     :param gid str:
     :param old str:
     :param new str:
     """
     if wp.config.simulate:
         wp.output("Simulation, not fixing the redirect from %s to %s" %
                   (old, new))
         return
     if self.client is None:
         return
     wp.debug("Fixing the redirect from %s to %s" % (old, new), layer="")
     self.client.edit_url(gid, old, new, self.edit_note % (old, new))
コード例 #41
0
 def __del__(self):
     """Destructor to close all connections in the pool."""
     self.lock.acquire()
     try:
         pywikibot.debug(u"Closing connection pool (%s connections)"
                         % len(self.connections),
                         _logger)
         for key in self.connections:
             for connection in self.connections[key]:
                 connection.close()
     except (AttributeError, TypeError):
         pass   # this shows up when logger has been destroyed first
     finally:
         self.lock.release()
コード例 #42
0
ファイル: threadedhttp.py プロジェクト: azatoth/pywikipedia
 def run(self):
     # The Queue item is expected to either an HttpRequest object
     # or None (to shut down the thread)
     pywikibot.debug(u"Thread started, waiting for requests.", _logger)
     while (True):
         item = self.queue.get()
         if item is None:
             pywikibot.debug(u"Shutting down thread.", _logger)
             return
         try:
             item.data = self.http.request(*item.args, **item.kwargs)
         finally:
             if item.lock:
                 item.lock.release()
コード例 #43
0
ファイル: logentries.py プロジェクト: CCXXXI/pywikibot
    def _create_from_data(self, logdata: Dict[str, Any]) -> LogEntry:
        """
        Check for logtype from data, and creates the correct LogEntry.

        :param logdata: log entry data
        """
        try:
            logtype = logdata['type']
        except KeyError:
            pywikibot.debug('API log entry received:\n{}'.format(logdata),
                            _logger)
            raise Error("Log entry has no 'type' key")

        return LogEntryFactory.get_entry_class(logtype)(logdata, self._site)
コード例 #44
0
ファイル: threadedhttp.py プロジェクト: dtbinh/code
 def run(self):
     # The Queue item is expected to either an HttpRequest object
     # or None (to shut down the thread)
     pywikibot.debug(u"Thread started, waiting for requests.", _logger)
     while True:
         item = self.queue.get()
         if item is None:
             pywikibot.debug(u"Shutting down thread.", _logger)
             return
         try:
             item.data = self.http.request(*item.args, **item.kwargs)
         finally:
             if item.lock:
                 item.lock.release()
コード例 #45
0
ファイル: threadedhttp.py プロジェクト: dtbinh/code
    def _follow_redirect(self, uri, method, body, headers, response, content,
                         max_redirects):
        """Internal function to follow a redirect recieved by L{request}"""
        (scheme, authority, absolute_uri,
         defrag_uri) = httplib2.urlnorm(httplib2.iri2uri(uri))
        if self.cache:
            cachekey = defrag_uri
        else:
            cachekey = None

        # Pick out the location header and basically start from the beginning
        # remembering first to strip the ETag header and decrement our 'depth'
        if "location" not in response and response.status != 300:
            raise httplib2.RedirectMissingLocation(
                "Redirected but the response is missing a Location: header.",
                response, content)
        # Fix-up relative redirects (which violate an RFC 2616 MUST)
        if "location" in response:
            location = response['location']
            (scheme, authority, path, query,
             fragment) = httplib2.parse_uri(location)
            if authority is None:
                response['location'] = httplib2.urlparse.urljoin(uri, location)
                pywikibot.debug(
                    u"Relative redirect: changed [%s] to [%s]" %
                    (location, response['location']), _logger)
        if response.status == 301 and method in ["GET", "HEAD"]:
            response['-x-permanent-redirect-url'] = response['location']
            if "content-location" not in response:
                response['content-location'] = absolute_uri
            httplib2._updateCache(headers, response, content, self.cache,
                                  cachekey)

        headers.pop('if-none-match', None)
        headers.pop('if-modified-since', None)

        if "location" in response:
            location = response['location']
            redirect_method = (
                (response.status == 303) and
                (method not in ["GET", "HEAD"])) and "GET" or method
            return self.request(location,
                                redirect_method,
                                body=body,
                                headers=headers,
                                max_redirects=max_redirects - 1)
        else:
            raise httplib2.RedirectLimit(
                "Redirected more times than redirection_limit allows.",
                response, content)
コード例 #46
0
ファイル: logentries.py プロジェクト: Zeffar/Elobot
    def _createFromData(self, logdata):
        """
        Check for logtype from data, and creates the correct LogEntry.

        @param logdata: log entry data
        @type logdata: dict
        @rtype: LogEntry
        """
        try:
            logtype = logdata['type']
        except KeyError:
            pywikibot.debug('API log entry received:\n{0}'.format(logdata),
                            _logger)
            raise Error("Log entry has no 'type' key")
        return LogEntryFactory.get_entry_class(logtype)(logdata, self._site)
コード例 #47
0
    def set_maximum_items(self, value):
        """
        Set the maximum number of items to be retrieved from the stream.

        If not called, most queries will continue as long as there is
        more data to be retrieved from the stream.

        @param value: The value of maximum number of items to be retrieved
            in total to set.
        @type value: int
        """
        if value is not None:
            self._total = int(value)
            debug('{0}: Set limit (maximum_items) to {1}.'
                  .format(self.__class__.__name__, self._total), _logger)
コード例 #48
0
ファイル: threadedhttp.py プロジェクト: azatoth/pywikipedia
    def _follow_redirect(self, uri, method, body, headers, response,
                         content, max_redirects):
        """Internal function to follow a redirect recieved by L{request}"""
        (scheme, authority, absolute_uri, defrag_uri) = httplib2.urlnorm(
                                                          httplib2.iri2uri(uri))
        if self.cache:
            cachekey = defrag_uri
        else:
            cachekey = None

        # Pick out the location header and basically start from the beginning
        # remembering first to strip the ETag header and decrement our 'depth'
        if "location" not in response and response.status != 300:
            raise httplib2.RedirectMissingLocation(
                "Redirected but the response is missing a Location: header.",
                response, content)
        # Fix-up relative redirects (which violate an RFC 2616 MUST)
        if "location" in response:
            location = response['location']
            (scheme, authority, path, query, fragment) = httplib2.parse_uri(
                                                                    location)
            if authority == None:
                response['location'] = httplib2.urlparse.urljoin(uri, location)
                pywikibot.debug(u"Relative redirect: changed [%s] to [%s]"
                                     % (location, response['location']),
                                _logger)
        if response.status == 301 and method in ["GET", "HEAD"]:
            response['-x-permanent-redirect-url'] = response['location']
            if "content-location" not in response:
                response['content-location'] = absolute_uri
            httplib2._updateCache(headers, response, content, self.cache,
                                  cachekey)

        headers.pop('if-none-match', None)
        headers.pop('if-modified-since', None)

        if "location" in response:
            location = response['location']
            redirect_method = ((response.status == 303) and
                               (method not in ["GET", "HEAD"])
                               ) and "GET" or method
            return self.request(location, redirect_method, body=body,
                                headers=headers,
                                max_redirects=max_redirects - 1)
        else:
            raise RedirectLimit(
                "Redirected more times than redirection_limit allows.",
                response, content)
コード例 #49
0
ファイル: common.py プロジェクト: mineo/mb2wikidatabot
    def process_result(self, result):
        entity_gid, url_gid, wikipage = result
        wp.debug("» {wp} https://musicbrainz.org/{entitytype}/{gid}".format(
            entitytype=self._current_entity_type.replace("_", "-"),
            wp=wikipage,
            gid=entity_gid
        ), layer="")
        try:
            itempage = get_wikidata_itempage_from_wikilink(wikipage)
        except wp.NoSuchSite:
            wp.warning("{page} no supported family".format(page=wikipage))
            return
        except (wp.BadTitle, wp.InvalidTitle) as e:
            wp.error("Bad or invalid title received while processing {page}".format(page=wikipage))
            wp.exception(e, tb=True)
            return
        except SkipPage as e:
            wp.warning("{page} is being skipped because: {reason}".format(page=wikipage,
                                                                          reason=e))
            return
        except IsRedirectPage as e:
            wp.debug("{page} is a redirect".format(page=wikipage), layer="")
            if self.can_edit:
                self.fix_redirect(url_gid, e.old, e.new)
            return
        except ValueError as e:
            wp.output(e)
            return

        if itempage is None:
            wp.debug(u"There's no wikidata page for {mbid}".format(mbid=entity_gid),
                     layer="")
            return

        if any((key.lower() == self.property_id.lower() and
               claim.target == entity_gid)
               for key, claims in itempage.claims.items() for claim in claims):
            wp.debug(u"{page} already has property {pid} with value {mbid}".
                     format(page=wikipage,
                            mbid=entity_gid,
                            pid=self.property_id), layer="")
            self.donefunc(entity_gid)
            return

        wp.debug("{mbid} is not linked in Wikidata".format(
                  mbid=entity_gid), layer="")
        self.add_mbid_claim_to_item(itempage, entity_gid)
コード例 #50
0
ファイル: api.py プロジェクト: dtbinh/code
    def set_query_increment(self, value):
        """Set the maximum number of items to be retrieved per API query.

        If not called, the default is to ask for "max" items and let the
        API decide how many to send.

        """
        limit = int(value)

        # don't update if limit is greater than maximum allowed by API
        if self.api_limit is None:
            self.query_limit = limit
        else:
            self.query_limit = min(self.api_limit, limit)
        pywikibot.debug(
            u"%s: Set query_limit to %i." %
            (self.__class__.__name__, self.query_limit), _logger)
コード例 #51
0
ファイル: api.py プロジェクト: dtbinh/code
    def update_limit(self):
        """Set query limit for self.module based on api response"""

        for mod in self.module.split('|'):
            for param in self._modules[mod].get("parameters", []):
                if param["name"] == "limit":
                    if self.site.logged_in() and self.site.has_right(
                            'apihighlimits'):
                        self.api_limit = int(param["highmax"])
                    else:
                        self.api_limit = int(param["max"])
                    if self.prefix is None:
                        self.prefix = self._modules[mod]["prefix"]
                    pywikibot.debug(
                        u"%s: Set query_limit to %i." %
                        (self.__class__.__name__, self.api_limit), _logger)
                    return
コード例 #52
0
ファイル: api.py プロジェクト: wpoa/wiki-imports
    def update_limit(self):
        """Set query limit for self.module based on api response"""

        for mod in self.module.split('|'):
            for param in self._modules[mod].get("parameters", []):
                if param["name"] == "limit":
                    if self.site.logged_in() and self.site.has_right('apihighlimits'):
                        self.api_limit = int(param["highmax"])
                    else:
                        self.api_limit = int(param["max"])
                    if self.prefix is None:
                        self.prefix = self._modules[mod]["prefix"]
                    pywikibot.debug(u"%s: Set query_limit to %i."
                                    % (self.__class__.__name__,
                                       self.api_limit),
                                    _logger)
                    return
コード例 #53
0
def getversiondict():
    """Get version info for the package.

    :return:
        - tag (name for the repository),
        - rev (current revision identifier),
        - date (date of current revision),
        - hash (git hash for the current revision)
    :rtype: ``dict`` of four ``str``
    """
    _program_dir = _get_program_dir()
    exceptions = {}

    for vcs_func in (getversion_git, getversion_svn, getversion_nightly,
                     getversion_package):
        try:
            (tag, rev, date, hsh) = vcs_func(_program_dir)
        except Exception as e:
            exceptions[vcs_func] = e
        else:
            break
    else:
        # nothing worked; version unknown (but suppress exceptions)
        # the value is most likely '$Id' + '$', it means that
        # pywikibot was imported without using version control at all.
        tag, rev, date, hsh = ('', '-1 (unknown)', '0 (unknown)', '(unknown)')
        warn(
            'Unable to detect version; exceptions raised:\n{!r}'.format(
                exceptions), UserWarning)
        exceptions = None

    # Git and SVN can silently fail, as it may be a nightly.
    if exceptions:
        pywikibot.debug(
            'version algorithm exceptions:\n{!r}'.format(exceptions), _logger)

    if isinstance(date, str):
        datestring = date
    elif isinstance(date, time.struct_time):
        datestring = time.strftime('%Y/%m/%d, %H:%M:%S', date)
    else:
        warn('Unable to detect package date', UserWarning)
        datestring = '-2 (unknown)'

    return {'tag': tag, 'rev': rev, 'date': datestring, 'hsh': hsh}
コード例 #54
0
    def __missing__(self, key):
        """Debug when the key is missing.

        HiddenKeyError is raised when the user does not have permission.
        KeyError is raised otherwise.

        It also logs debugging information when a key is missing.
        """
        pywikibot.debug('API log entry received:\n' + repr(self), _logger)
        hidden = {'action', 'logpage', 'ns', 'pageid', 'params', 'title'}
        if ((key in hidden and 'actionhidden' in self)
                or (key == 'comment' and 'commenthidden' in self)
                or (key == 'user' and 'userhidden' in self)):
            raise HiddenKeyError(
                "Log entry ({}) has a hidden '{}' key and you don't have "
                'permission to view it.'.format(self['type'], key))
        raise KeyError("Log entry ({}) has no '{}' key".format(
            self['type'], key))
コード例 #55
0
    def is_wikisource_author_page(self, title):
        """Initialise author_ns if site family is 'wikisource' else pass."""
        if self.site.family.name != 'wikisource':
            return

        author_ns = 0
        try:
            author_ns = self.site.family.authornamespaces[self.site.lang][0]
        except (AttributeError, KeyError):
            pass
        if author_ns:
            author_ns_prefix = self.site.namespace(author_ns)
        pywikibot.debug('Author ns: {0}; name: {1}'
                        .format(author_ns, author_ns_prefix), _logger)
        if title.find(author_ns_prefix + ':') == 0:
            author_page_name = title[len(author_ns_prefix) + 1:]
            verbose_output('Found author ' + author_page_name)
            return True
コード例 #56
0
    def pop_connection(self, identifier):
        """Get a connection from identifier's connection pool.

        @param identifier: The pool identifier
        @return: A connection object if found, None otherwise

        """
        self.lock.acquire()
        try:
            if identifier in self.connections:
                if len(self.connections[identifier]) > 0:
                    pywikibot.debug(u"Retrieved connection from '%s' pool."
                                    % identifier,
                                    _logger)
                    return self.connections[identifier].pop()
            return None
        finally:
            self.lock.release()
コード例 #57
0
    def run(self):
        # The Queue item is expected to either an HttpRequest object
        # or None (to shut down the thread)
        pywikibot.debug(u"Thread started, waiting for requests.", _logger)
        while True:
            item = self.queue.get()
            if item is None:
                pywikibot.debug(u"Shutting down thread.", _logger)
                return

            # This needs to be set per request, however it is only used
            # the first time the pooled connection is created.
            self.http.disable_ssl_certificate_validation = \
                item.kwargs.pop('disable_ssl_certificate_validation', False)
            try:
                item.data = self.http.request(*item.args, **item.kwargs)
            finally:
                if item.lock:
                    item.lock.release()
コード例 #58
0
    def is_wikisource_author_page(self, title):
        """Initialise author_ns if site family is 'wikisource' else pass."""
        if self.site.family.name != 'wikisource':
            return

        author_ns = 0
        try:
            author_ns = self.site.family.authornamespaces[self.site.lang][0]
        except:
            pass
        if author_ns:
            author_ns_prefix = self.site.namespace(author_ns)
        pywikibot.debug(u'Author ns: %d; name: %s'
                        % (author_ns, author_ns_prefix), _logger)
        if title.find(author_ns_prefix + ':') == 0:
            if pywikibot.config.verbose_output:
                author_page_name = title[len(author_ns_prefix) + 1:]
                pywikibot.output(u'Found author %s' % author_page_name)
            return True
コード例 #59
0
ファイル: common.py プロジェクト: reosarevok/mb2wikidatabot
 def end_removed(self, rel_id, link_type_id, entity_gid, url_gid,
                 entitytype, wikipage):
     """
     :param rel_id str:
     :param link_type_id str:
     :param entity_gid str:
     :param url_gid str:
     :param entitytype str:
     """
     url_entity = {'type': 'url', 'gid': url_gid, 'url': wikipage}
     other_entity = {'type': entitytype, 'gid': entity_gid}
     entity0 = other_entity if (entitytype < 'url') else url_entity
     entity1 = url_entity if (entitytype < 'url') else other_entity
     wp.debug("Removing non existing page %s" % (wikipage), layer="")
     self.client.edit_relationship(rel_id, entity0, entity1, link_type_id,
                                   {}, {}, {}, True,
                                   self.removed_edit_note % (wikipage),
                                   False)
     self._performed_edit()
コード例 #60
0
    def checkMultiplicity(self):
        """Count running processes for site and set process_multiplicity."""
        global pid
        mysite = self.mysite
        pywikibot.debug('Checking multiplicity: pid = {pid}'.format(pid=pid),
                        _logger)
        with self.lock:
            processes = []
            used_pids = set()
            count = 1

            now = time.time()
            for proc in self._read_file(raise_exc=True):
                used_pids.add(proc.pid)
                if now - proc.time > self.releasepid:
                    continue  # process has expired, drop from file
                if now - proc.time <= self.dropdelay \
                   and proc.site == mysite \
                   and proc.pid != pid:
                    count += 1
                if proc.site != self.mysite or proc.pid != pid:
                    processes.append(proc)

            free_pid = (i for i in itertools.count(start=1)
                        if i not in used_pids)
            if not pid:
                pid = next(free_pid)

            self.checktime = time.time()
            processes.append(
                ProcEntry(module_id=self._module_hash(),
                          pid=pid,
                          time=self.checktime,
                          site=mysite))
            self.modules = Counter(p.module_id for p in processes)

            self._write_file(sorted(processes, key=lambda p: p.pid))

            self.process_multiplicity = count
            pywikibot.log(
                'Found {} {} processes running, including this one.'.format(
                    count, mysite))