Esempio n. 1
0
    def is_wikisource_author_page(self, title):
        if not self.site.family.name == 'wikisource':
            return

        author_ns = 0
        try:
            author_ns = self.site.family.authornamespaces[self.site.lang][0]
        except:
            pass
        if author_ns:
            author_ns_prefix = self.site.namespace(author_ns)
        pywikibot.debug(u'Author ns: %d; name: %s'
                        % (author_ns, author_ns_prefix))
        if title.find(author_ns_prefix+':') == 0:
            return True

        if pywikibot.verbose:
            author_page_name = title[len(author_ns_prefix)+1:]
            pywikibot.output(u'Found author %s' % author_page_name)
        return
Esempio n. 2
0
    def is_wikisource_author_page(self, title):
        if not self.site.family.name == 'wikisource':
            return

        author_ns = 0
        try:
            author_ns = self.site.family.authornamespaces[self.site.lang][0]
        except:
            pass
        if author_ns:
            author_ns_prefix = self.site.namespace(author_ns)
        pywikibot.debug(u'Author ns: %d; name: %s' %
                        (author_ns, author_ns_prefix))
        if title.find(author_ns_prefix + ':') == 0:
            return True

        if pywikibot.verbose:
            author_page_name = title[len(author_ns_prefix) + 1:]
            pywikibot.output(u'Found author %s' % author_page_name)
        return
Esempio n. 3
0
    def subTemplate(self, content, param):
        """Substitute the template tags in content according to param.

           @param content: Content with tags to substitute.
           @type  content: string
           @param param: Param with data how to substitute tags.
           @type  param: dict

           Returns a tuple containig the new content with tags
           substituted and a list of those tags.
        """

        substed_tags = []  # DRTRIGON-73
        metadata     = {'mw-signature': u'~~~~',
                        'mw-timestamp': u'~~~~~',}  # DRTRIGON-132

        # 0.2.) check for 'simple' mode and get additional params
        if param['simple']:
            p = self.site.getExpandedString(param['simple'])
            param.update(pywikibot.extract_templates_and_params(p)[0][1])

        # 0.5.) check cron/date
        if param['cron']:
            # [min] [hour] [day of month] [month] [day of week]
            # (date supported only, thus [min] and [hour] dropped)
            if not (param['cron'][0] == '@'):
                param['cron'] = '* * ' + param['cron']
            entry = crontab.CronTab(param['cron'])
            # find the delay from midnight (does not return 0.0 - but next)
            delay = entry.next(datetime.datetime.now().replace(hour=0,
                                                               minute=0,
                                                               second=0,
                                                               microsecond=0)- \
                               datetime.timedelta(microseconds=1))

            pywikibot.output(u'CRON delay for execution: %.3f (<= %i)'
                             % (delay, self._bot_config['CRONMaxDelay']))

            if not (delay <= self._bot_config['CRONMaxDelay']):
                return (content, substed_tags, metadata)

        # 1.) getUrl or wiki text
        # (security: check url not to point to a local file on the server,
        #  e.g. 'file://' - same as used in xsalt.py)
        secure = False
        for item in [u'http://', u'https://',
                     u'mail://', u'local://', u'wiki://']:
            secure = secure or (param['url'][:len(item)] == item)
        param['zip'] = ast.literal_eval(param['zip'])
        if not secure:
            return (content, substed_tags, metadata)
        if   (param['url'][:7] == u'wiki://'):
            url = param['url'][7:].strip('[]')              # enable wiki-links
            if ast.literal_eval(param['expandtemplates']):  # DRTRIGON-93 (only with 'wiki://')
                external_buffer = pywikibot.Page(self.site,
                                                 url).get(expandtemplates=True)
            else:
                external_buffer = self.load( pywikibot.Page(self.site, url) )
        elif (param['url'][:7] == u'mail://'):              # DRTRIGON-101
            url = param['url'].replace(u'{{@}}', u'@')     # e.g. nlwiki
            mbox = SubsterMailbox(
              pywikibot.config.datafilepath(self._bot_config['data_path'],
                                            self._bot_config['mbox_file'], ''))
            external_buffer = mbox.find_data(url)
            mbox.close()
        elif (param['url'][:8] == u'local://'):             # DRTRIGON-131
            if (param['url'][8:] == u'cache/state_bots'):
                # filename hard-coded
                d = shelve.open(pywikibot.config.datafilepath('cache',
                                                              'state_bots'))
                external_buffer = pprint.pformat(
                    ast.literal_eval(pprint.pformat(d)))
                d.close()
            else:
                external_buffer = u'n/a'
        else:
            # consider using 'expires', 'last-modified', 'etag' in order to
            # make the updating data requests more efficient! use those stored
            # on page, if the user placed them, else use the conventional mode.
            # http://www.diveintopython.net/http_web_services/etags.html
            f_url, external_buffer = http.request(self.site, param['url'],
                                                  no_hostname = True,
                                                  back_response = True)
            headers = f_url.headers # same like 'f_url.info()'
            #if param['zip']:
            if ('text/' not in headers['content-type']):
                pywikibot.output(u'Source is of non-text content-type, '
                                 u'using raw data instead.')
                external_buffer = f_url.read()
            del f_url               # free some memory (no need to keep copy)

            for h in ['content-length', 'date', 'last-modified', 'expires']:
                if h in headers:
                    metadata['url-%s' % h] = headers[h]

        # some intermediate processing (unzip, xlsx2csv, ...)
        if param['zip']:    # 'application/zip', ...
            fileno          = 0 if (param['zip'] == True) else (param['zip']-1)
            external_buffer = self.unzip(external_buffer, fileno)
        if param['xlsx']:   # 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
            external_buffer = self.xlsx2csv(external_buffer, param['xlsx'])
        if param['ods']:    # 'application/vnd.oasis.opendocument.spreadsheet'
            external_buffer = self.ods2csv(external_buffer, param['ods'])

        if not ast.literal_eval(param['beautifulsoup']):    # DRTRIGON-88
            # 2.) regexp
            #for subitem in param['regex']:
            subitem = param['regex']
            regex = re.compile(subitem, re.S | re.I)

            # 3.) subst in content
            external_data = regex.search(external_buffer)

            external_data_dict = {}
            if external_data:    # not None
                external_data = external_data.groups()

                pywikibot.output(u'Groups found by regex: %i'
                                 % len(external_data))

                # DRTRIGON-114: Support for named groups in regexs
                if regex.groupindex:
                    for item in regex.groupindex:
                        external_data_dict[u'%s-%s' % (param['value'], item)] = external_data[regex.groupindex[item]-1]
                elif (len(external_data) == 1):
                    external_data_dict = {param['value']: external_data[0]}
                else:
                    external_data_dict = {param['value']: str(external_data)}
            pywikibot.debug( str(external_data_dict) )

            param['postproc'] = eval(param['postproc'])
            # should be secured as given below, but needs code changes in wiki too
            #param['postproc'] = ast.literal_eval(param['postproc'])
            for value in external_data_dict:
                external_data = external_data_dict[value]

                # 4.) postprocessing
                func  = param['postproc'][0]    # needed by exec call of self._code
                DATA  = [ external_data ]       #
                args  = param['postproc'][1:]   #
                scope = {}                      # (scope to run in)
                scope.update( locals() )        # (add DATA, *args, ...)
                scope.update( globals() )       # (add imports and else)
                if func:
                    exec(self._code + (self._bot_config['CodeTemplate'] % func), scope, scope)
                    external_data = DATA[0]
                pywikibot.debug( external_data )

                # 5.) subst content
                (content, tags) = self.subTag(content, value, external_data, int(param['count']))
                substed_tags += tags
        else:
            # DRTRIGON-105: Support for multiple BS template configurations
            value = param['value']
            if value:
                value += u'-'

            # DRTRIGON-88: Enable Beautiful Soup power for Subster
            BS_tags = self.get_BS_regex(value).findall(content)

            pywikibot.output(u'BeautifulSoup tags found by regex: %i' % len(BS_tags))

            prev_content = content

            BS = BeautifulSoup.BeautifulSoup(external_buffer)
            for item in BS_tags:
                external_data = eval('BS.%s' % item[1])
                external_data = self._BS_regex_str%{'var1':value+'BS:'+item[1],'var2':value,'cont':external_data}
                content = content.replace(item[0], external_data, 1)

            if (content != prev_content):
                substed_tags.append(value+'BS')

        metadata['bot-timestamp'] = pywikibot.Timestamp.now().isoformat(' ')

        return (content, substed_tags, metadata)
Esempio n. 4
0
 def process_children(obj, current_user):
     pywikibot.debug(u'parsing node: %s' % obj)
     for c in obj.children:
         temp = process_node(c, current_user)
         if temp and not current_user:
             current_user = temp
Esempio n. 5
0
    def subTemplate(self, content, param):
        """Substitute the template tags in content according to param.

           @param content: Content with tags to substitute.
           @type  content: string
           @param param: Param with data how to substitute tags.
           @type  param: dict

           Returns a tuple containig the new content with tags
           substituted and a list of those tags.
        """

        substed_tags = []  # DRTRIGON-73
        metadata     = {'mw-signature': u'~~~~',
                        'mw-timestamp': u'~~~~~',}  # DRTRIGON-132

        # 0.2.) check for 'simple' mode and get additional params
        if param['simple']:
            p = self.site.getExpandedString(param['simple'])
            param.update(pywikibot.extract_templates_and_params(p)[0][1])

        # 0.5.) check cron/date
        if param['cron']:
            # [min] [hour] [day of month] [month] [day of week]
            # (date supported only, thus [min] and [hour] dropped)
            if not (param['cron'][0] == '@'):
                param['cron'] = '* * ' + param['cron']
            entry = crontab.CronTab(param['cron'])
            # find the delay from midnight (does not return 0.0 - but next)
            delay = entry.next(datetime.datetime.now().replace(hour=0,
                                                               minute=0,
                                                               second=0,
                                                               microsecond=0)- \
                               datetime.timedelta(microseconds=1))

            pywikibot.output(u'CRON delay for execution: %.3f (<= %i)'
                             % (delay, self._bot_config['CRONMaxDelay']))

            if not (delay <= self._bot_config['CRONMaxDelay']):
                return (content, substed_tags, metadata)

        # 1.) getUrl or wiki text
        # (security: check url not to point to a local file on the server,
        #  e.g. 'file://' - same as used in xsalt.py)
        secure = False
        for item in [u'http://', u'https://',
                     u'mail://', u'local://', u'wiki://']:
            secure = secure or (param['url'][:len(item)] == item)
        param['zip'] = ast.literal_eval(param['zip'])
        if not secure:
            return (content, substed_tags, metadata)
        if   (param['url'][:7] == u'wiki://'):
            url = param['url'][7:].strip('[]')              # enable wiki-links
            if ast.literal_eval(param['expandtemplates']):  # DRTRIGON-93 (only with 'wiki://')
                external_buffer = pywikibot.Page(self.site,
                                                 url).get(expandtemplates=True)
            else:
                external_buffer = self.load( pywikibot.Page(self.site, url) )
        elif (param['url'][:7] == u'mail://'):              # DRTRIGON-101
            url = param['url'].replace(u'{{@}}', u'@')     # e.g. nlwiki
            mbox = SubsterMailbox(
              pywikibot.config.datafilepath(self._bot_config['data_path'],
                                            self._bot_config['mbox_file'], ''))
            external_buffer = mbox.find_data(url)
            mbox.close()
        elif (param['url'][:8] == u'local://'):             # DRTRIGON-131
            if (param['url'][8:] == u'cache/state_bots'):
                # filename hard-coded
                d = shelve.open(pywikibot.config.datafilepath('cache',
                                                              'state_bots'))
                external_buffer = pprint.pformat(
                    ast.literal_eval(pprint.pformat(d)))
                d.close()
            else:
                external_buffer = u'n/a'
        else:
            # consider using 'expires', 'last-modified', 'etag' in order to
            # make the updating data requests more efficient! use those stored
            # on page, if the user placed them, else use the conventional mode.
            # http://www.diveintopython.net/http_web_services/etags.html
            f_url, external_buffer = http.request(self.site, param['url'],
                                                  no_hostname = True,
                                                  back_response = True)
            headers = f_url.headers # same like 'f_url.info()'
            #if param['zip']:
            if ('text/' not in headers['content-type']):
                pywikibot.output(u'Source is of non-text content-type, '
                                 u'using raw data instead.')
                external_buffer = f_url.read()
            del f_url               # free some memory (no need to keep copy)

            for h in ['content-length', 'date', 'last-modified', 'expires']:
                if h in headers:
                    metadata['url-%s' % h] = headers[h]

        # some intermediate processing (unzip, xlsx2csv, ...)
        if param['zip']:    # 'application/zip', ...
            fileno          = 0 if (param['zip'] == True) else (param['zip']-1)
            external_buffer = self.unzip(external_buffer, fileno)
        if param['xlsx']:   # 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
            external_buffer = self.xlsx2csv(external_buffer, param['xlsx'])
        if param['ods']:    # 'application/vnd.oasis.opendocument.spreadsheet'
            external_buffer = self.ods2csv(external_buffer, param['ods'])

        if not ast.literal_eval(param['beautifulsoup']):    # DRTRIGON-88
            # 2.) regexp
            #for subitem in param['regex']:
            subitem = param['regex']
            regex = re.compile(subitem, re.S | re.I)

            # 3.) subst in content
            external_data = regex.search(external_buffer)

            external_data_dict = {}
            if external_data:    # not None
                external_data = external_data.groups()

                pywikibot.output(u'Groups found by regex: %i'
                                 % len(external_data))

                # DRTRIGON-114: Support for named groups in regexs
                if regex.groupindex:
                    for item in regex.groupindex:
                        external_data_dict[u'%s-%s' % (param['value'], item)] = external_data[regex.groupindex[item]-1]
                elif (len(external_data) == 1):
                    external_data_dict = {param['value']: external_data[0]}
                else:
                    external_data_dict = {param['value']: str(external_data)}
            pywikibot.debug( str(external_data_dict) )

            param['postproc'] = eval(param['postproc'])
            # should be secured as given below, but needs code changes in wiki too
            #param['postproc'] = ast.literal_eval(param['postproc'])
            for value in external_data_dict:
                external_data = external_data_dict[value]

                # 4.) postprocessing
                func  = param['postproc'][0]    # needed by exec call of self._code
                DATA  = [ external_data ]       #
                args  = param['postproc'][1:]   #
                scope = {}                      # (scope to run in)
                scope.update( locals() )        # (add DATA, *args, ...)
                scope.update( globals() )       # (add imports and else)
                if func:
                    exec(self._code + (self._bot_config['CodeTemplate'] % func), scope, scope)
                    external_data = DATA[0]
                pywikibot.debug( external_data )

                # 5.) subst content
                (content, tags) = self.subTag(content, value, external_data, int(param['count']))
                substed_tags += tags
        else:
            # DRTRIGON-105: Support for multiple BS template configurations
            value = param['value']
            if value:
                value += u'-'

            # DRTRIGON-88: Enable Beautiful Soup power for Subster
            BS_tags = self.get_BS_regex(value).findall(content)

            pywikibot.output(u'BeautifulSoup tags found by regex: %i' % len(BS_tags))

            prev_content = content

            BS = BeautifulSoup.BeautifulSoup(external_buffer)
            for item in BS_tags:
                external_data = eval('BS.%s' % item[1])
                external_data = self._BS_regex_str%{'var1':value+'BS:'+item[1],'var2':value,'cont':external_data}
                content = content.replace(item[0], external_data, 1)

            if (content != prev_content):
                substed_tags.append(value+'BS')

        metadata['bot-timestamp'] = pywikibot.Timestamp.now().isoformat(' ')

        return (content, substed_tags, metadata)
Esempio n. 6
0
 def process_children(obj, current_user):
     pywikibot.debug(u'parsing node: %s' % obj)
     for c in obj.children:
         temp = process_node(c, current_user)
         if temp and not current_user:
             current_user = temp