コード例 #1
0
def main():
    site = pywikibot.Site('en', 'wikisource')

    # filename = "מפתח:ספר_השרשים.pdf"
    # filename = "Index:ספר_השרשים.pdf"
    filename = "Index:Catholic_Encyclopedia,_volume_17.djvu"
    index = IndexPage(site, filename)

    #sql = "SELECT DISTINCT page table WHERE page = %s ORDER BY page"
    #conn = sqlite3.connect('example.db')
    #c = conn.cursor()

    gen_list = []
    #for p in c.execute(sql):
    for p in range(6, 7):
        #todo - what is content parameter ? and what should we put in filter_ql ?
        #TODO - here I get exception qualityN prp-pagequality-N" or class="new"

        # I added a patch proffread.patch to fix it
        gen = index.page_gen(start=p, end=p, filter_ql=None, content=False)
        gen_list.append(gen)

    gen = itertools.chain(*gen_list)

    pywikibot.output('\nUploading text to %s\n' % index.title(asLink=True))

    bot = ubot.UploadTranscodedPageBot(gen, site=index.site)
    bot.run()
コード例 #2
0
    def test_page_gen(self, key):
        """Test Index page generator."""
        data = self.sites[key]
        num, title_num, label = data['get_label']

        index_page = IndexPage(self.site, self.sites[key]['index'])
        page_title = self.sites[key]['page'].format(title_num)
        proofread_page = ProofreadPage(self.site, page_title)

        # Check start/end limits.
        self.assertRaises(ValueError, index_page.page_gen, -1, 2)
        self.assertRaises(ValueError, index_page.page_gen, 1, -1)
        self.assertRaises(ValueError, index_page.page_gen, 2, 1)

        # Check quality filters.
        gen = index_page.page_gen(num, num, filter_ql=range(5))
        self.assertEqual(list(gen), [proofread_page])

        gen = index_page.page_gen(num, num, filter_ql=[0])
        self.assertEqual(list(gen), [])
コード例 #3
0
    def test_page_gen(self, key):
        """Test Index page generator."""
        data = self.sites[key]
        num, title_num, label = data['get_label']

        index_page = IndexPage(self.site, self.sites[key]['index'])
        page_title = self.sites[key]['page'].format(title_num)
        proofread_page = ProofreadPage(self.site, page_title)

        # Check start/end limits.
        self.assertRaises(ValueError, index_page.page_gen, -1, 2)
        self.assertRaises(ValueError, index_page.page_gen, 1, -1)
        self.assertRaises(ValueError, index_page.page_gen, 2, 1)

        # Check quality filters.
        gen = index_page.page_gen(num, num, filter_ql=range(5))
        self.assertEqual(list(gen), [proofread_page])

        gen = index_page.page_gen(num, num, filter_ql=[0])
        self.assertEqual(list(gen), [])
コード例 #4
0
def main():
	site = pywikibot.Site("mul", "wikisource")
	arts = pywikibot.Category(site, u"Indizeak euskaraz").articles(recurse=1, reverse=True, content=False)
	'''indexPage = IndexPage(site, u'Index:Chantspopulaires00sall.pdf')
	pages = indexPage.page_gen(only_existing=True, content=True)
	for page in pages:
		if not page.isRedirectPage() and page.exists():
			name = page.title()
			title = name.replace(u"Chantspopulaires00sall", u"Chants populaires du pays basque (1870)")
			print title
			#page.move(title, reason="File renamed in Wikimedia Commons", movetalkpage=True)
			raw_input('Are you sure? (y/n)')
	exit(0)'''
	for art in arts:
		print art.title()
		if art.title() not in done: 
			indexPage = IndexPage(art)
			try:
				#indexPage.page_gen(only_existing=True, content=True, filter_ql=0)
				pages = indexPage.page_gen(only_existing=True, content=True)
			except ValueError as nopage:
				continue
			for page in pages:
				#print page.text
				if page.exists():
					print page
					cat = getCategoryNeeded(page.quality_level)
					oldText = page.text
					#print cat
					print cat
					#newText = oldText.replace(u"[[Category:Euskara]]", "")
					#match = re.match(cat, oldText)
					if cat not in oldText:
						newText = oldText
						for oldCat in categories:
							if oldCat in newText:
								newText = newText.replace(oldCat, "")
						print newText
						headerFooter = re.findall(r"(<noinclude>(?:[\S\s]+?))(?:<\/noinclude>)", newText)
						if len(headerFooter) == 1:
							newText = newText.replace(u"<noinclude></noinclude>", u"<noinclude>{0}</noinclude>".format(cat))
						else:
							footer = headerFooter[1]
							newText = newText.replace(footer, u"{0}\n{1}".format(footer, cat))
						
						pywikibot.showDiff(oldText,newText)
						#raw_input('Are you sure? (y/n)')
						page.put(newText, comment = u'Added category {0}'.format(cat), minorEdit=True)
コード例 #5
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    index = None
    pages = '1-'
    options = {}

    # Parse command line arguments.
    local_args = pywikibot.handle_args(args)
    for arg in local_args:
        arg, sep, value = arg.partition(':')
        if arg == '-index':
            index = value
        elif arg == '-pages':
            pages = value
        elif arg == '-showdiff':
            issue_deprecation_warning('The usage of -showdiff option', None, 0)
        elif arg == '-summary':
            options['summary'] = value
        elif arg == '-force':
            issue_deprecation_warning('The usage of -force option', None, 0)
        elif arg == '-always':
            options['always'] = True
        else:
            pywikibot.output('Unknown argument %s' % arg)

    # index is mandatory.
    if not index:
        pywikibot.bot.suggest_help(missing_parameters=['-index'])
        return False

    site = pywikibot.Site()
    if not site.has_extension('ProofreadPage'):
        pywikibot.error('Site %s must have ProofreadPage extension.' % site)
        return False

    index = IndexPage(site, index)

    if not index.exists():
        pywikibot.error("Page %s doesn't exist." % index)
        return False

    # Parse pages param.
    # Create a list of (start, end) tuples.
    pages = pages.split(',')
    for interval in range(len(pages)):
        start, sep, end = pages[interval].partition('-')
        start = 1 if not start else int(start)
        if not sep:
            end = start
        else:
            end = int(end) if end else index.num_pages
        pages[interval] = (start, end)

    gen_list = []
    for start, end in sorted(pages):
        gen = index.page_gen(start=start,
                             end=end,
                             filter_ql=[1],
                             content=False)
        gen_list.append(gen)

    gen = itertools.chain(*gen_list)

    pywikibot.output('\nUploading text to %s\n' % index.title(asLink=True))

    bot = UploadTextBot(gen, site=index.site, **options)
    bot.run()
コード例 #6
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: str
    """
    index = None
    pages = '1-'
    options = {}

    # Parse command line arguments.
    local_args = pywikibot.handle_args(args)
    for arg in local_args:
        arg, sep, value = arg.partition(':')
        if arg == '-index':
            index = value
        elif arg == '-pages':
            pages = value
        elif arg == '-showdiff':
            options['showdiff'] = True
        elif arg == '-summary':
            options['summary'] = value
        elif arg == '-ocr':
            options['ocr'] = value or 'phetools'
        elif arg == '-threads':
            options['threads'] = int(value)
        elif arg == '-force':
            options['force'] = True
        elif arg == '-always':
            options['always'] = True
        else:
            pywikibot.output('Unknown argument ' + arg)

    # index is mandatory.
    if not index:
        pywikibot.bot.suggest_help(missing_parameters=['-index'])
        return

    # '-force' can be used with '-ocr' only.
    if 'force' in options and 'ocr' not in options:
        pywikibot.error("'-force' can be used with '-ocr' option only.")
        return

    site = pywikibot.Site()
    if not site.has_extension('ProofreadPage'):
        pywikibot.error(
            'Site {} must have ProofreadPage extension.'.format(site))
        return

    index = IndexPage(site, index)

    if not index.exists():
        pywikibot.error("Page {} doesn't exist.".format(index))
        return

    # Parse pages param.
    # Create a list of (start, end) tuples.
    pages = pages.split(',')
    for interval in range(len(pages)):
        start, sep, end = pages[interval].partition('-')
        start = 1 if not start else int(start)
        if not sep:
            end = start
        else:
            end = int(end) if end else index.num_pages
        pages[interval] = (start, end)

    # gen yields ProofreadPage objects.
    gen_list = []
    for start, end in sorted(pages):
        gen = index.page_gen(start=start, end=end, filter_ql=[1], content=True)
        gen_list.append(gen)

    gen = itertools.chain(*gen_list)

    pywikibot.output('\nUploading text to {}\n'.format(
        index.title(as_link=True)))

    bot = UploadTextBot(gen, site=index.site, **options)
    bot.run()
コード例 #7
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    index = None
    pages = '1-'
    options = {}

    # Parse command line arguments.
    local_args = pywikibot.handle_args(args)
    for arg in local_args:
        arg, sep, value = arg.partition(':')
        if arg == '-index':
            index = value
        elif arg == '-pages':
            pages = value
        elif arg == '-showdiff':
            options['showdiff'] = True
        elif arg == '-summary':
            options['summary'] = value
        elif arg == '-ocr':
            options['ocr'] = True
        elif arg == '-force':
            options['force'] = True
        elif arg == '-always':
            options['always'] = True
        else:
            pywikibot.output('Unknown argument %s' % arg)

    # index is mandatory.
    if not index:
        pywikibot.bot.suggest_help(missing_parameters=['-index'])
        return False

    # '-force' can be used with '-ocr' only.
    if 'force' in options and 'ocr' not in options:
        pywikibot.error("'-force' can be used with '-ocr' option only.")
        return False

    site = pywikibot.Site()
    if not site.has_extension('ProofreadPage'):
        pywikibot.error('Site %s must have ProofreadPage extension.' % site)
        return False

    index = IndexPage(site, index)

    if not index.exists():
        pywikibot.error("Page %s doesn't exist." % index)
        return False

    # Parse pages param.
    # Create a list of (start, end) tuples.
    pages = pages.split(',')
    for interval in range(len(pages)):
        start, sep, end = pages[interval].partition('-')
        start = 1 if not start else int(start)
        if not sep:
            end = start
        else:
            end = int(end) if end else index.num_pages
        pages[interval] = (start, end)

    # gen yields ProofreadPage objects.
    gen_list = []
    for start, end in sorted(pages):
        gen = index.page_gen(start=start, end=end,
                             filter_ql=[1], content=False)
        gen_list.append(gen)

    gen = itertools.chain(*gen_list)

    pywikibot.output('\nUploading text to %s\n' % index.title(asLink=True))

    bot = UploadTextBot(gen, site=index.site, **options)
    bot.run()