def crawlerRC(): gen = pagegenerators.RecentchangesPageGenerator() for Page in pagegenerators.PreloadingGenerator(gen,100): modification(Page.title())
def crawlerRC(): RC = pagegenerators.RecentchangesPageGenerator() for Page in pagegenerators.PreloadingGenerator(RC, 100): if Page.namespace() == 1: modification(Page.title())
def main(): wikipedia.config.put_throttle = 0 wikipedia.put_throttle.setDelay() summary_commandline, gen, template = None, None, None namespaces, PageTitles, exceptions = [], [], [] encat, newcatfile = '', '' autoText, autoTitle = False, False recentcat, newcat = False, False genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): if arg == '-autotitle': autoTitle = True elif arg == '-autotext': autoText = True elif arg.startswith('-page'): if len(arg) == 5: PageTitles.append( wikipedia.input(u'Which page do you want to chage?')) else: PageTitles.append(arg[6:]) break elif arg.startswith('-except:'): exceptions.append(arg[8:]) elif arg.startswith('-template:'): template = arg[10:] elif arg.startswith('-encat:'): encat = arg[7:].replace(u'Category:', u'').replace(u'category:', u'').replace(u'رده:', u'') break elif arg.startswith('-newcatfile:'): newcatfile = arg[12:] break elif arg.startswith('-recentcat'): arg = arg.replace(':', '') if len(arg) == 10: genfa = pagegenerators.RecentchangesPageGenerator() else: genfa = pagegenerators.RecentchangesPageGenerator( number=int(arg[10:])) genfa = pagegenerators.DuplicateFilterPageGenerator(genfa) genfa = pagegenerators.NamespaceFilterPageGenerator(genfa, [14]) preloadingGen = pagegenerators.PreloadingGenerator(genfa, 60) recentcat = True break elif arg.startswith('-newcat'): arg = arg.replace(':', '') if len(arg) == 7: genfa = pagegenerators.NewpagesPageGenerator( 100, False, None, 14) else: genfa = pagegenerators.NewpagesPageGenerator( int(arg[7:]), False, None, 14) preloadingGen = pagegenerators.PreloadingGenerator(genfa, 60) newcat = True break elif arg.startswith('-namespace:'): namespaces.append(int(arg[11:])) elif arg.startswith('-summary:'): wikipedia.setAction(arg[9:]) summary_commandline = True else: generator = genFactory.handleArg(arg) if generator: gen = generator if PageTitles: pages = [ wikipedia.Page(wikipedia.getSite(), PageTitle) for PageTitle in PageTitles ] gen = iter(pages) if recentcat: for workpage in preloadingGen: workpage = workpage.title() cat = catlib.Category(wikipedia.getSite('fa'), workpage) gent = pagegenerators.CategorizedPageGenerator(cat) run(gent) wikipedia.stopme() sys.exit() if newcat: run(preloadingGen) if newcatfile: text2 = codecs.open(newcatfile, 'r', 'utf8') text = text2.read() linken = re.findall(ur'\[\[.*?\]\]', text, re.S) run(linken) wikipedia.stopme() sys.exit() if not gen: wikipedia.stopme() sys.exit() if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=60) run(preloadingGen)