def moincms(wikibase, outputdir, pattern): if pattern: pattern = re.compile(pattern) #print (wikibase, outputdir, rewrite) req = urllib2.Request(wikibase, headers={'Accept': RDF_IMT}) resp = urllib2.urlopen(req) original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER] feed = bindery.parse(resp) process_list = [] for item in feed.RDF.channel.items.Seq.li: uri = split_fragment(item.resource)[0] #print >> sys.stderr, (uri, str(item.resource), split_fragment(item.resource)) #Deal with the wrapped URI if original_wiki_base: #print >> sys.stderr, (uri, original_wiki_base.rstrip('/')+'/') relative = relativize(uri, original_wiki_base.rstrip('/')+'/').lstrip('/') uri = absolutize(relative, wikibase) #print >> sys.stderr, (uri, relative) if pattern and not pattern.match(relative): continue n = node.factory(uri, relative, outputdir) if n.up_to_date(): pass #print >> sys.stderr, 'Up to date. Skipped...' else: process_list.append(n) #Process nodes needing update according to priority for n in sorted(process_list, key=attrgetter('PRIORITY'), reverse=True): #print >> sys.stderr, "processing ", n.rest_uri n.render() return
def moin2atomentries(wikibase, outputdir, rewrite, pattern): wikibase_len = len(rewrite) if pattern: pattern = re.compile(pattern) #print (wikibase, outputdir, rewrite) req = urllib2.Request(wikibase, headers={'Accept': RDF_IMT}) with closing(urllib2.urlopen(req)) as resp: feed = bindery.parse(resp) for item in feed.RDF.channel.items.Seq.li: uri = split_fragment(item.resource)[0] relative = uri[wikibase_len:] print >> sys.stderr, uri, relative if pattern and not pattern.match(relative): continue if rewrite: uri = uri.replace(rewrite, wikibase) req = urllib2.Request(uri, headers={'Accept': DOCBOOK_IMT}) with closing(urllib2.urlopen(req)) as resp: page = bindery.parse(resp) entrydate = dateparse(unicode(page.article.articleinfo.revhistory.revision.date)) if entrydate.tzinfo == None: entrydate = entrydate.replace(tzinfo=DEFAULT_TZ) output = os.path.join(outputdir, OUTPUTPATTERN%pathsegment(relative)) if os.access(output, os.R_OK): lastrev = dateparse(unicode(bindery.parse(output).entry.updated)) if lastrev.tzinfo == None: lastrev = lastrev.replace(tzinfo=DEFAULT_TZ) if (entrydate == lastrev): print >> sys.stderr, 'Not updated. Skipped...' continue print >> sys.stderr, 'Writing to ', output with open(output, 'w') as output: handle_page(uri, page, outputdir, relative, output) return
def moincms(wikibase, outputdir, pattern): if pattern: pattern = re.compile(pattern) #print (wikibase, outputdir, rewrite) req = urllib2.Request(wikibase, headers={'Accept': RDF_IMT}) resp = urllib2.urlopen(req) original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER] feed = bindery.parse(resp) process_list = [] for item in feed.RDF.channel.items.Seq.li: uri = split_fragment(item.resource)[0] #print >> sys.stderr, (uri, str(item.resource), split_fragment(item.resource)) #Deal with the wrapped URI if original_wiki_base: #print >> sys.stderr, (uri, original_wiki_base.rstrip('/')+'/') relative = relativize(uri, original_wiki_base.rstrip('/') + '/').lstrip('/') uri = absolutize(relative, wikibase) #print >> sys.stderr, (uri, relative) if pattern and not pattern.match(relative): continue n = node.factory(uri, relative, outputdir) if n.up_to_date(): pass #print >> sys.stderr, 'Up to date. Skipped...' else: process_list.append(n) #Process nodes needing update according to priority for n in sorted(process_list, key=attrgetter('PRIORITY'), reverse=True): #print >> sys.stderr, "processing ", n.rest_uri n.render() return
def execute(top=None): ''' Sample request: curl -F "pattern=wiki/path" -F "wiki=http://localhost:8880/moin/foo/" "http://*****:*****@class="navigation"]//@href'): link = navchild.xml_value #print >> sys.stderr, 'LINK:', link #uri = split_fragment(item.resource)[0] #relative = uri[wikibase_len:] #print >> sys.stderr, uri, relative #if rewrite: # uri = uri.replace(rewrite, wikibase) rest_uri = wrapped_uri(original_wiki_base, link) #print >> sys.stderr, 'rest uri:', rest_uri items.append(freemix(rest_uri, opener).render()) return json.dumps({'items': items}, indent=4)