Ejemplo n.º 1
0
Archivo: exhibit.py Proyecto: dpla/zen
def prep(items, schema=None, strict=False):
    '''
    Prep a raw JSON set of items so it's more useful for simile
    
    schema is a description in code of the expectations for items, including
    descriptions to be applied to keys or values

    import string
    from amara.lib.util import mcompose, first_item
    from zen import exhibit

    PIPELINES = { u'atom:entry': {
        u"type": None,
        u"author": None,
        u"updated": None,
        #u"title": mcompose(first_item, string.strip),
        u"title": None,
        u"alternate_link": first_item,
        u"summary": (first_item, exhibit.REQUIRE),
        u"content": None,
        u"published": None,
        u"id": None, #Note: ID is always automatically required
        u"label": None,
    }, u'atom:feed': None }
    prepped = exhibit.prep(obj, schema=PIPELINES)
    '''
    remove_list = []
    for item in items:
        #print item
        if not u'id' in item:
            raise ValueError('Missing ID')
        if schema:
            match = schema.get(first_item(item.get(u'type')))
            if strict and match is None:
                remove_list.append(item)
                continue
            schema_for_item = match or {}
            #print schema_for_item
            for key in schema_for_item.keys():
                #Extract the unit transforms for the entry key and entry value
                if isinstance(schema_for_item[key], tuple):
                    value_unit, key_unit = schema_for_item[key]
                else:
                    value_unit, key_unit = schema_for_item[key], None
                #import sys; print >> sys.stderr, (key, value_unit, key_unit)
                if key_unit and key_unit is REQUIRE:
                    if key not in item:
                        raise ValueError('Missing required field: %s'%key)
                if key in item:
                    #result = pipeline_stage(schema_for_item[key], item[key]).next()
                    value = pipeline_stage(value_unit, item[key])
                    #FIXME: Now supports either REQUIRE *or* transformation, and not both. Maybe make a 3-tuple
                    if key_unit and key_unit is not REQUIRE:
                        new_key = pipeline_stage(key_unit, key)
                        del item[key]
                        key = new_key
                    item[key] = value
    for item in remove_list:
        items.remove(item)
    return items
Ejemplo n.º 2
0
def rss2translate(url=None, format=None):
    """Convert RSS 2.0 feed to Atom or RSS 1.0
    
    Sample request:
    * curl "http://localhost:8880/akara.rss2translate?url=http://feeds.delicious.com/v2/rss/recent"

    This is a demo and is not meant as an industrial-strength converter.
    """
    # Support connection-negotiation in addition to query parameter
    if not format:
        accepted_imts = request.environ.get('HTTP_ACCEPT', '').split(',')
        imt = first_item(dropwhile(lambda x: '*' in x, accepted_imts))
        if imt == 'RDF_IMT':
            format = 'rss1'
        else:
            format = 'atom'
    
    if not url:
        raise AssertionError("The 'url' query parameter is mandatory.")

    import feedparser # From http://www.feedparser.org/
    feed = feedparser.parse(url)
    
    # Note: bad URLs might mean the feed doesn't have headers
    logger.debug('Feed info: ' + repr((url, feed.version, feed.encoding, feed.headers.get('Content-type'))))

    updated = getattr(feed.feed, 'updated_parsed', None)
    if updated:
        #FIXME: Double-check this conversion
        updated = datetime(*updated[:7]).isoformat()
    
    f = atomtools.feed(title=feed.feed.title, updated=updated, id=feed.feed.link)
    for e in feed.entries:
        updated = getattr(e, 'updated_parsed', None)
        if updated:
            #FIXME: Double-check this conversion
            updated = datetime(*updated[:7]).isoformat()
        links = [
            #FIXME: self?
            (e.link, u'alternate'),
        ]
        f.append(
            e.link,
            e.title,
            updated = updated,
            summary=e.description,
            #e.author_detail.name
            #authors=authors,
            links=links,
        )

    if format == 'atom':
        result = f.xml_encode()
        response.add_header("Content-Type", ATOM_IMT)
    else:
        result = f.rss1format()
        response.add_header("Content-Type", RDF_IMT)
    return result
Ejemplo n.º 3
0
def requested_imt(environ):
    # Choose a preferred media type from the Accept header, using application/json as presumed
    # default, and stripping out any wildcard types and type parameters
    #
    # FIXME Ideally, this should use the q values and pick the best media type, rather than
    # just picking the first non-wildcard type.  Perhaps: http://code.google.com/p/mimeparse/
    accepted_imts = []
    accept_header = environ.get('HTTP_ACCEPT')
    if accept_header:
        accepted_imts = [mt.split(';')[0] for mt in accept_header.split(',')]
    accepted_imts.append('application/json')
    #if logger: logger.debug('accepted_imts: ' + repr(accepted_imts))
    imt = first_item(dropwhile(lambda x: '*' in x, accepted_imts))
    return imt
Ejemplo n.º 4
0
def requested_imt(environ):
    # Choose a preferred media type from the Accept header, using application/json as presumed
    # default, and stripping out any wildcard types and type parameters
    #
    # FIXME Ideally, this should use the q values and pick the best media type, rather than
    # just picking the first non-wildcard type.  Perhaps: http://code.google.com/p/mimeparse/
    accepted_imts = []
    accept_header = environ.get('HTTP_ACCEPT')
    if accept_header :
        accepted_imts = [ mt.split(';')[0] for mt in accept_header.split(',') ]
    accepted_imts.append('application/json')
    #if logger: logger.debug('accepted_imts: ' + repr(accepted_imts))
    imt = first_item(dropwhile(lambda x: '*' in x, accepted_imts))
    return imt
Ejemplo n.º 5
0
Archivo: atom_zen.py Proyecto: dpla/zen
def atom_moin(body, ctype, maxcount=None, folder=None, feed=None):
    #Sample query:
    #curl --request POST "http://localhost:8880/atom.moin?feed=http://bitworking.org/news/feed/&maxcount=10&folder=foo091023"
    #You can set ...&maxcount=100 or whatever number, if you like
    maxcount = int(maxcount if maxcount else DEFAULT_MAX)

    H = httplib2.Http('.cache')
    if USER:
        H.add_credentials(USER, PASSWD)

    #Prepare the envelope for the output (POST response)
    w = structencoder()
    output = w.cofeed(ROOT(E_CURSOR(u'updates', {u'feed': feed})))
    logger.debug('Feed: ' + feed)
    
    entries = atomtools.ejsonize(feed)
    for entry in islice(entries, 0, maxcount):
        try:
            logger.debug('ENTRY: ' + repr(entry))
            aid = entry[u'label']
            slug = atomtools.slug_from_title(aid)
            #logger.debug('GRIPPO' + repr((id,)))
            dest = folder + '/' + slug
            chunks = [ ' title:: ' + entry[u'title'] ]
            chunks.append(' last changed:: ' + entry[u'updated'])
            chunks.append(' link:: ' + (first_item(entry[u'link']) or ''))

            if u'summary' in entry: chunks.append('= Summary =\n' + entry[u'summary'])
            if u'content_src' in entry: chunks.append('= Content =\n' + entry[u'content_src'])
            if u'content_text' in entry: chunks.append('= Content =\n' + entry[u'content_text'])
            #logger.debug("Result IDs: " + ids)
            if u'categories' in entry:
                chunks.append(u'= Categories =')
                for categories in entry[u'categories']:
                    chunks.append(' * ' + categories)

            chunks.append(' id:: ' + entry[u'id'])
            chunks.append('= akara:metadata =\n akara:type:: http://purl.org/com/zepheira/zen/resource/webfeed\n')

            url = absolutize(dest, MOINBASE)
            headers = {'Content-Type' : 'text/plain'}
            resp, content = H.request(url, "PUT", body='\n'.join(chunks).encode('utf-8'), headers=headers)
            logger.debug("Result: " + repr((resp, content)))
            output.send(E(u'update', {u'entry-id': entry[u'id'], u'page': url}))
        except (KeyboardInterrupt, SystemExit):
            raise
        except Exception, e:
            logger.info('Exception handling Entry page: ' + repr(e))
            output.send(E(u'failure', {u'entry-id': entry[u'id']}))
Ejemplo n.º 6
0
def get_page(environ, start_response):
    #logger.debug('get_page: ' + repr((environ['SCRIPT_NAME'], environ['PATH_INFO'])))
    req_headers = copy_headers_to_dict(environ,exclude=['HTTP_ACCEPT_ENCODING'])
    wiki_id, base, opener, original_page, wrapped_wiki_base = target(environ)
    page = environ['PATH_INFO'].lstrip('/')
    check_auth(environ, start_response, base, opener, req_headers)
    upstream_handler = None
    status = httplib.OK
    params = cgi.parse_qs(environ['QUERY_STRING'])
    #Note: probably a better solution here: http://code.google.com/p/mimeparse/
    accepted_imts = environ.get('HTTP_ACCEPT', '').split(',')
    #logger.debug('accepted_imts: ' + repr(accepted_imts))
    imt = first_item(dropwhile(lambda x: '*' in x, accepted_imts))
    #logger.debug('imt: ' + repr(imt))
    params_for_moin = {}
    cache_max_age = CACHE_MAX_AGE # max-age of this response. If set to None, it will not be used
    if NO_CACHE_PATHS and first_item(dropwhile(lambda x: x not in page, NO_CACHE_PATHS)):
        cache_max_age = None

    if 'rev' in params:
        #XXX: Not compatible with search
        #params_for_moin = {'rev' : params['rev'][0], 'action': 'recall'}
        params_for_moin = {'rev' : params['rev'][0]}
    if 'search' in params:
        searchq = params['search'][0]
        query = urllib.urlencode({'value' : searchq, 'action': 'fullsearch', 'context': '180', 'fullsearch': 'Text'})
        #?action=fullsearch&context=180&value=foo&=Text
        url = absolutize('?'+query, base)
        request = urllib2.Request(url, None, req_headers)
        ctype = moin.RDF_IMT
        cache_max_age = None
    #elif 'action' in params and params['action'][0] == 'recall':
    elif moin.HTML_IMT in environ.get('HTTP_ACCEPT', ''):
        params = urllib.urlencode(params_for_moin)
        url = absolutize(page+'?'+params, base)
        request = urllib2.Request(url, None, req_headers)
        ctype = moin.HTML_IMT
    elif moin.RDF_IMT in environ.get('HTTP_ACCEPT', ''):
        #FIXME: Make unique flag optional
        #url = base + '/RecentChanges?action=rss_rc&unique=1&ddiffs=1'
        url = absolutize('RecentChanges?action=rss_rc&unique=1&ddiffs=1', base)
        #print >> sys.stderr, (url, base, '/RecentChanges?action=rss_rc&unique=1&ddiffs=1', )
        request = urllib2.Request(url, None, req_headers)
        ctype = moin.RDF_IMT
    elif moin.ATTACHMENTS_IMT in environ.get('HTTP_ACCEPT', ''):
        url = absolutize(page + '?action=AttachFile', base)
        request = urllib2.Request(url, None, req_headers)
        ctype = moin.ATTACHMENTS_IMT
        def upstream_handler():
            #Sigh.  Sometimes you have to break some Tag soup eggs to make a RESTful omlette
            with closing(opener.open(request)) as resp:
                rbody = resp.read()
            doc = htmlparse(rbody)
            raise_embedded_error(doc)
            attachment_nodes = doc.xml_select(u'//*[contains(@href, "action=AttachFile") and contains(@href, "do=view")]')
            targets = []
            for node in attachment_nodes:
                target = [ param.split('=', 1)[1] for param in node.href.split(u'&') if param.startswith('target=') ][0]
                targets.append(target)
            output = structencoder(indent=u"yes")
            output.feed(
            ROOT(
                E((u'attachments'),
                    (E(u'attachment', {u'href': unicode(t)}) for t in targets)
                )
            ))
            return output.read(), ctype
    #Notes on use of URI parameters - http://markmail.org/message/gw6xbbvx4st6bksw
    elif ';attachment=' in page:
        page, attachment = page.split(';attachment=', 1)
        url = absolutize(page + '?action=AttachFile&do=get&target=' + attachment, base)
        request = urllib2.Request(url, None, req_headers)
        def upstream_handler():
            with closing(opener.open(request)) as resp:
                rbody = resp.read()
            return rbody, dict(resp.info())['content-type']
    #
    elif ';history' in page:
        cache_max_age = None
        page, discard = page.split(';history', 1)
        ctype = moin.XML_IMT
        def upstream_handler():
            revs = scrape_page_history(page, base, opener, req_headers)
            output = structencoder(indent=u"yes")
            output.feed(
            ROOT(
                E((u'history'),
                    (E(u'rev', {u'id': unicode(r['rev']), u'editor': unicode(r['editor']), u'date': unicode(r['date']).replace(' ', 'T')}) for r in revs)
                )
            ))
            return output.read(), ctype
    elif imt:
        params_for_moin.update({'mimetype': imt})
        params = urllib.urlencode(params_for_moin)
        url = absolutize(page, base) + '?' + params
        request = urllib2.Request(url, None, req_headers)
        ctype = moin.DOCBOOK_IMT
    else:
        params_for_moin.update({'action': 'raw'})
        params = urllib.urlencode(params_for_moin)
        url = absolutize(page, base) + '?' + params
        request = urllib2.Request(url, None, req_headers)
        ctype = moin.WIKITEXT_IMT
    try:
        if upstream_handler:
            rbody, ctype = upstream_handler()
        else:
            with closing(opener.open(request)) as resp:
                rbody = resp.read()
        
        #headers = {moin.ORIG_BASE_HEADER: base}
        #moin_base = absolutize(wiki_id, base)
        moin_base_info = base + ' ' + wrapped_wiki_base + ' ' + original_page
        response_headers = [("Content-Type", ctype),
                            ("Vary", "Accept"),
                            (moin.ORIG_BASE_HEADER, moin_base_info)]
        if cache_max_age:
            response_headers.append(("Cache-Control","max-age="+str(cache_max_age)))

        start_response(status_response(status), response_headers)
        return rbody
    except urllib2.URLError, e:
        if e.code == 401:
            raise HTTPAuthorizationError(url=request.get_full_url())
        if e.code == 403:
            raise MoinMustAuthenticateError(url=request.get_full_url(),target=wiki_id)
        if e.code == 404:
            raise MoinNotFoundError(fronturl=request_uri(environ),backurl=url)
        else:
            raise UnexpectedResponseError(url=url,code=e.code,error=str(e))
Ejemplo n.º 7
0
def read_contentdm(site, collection=None, query=None, limit=None, logger=logging, proxy=None, cachedir='/tmp/.cache'):
    '''
    A generator of CDM records
    First generates header info

    >>> from zen.contentdm import read_contentdm
    >>> results = read_contentdm('http://digital.library.louisville.edu/cdm4/', collection='/jthom', query=None, limit=None)
    >>> results.next()
    {'basequeryurl': 'http://digital.library.louisville.edu/cdm4/results.php?CISOOP1=any&CISOROOT=%2Fjthom&CISOBOX1=&CISOFIELD1=CISOSEARCHALL'}
    >>> results.next()
    {u'Title': u'60 years in darkness.  ', u'Object_Type': u'Negatives, ', u'Source': u"4 x 5 in. b&w safety negative. Item no. 1979.33.1026 in the Jean Thomas, The Traipsin' Woman, Collection, University of Louisville Photographic Archives. ", u'Collection': u"Jean Thomas, The Traipsin' Woman, Collection, ",...}

    The first yielded value is global metadata; the  second is the record
    for the first item  in the collection/query, and so on until all the items
    are returned, or the limit reached.

    If you want to see the debug messages, just do (before calling read_contentdm for the first time):

    >>> import logging; logging.basicConfig(level=logging.DEBUG)

    for a nice-sized collection to try:
    >>> read_contentdm('http://digital.library.louisville.edu/cdm4/', collection='/maps')

    Auburn theater collection:

    >>> read_contentdm('http://content.lib.auburn.edu', collection='/theatre01')
    >>> read_contentdm('http://content.lib.auburn.edu', collection='/football')

    i.e.: http://digital.library.louisville.edu/cdm4/browse.php?CISOROOT=/maps

    See also:

    * /cdm4/browse.php?CISOROOT=/football (51 items)

    >>> results = read_contentdm('http://digital.library.louisville.edu/cdm4/', collection='/jthom', query=None, limit=None, proxy="http://*****:*****@name="searchResultsForm"]//a[starts-with(@href, "item_viewer.php")]')

    def follow_pagination(doc):
        #e.g. of page 1: http://digital.library.louisville.edu/cdm4/browse.php?CISOROOT=/afamoh
        #e.g. of page 2: http://digital.library.louisville.edu/cdm4/browse.php?CISOROOT=/afamoh&CISOSTART=1,21
        page_start = 1
        while True:
            items = doc.xml_select(u'//a[contains(@href, "item_viewer.php") or contains(@href, "document.php")]')
            #items = list(items)
            #for i in items: yield i
            for i in items:
                #logger.debug("item: {0}".format(i.title.encode('utf-8')))
                yield i
            next = [ l.href for l in doc.xml_select(u'//a[@class="res_submenu"]') if int(l.href.split(u',')[-1]) > page_start ]
            if not next:
                #e.g. http://vilda.alaska.edu/ uses yet another pattern with just @class=submenu links *sigh*
                next = [ l.href for l in doc.xml_select(u'//a[@class="submenu"]') if u'CISOSTART' in l.href and int(l.href.split(u',')[-1]) > page_start ]
                if not next:
                    break
            page_start = int(next[0].split(u',')[-1])
            url = absolutize(next[0], site)

            resp, doc = cdmsite.index_page(url, "Next page URL: {0}")
        return

    items = follow_pagination(resultsdoc)

    at_least_one = False
    count = 0
    for it in items:
        at_least_one = True
        pageuri = absolutize(it.href, site)
        if pageuri in seen_links:
            continue
        seen_links.add(pageuri)
        entry = {}
        logger.debug("Processing item URL: {0}".format(pageuri))
        (scheme, netloc, path, query, fragment) = split_uri_ref(pageuri)
        entry['domain'] = netloc
        params = parse_qs(query)
        entry['cdm-coll'] = params['CISOROOT'][0].strip('/').split('/')[0]
        entry['id'] = params['CISOPTR'][0]
        logger.debug("Item id: {0}".format(entry['id']))
        if entry['id'] in seen_ids:
            continue
        seen_ids.add(entry['id'])
        entry['link'] = unicode(pageuri)
        entry['local_link'] = '#' + entry['id']

        resp, page, cachekey, cached = cdmsite.item_page(pageuri)

        if cached:
            entry = cached
        else:
            image = first_item(page.xml_select(u'//td[@class="tdimage"]//img'))
            if image:
                imageuri = absolutize(image.src, site)
                entry['imageuri'] = imageuri
                try:
                    entry['thumbnail'] = absolutize(dict(it.xml_parent.a.img.xml_attributes.items())[None, u'src'], site)
                except AttributeError:
                    logger.debug("No thumbnail")
            #entry['thumbnail'] = DEFAULT_RESOLVER.normalize(it.xml_parent.a.img.src, root)
            #fields = page.xml_select(u'//tr[td[@class="tdtext"]]')
            #fields = page.xml_select(u'//table[@class="metatable"]/tr')
            fields = chain(page.xml_select(u'//tr[td[@class="tdtext"]]'), page.xml_select(u'//table[@class="metatable"]//tr'))
            for f in fields:
                #key = unicode(f.td[0].span.b).replace(' ', '_')
                key = UNSUPPORTED_IN_EXHIBITKEY.sub(u'_', U(f.xml_select(u'td[1]//b')))
                #logger.debug("{0}".format(key))
                value = u''.join(CONTENT.dispatch(f.td[1]))
                #value = u''.join(CONTENT.dispatch(f.xml_select(u'td[2]')))
                entry[key] = unicode(value)
            if u'Title' in entry:
                #logger.debug("{0}".format(entry['Title']))
                entry['label'] = entry['Title']
            else:
                entry['label'] = u'[NO LABEL AVAILABLE]'
            if u"Location_Depicted" in entry:
                locations = entry[u"Location_Depicted"].split(u', ')
                #locations = [ l.replace(' (', ', ').replace(')', '').replace(' ', '+') for l in locations if l.strip() ]
                locations = [ l.replace(' (', ', ').replace(')', '').replace('.', '') for l in locations if l.strip() ]
                #print >> sys.stderr, "LOCATIONS", repr(locations)
                entry[u"Locations_Depicted"] = locations
            if u"Date_Original" in entry:
                entry[u"Estimated_Original_Date"] = entry[u"Date_Original"].strip().replace('-', '5').replace('?', '') 
            entry[u"Subject"] = [ s for s in entry.get(u"Subject", u'').split(', ') if s.strip() ]
            if cachedir:
                try:
                    json_stream = open(os.path.join(cachedir, cachekey+'.extract.js'), 'w')
                    json.dump(entry, json_stream)
                except IOError, ValueError:
                    pass

        yield entry
        count += 1
        if limit and count >= limit:
            logger.debug("Limit reached")
            break
Ejemplo n.º 8
0
def get_page(environ, start_response):
    #logger.debug('get_page: ' + repr((environ['SCRIPT_NAME'], environ['PATH_INFO'])))
    req_headers = copy_headers_to_dict(environ,
                                       exclude=['HTTP_ACCEPT_ENCODING'])
    wiki_id, base, opener, original_page, wrapped_wiki_base = target(environ)
    page = environ['PATH_INFO'].lstrip('/')
    check_auth(environ, start_response, base, opener, req_headers)
    upstream_handler = None
    status = httplib.OK
    params = cgi.parse_qs(environ['QUERY_STRING'])
    #Note: probably a better solution here: http://code.google.com/p/mimeparse/
    accepted_imts = environ.get('HTTP_ACCEPT', '').split(',')
    #logger.debug('accepted_imts: ' + repr(accepted_imts))
    imt = first_item(dropwhile(lambda x: '*' in x, accepted_imts))
    #logger.debug('imt: ' + repr(imt))
    params_for_moin = {}
    cache_max_age = CACHE_MAX_AGE  # max-age of this response. If set to None, it will not be used
    if NO_CACHE_PATHS and first_item(
            dropwhile(lambda x: x not in page, NO_CACHE_PATHS)):
        cache_max_age = None

    if 'rev' in params:
        #XXX: Not compatible with search
        #params_for_moin = {'rev' : params['rev'][0], 'action': 'recall'}
        params_for_moin = {'rev': params['rev'][0]}
    if 'search' in params:
        searchq = params['search'][0]
        query = urllib.urlencode({
            'value': searchq,
            'action': 'fullsearch',
            'context': '180',
            'fullsearch': 'Text'
        })
        #?action=fullsearch&context=180&value=foo&=Text
        url = absolutize('?' + query, base)
        request = urllib2.Request(url, None, req_headers)
        ctype = moin.RDF_IMT
        cache_max_age = None
    #elif 'action' in params and params['action'][0] == 'recall':
    elif moin.HTML_IMT in environ.get('HTTP_ACCEPT', ''):
        params = urllib.urlencode(params_for_moin)
        url = absolutize(page + '?' + params, base)
        request = urllib2.Request(url, None, req_headers)
        ctype = moin.HTML_IMT
    elif moin.RDF_IMT in environ.get('HTTP_ACCEPT', ''):
        #FIXME: Make unique flag optional
        #url = base + '/RecentChanges?action=rss_rc&unique=1&ddiffs=1'
        url = absolutize('RecentChanges?action=rss_rc&unique=1&ddiffs=1', base)
        #print >> sys.stderr, (url, base, '/RecentChanges?action=rss_rc&unique=1&ddiffs=1', )
        request = urllib2.Request(url, None, req_headers)
        ctype = moin.RDF_IMT
    elif moin.ATTACHMENTS_IMT in environ.get('HTTP_ACCEPT', ''):
        url = absolutize(page + '?action=AttachFile', base)
        request = urllib2.Request(url, None, req_headers)
        ctype = moin.ATTACHMENTS_IMT

        def upstream_handler():
            #Sigh.  Sometimes you have to break some Tag soup eggs to make a RESTful omlette
            with closing(opener.open(request)) as resp:
                rbody = resp.read()
            doc = htmlparse(rbody)
            raise_embedded_error(doc)
            attachment_nodes = doc.xml_select(
                u'//*[contains(@href, "action=AttachFile") and contains(@href, "do=view")]'
            )
            targets = []
            for node in attachment_nodes:
                target = [
                    param.split('=', 1)[1] for param in node.href.split(u'&')
                    if param.startswith('target=')
                ][0]
                targets.append(target)
            output = structencoder(indent=u"yes")
            output.feed(
                ROOT(
                    E((u'attachments'),
                      (E(u'attachment', {u'href': unicode(t)})
                       for t in targets))))
            return output.read(), ctype
    #Notes on use of URI parameters - http://markmail.org/message/gw6xbbvx4st6bksw
    elif ';attachment=' in page:
        page, attachment = page.split(';attachment=', 1)
        url = absolutize(
            page + '?action=AttachFile&do=get&target=' + attachment, base)
        request = urllib2.Request(url, None, req_headers)

        def upstream_handler():
            with closing(opener.open(request)) as resp:
                rbody = resp.read()
            return rbody, dict(resp.info())['content-type']
    #
    elif ';history' in page:
        cache_max_age = None
        page, discard = page.split(';history', 1)
        ctype = moin.XML_IMT

        def upstream_handler():
            revs = scrape_page_history(page, base, opener, req_headers)
            output = structencoder(indent=u"yes")
            output.feed(
                ROOT(
                    E((u'history'), (E(
                        u'rev', {
                            u'id': unicode(r['rev']),
                            u'editor': unicode(r['editor']),
                            u'date': unicode(r['date']).replace(' ', 'T')
                        }) for r in revs))))
            return output.read(), ctype
    elif imt:
        params_for_moin.update({'mimetype': imt})
        params = urllib.urlencode(params_for_moin)
        url = absolutize(page, base) + '?' + params
        request = urllib2.Request(url, None, req_headers)
        ctype = moin.DOCBOOK_IMT
    else:
        params_for_moin.update({'action': 'raw'})
        params = urllib.urlencode(params_for_moin)
        url = absolutize(page, base) + '?' + params
        request = urllib2.Request(url, None, req_headers)
        ctype = moin.WIKITEXT_IMT
    try:
        if upstream_handler:
            rbody, ctype = upstream_handler()
        else:
            with closing(opener.open(request)) as resp:
                rbody = resp.read()

        #headers = {moin.ORIG_BASE_HEADER: base}
        #moin_base = absolutize(wiki_id, base)
        moin_base_info = base + ' ' + wrapped_wiki_base + ' ' + original_page
        response_headers = [("Content-Type", ctype), ("Vary", "Accept"),
                            (moin.ORIG_BASE_HEADER, moin_base_info)]
        if cache_max_age:
            response_headers.append(
                ("Cache-Control", "max-age=" + str(cache_max_age)))

        start_response(status_response(status), response_headers)
        return rbody
    except urllib2.URLError, e:
        if e.code == 401:
            raise HTTPAuthorizationError(url=request.get_full_url())
        if e.code == 403:
            raise MoinMustAuthenticateError(url=request.get_full_url(),
                                            target=wiki_id)
        if e.code == 404:
            raise MoinNotFoundError(fronturl=request_uri(environ), backurl=url)
        else:
            raise UnexpectedResponseError(url=url, code=e.code, error=str(e))
Ejemplo n.º 9
0
 def rule(self, node):
     context_elem = first_item(e.xml_select(u'ak:resource'))
     if context_elem:
         resource = U(context_elem.select)
     self.rules.append((U(node.context), resource, []))
     list(chain(*imap(self.dispatch, node.xml_children)))