コード例 #1
0
def pubmed_adapter(search=None, id=None):
    '''
    Sample queries:
    #curl "http://localhost:8880/pubmed?"
    curl "http://localhost:8880/pubmed?search=stem+cells"
    curl "http://localhost:8880/pubmed?id=19358275"
    '''
    #FIXME: How do we handle no search or id param?  Just serve up the latest entries?  Or error as below?
    #assert_(not(search and id), msg="You must specify the 'search' or 'id' query parameter is mandatory.")
    if search:
        #search = first_item(search)
        #reldate: only search for last N days
        #query = urllib.urlencode({'db' : NCBI_DB, 'term': query, 'reldate': '60', 'datetype': 'edat', 'retmax': DEFAULT_MAX_RESULTS, 'usehistory': 'y'})
        query = urllib.urlencode({'term': search, 'db' : NCBI_DB, 'datetype': 'edat', 'retmax': DEFAULT_MAX_RESULTS, 'usehistory': 'y'})
        search_url = NCBI_SEARCH_PATTERN + query
        logger.debug("Term search URL: " + search_url)
        doc = bindery.parse(search_url, standalone=True)
        search_terms = search
        ids = ( unicode(i) for i in doc.eSearchResult.IdList.Id )
        ids = ','.join(ids)
        self_link = '/pubmed?search='+search
    else:
        #ids = first_item(id)
        #fulltext = fulltext[0] if fulltext else u'no'
        #if fulltext == 'yes':
        search_terms = ids
        self_link = '/pubmed?id='+ids
    query = urllib.urlencode({'db' : NCBI_DB, 'id': ids, 'retmode': 'xml'})
    search_url = NCBI_ARTICLE_ACCESS_PATTERN + query
    logger.debug("ID search URL: " + search_url)
    alt_link = search_url
    doc = bindery.parse(search_url, standalone=True, model=PUBMED_MODEL)
    #doc = bindery.parse(open('/Users/uche/tmp/efetch.fcgi.html'), standalone=True, model=PUBMED_MODEL)
    metadata, first_id = metadata_dict(generate_metadata(doc))
    return atom_results(doc, metadata, self_link, alt_link, search_terms)
コード例 #2
0
ファイル: oai.py プロジェクト: eldios/ingestion
    def list_records(self, set="", resumption_token = ""):
        '''
        List records. Use either the resumption token or set id.
        '''
        if resumption_token:
            params = {'verb' : 'ListRecords', 'resumptionToken': resumption_token}
        else:
            params = {'verb' : 'ListRecords', 'metadataPrefix': 'oai_dc', 'set': set}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))
        doc = bindery.parse(url, model=OAI_LISTRECORDS_MODEL)

        records, first_id = metadata_dict(generate_metadata(doc),
                                          nesteddict=False)
        for id_, props in records:
            for k, v in props.iteritems():
                props[k] = [ U(item) for item in v ]
        if (doc.OAI_PMH.ListRecords is not None) and (doc.OAI_PMH.ListRecords.resumptionToken is not None):
            resumption_token = U(doc.OAI_PMH.ListRecords.resumptionToken)
        else:
            resumption_token = ''
        return {'records' : records, 'resumption_token' : resumption_token}
コード例 #3
0
ファイル: moin.py プロジェクト: mredar/akara
 def factory(rest_uri, moin_link=None, opener=None):
     opener = opener or urllib2.build_opener()
     logger.debug('rest_uri: ' + rest_uri)
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     resp = opener.open(req)
     doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL)
     original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #amara.xml_print(self.content_cache)
     metadata, first_id = metadata_dict(generate_metadata(doc))
     metadata = metadata[first_id]
     akara_type = U(metadata[u'ak-type'])
     logger.debug('Type: ' + akara_type)
     try:
         #Older Moin CMS resource types are implemented by registration to the global node.NODES
         cls = node.NODES[akara_type]
     except KeyError:
         #Newer Moin CMS resource types are implemented by discovery of a URL,
         #to which a POST request executes the desired action
         return node.ENDPOINTS and (rest_uri, akara_type,
                                    node.ENDPOINTS[akara_type], doc,
                                    metadata, original_wiki_base)
     else:
         instance = cls(rest_uri,
                        moin_link,
                        opener,
                        cache=(doc, metadata, original_wiki_base))
         return instance
コード例 #4
0
    def search(self, term):
        qstr = urllib.urlencode({'verb' : 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': dspace_id})
        url = DSPACE_OAI_ENDPOINT + '?' + qstr
        logger.debug('DSpace URL: ' + str(url))
        #keywords = [ (k.strip(), JOVE_TAG) for k in unicode(row.xml_select(u'string(.//*[@class="keywords"])')).split(',') ]

        doc = bindery.parse(url, model=OAI_MODEL)
        #print >> sys.stderr, list(generate_metadata(doc))
        resources, first_id = metadata_dict(generate_metadata(doc), nesteddict=False)
        record = doc.OAI_PMH

        resource = resources[first_id]
コード例 #5
0
ファイル: oai.py プロジェクト: dpla/zen
    def search(self, term):
        qstr = urllib.urlencode({'verb' : 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': dspace_id})
        url = DSPACE_OAI_ENDPOINT + '?' + qstr
        logger.debug('DSpace URL: ' + str(url))
        #keywords = [ (k.strip(), JOVE_TAG) for k in unicode(row.xml_select(u'string(.//*[@class="keywords"])')).split(',') ]

        doc = bindery.parse(url, model=OAI_MODEL)
        #print >> sys.stderr, list(generate_metadata(doc))
        resources, first_id = metadata_dict(generate_metadata(doc), nesteddict=False)
        record = doc.OAI_PMH

        resource = resources[first_id]
コード例 #6
0
ファイル: oaitools.py プロジェクト: mredar/akara
def atomize_oai_record(endpoint=None, id=None):
    '''
    endpoint - the OAI request URL, e.g. http://dspace.mit.edu/oai/request
    id, e.g. the article ID, e.g. oai:dspace.mit.edu:1721.1/5451
    
    Sample request:
    curl "http://localhost:8880/akara.oai.atom?endpoint=http://dspace.mit.edu/oai/request&id=oai:dspace.mit.edu:1721.1/5451"
    '''
    if endpoint is None:
        raise ValueError('endpoint required')
    if id is None:
        raise ValueError('id required')
    qstr = urllib.urlencode({
        'verb': 'GetRecord',
        'metadataPrefix': 'oai_dc',
        'identifier': id
    })
    url = endpoint + '?' + qstr
    doc = bindery.parse(url, model=OAI_MODEL)
    resources = metadata_dict(generate_metadata(doc))
    #print resources
    f = feed(ATOM_ENVELOPE)
    #f = feed(ATOM_ENVELOPE, title=resources['title'], id=resources['id'])
    #f.source.feed.xml_append(E((ATOM_NAMESPACE, u'link'), {u'rel': u'self', u'type': u'application/atom+xml', u'href': self_link.decode('utf-8')}))
    #f.source.feed.xml_append(E((ATOM_NAMESPACE, u'link'), {u'rel': u'search', u'type': u'application/opensearchdescription+xml', u'href': u'http://kds-kci.zepheira.com/sciencedirect.discovery'}))
    #f.source.feed.xml_append(E((ATOM_NAMESPACE, u'link'), {u'rel': u'alternate', u'type': u'text/xml', u'href': alt_link.decode('utf-8')}))
    #f.source.feed.xml_append(E((OPENSEARCH_NAMESPACE, u'Query'), {u'role': u'request', u'searchTerms': search_terms.decode('utf-8')}))
    #maxarticles = DEFAULT_MAX_RESULTS
    maxarticles = 3
    for record in islice(doc.OAI_PMH, 0, maxarticles):
        resource = unicode(resources[id])
        print resource
        authors = [(a, None, None) for a in unicode(resource[u'creator'])]
        links = [
            (unicode(resource['handle']), u'alternate'),
        ]
        #categories = [ (unicode(k), SD_NS+u'authorKeyword') for k in authkw(article) ]
        #elements = [
        #    E((SD_NS, u'sd:journal-cover'), unicode(article.journalCover).strip() if hasattr(article, 'journalCover') else DEFAULT_ICON),
        #    E((SD_NS, u'sd:journal-name'), unicode(article.journalName)),
        #]
        f.append(
            id,
            unicode(resource['title'][0]),
            updated=unicode(resource['date'][0]),
            summary=unicode(resource['description'][0]),
            authors=authors,
            links=links,
            #categories=categories,
            #elements=elements,
        )

    return f.source.xml_encode('xml-indent')
コード例 #7
0
ファイル: moincms.py プロジェクト: dpla/akara
 def __init__(self, rest_uri, opener):
     self.rest_uri = rest_uri
     self.opener = opener
     #from node.factory
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     print >> sys.stderr, 'rest_uri: ', rest_uri
     with closing(opener.open(req)) as resp:
         doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL)
         original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
         #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
         #amara.xml_print(self.content_cache)
     metadata = metadata_dict(generate_metadata(doc))
     self.cache=(doc, metadata, original_wiki_base)
     return
コード例 #8
0
ファイル: oaitools.py プロジェクト: dpla/akara
def atomize_oai_record(endpoint=None, id=None):
    '''
    endpoint - the OAI request URL, e.g. http://dspace.mit.edu/oai/request
    id, e.g. the article ID, e.g. oai:dspace.mit.edu:1721.1/5451
    
    Sample request:
    curl "http://localhost:8880/akara.oai.atom?endpoint=http://dspace.mit.edu/oai/request&id=oai:dspace.mit.edu:1721.1/5451"
    '''
    if endpoint is None:
        raise ValueError('endpoint required')
    if id is None:
        raise ValueError('id required')
    qstr = urllib.urlencode({'verb' : 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': id})
    url = endpoint + '?' + qstr
    doc = bindery.parse(url, model=OAI_MODEL)
    resources = metadata_dict(generate_metadata(doc))
    #print resources
    f = feed(ATOM_ENVELOPE)
    #f = feed(ATOM_ENVELOPE, title=resources['title'], id=resources['id'])
    #f.source.feed.xml_append(E((ATOM_NAMESPACE, u'link'), {u'rel': u'self', u'type': u'application/atom+xml', u'href': self_link.decode('utf-8')}))
    #f.source.feed.xml_append(E((ATOM_NAMESPACE, u'link'), {u'rel': u'search', u'type': u'application/opensearchdescription+xml', u'href': u'http://kds-kci.zepheira.com/sciencedirect.discovery'}))
    #f.source.feed.xml_append(E((ATOM_NAMESPACE, u'link'), {u'rel': u'alternate', u'type': u'text/xml', u'href': alt_link.decode('utf-8')}))
    #f.source.feed.xml_append(E((OPENSEARCH_NAMESPACE, u'Query'), {u'role': u'request', u'searchTerms': search_terms.decode('utf-8')}))
    #maxarticles = DEFAULT_MAX_RESULTS
    maxarticles = 3
    for record in islice(doc.OAI_PMH, 0, maxarticles):
        resource = unicode(resources[id])
        print resource
        authors = [ (a, None, None) for a in unicode(resource[u'creator']) ]
        links = [
            (unicode(resource['handle']), u'alternate'),
        ]
        #categories = [ (unicode(k), SD_NS+u'authorKeyword') for k in authkw(article) ]
        #elements = [
        #    E((SD_NS, u'sd:journal-cover'), unicode(article.journalCover).strip() if hasattr(article, 'journalCover') else DEFAULT_ICON),
        #    E((SD_NS, u'sd:journal-name'), unicode(article.journalName)),
        #]
        f.append(
            id,
            unicode(resource['title'][0]),
            updated=unicode(resource['date'][0]),
            summary=unicode(resource['description'][0]),
            authors=authors,
            links=links,
            #categories=categories,
            #elements=elements,
        )

    return f.source.xml_encode('xml-indent')
コード例 #9
0
 def __init__(self, rest_uri, opener):
     self.rest_uri = rest_uri
     self.opener = opener
     #from node.factory
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     print >> sys.stderr, 'rest_uri: ', rest_uri
     with closing(opener.open(req)) as resp:
         doc = bindery.parse(resp,
                             standalone=True,
                             model=MOIN_DOCBOOK_MODEL)
         original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
         #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
         #amara.xml_print(self.content_cache)
     metadata = metadata_dict(generate_metadata(doc))
     self.cache = (doc, metadata, original_wiki_base)
     return
コード例 #10
0
ファイル: oai.py プロジェクト: amber-reichert/ingestion
    def get_record(self, id):
        params = {'verb': 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': id}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s',format(retrieved_t - start_t))
        doc = bindery.parse(url, model=OAI_GETRECORD_MODEL)

        record, rid = metadata_dict(generate_metadata(doc), nesteddict=False)
        for id_, props in (record if isinstance(record, list) else [record]):
            for k, v in props.iteritems():
                props[k] = [ U(item) for item in v ]

        return {'record' : record}
コード例 #11
0
    def get_record(self, id):
        params = {'verb': 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': id}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s',format(retrieved_t - start_t))
        doc = bindery.parse(url, model=OAI_GETRECORD_MODEL)

        record, rid = metadata_dict(generate_metadata(doc), nesteddict=False)
        for id_, props in (record if isinstance(record, list) else [record]):
            for k, v in props.iteritems():
                props[k] = [ U(item) for item in v ]

        return {'record' : record}
コード例 #12
0
ファイル: oai.py プロジェクト: amber-reichert/ingestion
    def list_records(self, set="", resumption_token="", metadataPrefix=""):
        '''
        List records. Use either the resumption token or set id.
        '''
        error = None

        if resumption_token:
            params = {'verb' : 'ListRecords', 'resumptionToken': resumption_token}
        else:
            params = {'verb' : 'ListRecords', 'metadataPrefix': metadataPrefix, 'set': set}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))

        resumption_token = ''
        if metadataPrefix in ["mods", "marc", "untl"]:
            xml_content = XML_PARSE(content)
            records = []
            error = getprop(xml_content, "OAI-PMH/error/#text", True)
            if error is None:
                for record in xml_content["OAI-PMH"]["ListRecords"]["record"]:
                    id = record["header"]["identifier"]
                    if "null" not in id:
                        records.append((id, record))
                if "resumptionToken" in xml_content["OAI-PMH"]["ListRecords"]:
                    resumption_token = xml_content["OAI-PMH"]["ListRecords"]["resumptionToken"]
                    if isinstance(resumption_token, dict):
                        resumption_token = resumption_token.get("#text", "")
        else:
            doc = bindery.parse(url, model=LISTRECORDS_MODELS[metadataPrefix])
            records, first_id = metadata_dict(generate_metadata(doc),
                                            nesteddict=False)
          
            for id_, props in records:
                for k, v in props.iteritems():
                    props[k] = [ U(item) for item in v ]
            if (doc.OAI_PMH.ListRecords is not None) and (doc.OAI_PMH.ListRecords.resumptionToken is not None):
                resumption_token = U(doc.OAI_PMH.ListRecords.resumptionToken)

        return {'records': records, 'resumption_token': resumption_token,
                'error': error}
コード例 #13
0
ファイル: oai.py プロジェクト: dpla/zen
    def list_records(self, set):
        '''
        '''
        #e.g. http://dspace.mit.edu/oai/request?verb=ListRecords&metadataPrefix=oai_dc&set=hdl_1721.1_18193
        qstr = urllib.urlencode({'verb' : 'ListRecords', 'metadataPrefix': 'oai_dc', 'set': set})
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))
        doc = bindery.parse(url, model=OAI_LISTRECORDS_MODEL)
        #print >> sys.stderr, list(generate_metadata(doc))
        records, first_id = metadata_dict(generate_metadata(doc),
                                          nesteddict=False)
        for id_, props in records:
            for k, v in props.iteritems():
                props[k] = [ U(item) for item in v ]

        return records
コード例 #14
0
ファイル: moincms.py プロジェクト: dpla/akara
 def factory(rest_uri, relative, outputdir):
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     resp = urllib2.urlopen(req)
     doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL)
     original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #amara.xml_print(self.content_cache)
     output = os.path.join(outputdir, relative)
     parent_dir = os.path.split(output)[0]
     try:
         os.makedirs(parent_dir)
     except OSError:
         pass
     metadata, first_id = metadata_dict(generate_metadata(doc))
     metadata = metadata[first_id]
     akara_type = first_item(first_item(metadata[u'ak-type']))
     #import sys; print >> sys.stderr, 'GRIPPO', akara_type.xml_value
     cls = node.NODES[akara_type.xml_value]
     instance = cls(rest_uri, relative, outputdir, cache=(doc, metadata, original_wiki_base))
     return instance
コード例 #15
0
    def list_records(self, set="", resumption_token="", metadataPrefix=""):
        '''
        List records. Use either the resumption token or set id.
        '''
        if resumption_token:
            params = {'verb' : 'ListRecords', 'resumptionToken': resumption_token}
        else:
            params = {'verb' : 'ListRecords', 'metadataPrefix': metadataPrefix, 'set': set}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))

        if metadataPrefix == "mods" or metadataPrefix == "marc":
            xml_content = XML_PARSE(content)
            records = []
            for record in xml_content["OAI-PMH"]["ListRecords"]["record"]:
                id = record["header"]["identifier"]
                if "null" not in id:
                    records.append((id, record))
            if "resumptionToken" in xml_content["OAI-PMH"]["ListRecords"]:
                resumption_token = xml_content["OAI-PMH"]["ListRecords"]["resumptionToken"]
            else:
                resumption_token = ''
        else:
            doc = bindery.parse(url, model=LISTRECORDS_MODELS[metadataPrefix])
            records, first_id = metadata_dict(generate_metadata(doc),
                                            nesteddict=False)
          
            for id_, props in records:
                for k, v in props.iteritems():
                    props[k] = [ U(item) for item in v ]
            if (doc.OAI_PMH.ListRecords is not None) and (doc.OAI_PMH.ListRecords.resumptionToken is not None):
                resumption_token = U(doc.OAI_PMH.ListRecords.resumptionToken)
            else:
                resumption_token = ''

        return {'records' : records, 'resumption_token' : resumption_token}
コード例 #16
0
    def test_metadata_extraction(self):
        """Test metadata extraction"""
        model = schematron_model(MODEL_A)
        doc = bindery.parse(INSTANCE_A_1, model=model)
        metadata = generate_metadata(doc)
        EXPECTED_MD = [(u'ep', u'place', u'Hailey,ID'),
                       (u'tse', u'place', u'Stamford,CT'),
                       (u'tse', u'opus', u'r2e0e3e5'),
                       (u'r2e0e3e5', u'title', u'The Wasteland'),
                       (u'tse', u'tag', u'old possum'),
                       (u'tse', u'tag', u'poet'),
                       (u'lh', u'place', u'Harlem,NY'),
                       (u'lh', u'tag', u'poet'),
                       (u'co', u'place', u'Idoto,Anambra'),
                       (u'co', u'opus', u'r2e0e7e5'),
                       (u'r2e0e7e5', u'title', u"Heaven's Gate"),
                       (u'co', u'tag', u'biafra'), (u'co', u'tag', u'poet')]

        #print list(metadata)
        meta_list = normalize_generated_ids(list(metadata))
        self.assertEqual(meta_list, normalize_generated_ids(EXPECTED_MD))
コード例 #17
0
    def test_metadata_extraction(self):
        """Test metadata extraction"""
        model = schematron_model(MODEL_A)
        doc = bindery.parse(INSTANCE_A_1, model=model)
        metadata = generate_metadata(doc)
        EXPECTED_MD = [(u'ep', u'place', u'Hailey,ID'),
         (u'tse', u'place', u'Stamford,CT'),
         (u'tse', u'opus', u'r2e0e3e5'),
         (u'r2e0e3e5', u'title', u'The Wasteland'),
         (u'tse', u'tag', u'old possum'),
         (u'tse', u'tag', u'poet'),
         (u'lh', u'place', u'Harlem,NY'),
         (u'lh', u'tag', u'poet'),
         (u'co', u'place', u'Idoto,Anambra'),
         (u'co', u'opus', u'r2e0e7e5'),
         (u'r2e0e7e5', u'title', u"Heaven's Gate"),
         (u'co', u'tag', u'biafra'),
         (u'co', u'tag', u'poet')]

        #print list(metadata)
        meta_list = normalize_generated_ids(list(metadata))
        self.assertEqual(meta_list, normalize_generated_ids(EXPECTED_MD))
コード例 #18
0
 def factory(rest_uri, relative, outputdir):
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     resp = urllib2.urlopen(req)
     doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL)
     original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #amara.xml_print(self.content_cache)
     output = os.path.join(outputdir, relative)
     parent_dir = os.path.split(output)[0]
     try:
         os.makedirs(parent_dir)
     except OSError:
         pass
     metadata, first_id = metadata_dict(generate_metadata(doc))
     metadata = metadata[first_id]
     akara_type = first_item(first_item(metadata[u'ak-type']))
     #import sys; print >> sys.stderr, 'GRIPPO', akara_type.xml_value
     cls = node.NODES[akara_type.xml_value]
     instance = cls(rest_uri,
                    relative,
                    outputdir,
                    cache=(doc, metadata, original_wiki_base))
     return instance
コード例 #19
0
ファイル: moin.py プロジェクト: dpla/akara
 def factory(rest_uri, moin_link=None, opener=None):
     opener = opener or urllib2.build_opener()
     logger.debug('rest_uri: ' + rest_uri)
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     resp = opener.open(req)
     doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL)
     original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #amara.xml_print(self.content_cache)
     metadata, first_id = metadata_dict(generate_metadata(doc))
     metadata = metadata[first_id]
     akara_type = U(metadata[u'ak-type'])
     logger.debug('Type: ' + akara_type)
     try:
         #Older Moin CMS resource types are implemented by registration to the global node.NODES
         cls = node.NODES[akara_type]
     except KeyError:
         #Newer Moin CMS resource types are implemented by discovery of a URL,
         #to which a POST request executes the desired action
         return node.ENDPOINTS and (rest_uri, akara_type, node.ENDPOINTS[akara_type], doc, metadata, original_wiki_base)
     else:
         instance = cls(rest_uri, moin_link, opener, cache=(doc, metadata, original_wiki_base))
         return instance
コード例 #20
0
def dspace_adapter(search=None, id=None):
    '''
    Sample queries:
    curl "http://*****:*****@class="result_table"]//*[@class="article_title"]'):
        for li in islice(doc.xml_select(u'//*[@id="'+RESULTS_DIV+'"]//*[@class="artifact-description"]/..'), 0, maxarticles):
            row = li.xml_parent.xml_parent
            title = li.xml_select(u'.//*[@class="artifact-title"]')[0]
            rel_id = title.a.href.partition(u'/handle/')[2]
            dspace_id = DSPACE_ID_BASE + rel_id
            alt_link = DSPACE_ARTICLE_BASE + u'1721.1/7488'
            #Do not quote.  DSpace doesn't like that
            #alt_link = DSPACE_ARTICLE_BASE + urllib.quote(u'1721.1/7488', '')
            title = unicode(title)
            summary = unicode(row.xml_select(u'string(.//*[@class="summary"])'))
            updated = unicode(row.xml_select(u'string(.//*[@class="date"])')).strip().partition(u'Published: ')[2]
            #updated = time.strptime(updated, "%m/%d/%Y %H:%M:%S") #2/11/2008 2:20:00 AM
            authors = [ (name.strip(), None, None) for name in unicode(row.xml_select(u'string(.//*[@class="author"]//b)')).split(';') ]

            #Retrieve the DSpace page
            qstr = urllib.urlencode({'verb' : 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': dspace_id})
            url = DSPACE_OAI_ENDPOINT + '?' + qstr
            print >> sys.stderr, url
            #keywords = [ (k.strip(), JOVE_TAG) for k in unicode(row.xml_select(u'string(.//*[@class="keywords"])')).split(',') ]

            doc = bindery.parse(url, model=OAI_MODEL)
            #print >> sys.stderr, list(generate_metadata(doc))
            resources, first_id = metadata_dict(generate_metadata(doc))
            record = doc.OAI_PMH

            resource = resources[first_id]

            authors = [ (a, None, None) for a in resource[u'creator'] ]
            links = [
                (DSPACE_ARTICLE_BASE + rel_id, u'alternate'),
                (u'dspace?id=' + dspace_id, u'self'),
            ]
            elements = [
                E((ATOM_NAMESPACE, u'content'), {u'src': alt_link}),
            ]
            f.append(
                dspace_id,
                U(resource['title']),
                updated=U(resource['date']),
                summary=U(resource['description']),
                authors=authors,
                links=links,
                #categories=categories,
                elements=elements,
            )

        #FIXME: indent
        return f.xml_encode()