Example #1
0
def pubmed_adapter(search=None, id=None):
    '''
    Sample queries:
    #curl "http://localhost:8880/pubmed?"
    curl "http://localhost:8880/pubmed?search=stem+cells"
    curl "http://localhost:8880/pubmed?id=19358275"
    '''
    #FIXME: How do we handle no search or id param?  Just serve up the latest entries?  Or error as below?
    #assert_(not(search and id), msg="You must specify the 'search' or 'id' query parameter is mandatory.")
    if search:
        #search = first_item(search)
        #reldate: only search for last N days
        #query = urllib.urlencode({'db' : NCBI_DB, 'term': query, 'reldate': '60', 'datetype': 'edat', 'retmax': DEFAULT_MAX_RESULTS, 'usehistory': 'y'})
        query = urllib.urlencode({'term': search, 'db' : NCBI_DB, 'datetype': 'edat', 'retmax': DEFAULT_MAX_RESULTS, 'usehistory': 'y'})
        search_url = NCBI_SEARCH_PATTERN + query
        logger.debug("Term search URL: " + search_url)
        doc = bindery.parse(search_url, standalone=True)
        search_terms = search
        ids = ( unicode(i) for i in doc.eSearchResult.IdList.Id )
        ids = ','.join(ids)
        self_link = '/pubmed?search='+search
    else:
        #ids = first_item(id)
        #fulltext = fulltext[0] if fulltext else u'no'
        #if fulltext == 'yes':
        search_terms = ids
        self_link = '/pubmed?id='+ids
    query = urllib.urlencode({'db' : NCBI_DB, 'id': ids, 'retmode': 'xml'})
    search_url = NCBI_ARTICLE_ACCESS_PATTERN + query
    logger.debug("ID search URL: " + search_url)
    alt_link = search_url
    doc = bindery.parse(search_url, standalone=True, model=PUBMED_MODEL)
    #doc = bindery.parse(open('/Users/uche/tmp/efetch.fcgi.html'), standalone=True, model=PUBMED_MODEL)
    metadata, first_id = metadata_dict(generate_metadata(doc))
    return atom_results(doc, metadata, self_link, alt_link, search_terms)
Example #2
0
    def list_records(self, set="", resumption_token = ""):
        '''
        List records. Use either the resumption token or set id.
        '''
        if resumption_token:
            params = {'verb' : 'ListRecords', 'resumptionToken': resumption_token}
        else:
            params = {'verb' : 'ListRecords', 'metadataPrefix': 'oai_dc', 'set': set}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))
        doc = bindery.parse(url, model=OAI_LISTRECORDS_MODEL)

        records, first_id = metadata_dict(generate_metadata(doc),
                                          nesteddict=False)
        for id_, props in records:
            for k, v in props.iteritems():
                props[k] = [ U(item) for item in v ]
        if (doc.OAI_PMH.ListRecords is not None) and (doc.OAI_PMH.ListRecords.resumptionToken is not None):
            resumption_token = U(doc.OAI_PMH.ListRecords.resumptionToken)
        else:
            resumption_token = ''
        return {'records' : records, 'resumption_token' : resumption_token}
Example #3
0
File: moin.py Project: mredar/akara
 def factory(rest_uri, moin_link=None, opener=None):
     opener = opener or urllib2.build_opener()
     logger.debug('rest_uri: ' + rest_uri)
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     resp = opener.open(req)
     doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL)
     original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #amara.xml_print(self.content_cache)
     metadata, first_id = metadata_dict(generate_metadata(doc))
     metadata = metadata[first_id]
     akara_type = U(metadata[u'ak-type'])
     logger.debug('Type: ' + akara_type)
     try:
         #Older Moin CMS resource types are implemented by registration to the global node.NODES
         cls = node.NODES[akara_type]
     except KeyError:
         #Newer Moin CMS resource types are implemented by discovery of a URL,
         #to which a POST request executes the desired action
         return node.ENDPOINTS and (rest_uri, akara_type,
                                    node.ENDPOINTS[akara_type], doc,
                                    metadata, original_wiki_base)
     else:
         instance = cls(rest_uri,
                        moin_link,
                        opener,
                        cache=(doc, metadata, original_wiki_base))
         return instance
Example #4
0
    def search(self, term):
        qstr = urllib.urlencode({'verb' : 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': dspace_id})
        url = DSPACE_OAI_ENDPOINT + '?' + qstr
        logger.debug('DSpace URL: ' + str(url))
        #keywords = [ (k.strip(), JOVE_TAG) for k in unicode(row.xml_select(u'string(.//*[@class="keywords"])')).split(',') ]

        doc = bindery.parse(url, model=OAI_MODEL)
        #print >> sys.stderr, list(generate_metadata(doc))
        resources, first_id = metadata_dict(generate_metadata(doc), nesteddict=False)
        record = doc.OAI_PMH

        resource = resources[first_id]
Example #5
0
File: oai.py Project: dpla/zen
    def search(self, term):
        qstr = urllib.urlencode({'verb' : 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': dspace_id})
        url = DSPACE_OAI_ENDPOINT + '?' + qstr
        logger.debug('DSpace URL: ' + str(url))
        #keywords = [ (k.strip(), JOVE_TAG) for k in unicode(row.xml_select(u'string(.//*[@class="keywords"])')).split(',') ]

        doc = bindery.parse(url, model=OAI_MODEL)
        #print >> sys.stderr, list(generate_metadata(doc))
        resources, first_id = metadata_dict(generate_metadata(doc), nesteddict=False)
        record = doc.OAI_PMH

        resource = resources[first_id]
Example #6
0
def atomize_oai_record(endpoint=None, id=None):
    '''
    endpoint - the OAI request URL, e.g. http://dspace.mit.edu/oai/request
    id, e.g. the article ID, e.g. oai:dspace.mit.edu:1721.1/5451
    
    Sample request:
    curl "http://localhost:8880/akara.oai.atom?endpoint=http://dspace.mit.edu/oai/request&id=oai:dspace.mit.edu:1721.1/5451"
    '''
    if endpoint is None:
        raise ValueError('endpoint required')
    if id is None:
        raise ValueError('id required')
    qstr = urllib.urlencode({
        'verb': 'GetRecord',
        'metadataPrefix': 'oai_dc',
        'identifier': id
    })
    url = endpoint + '?' + qstr
    doc = bindery.parse(url, model=OAI_MODEL)
    resources = metadata_dict(generate_metadata(doc))
    #print resources
    f = feed(ATOM_ENVELOPE)
    #f = feed(ATOM_ENVELOPE, title=resources['title'], id=resources['id'])
    #f.source.feed.xml_append(E((ATOM_NAMESPACE, u'link'), {u'rel': u'self', u'type': u'application/atom+xml', u'href': self_link.decode('utf-8')}))
    #f.source.feed.xml_append(E((ATOM_NAMESPACE, u'link'), {u'rel': u'search', u'type': u'application/opensearchdescription+xml', u'href': u'http://kds-kci.zepheira.com/sciencedirect.discovery'}))
    #f.source.feed.xml_append(E((ATOM_NAMESPACE, u'link'), {u'rel': u'alternate', u'type': u'text/xml', u'href': alt_link.decode('utf-8')}))
    #f.source.feed.xml_append(E((OPENSEARCH_NAMESPACE, u'Query'), {u'role': u'request', u'searchTerms': search_terms.decode('utf-8')}))
    #maxarticles = DEFAULT_MAX_RESULTS
    maxarticles = 3
    for record in islice(doc.OAI_PMH, 0, maxarticles):
        resource = unicode(resources[id])
        print resource
        authors = [(a, None, None) for a in unicode(resource[u'creator'])]
        links = [
            (unicode(resource['handle']), u'alternate'),
        ]
        #categories = [ (unicode(k), SD_NS+u'authorKeyword') for k in authkw(article) ]
        #elements = [
        #    E((SD_NS, u'sd:journal-cover'), unicode(article.journalCover).strip() if hasattr(article, 'journalCover') else DEFAULT_ICON),
        #    E((SD_NS, u'sd:journal-name'), unicode(article.journalName)),
        #]
        f.append(
            id,
            unicode(resource['title'][0]),
            updated=unicode(resource['date'][0]),
            summary=unicode(resource['description'][0]),
            authors=authors,
            links=links,
            #categories=categories,
            #elements=elements,
        )

    return f.source.xml_encode('xml-indent')
Example #7
0
 def __init__(self, rest_uri, opener):
     self.rest_uri = rest_uri
     self.opener = opener
     #from node.factory
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     print >> sys.stderr, 'rest_uri: ', rest_uri
     with closing(opener.open(req)) as resp:
         doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL)
         original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
         #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
         #amara.xml_print(self.content_cache)
     metadata = metadata_dict(generate_metadata(doc))
     self.cache=(doc, metadata, original_wiki_base)
     return
Example #8
0
def atomize_oai_record(endpoint=None, id=None):
    '''
    endpoint - the OAI request URL, e.g. http://dspace.mit.edu/oai/request
    id, e.g. the article ID, e.g. oai:dspace.mit.edu:1721.1/5451
    
    Sample request:
    curl "http://localhost:8880/akara.oai.atom?endpoint=http://dspace.mit.edu/oai/request&id=oai:dspace.mit.edu:1721.1/5451"
    '''
    if endpoint is None:
        raise ValueError('endpoint required')
    if id is None:
        raise ValueError('id required')
    qstr = urllib.urlencode({'verb' : 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': id})
    url = endpoint + '?' + qstr
    doc = bindery.parse(url, model=OAI_MODEL)
    resources = metadata_dict(generate_metadata(doc))
    #print resources
    f = feed(ATOM_ENVELOPE)
    #f = feed(ATOM_ENVELOPE, title=resources['title'], id=resources['id'])
    #f.source.feed.xml_append(E((ATOM_NAMESPACE, u'link'), {u'rel': u'self', u'type': u'application/atom+xml', u'href': self_link.decode('utf-8')}))
    #f.source.feed.xml_append(E((ATOM_NAMESPACE, u'link'), {u'rel': u'search', u'type': u'application/opensearchdescription+xml', u'href': u'http://kds-kci.zepheira.com/sciencedirect.discovery'}))
    #f.source.feed.xml_append(E((ATOM_NAMESPACE, u'link'), {u'rel': u'alternate', u'type': u'text/xml', u'href': alt_link.decode('utf-8')}))
    #f.source.feed.xml_append(E((OPENSEARCH_NAMESPACE, u'Query'), {u'role': u'request', u'searchTerms': search_terms.decode('utf-8')}))
    #maxarticles = DEFAULT_MAX_RESULTS
    maxarticles = 3
    for record in islice(doc.OAI_PMH, 0, maxarticles):
        resource = unicode(resources[id])
        print resource
        authors = [ (a, None, None) for a in unicode(resource[u'creator']) ]
        links = [
            (unicode(resource['handle']), u'alternate'),
        ]
        #categories = [ (unicode(k), SD_NS+u'authorKeyword') for k in authkw(article) ]
        #elements = [
        #    E((SD_NS, u'sd:journal-cover'), unicode(article.journalCover).strip() if hasattr(article, 'journalCover') else DEFAULT_ICON),
        #    E((SD_NS, u'sd:journal-name'), unicode(article.journalName)),
        #]
        f.append(
            id,
            unicode(resource['title'][0]),
            updated=unicode(resource['date'][0]),
            summary=unicode(resource['description'][0]),
            authors=authors,
            links=links,
            #categories=categories,
            #elements=elements,
        )

    return f.source.xml_encode('xml-indent')
Example #9
0
 def __init__(self, rest_uri, opener):
     self.rest_uri = rest_uri
     self.opener = opener
     #from node.factory
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     print >> sys.stderr, 'rest_uri: ', rest_uri
     with closing(opener.open(req)) as resp:
         doc = bindery.parse(resp,
                             standalone=True,
                             model=MOIN_DOCBOOK_MODEL)
         original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
         #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
         #amara.xml_print(self.content_cache)
     metadata = metadata_dict(generate_metadata(doc))
     self.cache = (doc, metadata, original_wiki_base)
     return
Example #10
0
    def get_record(self, id):
        params = {'verb': 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': id}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s',format(retrieved_t - start_t))
        doc = bindery.parse(url, model=OAI_GETRECORD_MODEL)

        record, rid = metadata_dict(generate_metadata(doc), nesteddict=False)
        for id_, props in (record if isinstance(record, list) else [record]):
            for k, v in props.iteritems():
                props[k] = [ U(item) for item in v ]

        return {'record' : record}
Example #11
0
    def get_record(self, id):
        params = {'verb': 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': id}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s',format(retrieved_t - start_t))
        doc = bindery.parse(url, model=OAI_GETRECORD_MODEL)

        record, rid = metadata_dict(generate_metadata(doc), nesteddict=False)
        for id_, props in (record if isinstance(record, list) else [record]):
            for k, v in props.iteritems():
                props[k] = [ U(item) for item in v ]

        return {'record' : record}
Example #12
0
    def list_records(self, set="", resumption_token="", metadataPrefix=""):
        '''
        List records. Use either the resumption token or set id.
        '''
        error = None

        if resumption_token:
            params = {'verb' : 'ListRecords', 'resumptionToken': resumption_token}
        else:
            params = {'verb' : 'ListRecords', 'metadataPrefix': metadataPrefix, 'set': set}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))

        resumption_token = ''
        if metadataPrefix in ["mods", "marc", "untl"]:
            xml_content = XML_PARSE(content)
            records = []
            error = getprop(xml_content, "OAI-PMH/error/#text", True)
            if error is None:
                for record in xml_content["OAI-PMH"]["ListRecords"]["record"]:
                    id = record["header"]["identifier"]
                    if "null" not in id:
                        records.append((id, record))
                if "resumptionToken" in xml_content["OAI-PMH"]["ListRecords"]:
                    resumption_token = xml_content["OAI-PMH"]["ListRecords"]["resumptionToken"]
                    if isinstance(resumption_token, dict):
                        resumption_token = resumption_token.get("#text", "")
        else:
            doc = bindery.parse(url, model=LISTRECORDS_MODELS[metadataPrefix])
            records, first_id = metadata_dict(generate_metadata(doc),
                                            nesteddict=False)
          
            for id_, props in records:
                for k, v in props.iteritems():
                    props[k] = [ U(item) for item in v ]
            if (doc.OAI_PMH.ListRecords is not None) and (doc.OAI_PMH.ListRecords.resumptionToken is not None):
                resumption_token = U(doc.OAI_PMH.ListRecords.resumptionToken)

        return {'records': records, 'resumption_token': resumption_token,
                'error': error}
Example #13
0
File: oai.py Project: dpla/zen
    def list_records(self, set):
        '''
        '''
        #e.g. http://dspace.mit.edu/oai/request?verb=ListRecords&metadataPrefix=oai_dc&set=hdl_1721.1_18193
        qstr = urllib.urlencode({'verb' : 'ListRecords', 'metadataPrefix': 'oai_dc', 'set': set})
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))
        doc = bindery.parse(url, model=OAI_LISTRECORDS_MODEL)
        #print >> sys.stderr, list(generate_metadata(doc))
        records, first_id = metadata_dict(generate_metadata(doc),
                                          nesteddict=False)
        for id_, props in records:
            for k, v in props.iteritems():
                props[k] = [ U(item) for item in v ]

        return records
Example #14
0
 def factory(rest_uri, relative, outputdir):
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     resp = urllib2.urlopen(req)
     doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL)
     original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #amara.xml_print(self.content_cache)
     output = os.path.join(outputdir, relative)
     parent_dir = os.path.split(output)[0]
     try:
         os.makedirs(parent_dir)
     except OSError:
         pass
     metadata, first_id = metadata_dict(generate_metadata(doc))
     metadata = metadata[first_id]
     akara_type = first_item(first_item(metadata[u'ak-type']))
     #import sys; print >> sys.stderr, 'GRIPPO', akara_type.xml_value
     cls = node.NODES[akara_type.xml_value]
     instance = cls(rest_uri, relative, outputdir, cache=(doc, metadata, original_wiki_base))
     return instance
Example #15
0
    def list_records(self, set="", resumption_token="", metadataPrefix=""):
        '''
        List records. Use either the resumption token or set id.
        '''
        if resumption_token:
            params = {'verb' : 'ListRecords', 'resumptionToken': resumption_token}
        else:
            params = {'verb' : 'ListRecords', 'metadataPrefix': metadataPrefix, 'set': set}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))

        if metadataPrefix == "mods" or metadataPrefix == "marc":
            xml_content = XML_PARSE(content)
            records = []
            for record in xml_content["OAI-PMH"]["ListRecords"]["record"]:
                id = record["header"]["identifier"]
                if "null" not in id:
                    records.append((id, record))
            if "resumptionToken" in xml_content["OAI-PMH"]["ListRecords"]:
                resumption_token = xml_content["OAI-PMH"]["ListRecords"]["resumptionToken"]
            else:
                resumption_token = ''
        else:
            doc = bindery.parse(url, model=LISTRECORDS_MODELS[metadataPrefix])
            records, first_id = metadata_dict(generate_metadata(doc),
                                            nesteddict=False)
          
            for id_, props in records:
                for k, v in props.iteritems():
                    props[k] = [ U(item) for item in v ]
            if (doc.OAI_PMH.ListRecords is not None) and (doc.OAI_PMH.ListRecords.resumptionToken is not None):
                resumption_token = U(doc.OAI_PMH.ListRecords.resumptionToken)
            else:
                resumption_token = ''

        return {'records' : records, 'resumption_token' : resumption_token}
Example #16
0
    def test_metadata_extraction(self):
        """Test metadata extraction"""
        model = schematron_model(MODEL_A)
        doc = bindery.parse(INSTANCE_A_1, model=model)
        metadata = generate_metadata(doc)
        EXPECTED_MD = [(u'ep', u'place', u'Hailey,ID'),
                       (u'tse', u'place', u'Stamford,CT'),
                       (u'tse', u'opus', u'r2e0e3e5'),
                       (u'r2e0e3e5', u'title', u'The Wasteland'),
                       (u'tse', u'tag', u'old possum'),
                       (u'tse', u'tag', u'poet'),
                       (u'lh', u'place', u'Harlem,NY'),
                       (u'lh', u'tag', u'poet'),
                       (u'co', u'place', u'Idoto,Anambra'),
                       (u'co', u'opus', u'r2e0e7e5'),
                       (u'r2e0e7e5', u'title', u"Heaven's Gate"),
                       (u'co', u'tag', u'biafra'), (u'co', u'tag', u'poet')]

        #print list(metadata)
        meta_list = normalize_generated_ids(list(metadata))
        self.assertEqual(meta_list, normalize_generated_ids(EXPECTED_MD))
Example #17
0
    def test_metadata_extraction(self):
        """Test metadata extraction"""
        model = schematron_model(MODEL_A)
        doc = bindery.parse(INSTANCE_A_1, model=model)
        metadata = generate_metadata(doc)
        EXPECTED_MD = [(u'ep', u'place', u'Hailey,ID'),
         (u'tse', u'place', u'Stamford,CT'),
         (u'tse', u'opus', u'r2e0e3e5'),
         (u'r2e0e3e5', u'title', u'The Wasteland'),
         (u'tse', u'tag', u'old possum'),
         (u'tse', u'tag', u'poet'),
         (u'lh', u'place', u'Harlem,NY'),
         (u'lh', u'tag', u'poet'),
         (u'co', u'place', u'Idoto,Anambra'),
         (u'co', u'opus', u'r2e0e7e5'),
         (u'r2e0e7e5', u'title', u"Heaven's Gate"),
         (u'co', u'tag', u'biafra'),
         (u'co', u'tag', u'poet')]

        #print list(metadata)
        meta_list = normalize_generated_ids(list(metadata))
        self.assertEqual(meta_list, normalize_generated_ids(EXPECTED_MD))
Example #18
0
 def factory(rest_uri, relative, outputdir):
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     resp = urllib2.urlopen(req)
     doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL)
     original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #amara.xml_print(self.content_cache)
     output = os.path.join(outputdir, relative)
     parent_dir = os.path.split(output)[0]
     try:
         os.makedirs(parent_dir)
     except OSError:
         pass
     metadata, first_id = metadata_dict(generate_metadata(doc))
     metadata = metadata[first_id]
     akara_type = first_item(first_item(metadata[u'ak-type']))
     #import sys; print >> sys.stderr, 'GRIPPO', akara_type.xml_value
     cls = node.NODES[akara_type.xml_value]
     instance = cls(rest_uri,
                    relative,
                    outputdir,
                    cache=(doc, metadata, original_wiki_base))
     return instance
Example #19
0
File: moin.py Project: dpla/akara
 def factory(rest_uri, moin_link=None, opener=None):
     opener = opener or urllib2.build_opener()
     logger.debug('rest_uri: ' + rest_uri)
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     resp = opener.open(req)
     doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL)
     original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #amara.xml_print(self.content_cache)
     metadata, first_id = metadata_dict(generate_metadata(doc))
     metadata = metadata[first_id]
     akara_type = U(metadata[u'ak-type'])
     logger.debug('Type: ' + akara_type)
     try:
         #Older Moin CMS resource types are implemented by registration to the global node.NODES
         cls = node.NODES[akara_type]
     except KeyError:
         #Newer Moin CMS resource types are implemented by discovery of a URL,
         #to which a POST request executes the desired action
         return node.ENDPOINTS and (rest_uri, akara_type, node.ENDPOINTS[akara_type], doc, metadata, original_wiki_base)
     else:
         instance = cls(rest_uri, moin_link, opener, cache=(doc, metadata, original_wiki_base))
         return instance
Example #20
0
def dspace_adapter(search=None, id=None):
    '''
    Sample queries:
    curl "http://*****:*****@class="result_table"]//*[@class="article_title"]'):
        for li in islice(doc.xml_select(u'//*[@id="'+RESULTS_DIV+'"]//*[@class="artifact-description"]/..'), 0, maxarticles):
            row = li.xml_parent.xml_parent
            title = li.xml_select(u'.//*[@class="artifact-title"]')[0]
            rel_id = title.a.href.partition(u'/handle/')[2]
            dspace_id = DSPACE_ID_BASE + rel_id
            alt_link = DSPACE_ARTICLE_BASE + u'1721.1/7488'
            #Do not quote.  DSpace doesn't like that
            #alt_link = DSPACE_ARTICLE_BASE + urllib.quote(u'1721.1/7488', '')
            title = unicode(title)
            summary = unicode(row.xml_select(u'string(.//*[@class="summary"])'))
            updated = unicode(row.xml_select(u'string(.//*[@class="date"])')).strip().partition(u'Published: ')[2]
            #updated = time.strptime(updated, "%m/%d/%Y %H:%M:%S") #2/11/2008 2:20:00 AM
            authors = [ (name.strip(), None, None) for name in unicode(row.xml_select(u'string(.//*[@class="author"]//b)')).split(';') ]

            #Retrieve the DSpace page
            qstr = urllib.urlencode({'verb' : 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': dspace_id})
            url = DSPACE_OAI_ENDPOINT + '?' + qstr
            print >> sys.stderr, url
            #keywords = [ (k.strip(), JOVE_TAG) for k in unicode(row.xml_select(u'string(.//*[@class="keywords"])')).split(',') ]

            doc = bindery.parse(url, model=OAI_MODEL)
            #print >> sys.stderr, list(generate_metadata(doc))
            resources, first_id = metadata_dict(generate_metadata(doc))
            record = doc.OAI_PMH

            resource = resources[first_id]

            authors = [ (a, None, None) for a in resource[u'creator'] ]
            links = [
                (DSPACE_ARTICLE_BASE + rel_id, u'alternate'),
                (u'dspace?id=' + dspace_id, u'self'),
            ]
            elements = [
                E((ATOM_NAMESPACE, u'content'), {u'src': alt_link}),
            ]
            f.append(
                dspace_id,
                U(resource['title']),
                updated=U(resource['date']),
                summary=U(resource['description']),
                authors=authors,
                links=links,
                #categories=categories,
                elements=elements,
            )

        #FIXME: indent
        return f.xml_encode()