Exemple #1
0
    def list_records(self, set="", resumption_token = ""):
        '''
        List records. Use either the resumption token or set id.
        '''
        if resumption_token:
            params = {'verb' : 'ListRecords', 'resumptionToken': resumption_token}
        else:
            params = {'verb' : 'ListRecords', 'metadataPrefix': 'oai_dc', 'set': set}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))
        doc = bindery.parse(url, model=OAI_LISTRECORDS_MODEL)

        records, first_id = metadata_dict(generate_metadata(doc),
                                          nesteddict=False)
        for id_, props in records:
            for k, v in props.iteritems():
                props[k] = [ U(item) for item in v ]
        if (doc.OAI_PMH.ListRecords is not None) and (doc.OAI_PMH.ListRecords.resumptionToken is not None):
            resumption_token = U(doc.OAI_PMH.ListRecords.resumptionToken)
        else:
            resumption_token = ''
        return {'records' : records, 'resumption_token' : resumption_token}
Exemple #2
0
 def factory(rest_uri, moin_link=None, opener=None):
     opener = opener or urllib2.build_opener()
     logger.debug('rest_uri: ' + rest_uri)
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     resp = opener.open(req)
     doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL)
     original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #amara.xml_print(self.content_cache)
     metadata, first_id = metadata_dict(generate_metadata(doc))
     metadata = metadata[first_id]
     akara_type = U(metadata[u'ak-type'])
     logger.debug('Type: ' + akara_type)
     try:
         #Older Moin CMS resource types are implemented by registration to the global node.NODES
         cls = node.NODES[akara_type]
     except KeyError:
         #Newer Moin CMS resource types are implemented by discovery of a URL,
         #to which a POST request executes the desired action
         return node.ENDPOINTS and (rest_uri, akara_type,
                                    node.ENDPOINTS[akara_type], doc,
                                    metadata, original_wiki_base)
     else:
         instance = cls(rest_uri,
                        moin_link,
                        opener,
                        cache=(doc, metadata, original_wiki_base))
         return instance
def pubmed_adapter(search=None, id=None):
    '''
    Sample queries:
    #curl "http://localhost:8880/pubmed?"
    curl "http://localhost:8880/pubmed?search=stem+cells"
    curl "http://localhost:8880/pubmed?id=19358275"
    '''
    #FIXME: How do we handle no search or id param?  Just serve up the latest entries?  Or error as below?
    #assert_(not(search and id), msg="You must specify the 'search' or 'id' query parameter is mandatory.")
    if search:
        #search = first_item(search)
        #reldate: only search for last N days
        #query = urllib.urlencode({'db' : NCBI_DB, 'term': query, 'reldate': '60', 'datetype': 'edat', 'retmax': DEFAULT_MAX_RESULTS, 'usehistory': 'y'})
        query = urllib.urlencode({'term': search, 'db' : NCBI_DB, 'datetype': 'edat', 'retmax': DEFAULT_MAX_RESULTS, 'usehistory': 'y'})
        search_url = NCBI_SEARCH_PATTERN + query
        logger.debug("Term search URL: " + search_url)
        doc = bindery.parse(search_url, standalone=True)
        search_terms = search
        ids = ( unicode(i) for i in doc.eSearchResult.IdList.Id )
        ids = ','.join(ids)
        self_link = '/pubmed?search='+search
    else:
        #ids = first_item(id)
        #fulltext = fulltext[0] if fulltext else u'no'
        #if fulltext == 'yes':
        search_terms = ids
        self_link = '/pubmed?id='+ids
    query = urllib.urlencode({'db' : NCBI_DB, 'id': ids, 'retmode': 'xml'})
    search_url = NCBI_ARTICLE_ACCESS_PATTERN + query
    logger.debug("ID search URL: " + search_url)
    alt_link = search_url
    doc = bindery.parse(search_url, standalone=True, model=PUBMED_MODEL)
    #doc = bindery.parse(open('/Users/uche/tmp/efetch.fcgi.html'), standalone=True, model=PUBMED_MODEL)
    metadata, first_id = metadata_dict(generate_metadata(doc))
    return atom_results(doc, metadata, self_link, alt_link, search_terms)
Exemple #4
0
Fichier : oai.py Projet : dpla/zen
    def search(self, term):
        qstr = urllib.urlencode({'verb' : 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': dspace_id})
        url = DSPACE_OAI_ENDPOINT + '?' + qstr
        logger.debug('DSpace URL: ' + str(url))
        #keywords = [ (k.strip(), JOVE_TAG) for k in unicode(row.xml_select(u'string(.//*[@class="keywords"])')).split(',') ]

        doc = bindery.parse(url, model=OAI_MODEL)
        #print >> sys.stderr, list(generate_metadata(doc))
        resources, first_id = metadata_dict(generate_metadata(doc), nesteddict=False)
        record = doc.OAI_PMH

        resource = resources[first_id]
Exemple #5
0
    def search(self, term):
        qstr = urllib.urlencode({'verb' : 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': dspace_id})
        url = DSPACE_OAI_ENDPOINT + '?' + qstr
        logger.debug('DSpace URL: ' + str(url))
        #keywords = [ (k.strip(), JOVE_TAG) for k in unicode(row.xml_select(u'string(.//*[@class="keywords"])')).split(',') ]

        doc = bindery.parse(url, model=OAI_MODEL)
        #print >> sys.stderr, list(generate_metadata(doc))
        resources, first_id = metadata_dict(generate_metadata(doc), nesteddict=False)
        record = doc.OAI_PMH

        resource = resources[first_id]
Exemple #6
0
    def get_record(self, id):
        params = {'verb': 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': id}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s',format(retrieved_t - start_t))
        doc = bindery.parse(url, model=OAI_GETRECORD_MODEL)

        record, rid = metadata_dict(generate_metadata(doc), nesteddict=False)
        for id_, props in (record if isinstance(record, list) else [record]):
            for k, v in props.iteritems():
                props[k] = [ U(item) for item in v ]

        return {'record' : record}
Exemple #7
0
    def get_record(self, id):
        params = {'verb': 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': id}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s',format(retrieved_t - start_t))
        doc = bindery.parse(url, model=OAI_GETRECORD_MODEL)

        record, rid = metadata_dict(generate_metadata(doc), nesteddict=False)
        for id_, props in (record if isinstance(record, list) else [record]):
            for k, v in props.iteritems():
                props[k] = [ U(item) for item in v ]

        return {'record' : record}
Exemple #8
0
    def list_records(self, set="", resumption_token="", metadataPrefix=""):
        '''
        List records. Use either the resumption token or set id.
        '''
        error = None

        if resumption_token:
            params = {'verb' : 'ListRecords', 'resumptionToken': resumption_token}
        else:
            params = {'verb' : 'ListRecords', 'metadataPrefix': metadataPrefix, 'set': set}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))

        resumption_token = ''
        if metadataPrefix in ["mods", "marc", "untl"]:
            xml_content = XML_PARSE(content)
            records = []
            error = getprop(xml_content, "OAI-PMH/error/#text", True)
            if error is None:
                for record in xml_content["OAI-PMH"]["ListRecords"]["record"]:
                    id = record["header"]["identifier"]
                    if "null" not in id:
                        records.append((id, record))
                if "resumptionToken" in xml_content["OAI-PMH"]["ListRecords"]:
                    resumption_token = xml_content["OAI-PMH"]["ListRecords"]["resumptionToken"]
                    if isinstance(resumption_token, dict):
                        resumption_token = resumption_token.get("#text", "")
        else:
            doc = bindery.parse(url, model=LISTRECORDS_MODELS[metadataPrefix])
            records, first_id = metadata_dict(generate_metadata(doc),
                                            nesteddict=False)
          
            for id_, props in records:
                for k, v in props.iteritems():
                    props[k] = [ U(item) for item in v ]
            if (doc.OAI_PMH.ListRecords is not None) and (doc.OAI_PMH.ListRecords.resumptionToken is not None):
                resumption_token = U(doc.OAI_PMH.ListRecords.resumptionToken)

        return {'records': records, 'resumption_token': resumption_token,
                'error': error}
Exemple #9
0
Fichier : oai.py Projet : dpla/zen
    def list_records(self, set):
        '''
        '''
        #e.g. http://dspace.mit.edu/oai/request?verb=ListRecords&metadataPrefix=oai_dc&set=hdl_1721.1_18193
        qstr = urllib.urlencode({'verb' : 'ListRecords', 'metadataPrefix': 'oai_dc', 'set': set})
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))
        doc = bindery.parse(url, model=OAI_LISTRECORDS_MODEL)
        #print >> sys.stderr, list(generate_metadata(doc))
        records, first_id = metadata_dict(generate_metadata(doc),
                                          nesteddict=False)
        for id_, props in records:
            for k, v in props.iteritems():
                props[k] = [ U(item) for item in v ]

        return records
Exemple #10
0
    def list_records(self, set="", resumption_token="", metadataPrefix=""):
        '''
        List records. Use either the resumption token or set id.
        '''
        if resumption_token:
            params = {'verb' : 'ListRecords', 'resumptionToken': resumption_token}
        else:
            params = {'verb' : 'ListRecords', 'metadataPrefix': metadataPrefix, 'set': set}
        qstr = urllib.urlencode(params)
        url = self.root + '?' + qstr
        self.logger.debug('OAI request URL: {0}'.format(url))
        start_t = time.time()
        resp, content = self.h.request(url)
        retrieved_t = time.time()
        self.logger.debug('Retrieved in {0}s'.format(retrieved_t - start_t))

        if metadataPrefix == "mods" or metadataPrefix == "marc":
            xml_content = XML_PARSE(content)
            records = []
            for record in xml_content["OAI-PMH"]["ListRecords"]["record"]:
                id = record["header"]["identifier"]
                if "null" not in id:
                    records.append((id, record))
            if "resumptionToken" in xml_content["OAI-PMH"]["ListRecords"]:
                resumption_token = xml_content["OAI-PMH"]["ListRecords"]["resumptionToken"]
            else:
                resumption_token = ''
        else:
            doc = bindery.parse(url, model=LISTRECORDS_MODELS[metadataPrefix])
            records, first_id = metadata_dict(generate_metadata(doc),
                                            nesteddict=False)
          
            for id_, props in records:
                for k, v in props.iteritems():
                    props[k] = [ U(item) for item in v ]
            if (doc.OAI_PMH.ListRecords is not None) and (doc.OAI_PMH.ListRecords.resumptionToken is not None):
                resumption_token = U(doc.OAI_PMH.ListRecords.resumptionToken)
            else:
                resumption_token = ''

        return {'records' : records, 'resumption_token' : resumption_token}
Exemple #11
0
 def factory(rest_uri, moin_link=None, opener=None):
     opener = opener or urllib2.build_opener()
     logger.debug('rest_uri: ' + rest_uri)
     req = urllib2.Request(rest_uri, headers={'Accept': DOCBOOK_IMT})
     resp = opener.open(req)
     doc = bindery.parse(resp, standalone=True, model=MOIN_DOCBOOK_MODEL)
     original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #self.original_wiki_base = dict(resp.info())[ORIG_BASE_HEADER]
     #amara.xml_print(self.content_cache)
     metadata, first_id = metadata_dict(generate_metadata(doc))
     metadata = metadata[first_id]
     akara_type = U(metadata[u'ak-type'])
     logger.debug('Type: ' + akara_type)
     try:
         #Older Moin CMS resource types are implemented by registration to the global node.NODES
         cls = node.NODES[akara_type]
     except KeyError:
         #Newer Moin CMS resource types are implemented by discovery of a URL,
         #to which a POST request executes the desired action
         return node.ENDPOINTS and (rest_uri, akara_type, node.ENDPOINTS[akara_type], doc, metadata, original_wiki_base)
     else:
         instance = cls(rest_uri, moin_link, opener, cache=(doc, metadata, original_wiki_base))
         return instance