コード例 #1
0
ファイル: parser.py プロジェクト: Cheshire-Grampa/cheshire3
 def process_document(self, session, doc):
     data = doc.get_raw(session)
     et = etree.parse(StringIO.StringIO(data), self.parser)
     rec = LxmlRecord(et)
     rec.byteCount = len(data)
     self._copyData(doc, rec)
     return rec
コード例 #2
0
ファイル: parser.py プロジェクト: Cheshire-Grampa/cheshire3
 def process_document(self, session, doc):
     data = doc.get_raw(session)
     et = etree.parse(StringIO.StringIO(data), self.parser)
     rec = LxmlRecord(et)
     rec.byteCount = len(data)
     self._copyData(doc, rec)
     return rec
コード例 #3
0
ファイル: parser.py プロジェクト: Cheshire-Grampa/cheshire3
 def process_document(self, session, doc):
     # input must be string or stream
     data = doc.get_raw(session)
     try:
         et = etree.parse(StringIO.StringIO(data), self.parser)
     except AssertionError:
         data = data.decode('utf8')
         et = etree.parse(StringIO.StringIO(data), self.parser)
     rec = LxmlRecord(et)
     rec.byteCount = len(data)
     self._copyData(doc, rec)
     return rec
コード例 #4
0
ファイル: parser.py プロジェクト: Cheshire-Grampa/cheshire3
 def process_document(self, session, doc):
     # input must be string or stream
     data = doc.get_raw(session)
     try:
         et = etree.parse(StringIO.StringIO(data), self.parser)
     except AssertionError:
         data = data.decode('utf8')
         et = etree.parse(StringIO.StringIO(data), self.parser)
     rec = LxmlRecord(et)
     rec.byteCount = len(data)
     self._copyData(doc, rec)
     return rec
コード例 #5
0
ファイル: record.py プロジェクト: Cheshire-Grampa/cheshire3
 def process_xpath(self, session, q, map={}):
     if self.dom is None:
         self.get_xml(session)
     try:
         return LxmlRecord.process_xpath(self, session, q, map)
     except etree.XPathEvalError:
         return self.process_sparql(session, q, map)
コード例 #6
0
ファイル: oai_utils.py プロジェクト: tanmoydeb07/cheshire3
def getRecord(baseUrl, metadataPrefix, identifier):
    """Return (Header, metadata, about) tuple of record with specified identifier from the specified OAI-PMH server."""
    args = {
        'verb': "GetRecord",
        'metadataPrefix': metadataPrefix,
        'identifier': identifier
    }
    params = urllib.urlencode(args)
    url = "{0}?{1}".format(baseUrl, params)
    data = fetch_data(url)
    try:
        tree = etree.fromstring(data)
    except:
        sys.stderr.write(url + '\n')
        sys.stderr.write(data + '\n')
        sys.stderr.flush()
        raise
    hEl = tree.xpath('//oai:record[1]/oai:header',
                     namespaces={'oai': NS_OAIPMH})[0]
    header = headerFromLxmlElement(hEl)
    recEl = tree.xpath('//oai:record[1]/oai:metadata/*',
                       namespaces={'oai': NS_OAIPMH})[0]
    recString = etree.tostring(recEl)
    rec = LxmlRecord(recEl,
                     xml=recString,
                     docId=identifier,
                     byteCount=len(recString))
    return (header, rec, None)
コード例 #7
0
ファイル: testIndex.py プロジェクト: tanmoydeb07/cheshire3
 def _get_test_records(self):
     for x in range(5):
         yield LxmlRecord(etree.XML('<record>'
                                    '<title>Title {0}</title>'
                                    '<content>Record {0} content.</content>'
                                    '</record>'.format(x)),
                          docId=x
                          )
コード例 #8
0
    def _processResult(self, session, data):
        """Parse XML to create and return dict of metadata items.
        
        Parse XML output from external program.
        Process parsed XML using self.sources.
        Populate and return a dictionary of metadata items.
        
        """
        try:
            et = etree.fromstring(data)
        except AssertionError:
            data = data.decode('utf8')
            et = etree.fromstring(data)
        except etree.XMLSyntaxError:
            if session.logger is not None:
                # log debug level
                session.logger.log_lvl(session, 10, data)
            raise
        record = LxmlRecord(et)
        record.byteCount = len(data)
        mddict = {}
        for key, src in self.sources.iteritems():
            (xpath, process, preprocess) = src['source']
            if preprocess is not None:
                record = preprocess.process(session, record)
            if xpath is not None:
                rawlist = xpath.process_record(session, record)
                processed = process.process(session, rawlist)
            else:
                processed = process.process(session, record)

            if len(processed) > 1:
                mddict[key] = []
                for pl, k in sorted([(val['proxLoc'], k)
                                     for k, val in processed.iteritems()]):
                    for x in pl:
                        mddict[key].append(k)

            elif len(processed) == 1:
                mddict[key] = processed.keys()[0]
            elif src['default'] is not None:
                mddict[key] = src['default']

        return mddict
コード例 #9
0
ファイル: preParser.py プロジェクト: ReinSi/cheshire3
 def _processResult(self, session, data):
     """Parse XML to create and return dict of metadata items.
     
     Parse XML output from external program.
     Process parsed XML using self.sources.
     Populate and return a dictionary of metadata items.
     
     """
     try:
         et = etree.fromstring(data)
     except AssertionError:
         data = data.decode('utf8')
         et = etree.fromstring(data)
     except etree.XMLSyntaxError:
         if session.logger is not None:
             # log debug level
             session.logger.log_lvl(session, 10, data)
         raise
     record = LxmlRecord(et)
     record.byteCount = len(data)
     mddict = {}
     for key, src in self.sources.iteritems():
         (xpath, process, preprocess) = src['source']
         if preprocess is not None:
             record = preprocess.process(session, record)
         if xpath is not None:
             rawlist = xpath.process_record(session, record)
             processed = process.process(session, rawlist)
         else:
             processed = process.process(session, record)
         
         if len(processed) > 1:
             mddict[key] = []
             for pl, k in sorted([(val['proxLoc'], k) for k, val in processed.iteritems()]):
                 for x in pl:
                     mddict[key].append(k)
                 
         elif len(processed) == 1:
             mddict[key] = processed.keys()[0]
         elif src['default'] is not None:
             mddict[key] = src['default']
         
     return mddict
コード例 #10
0
ファイル: oai_utils.py プロジェクト: tanmoydeb07/cheshire3
def listRecords(baseUrl,
                metadataPrefix,
                set=None,
                from_=None,
                until=None,
                cursor=0,
                batch_size=10):
    """Return a list of (Header, metadata, about) tuples for records which match the given parameters from the specified OAI-PMH server."""
    args = {'verb': "ListRecords", 'metadataPrefix': metadataPrefix}
    if set is not None:
        args['set'] = set
    if from_ is not None:
        args['from'] = str(from_)
    if until is not None:
        args['until'] = str(until)
    params = urllib.urlencode(args)
    url = "{0}?{1}".format(baseUrl, params)
    data = fetch_data(url)
    records = []
    i = 0
    while (data is not None):
        try:
            tree = etree.fromstring(data)
        except:
            print url
            print data
            raise
        for recEl in tree.xpath('//oai:record', namespaces={'oai': NS_OAIPMH}):
            if i < cursor:
                i += 1
                continue
            hEl = recEl.xpath('//oai:header', namespaces={'oai': NS_OAIPMH})[0]
            header = headerFromLxmlElement(hEl)
            mdEl = recEl.xpath('//oai:metadata/*',
                               namespaces={'oai': NS_OAIPMH})[0]
            recString = etree.tostring(mdEl)
            rec = LxmlRecord(mdEl,
                             xml=recString,
                             docId=header.identifier(),
                             byteCount=len(recString))
            records.append((header, rec, None))
            i += 1
            if (len(headers) >= batch_size):
                return headers

        resTok = tree.xpath('string(//oai:resumptionToken)',
                            namespaces={'oai': NS_OAIPMH})
        if resTok:
            data = fetch_data(url + '&resumptionToken=' + cgi_encode(resTok))
        else:
            break

    return records
コード例 #11
0
 def _get_testDataAndExpected(self):
     # Namespaced example
     yield (LxmlRecord(
         etree.XML("""
         <div xmlns="http.cheshire3.org/schemas/tests">
             <hr/>
             <p>
                 Some text.
             </p>
             <hr/>
         </div>""")), [
             re.compile(
                 """^<c3:component.*?>
             <p>
                 Some text.
             </p>
             </c3:component>$""", re.LOCALE)
         ])
コード例 #12
0
 def _get_testData(self):
     yield (LxmlRecord(
         etree.XML("""
     <menu>
         <meal>
             <egg/>
             <bacon/>
         </meal>
         <meal>
             <egg/>
             <sausage/>
             <bacon/>
         </meal>
         <meal>
             <egg/>
             <spam/>
         </meal>
         <meal>
             <egg/>
             <bacon/>
             <spam/>
         </meal>
     </menu>
     """)), )
コード例 #13
0
 def _submit_userLxml(self, id, userNode):
     rec = LxmlRecord(userNode)
     rec.id = id
     userStore.store_record(session, rec)
     userStore.commit_storing(session)
コード例 #14
0
 def _get_test_recs(self):
     for data in self._get_test_data():
         node = etree.Element('data')
         node.text = data
         xml = etree.tostring(node)
         yield LxmlRecord(node, xml=xml, byteCount=len(xml))
コード例 #15
0
 def _get_testDataAndExpected(self):
     # Simple example
     yield (LxmlRecord(
         etree.XML("""
         <div>
             <hr/>
             <p>
                 Some text.
             </p>
             <hr/>
         </div>""")), [
             re.compile(
                 """^<c3:?component.*?><hr></hr>
             <p>
                 Some text.
             </p>
             <hr></hr></c3:?component>$""", re.LOCALE)
         ])
     # Simple example with tail text
     yield (LxmlRecord(
         etree.XML("""
         <div>
             <hr/>
             <p>
                 Some text.
             </p> With tail text.
             <hr/>
         </div>""")), [
             re.compile(
                 """^<c3:?component.*?><hr></hr>
             <p>
                 Some text.
             </p> With tail text.
             <hr></hr></c3:?component>$""", re.LOCALE)
         ])
     # Example where endNode is not a sibling of startNode
     # Tail text should be excluded now
     yield (LxmlRecord(
         etree.XML("""
         <div>
             <hr/>
             <p>
                 Some text.
                 <hr/>
             </p> With tail text.
         </div>""")), [
             re.compile(
                 """^<c3:?component.*?><hr></hr>
             <p>
                 Some text.
                 <hr></hr></p></c3:?component>$""", re.LOCALE)
         ])
     # Example where endNode is sibling of startNode ancestor
     yield (LxmlRecord(
         etree.XML("""
         <div>
             <p>Some text.
                 <hr/>
             </p> With tail text
             <hr/>
         </div>""")), [
             re.compile(
                 """^<c3:?component.*?><p><hr></hr>
             </p> With tail text
             <hr></hr></c3:?component>$""", re.LOCALE)
         ])