def testUnimplemented(self):
     store = LuceneKeyValueStore(join(self.tempdir, 'kv'))
     store['1'] = 'aap'
     self.assertRaises(NotImplementedError, lambda: len(store))
     self.assertRaises(NotImplementedError, lambda: iter(store))
     self.assertRaises(NotImplementedError, lambda: store.items())
     self.assertRaises(NotImplementedError, lambda: store.keys())
     self.assertRaises(NotImplementedError, lambda: store.values())
    def testSetDeleteGet(self):
        store = LuceneKeyValueStore(join(self.tempdir, 'kv'))
        store['1'] = 'aap'
        store['1'] = 'noot'
        store['2'] = 'mies'
        del store['2']
        self.assertEquals('noot', store['1'])
        self.assertEquals('noot', store.get('1'))

        self.assertRaises(KeyError, lambda: store['2'])
        try:
            store['3']
            self.fail()
        except KeyError, e:
            self.assertEquals("KeyError('3',)", repr(e))
예제 #3
0
파일: plein.py 프로젝트: seecr/meresco-rdf
 def __init__(self, directory, storageLabel, oaiAddRecordLabel, rdfxsdUrl=None, name=None):
     Observable.__init__(self, name=name)
     self._fragmentAdmin = LuceneKeyValueStore(join(directory, 'fragmentAdmin'))
     self._storageLabel = storageLabel
     self._oaiAddRecordLabel = oaiAddRecordLabel
     self._rdfxsdUrl = rdfxsdUrl or ''
예제 #4
0
파일: plein.py 프로젝트: seecr/meresco-rdf
class Plein(Observable):
    def __init__(self, directory, storageLabel, oaiAddRecordLabel, rdfxsdUrl=None, name=None):
        Observable.__init__(self, name=name)
        self._fragmentAdmin = LuceneKeyValueStore(join(directory, 'fragmentAdmin'))
        self._storageLabel = storageLabel
        self._oaiAddRecordLabel = oaiAddRecordLabel
        self._rdfxsdUrl = rdfxsdUrl or ''

    def add(self, identifier, lxmlNode, oaiArgs=None, **kwargs):
        rdfNode = xpathFirst(lxmlNode, '/rdf:RDF')
        if rdfNode is None:
            raise ValueError("Expected lxmlNode with xpath '/rdf:RDF'")
        rdfNode = ElementTree(rdfNode)
        yield self._add(recordId=str(identifier), fragments=self._extractFragments(rdfNode), oaiArgs=oaiArgs)

    def delete(self, identifier):
        yield self._delete(recordId=str(identifier))

    def commit(self):
        self._fragmentAdmin.commit()

    def close(self):
        self._fragmentAdmin.close()

    def handleShutdown(self):
        print 'handle shutdown: saving plein'
        from sys import stdout; stdout.flush()
        self.close()

    def _extractFragments(self, lxmlNode):
        fragments = {}
        for (fragmentNode, uri) in self._findFragmentNodesWithAboutUris(lxmlNode):
            fragment = _Fragment(uri, lxmltostringUtf8(self._normalizeRdfDescription(fragmentNode)))
            fragments[fragment.hash] = fragment
        return fragments

    def _findFragmentNodesWithAboutUris(self, lxmlNode):
        for descriptionNode in xpath(lxmlNode, "*[@rdf:about]"):
            uri = str(descriptionNode.attrib[curieToTag("rdf:about")])
            yield descriptionNode, uri
        for statementNode in xpath(lxmlNode, "rdf:Statement"):
            uri = str(xpathFirst(statementNode, 'rdf:subject/@rdf:resource'))
            yield statementNode, uri

    def _normalizeRdfDescription(self, descriptionNode):
        descriptionNode = XML(lxmltostringUtf8(descriptionNode).strip())
        cleanup_namespaces(descriptionNode)
        if descriptionNode.tag in CANONICAL_DESCRIPTION_TAGS:
            return descriptionNode
        def _tag2Type(tag):
            return tag.replace('{', '').replace('}', '')
        rdfDescriptionTag = '{%(rdf)s}Description' % namespaces
        if descriptionNode.tag == rdfDescriptionTag:
            return descriptionNode
        descriptionElement = Element(rdfDescriptionTag,
            attrib=descriptionNode.attrib,
            nsmap=descriptionNode.nsmap,
        )
        SubElement(descriptionElement,
            '{%(rdf)s}type' % namespaces,
            attrib={
                '{%(rdf)s}resource' % namespaces: _tag2Type(descriptionNode.tag)
            }
        )
        for childElement in descriptionNode.getchildren():
            descriptionElement.append(deepcopy(childElement))
        return descriptionElement

    def _add(self, recordId, fragments, oaiArgs=None):
        oaiArgs = oaiArgs or {}
        if 'metadataFormats' not in oaiArgs:
            oaiArgs['metadataFormats'] = self._metadataFormats()
        uriUpdates, recordCountUpdates = self._determineUpdates(recordId, fragments)
        for uri, changes in uriUpdates.items():
            uriFragments = self._newFragmentsForUri(uri, changes)
            if len(uriFragments) == 0:
                yield self.all[self._oaiAddRecordLabel].delete(identifier=uri)
                self.call[self._storageLabel].deleteData(identifier=uri, name='rdf')
            else:
                self.call[self._oaiAddRecordLabel].addOaiRecord(identifier=uri, **oaiArgs)
                data = RDF_TEMPLATE % ''.join(fragment.data for fragment in uriFragments.values())
                self.call[self._storageLabel].addData(identifier=uri, name='rdf', data=data)
            yield Yield

        self._registerFragmentsForRecord(recordId, fragments.values())
        for fragmentHash, recordCount in recordCountUpdates.items():
            self._setFragmentRecordCount(fragmentHash, recordCount)

    def _delete(self, recordId, oaiArgs=None):
        yield self._add(recordId, fragments={}, oaiArgs=oaiArgs)

    def _metadataFormats(self):
        return [('rdf', self._rdfxsdUrl, namespaces.rdf)]

    def _determineUpdates(self, recordId, fragments):
        uriUpdates = defaultdict(lambda: defaultdict(list))
        recordCountUpdates = {}

        newFragmentHashes = fragments.keys()
        existingRecordFragments = self._fragmentsForRecord(recordId)
        for fragment in existingRecordFragments:
            if fragment.hash in newFragmentHashes:
                continue
            newCount = recordCountUpdates[fragment.hash] = self._fragmentRecordCount(fragment.hash) - 1
            if newCount == 0:
                uriUpdates[fragment.uri]['remove'].append(fragment.hash)

        existingRecordFragmentHashes = set([fragment.hash for fragment in existingRecordFragments])
        for fragmentHash, fragment in fragments.items():
            if fragmentHash in existingRecordFragmentHashes:
                continue
            newCount = recordCountUpdates[fragmentHash] = self._fragmentRecordCount(fragmentHash) + 1
            if newCount == 1:
                uriUpdates[fragment.uri]['add'].append(fragment)
        return uriUpdates, recordCountUpdates

    def _newFragmentsForUri(self, uri, changes):
        uriFragments = {}
        oaiRecord = self.call.getRecord(identifier=uri)
        if not oaiRecord is None and not oaiRecord.isDeleted:
            data = self.call.getData(identifier=oaiRecord.identifier, name='rdf')
            uriFragments = self._extractFragments(XML(data))
        for fragmentHash in changes['remove']:
            try:
                del uriFragments[fragmentHash]
            except KeyError:
                print >> sys.stderr, 'Warning: hash %s for %s was not found in uriFragments.' % (repr(fragmentHash), repr(uri))
                sys.stderr.flush()
        for fragment in changes['add']:
            uriFragments[fragment.hash] = fragment
        return uriFragments

    def _fragmentsForRecord(self, recordId):
        fragments = []
        encodedFragments = self._fragmentAdmin.get(recordId)
        if encodedFragments:
            # Backwards compatible mode
            if '|' in encodedFragments:
                encodedFragments = fixEncodedFragments(encodedFragments)
                print "Fixing fragments for '{0}'".format(recordId)
                from sys import stdout; stdout.flush()
                self._fragmentAdmin[recordId] = encodedFragments
            # /Backwards compatible mode
            fragments = [
                _Fragment.fromEncodedString(entry) for entry in encodedFragments.split(' ')
            ]
        return fragments

    def _registerFragmentsForRecord(self, recordId, fragments):
        encodedFragments = ' '.join(fragment.asEncodedString() for fragment in fragments)
        if encodedFragments:
            self._fragmentAdmin[recordId] = encodedFragments
        else:
            del self._fragmentAdmin[recordId]

    def _fragmentRecordCount(self, fragmentHash):
        return int(self._fragmentAdmin.get(fragmentHash, 0))

    def _setFragmentRecordCount(self, fragmentHash, count):
        if count == 0:
            del self._fragmentAdmin[fragmentHash]
        else:
            self._fragmentAdmin[fragmentHash] = str(count)
 def testCommit(self):
     store = LuceneKeyValueStore(join(self.tempdir, 'kv'))
     store['1'] = 'aap'
     store.commit()
     self.assertEqual('aap', store['1'])
 def testAllStringified(self):
     store = LuceneKeyValueStore(join(self.tempdir, 'kv'))
     store[1] = None
     self.assertEquals('None', store[1])
     self.assertEquals('None', store['1'])