Example #1
0
class InventoryHarvester(DguHarvesterBase):
    '''
    Harvesting of LGA Inventories from a single XML document provided at a
    URL.
    '''
    implements(IHarvester)

    IDENTIFIER_KEY = 'inventory_identifier'

    def info(self):
        '''
        Returns a descriptor with information about the harvester.
        '''
        return {
            "name":
            "inventory",
            "title":
            "Inventory XML",
            "description":
            "Dataset metadata published according to the Inventory XML format: http://schemas.opendata.esd.org.uk/Inventory with XSD: https://github.com/datagovuk/ckanext-dgu-local/blob/master/ckanext/dgulocal/data/inventory.xsd"
        }

    def gather_stage(self, harvest_job):
        '''
        Fetches the single inventory document containing all of the
        datasets to be created/modified.

        :param harvest_job: HarvestJob object
        :returns: A list of HarvestObject ids
        '''
        from ckanext.harvest.model import (HarvestJob, HarvestObject,
                                           HarvestObjectExtra as HOExtra,
                                           HarvestGatherError)

        from ckanext.dgulocal.lib.geo import get_boundary
        from ckan import model

        self.last_run = None

        log.debug('Resolving source: %s', harvest_job.source.url)
        try:
            req = requests.get(harvest_job.source.url)
            e = req.raise_for_status()
        except requests.exceptions.RequestException, e:
            # e.g. requests.exceptions.ConnectionError
            self._save_gather_error(
                'Failed to get content from URL: %s Error:%s %s' %
                (harvest_job.source.url, e.__class__.__name__, e), harvest_job)
            return None

        try:
            doc = InventoryDocument(req.content)
        except InventoryXmlError, e:
            self._save_gather_error(
                'Failed to parse or validate the XML document: %s %s' %
                (e.__class__.__name__, e), harvest_job)
            return None
Example #2
0
def _get_inventory_doc(inventory_xml_filename):
    path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data'))
    filepath = os.path.join(path, inventory_xml_filename)
    return InventoryDocument(open(filepath, 'r').read())