Ejemplo n.º 1
0
 def _save_object_error(self, message, obj, stage=u'Fetch'):
     '''
     Helper function to create an error during the fetch or import stage.
     '''
     err = HarvestObjectError(message=message, object=obj, stage=stage)
     err.save()
     log.error(message)
Ejemplo n.º 2
0
 def _save_object_error(self,message,obj,stage=u'Fetch'):
     '''
     Helper function to create an error during the fetch or import stage.
     '''
     err = HarvestObjectError(message=message,object=obj,stage=stage)
     err.save()
     log.error(message)
Ejemplo n.º 3
0
 def _save_object_error(self, message, obj, stage=u"Fetch", line=None):
     err = HarvestObjectError(message=message, object=obj, stage=stage, line=line)
     try:
         err.save()
     except InvalidRequestError, e:
         Session.rollback()
         err.save()
Ejemplo n.º 4
0
    def test_error_mail_sent_with_object_error(self, mock_mailer_mail_recipient):

        context, harvest_source, harvest_job = self._create_harvest_source_and_job_if_not_existing()

        data_dict = {
            'guid': 'guid',
            'content': 'content',
            'job_id': harvest_job['id'],
            'extras': {'a key': 'a value'},
            'source_id': harvest_source['id']
        }
        harvest_object = toolkit.get_action('harvest_object_create')(
            context, data_dict)

        harvest_object_model = HarvestObject.get(harvest_object['id'])

        # create a HarvestObjectError
        msg = 'HarvestObjectError occured: %s' % harvest_job['id']
        harvest_object_error = HarvestObjectError(message=msg, object=harvest_object_model)
        harvest_object_error.save()

        status = toolkit.get_action('harvest_source_show_status')(context, {'id': harvest_source['id']})

        send_error_mail(
            context,
            harvest_source['id'],
            status
        )

        assert_equal(1, status['last_job']['stats']['errored'])
        assert mock_mailer_mail_recipient.called
Ejemplo n.º 5
0
    def import_stage(self, harvest_object):
        ''' save to CKAN '''
        logger.info('Importing {}'.format(harvest_object.id))
        self.set_paths()
        
        package_dict = json.loads(harvest_object.content)
        action = package_dict.pop('action')
        extras = package_dict.get('extras', [])

        extras = self.update_extras(extras, harvest_object)
        package_dict['extras'] = extras
        resources = package_dict.pop('resources', [])
        
        # Save (create or update) to CKAN
        # Using base class function ._create_or_update_package
        #   seems no useful to deal with resources
        user_name = self._get_user_name()
        context = {'model': model, 'session': model.Session, 'user': user_name}
        
        if action == 'create':
            try:
                pkg = p.toolkit.get_action('package_create')(context, package_dict)
            except Exception, e:
                logger.error('Error creating package {}: {}'.format(str(e), package_dict))
                # TODO, no debería suceder
                if str(e).find('already in use') > 0:
                    action = 'update'
                else:
                    msg = 'Import CREATE error. pkg name: {}. \n\tError: {}'.format(package_dict.get('name', 'unnamed'), e)
                    harvest_object_error = HarvestObjectError(message=msg, object=harvest_object)
                    harvest_object_error.save()
                    return False
Ejemplo n.º 6
0
 def _save_object_error(self, message, obj, stage=u'Fetch', line=None):
     err = HarvestObjectError(message=message,
                              object=obj,
                              stage=stage,
                              line=line)
     try:
         err.save()
     except InvalidRequestError, e:
         Session.rollback()
         err.save()
Ejemplo n.º 7
0
    def is_part_of_to_package_id(self, ipo, harvest_object):
        """ Get an identifier from external source using isPartOf
            and returns the parent dataset or raises an ParentNotHarvestedException.
            Only search for datasets that are the parent of a collection.
            """
        ps = p.toolkit.get_action('package_search')
        query = 'extras_identifier:{} AND extras_collection_metadata:true'.format(
            ipo)
        results = ps(self.context(), {"fq": query})
        log.info('Package search results {}'.format(results))

        if results[
                'count'] > 0:  # event if we have only one we need to be sure is the parent I need
            # possible check identifier collision
            # check the URL of the source to validate
            datasets = results['results']
            harvest_source = harvest_object.source

            for dataset in datasets:
                extras = dataset.get('extras', [])
                identifiers = [
                    extra['value'] for extra in extras
                    if extra['key'] == 'identifier'
                ]
                if ipo not in identifiers:
                    log.error('BAD SEARCH for {}:{}'.format(ipo, identifiers))
                    continue

                dataset_harvest_source_id = self.get_harvest_source_id(
                    dataset['id'])

                if harvest_source.id == dataset_harvest_source_id:
                    log.info('Parent dataset identified correctly')
                    return dataset
                else:
                    log.info('{} not found at {} for {}'.format(
                        harvest_source.id, dataset_harvest_source_id, ipo))

        # we have 0 o bad results
        msg = 'Parent identifier not found: "{}"'.format(ipo)
        log.error(msg)
        try:
            harvest_object_error = HarvestObjectError(message=msg,
                                                      object=harvest_object)
            harvest_object_error.save()
            harvest_object.state = "ERROR"
            harvest_object.save()
        except:
            pass
        raise ParentNotHarvestedException(
            'Unable to find parent dataset. Raising error to allow re-run later'
        )
Ejemplo n.º 8
0
def _get_xml_url_content(xml_url, urlopen_timeout, harvest_object):
    try:
        try:
            r = requests.get(xml_url, timeout=urlopen_timeout)
            ET.XML(r.content)  # test for valid xml
            return r
        except ET.ParseError as e:
            msg = '%s: %s. From external XML content at %s' % (
                type(e).__name__, str(e), xml_url)
            log.warn(msg)
            err = HarvestObjectError(message=msg,
                                     object=harvest_object,
                                     stage='Import')
            err.save()
        except requests.exceptions.Timeout as e:
            msg = '%s: %s. From external XML content at %s' % (
                type(e).__name__, str(e), xml_url)
            log.warn(msg)
            err = HarvestObjectError(message=msg,
                                     object=harvest_object,
                                     stage='Import')
            err.save()
        except requests.exceptions.TooManyRedirects as e:
            msg = 'HTTP too many redirects: %s' % e.code
            log.warn(msg)
            err = HarvestObjectError(message=msg,
                                     object=harvest_object,
                                     stage='Import')
            err.save()
        except requests.exceptions.RequestException as e:
            msg = 'HTTP request exception: %s' % e.code
            log.warn(msg)
            err = HarvestObjectError(message=msg,
                                     object=harvest_object,
                                     stage='Import')
            err.save()
        except Exception as e:
            msg = '%s: %s. From external XML content at %s' % (
                type(e).__name__, str(e), xml_url)
            log.warn(msg)
            err = HarvestObjectError(message=msg,
                                     object=harvest_object,
                                     stage='Import')
            err.save()
        finally:
            return ''

    except StaleDataError as e:
        log.warn('Harvest object %s is stail. Error object not created. %s' %
                 (harvest_object.id, str(e)))
Ejemplo n.º 9
0
 def _save_object_error(self,message,obj,stage=u'Fetch'):
     err = HarvestObjectError(message=message,object=obj,stage=stage)
     err.save()
     log.error(message)