Esempio n. 1
0
def temporal_from_literal(text):
    '''
    Parse a temporal coverage from a literal ie. either:
    - an ISO date range
    - a single ISO date period (month,year)
    '''
    if text.count('/') == 1:
        # This is an ISO date range as preconized by Gov.uk
        # http://guidance.data.gov.uk/dcat_fields.html
        start, end = text.split('/')
        return db.DateRange(
            start=parse_dt(start).date(),
            end=parse_dt(end).date()
        )
    else:
        separators = text.count('-')
        if separators == 0:
            # this is a year
            return db.DateRange(
                start=date(int(text), 1, 1),
                end=date(int(text), 12, 31)
            )
        elif separators == 1:
            # this is a month
            dt = parse_dt(text).date()
            return db.DateRange(
                start=dt.replace(day=1),
                end=dt.replace(day=calendar.monthrange(dt.year, dt.month)[1])
            )
Esempio n. 2
0
def temporal_from_resource(resource):
    '''
    Parse a temporal coverage from a RDF class/resource ie. either:
    - a `dct:PeriodOfTime` with schema.org `startDate` and `endDate` properties
    - an inline gov.uk Time Interval value
    - an URI reference to a gov.uk Time Interval ontology
      http://reference.data.gov.uk/
    '''
    if isinstance(resource.identifier, URIRef):
        # Fetch remote ontology if necessary
        g = Graph().parse(str(resource.identifier))
        resource = g.resource(resource.identifier)
    if resource.value(SCHEMA.startDate):
        return db.DateRange(start=resource.value(SCHEMA.startDate).toPython(),
                            end=resource.value(SCHEMA.endDate).toPython())
    elif resource.value(SCV.min):
        return db.DateRange(start=resource.value(SCV.min).toPython(),
                            end=resource.value(SCV.max).toPython())
Esempio n. 3
0
 def process_formdata(self, valuelist):
     if valuelist and valuelist[0]:
         start, end = valuelist[0].split(' - ')
         self.data = db.DateRange(
             start=parse(start, yearfirst=True).date(),
             end=parse(end, yearfirst=True).date(),
         )
     else:
         self.data = None
    def test_initial_values(self):
        Fake, FakeForm = self.factory()
        dr = db.DateRange(start=date.today() - timedelta(days=1),
                          end=date.today())

        fake = Fake(daterange=dr)
        form = FakeForm(None, fake)
        expected = ' - '.join([to_iso_date(dr.start), to_iso_date(dr.end)])
        self.assertEqual(form.daterange._value(), expected)
Esempio n. 5
0
 def process_formdata(self, valuelist):
     if valuelist and valuelist[0]:
         value = valuelist[0]
         if isinstance(value, str):
             start, end = value.split(' - ')
             self.data = db.DateRange(
                 start=parse(start, yearfirst=True).date(),
                 end=parse(end, yearfirst=True).date(),
             )
         elif 'start' in value and 'end' in value:
             self.data = db.DateRange(
                 start=parse(value['start'], yearfirst=True).date(),
                 end=parse(value['end'], yearfirst=True).date(),
             )
         else:
             raise validators.ValidationError(
                 _('Unable to parse date range'))
     else:
         self.data = None
Esempio n. 6
0
    def test_temporal_coverage(self):
        start = faker.past_date(start_date='-30d')
        end = faker.future_date(end_date='+30d')
        temporal_coverage = db.DateRange(start=start, end=end)
        dataset = DatasetFactory(temporal_coverage=temporal_coverage)

        d = dataset_to_rdf(dataset)

        pot = d.value(DCT.temporal)

        assert pot.value(RDF.type).identifier == DCT.PeriodOfTime
        assert pot.value(SCHEMA.startDate).toPython() == start
        assert pot.value(SCHEMA.endDate).toPython() == end
    def test_with_valid_dates(self):
        Fake, FakeForm = self.factory()
        start = date.today() - timedelta(days=1)
        end = date.today()
        expected = ' - '.join([to_iso_date(start), to_iso_date(end)])

        fake = Fake()
        form = FakeForm(MultiDict({'daterange': expected}))

        form.validate()
        self.assertEqual(form.errors, {})

        form.populate_obj(fake)

        self.assertEqual(fake.daterange, db.DateRange(start=start, end=end))
    def test_with_valid_dates_from_json(self):
        Fake, FakeForm = self.factory()
        start = date.today() - timedelta(days=1)
        end = date.today()

        fake = Fake()
        form = FakeForm.from_json({
            'daterange': {
                'start': to_iso_date(start),
                'end': to_iso_date(end),
            }
        })

        form.validate()
        self.assertEqual(form.errors, {})

        form.populate_obj(fake)

        self.assertEqual(fake.daterange, db.DateRange(start=start, end=end))
Esempio n. 9
0
    def test_daterange_before_1900(self):
        '''Daterange filter should display range in an adaptive'''
        g.lang_code = 'en'
        iso2date = lambda s: date(*[int(v) for v in s.split('-')])
        dr = lambda s, e: db.DateRange(start=iso2date(s), end=iso2date(e))

        specs = (
            (dr('1234-02-01', '1234-02-01'), '1234/02/01'),
            (dr('1232-01-01', '1232-01-31'), '1232/01'),
            (dr('1232-01-01', '1232-01-14'), '1232/01/01 to 1232/01/14'),
            (dr('1232-01-01', '1232-03-31'), '1232/01 to 1232/03'),
            (dr('1232-01-01', '1232-02-29'), '1232/01 to 1232/02'),
            (dr('1232-01-01', '1232-12-31'), '1232'),
            (dr('1232-01-01', '1234-12-31'), '1232 to 1234'),
            (dr('1232-02-02', '1234-12-25'), '1232/02/02 to 1234/12/25'),
        )
        for given, expected in specs:
            self.assertEqual(
                render_template_string('{{given|daterange}}', given=given),
                expected)
Esempio n. 10
0
    def process(self, item):
        response = self.get_action('package_show', id=item.remote_id)
        data = self.validate(response['result'], schema)

        # Fix the remote_id: use real ID instead of not stable name
        item.remote_id = data['id']

        # Skip if no resource
        if not len(data.get('resources', [])):
            msg = 'Dataset {0} has no record'.format(item.remote_id)
            raise HarvestSkipException(msg)

        dataset = self.get_dataset(item.remote_id)

        # Core attributes
        if not dataset.slug:
            dataset.slug = data['name']
        dataset.title = data['title']
        dataset.description = data['notes']
        dataset.license = License.objects(id=data['license_id']).first()
        # dataset.license = license or License.objects.get(id='notspecified')
        dataset.tags = [t['name'] for t in data['tags'] if t['name']]

        dataset.created_at = data['metadata_created']
        dataset.last_modified = data['metadata_modified']

        dataset.extras['ckan:name'] = data['name']

        temporal_start, temporal_end = None, None
        spatial_geom = None

        for extra in data['extras']:
            # GeoJSON representation (Polygon or Point)
            if extra['key'] == 'spatial':
                spatial_geom = json.loads(extra['value'])
            #  Textual representation of the extent / location
            elif extra['key'] == 'spatial-text':
                log.debug('spatial-text value not handled')
                print 'spatial-text', extra['value']
            # Linked Data URI representing the place name
            elif extra['key'] == 'spatial-uri':
                log.debug('spatial-uri value not handled')
                print 'spatial-uri', extra['value']
            # Update frequency
            elif extra['key'] == 'frequency':
                print 'frequency', extra['value']
            # Temporal coverage start
            elif extra['key'] == 'temporal_start':
                print 'temporal_start', extra['value']
                temporal_start = daterange_start(extra['value'])
                continue
            # Temporal coverage end
            elif extra['key'] == 'temporal_end':
                print 'temporal_end', extra['value']
                temporal_end = daterange_end(extra['value'])
                continue
            # else:
            #     print extra['key'], extra['value']
            dataset.extras[extra['key']] = extra['value']

        if spatial_geom:
            dataset.spatial = SpatialCoverage()
            if spatial_geom['type'] == 'Polygon':
                coordinates = [spatial_geom['coordinates']]
            elif spatial_geom['type'] == 'MultiPolygon':
                coordinates = spatial_geom['coordinates']
            else:
                HarvestException('Unsupported spatial geometry')
            dataset.spatial.geom = {
                'type': 'MultiPolygon',
                'coordinates': coordinates
            }

        if temporal_start and temporal_end:
            dataset.temporal_coverage = db.DateRange(
                start=temporal_start,
                end=temporal_end,
            )

        # Remote URL
        if data.get('url'):
            dataset.extras['remote_url'] = data['url']

        # Resources
        for res in data['resources']:
            if res['resource_type'] not in ALLOWED_RESOURCE_TYPES:
                continue
            try:
                resource = get_by(dataset.resources, 'id', UUID(res['id']))
            except:
                log.error('Unable to parse resource ID %s', res['id'])
                continue
            if not resource:
                resource = Resource(id=res['id'])
                dataset.resources.append(resource)
            resource.title = res.get('name', '') or ''
            resource.description = res.get('description')
            resource.url = res['url']
            resource.filetype = ('api' if res['resource_type'] == 'api'
                                 else 'remote')
            resource.format = res.get('format')
            resource.mime = res.get('mimetype')
            resource.hash = res.get('hash')
            resource.created = res['created']
            resource.modified = res['last_modified']
            resource.published = resource.published or resource.created

        return dataset
Esempio n. 11
0
def dr(start, end, **kwargs):
    return db.DateRange(start=iso2date(start), end=iso2date(end), **kwargs)
Esempio n. 12
0
    def process(self, item):
        response = self.get(item.remote_id)
        encoding = chardet.detect(response.content)['encoding']
        xml = self.parse_xml(response.content.decode(encoding))
        metadata = xml['metadata']

        # Resolve and remote id from metadata
        item.remote_id = metadata['id']
        dataset = self.get_dataset(metadata['id'])

        dataset.title = metadata['title']
        dataset.frequency = FREQUENCIES.get(metadata['frequency'], 'unknown')
        dataset.description = metadata['notes']
        dataset.private = metadata['private']
        dataset.tags = sorted(set(metadata['tags']))

        if metadata.get('license_id'):
            dataset.license = License.objects.get(id=metadata['license_id'])

        if (metadata.get('temporal_coverage_from')
                and metadata.get('temporal_coverage_to')):
            dataset.temporal_coverage = db.DateRange(
                start=metadata['temporal_coverage_from'],
                end=metadata['temporal_coverage_to'])

        if (metadata.get('territorial_coverage_code')
                or metadata.get('territorial_coverage_granularity')):
            dataset.spatial = SpatialCoverage()

            if metadata.get('territorial_coverage_granularity'):
                dataset.spatial.granularity = GRANULARITIES.get(
                    metadata['territorial_coverage_granularity'])

            if metadata.get('territorial_coverage_code'):
                dataset.spatial.zones = [
                    ZONES[metadata['territorial_coverage_code']]
                ]

        dataset.resources = []
        cle = get_by(metadata['resources'], 'format', 'cle')
        for row in metadata['resources']:
            if row['format'] == 'cle':
                continue
            else:
                resource = Resource(title=row['name'],
                                    description=(row['description'] + '\n\n' +
                                                 SSL_COMMENT).strip(),
                                    filetype='remote',
                                    url=row['url'],
                                    format=row['format'])
                if resource.format == 'csv' and cle:
                    resource.checksum = Checksum(type='sha256',
                                                 value=self.get(
                                                     cle['url']).text)
                if row.get('last_modified'):
                    resource.modified = row['last_modified']
                dataset.resources.append(resource)

        if metadata.get('author'):
            dataset.extras['author'] = metadata['author']
        if metadata.get('author_email'):
            dataset.extras['author_email'] = metadata['author_email']
        if metadata.get('maintainer'):
            dataset.extras['maintainer'] = metadata['maintainer']
        if metadata.get('maintainer_email'):
            dataset.extras['maintainer_email'] = metadata['maintainer_email']
        for extra in metadata['extras']:
            dataset.extras[extra['key']] = extra['value']

        return dataset
Esempio n. 13
0
    def process(self, item):
        response = self.get_action('package_show', id=item.remote_id)
        data = self.validate(response['result'], self.schema)

        if type(data) == list:
            data = data[0]

        # Fix the remote_id: use real ID instead of not stable name
        item.remote_id = data['id']

        # Skip if no resource
        if not len(data.get('resources', [])):
            msg = 'Dataset {0} has no record'.format(item.remote_id)
            raise HarvestSkipException(msg)

        dataset = self.get_dataset(item.remote_id)

        # Core attributes
        if not dataset.slug:
            dataset.slug = data['name']
        dataset.title = data['title']
        dataset.description = parse_html(data['notes'])

        # Detect license
        default_license = dataset.license or License.default()
        dataset.license = License.guess(data['license_id'],
                                        data['license_title'],
                                        default=default_license)

        dataset.tags = [t['name'] for t in data['tags'] if t['name']]

        dataset.created_at = data['metadata_created']
        dataset.last_modified = data['metadata_modified']

        dataset.extras['ckan:name'] = data['name']

        temporal_start, temporal_end = None, None
        spatial_geom, spatial_zone = None, None

        for extra in data['extras']:
            key = extra['key']
            value = extra['value']
            if value is None or (isinstance(value, str) and not value.strip()):
                # Skip empty extras
                continue
            elif key == 'spatial':
                # GeoJSON representation (Polygon or Point)
                spatial_geom = json.loads(value)
            elif key == 'spatial-text':
                # Textual representation of the extent / location
                qs = GeoZone.objects(db.Q(name=value) | db.Q(slug=value))
                qs = qs.valid_at(datetime.now())
                if qs.count() == 1:
                    spatial_zone = qs.first()
                else:
                    dataset.extras['ckan:spatial-text'] = value
                    log.debug('spatial-text value not handled: %s', value)
            elif key == 'spatial-uri':
                # Linked Data URI representing the place name
                dataset.extras['ckan:spatial-uri'] = value
                log.debug('spatial-uri value not handled: %s', value)
            elif key == 'frequency':
                # Update frequency
                freq = frequency_from_rdf(value)
                if freq:
                    dataset.frequency = freq
                elif value in UPDATE_FREQUENCIES:
                    dataset.frequency = value
                else:
                    dataset.extras['ckan:frequency'] = value
                    log.debug('frequency value not handled: %s', value)
            # Temporal coverage start
            elif key == 'temporal_start':
                temporal_start = daterange_start(value)
            # Temporal coverage end
            elif key == 'temporal_end':
                temporal_end = daterange_end(value)
            else:
                dataset.extras[extra['key']] = value

        if spatial_geom or spatial_zone:
            dataset.spatial = SpatialCoverage()

        if spatial_zone:
            dataset.spatial.zones = [spatial_zone]

        if spatial_geom:
            if spatial_geom['type'] == 'Polygon':
                coordinates = [spatial_geom['coordinates']]
            elif spatial_geom['type'] == 'MultiPolygon':
                coordinates = spatial_geom['coordinates']
            else:
                raise HarvestException('Unsupported spatial geometry')
            dataset.spatial.geom = {
                'type': 'MultiPolygon',
                'coordinates': coordinates
            }

        if temporal_start and temporal_end:
            dataset.temporal_coverage = db.DateRange(
                start=temporal_start,
                end=temporal_end,
            )

        # Remote URL
        dataset.extras['remote_url'] = self.dataset_url(data['name'])
        if data.get('url'):
            try:
                url = uris.validate(data['url'])
            except uris.ValidationError:
                dataset.extras['ckan:source'] = data['url']
            else:
                # use declared `url` as `remote_url` if any
                dataset.extras['remote_url'] = url

        # Resources
        for res in data['resources']:
            if res['resource_type'] not in ALLOWED_RESOURCE_TYPES:
                continue
            try:
                resource = get_by(dataset.resources, 'id', UUID(res['id']))
            except Exception:
                log.error('Unable to parse resource ID %s', res['id'])
                continue
            if not resource:
                resource = Resource(id=res['id'])
                dataset.resources.append(resource)
            resource.title = res.get('name', '') or ''
            resource.description = parse_html(res.get('description'))
            resource.url = res['url']
            resource.filetype = 'remote'
            resource.format = res.get('format')
            resource.mime = res.get('mimetype')
            resource.hash = res.get('hash')
            resource.created = res['created']
            resource.modified = res['last_modified']
            resource.published = resource.published or resource.created

        return dataset
Esempio n. 14
0
def dr(start, end):
    return db.DateRange(start=iso2date(start), end=iso2date(end))
Esempio n. 15
0
    def process(self, item):
        response = self.get_action('package_show', id=item.remote_id)
        data = self.validate(response['result'], self.schema)

        if type(data) == list:
            data = data[0]

        # Fix the remote_id: use real ID instead of not stable name
        item.remote_id = data['id']

        # Skip if no resource
        if not len(data.get('resources', [])):
            msg = 'Dataset {0} has no record'.format(item.remote_id)
            raise HarvestSkipException(msg)

        dataset = self.get_dataset(item.remote_id)

        # Core attributes
        if not dataset.slug:
            dataset.slug = data['name']
        dataset.title = data['title']
        dataset.description = parse_html(data['notes'])

        # Detect Org
        organization_acronym = data['organization']['name']
        orgObj = Organization.objects(acronym=organization_acronym).first()
        if orgObj:
            #print 'Found %s' % orgObj.acronym
            dataset.organization = orgObj
        else:
            orgObj = Organization()
            orgObj.acronym = organization_acronym
            orgObj.name = data['organization']['title']
            orgObj.description = data['organization']['description']
            orgObj.save()
            #print 'Created %s' % orgObj.acronym

            dataset.organization = orgObj

        # Detect license
        default_license = self.harvest_config.get('license', License.default())
        dataset.license = License.guess(data['license_id'],
                                        data['license_title'],
                                        default=default_license)

        dataset.tags = [t['name'] for t in data['tags'] if t['name']]

        dataset.tags.append(urlparse(self.source.url).hostname)

        dataset.created_at = data['metadata_created']
        dataset.last_modified = data['metadata_modified']

        dataset.frequency = 'unknown'
        dataset.extras['ckan:name'] = data['name']

        temporal_start, temporal_end = None, None
        spatial_geom = None

        for extra in data['extras']:
            # GeoJSON representation (Polygon or Point)
            if extra['key'] == 'spatial':
                spatial_geom = json.loads(extra['value'])
            #  Textual representation of the extent / location
            elif extra['key'] == 'spatial-text':
                log.debug('spatial-text value not handled')
            # Linked Data URI representing the place name
            elif extra['key'] == 'spatial-uri':
                log.debug('spatial-uri value not handled')
            # Update frequency
            elif extra['key'] == 'frequency':
                print 'frequency', extra['value']
            # Temporal coverage start
            elif extra['key'] == 'temporal_start':
                temporal_start = daterange_start(extra['value'])
                continue
            # Temporal coverage end
            elif extra['key'] == 'temporal_end':
                temporal_end = daterange_end(extra['value'])
                continue
            dataset.extras[extra['key']] = extra['value']

        # We don't want spatial to be added on harvester
        if self.harvest_config.get('geozones', False):
            dataset.spatial = SpatialCoverage()
            dataset.spatial.zones = []
            for zone in self.harvest_config.get('geozones'):
                geo_zone = GeoZone.objects.get(id=zone)
                dataset.spatial.zones.append(geo_zone)
        #
        # if spatial_geom:
        #     dataset.spatial = SpatialCoverage()
        #     if spatial_geom['type'] == 'Polygon':
        #         coordinates = [spatial_geom['coordinates']]
        #     elif spatial_geom['type'] == 'MultiPolygon':
        #         coordinates = spatial_geom['coordinates']
        #     else:
        #         HarvestException('Unsupported spatial geometry')
        #     dataset.spatial.geom = {
        #         'type': 'MultiPolygon',
        #         'coordinates': coordinates
        #     }

        if temporal_start and temporal_end:
            dataset.temporal_coverage = db.DateRange(
                start=temporal_start,
                end=temporal_end,
            )

        # Remote URL
        if data.get('url'):
            try:
                url = uris.validate(data['url'])
            except uris.ValidationError:
                dataset.extras['remote_url'] = self.dataset_url(data['name'])
                dataset.extras['ckan:source'] = data['url']
            else:
                dataset.extras['remote_url'] = url

        dataset.extras['harvest:name'] = self.source.name

        current_resources = [
            str(resource.id) for resource in dataset.resources
        ]
        fetched_resources = []
        # Resources
        for res in data['resources']:
            if res['resource_type'] not in ALLOWED_RESOURCE_TYPES:
                continue

            #Ignore invalid Resources
            try:
                url = uris.validate(res['url'])
            except uris.ValidationError:
                continue

            try:
                resource = get_by(dataset.resources, 'id', UUID(res['id']))
            except Exception:
                log.error('Unable to parse resource ID %s', res['id'])
                continue

            fetched_resources.append(str(res['id']))
            if not resource:
                resource = Resource(id=res['id'])
                dataset.resources.append(resource)
            resource.title = res.get('name', '') or ''
            resource.description = parse_html(res.get('description'))
            resource.url = res['url']
            resource.filetype = 'remote'
            resource.format = res.get('format')
            resource.mime = res.get('mimetype')
            resource.hash = res.get('hash')
            resource.created = res['created']
            resource.modified = res['last_modified']
            resource.published = resource.published or resource.created

        # Clean up old resources removed from source
        for resource_id in current_resources:
            if resource_id not in fetched_resources:
                try:
                    resource = get_by(dataset.resources, 'id',
                                      UUID(resource_id))
                except Exception:
                    log.error('Unable to parse resource ID %s', resource_id)
                    continue
                else:
                    if resource and not self.dryrun:
                        dataset.resources.remove(resource)

        return dataset
Esempio n. 16
0
    def remote_datasets(self):
        response = self.get('package_list')
        for name in response['result']:
            details = self.get('package_show', {'id': name})['result']
            dataset = self.get_harvested(Dataset, details['id'])

            # Core attributes
            dataset.slug = details['name']
            dataset.title = details['title']
            dataset.description = details.get('notes', 'No description')
            dataset.license = License.objects(
                id=details['license_id']).first() or License.objects.get(
                    id='notspecified')
            dataset.tags = [tag['name'].lower() for tag in details['tags']]

            dataset.frequency = self.map('frequency', details) or 'unknown'
            dataset.created_at = parse(details['metadata_created'])
            dataset.last_modified = parse(details['metadata_modified'])

            if any_field(details, 'territorial_coverage',
                         'territorial_coverage_granularity'):
                coverage = TerritorialCoverage(
                    codes=[
                        code.strip() for code in details.get(
                            'territorial_coverage', '').split(',')
                        if code.strip()
                    ],
                    granularity=self.map('territorial_coverage_granularity',
                                         details),
                )
                dataset.extras['territorial_coverage'] = coverage
                try:
                    dataset.spatial = territorial_to_spatial(dataset)
                except Exception as e:
                    print 'Error while processing spatial coverage for {0}:'.format(
                        dataset.title), e

            if all_field(details, 'temporal_coverage_from',
                         'temporal_coverage_to'):
                try:
                    dataset.temporal_coverage = db.DateRange(
                        start=daterange_start(
                            details.get('temporal_coverage_from')),
                        end=daterange_end(details.get('temporal_coverage_to')),
                    )
                except:
                    log.error(
                        'Unable to parse temporal coverage for dataset %s',
                        details['id'])

            # Organization
            if details.get('organization'):
                dataset.organization = self.get_harvested(
                    Organization, details['organization']['id'], False)
            else:
                # Need to fetch user from roles
                roles = self.get('roles_show',
                                 {'domain_object': name})['result']['roles']
                for role in roles:
                    if role['role'] == 'admin' and role['context'] == 'Package':
                        dataset.owner = self.get_harvested(
                            User, role['user_id'])
                        break

            # Supplier
            if details.get('supplier_id'):
                dataset.supplier = self.get_harvested(Organization,
                                                      details['supplier_id'],
                                                      False)

            # Remote URL
            if details.get('url'):
                dataset.extras['remote_url'] = details['url']

            # Extras
            if 'extras' in details:
                extra_mapping = self.harvester.mapping.get('from_extras', {})
                for extra in details['extras']:
                    if extra['key'] in self.harvester.mapping:
                        value = self.harvester.mapping[extra['key']].get(
                            extra['value'])
                    else:
                        value = extra['value']
                    if extra['key'] in extra_mapping:
                        setattr(dataset, extra_mapping[extra['key']], value)
                    else:
                        dataset.extras[extra['key']] = value

            # Resources
            for res in details['resources']:
                try:
                    resource = get_by(dataset.resources, 'id', UUID(res['id']))
                except:
                    log.error('Unable to parse resource %s', res['id'])
                    continue
                if not resource:
                    resource = Resource(id=res['id'])
                    dataset.resources.append(resource)
                resource.title = res.get('name', '') or ''
                resource.url = res['url']
                resource.description = res.get('description')
                resource.format = res.get('format')
                resource.hash = res.get('hash')
                resource.created = parse(res['created'])
                resource.modified = parse(res['revision_timestamp'])
                resource.published = resource.published or resource.created
            yield dataset

            if dataset.id:
                followers = self.get('dataset_follower_list',
                                     {'id': name})['result']
                for follower in followers:
                    user = self.get_harvested(User, follower['id'], False)
                    if user:
                        follow, created = FollowDataset.objects.get_or_create(
                            follower=user, following=dataset)