Beispiel #1
0
class ResourceApiAttrs(ObjectAttrs, HighlightObjectMixin):
    title = TranslatedStr()
    description = TranslatedStr()
    category = fields.Str()
    format = fields.Str()
    media_type = fields.Str(
        attribute='type'
    )  # https://jsonapi.org/format/#document-resource-object-fields
    visualization_types = ListWithoutNoneStrElement(fields.Str())
    downloads_count =\
        fields.Function(
            lambda obj: obj.computed_downloads_count if is_enabled('S16_new_date_counters.be') else obj.downloads_count)
    openness_score = fields.Integer()
    views_count =\
        fields.Function(
            lambda obj: obj.computed_views_count if is_enabled('S16_new_date_counters.be') else obj.views_count)
    modified = fields.DateTime()
    created = fields.DateTime()
    verified = fields.DateTime()
    data_date = fields.Date()
    file_url = fields.Str()
    file_size = fields.Integer()
    csv_file_url = fields.Str()
    csv_file_size = fields.Integer()
    jsonld_file_url = fields.Str()
    jsonld_file_size = fields.Integer()
    jsonld_download_url = fields.Str()
    download_url = fields.Str()
    csv_download_url = fields.Str()
    link = fields.Str()
    data_special_signs = fields.Nested(SpecialSignSchema,
                                       data_key='special_signs',
                                       many=True)
    is_chart_creation_blocked = fields.Bool()
    if is_enabled('S35_high_value_data.be'):
        has_high_value_data = fields.Boolean()
    if is_enabled('S37_resources_admin_region_data.be'):
        regions = fields.Method('get_regions')
    if is_enabled('S40_new_file_model.be'):
        files = fields.Method('get_files')

    class Meta:
        relationships_schema = ResourceApiRelationships
        object_type = 'resource'
        api_path = 'resources'
        url_template = '{api_url}/resources/{ident}'
        model = 'resources.Resource'

    def get_regions(self, res):
        return RegionSchema(many=True).dump(
            getattr(res, 'all_regions', res.regions))

    def get_files(self, res):
        return ResourceFileSchema(many=True).dump(
            getattr(res, 'all_files', res.files))
Beispiel #2
0
class ResourceCSVMetadataSerializer(schemas.ExtSchema):
    frontend_absolute_url = fields.Url(data_key=_('Resource URL'))
    title = TranslatedStr(data_key=_('Resource title'), default='')
    description = TranslatedStr(data_key=_('Resource description'))
    created = fields.DateTime(data_key=_('Resource created'), format='iso8601')
    data_date = fields.Date(data_key=_('Data date'))
    openness_score = fields.Int(data_key=_('Openness score'))
    resource_type = fields.Function(lambda obj: obj.get_type_display(),
                                    data_key=_('Type'))
    format = fields.Str(data_key=_('File format'), default='')
    file_size = fields.Function(lambda obj: sizeof_fmt(obj.file_size)
                                if obj.file_size else '',
                                data_key=_('File size'))
    views_count = fields.Int(attribute='computed_views_count',
                             data_key=_("Resource views count"))
    downloads_count = fields.Int(attribute='computed_downloads_count',
                                 data_key=_("Resource downloads count"))
    has_table = fields.Function(lambda obj: _('YES')
                                if obj.has_table else _('NO'),
                                data_key=_('Table'))
    has_chart = fields.Function(lambda obj: _('YES')
                                if obj.has_chart else _('NO'),
                                data_key=_('Map'))
    has_map = fields.Function(lambda obj: _('YES') if obj.has_map else _('NO'),
                              data_key=_('Chart'))
    download_url = fields.Url(data_key=_('Download URL'))
    data_special_signs = fields.Nested(SpecialSignSchema,
                                       data_key=_('special signs'),
                                       many=True)

    @ma.post_dump(pass_many=False)
    def prepare_nested_data(self, data, **kwargs):
        special_signs = data.get(_('special signs'))
        signs_str = '\n'.join([
            '{name_label}: {name}, {symbol_label}: "{symbol}", {desc_label}: {desc}'
            .format(name=sign['name'],
                    name_label=_('name'),
                    symbol=sign['symbol'],
                    symbol_label=_('symbol'),
                    desc=sign['description'],
                    desc_label=_('description')) for sign in special_signs
        ])
        data[_('special signs')] = signs_str
        values_with_html = [_('Resource title'), _('Resource description')]
        for attribute in values_with_html:
            data[attribute] = strip_tags(data[attribute])
        return data

    class Meta:
        ordered = True
class InstitutionXMLSerializer(ExtSchema):
    id = fields.Integer()
    url = fields.Url(attribute='frontend_absolute_url')
    type = fields.Function(
        lambda organization: organization.get_institution_type_display())
    title = TranslatedStr()
    abbreviation = fields.Str()
    epuap = fields.Str()
    website = fields.Url()
    created = fields.DateTime(format='iso8601')
    modified = fields.DateTime(format='iso8601')

    postal_code = fields.Str()
    city = fields.Str()
    street = fields.Str()
    street_number = fields.Str()
    street_type = fields.Str()
    flat_number = fields.Str()

    email = fields.Str()
    tel = fields.Str(data_key='phone_number')

    regon = fields.Str()

    published_datasets_count = fields.Method('get_published_datasets_count')
    published_resources_count = fields.Method('get_published_resources_count')

    def get_published_datasets_count(self, organization):
        return self.context['published_datasets_count']

    def get_published_resources_count(self, organization):
        return self.context['published_resources_count']
Beispiel #4
0
class DatasetApiAttrs(ObjectAttrs, HighlightObjectMixin):
    title = TranslatedStr()
    slug = TranslatedStr()
    notes = TranslatedStr()
    categories = fields.Nested(DatasetCategoryAttr, many=True)
    category = fields.Nested(DatasetCategoryAttr, many=False)
    formats = fields.List(fields.String())
    types = fields.List(fields.String())
    keywords = KeywordsList(TranslatedStr())
    openness_scores = fields.List(fields.Int())
    license_chosen = fields.Integer()
    license_condition_db_or_copyrighted = fields.String()
    license_condition_personal_data = fields.String()
    license_condition_modification = fields.Boolean()
    license_condition_original = fields.Boolean()
    license_condition_responsibilities = fields.String()
    license_condition_source = fields.Boolean()
    license_condition_timestamp = fields.Boolean()
    license_name = fields.String()
    license_description = fields.String()
    update_frequency = TransUpdateFreqField()
    views_count =\
        fields.Function(
            lambda obj: obj.computed_views_count if is_enabled('S16_new_date_counters.be') else obj.views_count)
    downloads_count =\
        fields.Function(
            lambda obj: obj.computed_downloads_count if is_enabled('S16_new_date_counters.be') else obj.downloads_count)
    url = fields.String()
    followed = fields.Boolean()
    modified = fields.DateTime()
    resource_modified = fields.DateTime()
    created = fields.DateTime()
    verified = fields.DateTime()
    visualization_types = ListWithoutNoneStrElement(fields.Str())
    source = fields.Nested(SourceSchema)
    image_url = fields.Str()
    image_alt = TranslatedStr()
    if is_enabled('S35_high_value_data.be'):
        has_high_value_data = fields.Boolean()
    if is_enabled('S37_resources_admin_region_data.be'):
        regions = fields.Nested(RegionSchema, many=True)

    class Meta:
        relationships_schema = DatasetApiRelationships
        object_type = 'dataset'
        url_template = '{api_url}/datasets/{ident}'
        model = 'datasets.Dataset'
Beispiel #5
0
class ResourceXMLSerializer(schemas.ExtSchema):
    id = fields.Integer()
    access_url = fields.Url(attribute='frontend_absolute_url')
    title = TranslatedStr()
    description = TranslatedStr()
    openness_score = fields.Integer()
    format = fields.Str()
    views_count = fields.Int(attribute='computed_views_count')
    downloads_count = fields.Int(attribute='computed_downloads_count')
    created = fields.DateTime(format='iso8601')
    data_date = fields.Date()
    type = fields.Function(lambda resource: resource.get_type_display())
    file_size = fields.Function(lambda obj: sizeof_fmt(obj.file_size)
                                if obj.file_size else '')

    visualization_types = ListWithoutNoneStrElement(fields.Str())
    download_url = fields.Str()
    data_special_signs = fields.Nested(SpecialSignSchema,
                                       data_key='special_signs',
                                       many=True)
Beispiel #6
0
class DatasetXMLSerializer(ExtSchema):
    id = fields.Integer()
    url = fields.Url(attribute='frontend_absolute_url')
    title = TranslatedStr()
    notes = TranslatedStr()
    keywords = fields.Function(lambda dataset: (
        tag.name for tag in getattr(dataset, f'tags_{get_language()}')))
    categories = fields.Nested(DatasetCategoryAttr, many=True)
    update_frequency = TransUpdateFreqField()
    created = fields.DateTime()
    verified = fields.DateTime()
    views_count = fields.Int(attribute='computed_views_count')
    downloads_count = fields.Int(attribute='computed_downloads_count')
    published_resources_count = fields.Int(
        attribute='published_resources__count')
    license = fields.Str(attribute='license_name')
    conditions = fields.Method('get_conditions')
    organization = fields.Method('get_organization')
    resources = fields.Method('get_resources')

    source = fields.Nested(SourceXMLSchema)

    def get_conditions(self, dataset):
        conditions = _(
            'This dataset is public information, it can be reused under the following conditions: '
        )
        terms = [
            str(
                dataset._meta.get_field(
                    'license_condition_modification').verbose_name)
            if dataset.license_condition_modification else '',
            str(
                dataset._meta.get_field('license_condition_source').
                verbose_name) if dataset.license_condition_source else '',
            dataset.license_condition_db_or_copyrighted,
            dataset.license_condition_personal_data
        ]
        return conditions + '\n'.join([term for term in terms if term])

    def get_organization(self, dataset):
        context = {
            'published_datasets_count':
            dataset.organization_published_datasets__count,
            'published_resources_count':
            dataset.organization_published_resources__count,
        }
        return InstitutionXMLSerializer(many=False, context=context).dump(
            dataset.organization)

    def get_resources(self, dataset):
        return ResourceXMLSerializer(many=True).dump(
            dataset.published_resources)
class InstitutionCSVMetadataSerializer(ExtSchema):
    organization_url = fields.Url(attribute='frontend_absolute_url',
                                  data_key=_('Organization URL'))
    organization_type = fields.Function(
        lambda obj: obj.get_institution_type_display(),
        data_key=_('Institution type'))
    organization_title = TranslatedStr(attribute='title', data_key=_('Name'))
    organization_abbr_title = TranslatedStr(attribute='abbreviation',
                                            data_key=_('Abbreviation'),
                                            default='')
    organization_regon = fields.Str(data_key=_('REGON'), attribute='regon')
    organization_epuap = fields.Str(attribute='epuap',
                                    data_key=_('EPUAP'),
                                    default='')
    organization_website = fields.Url(attribute='website',
                                      data_key=_('Website'))
    organization_created = fields.DateTime(attribute='created',
                                           data_key=_('Organization created'),
                                           format='iso8601')
    organization_modified = fields.DateTime(
        attribute='modified',
        data_key=_('Organization modified'),
        format='iso8601')
    organization_datasets_count = fields.Method(
        'get_published_datasets_count', data_key=_('Number of datasets'))
    organization_resources_count = fields.Method(
        'get_published_resources_count',
        data_key=_('Number of organization resources'))
    organization_postal_code = fields.Str(attribute='postal_code',
                                          data_key=_('Postal code'))
    organization_city = fields.Str(attribute='city', data_key=_('City'))
    organization_street_type = fields.Str(attribute='street_type',
                                          data_key=_('Street type'))
    organization_street = fields.Str(attribute='street', data_key=_('Street'))
    organization_street_number = fields.Str(attribute='street_number',
                                            data_key=_('Street number'))
    organization_flat_number = fields.Str(attribute='flat_number',
                                          data_key=_('Flat number'))
    organization_email = fields.Email(attribute='email', data_key=_('Email'))
    organization_phone_number = fields.Str(attribute='tel',
                                           data_key=_('Phone'))

    class Meta:
        ordered = True

    def get_published_datasets_count(self, organization):
        return self.context['published_datasets_count']

    def get_published_resources_count(self, organization):
        return self.context['published_resources_count']
Beispiel #8
0
class DatasetResourcesCSVSerializer(CSVSerializer):
    dataset_url = fields.Url(attribute='frontend_absolute_url',
                             data_key=_('Dataset URL'))
    dataset_title = TranslatedStr(attribute='title', data_key=_('Title'))
    dataset_description = TranslatedStr(attribute='notes', data_key=_('Notes'))
    dataset_keywords = fields.Function(lambda obj: ', '.join(
        (tag.name for tag in getattr(obj, f'tags_{get_language()}'))),
                                       data_key=_('Tag'))
    dataset_categories = fields.Function(lambda obj: ', '.join(
        (category.title_i18n for category in obj.categories.all())),
                                         data_key=_('Category'))
    dataset_update_frequency = fields.Str(attribute='frequency_display',
                                          data_key=_('Update frequency'))
    dataset_created = fields.DateTime(attribute='created',
                                      data_key=_('Dataset created'),
                                      format='iso8601')
    dataset_verified = fields.DateTime(attribute='verified',
                                       data_key=_('Dataset verified'),
                                       format='iso8601')
    views_count = fields.Int(attribute='computed_views_count',
                             data_key=_("Dataset views count"))
    downloads_count = fields.Int(attribute='computed_downloads_count',
                                 data_key=_('Dataset downloads count'))
    dataset_resources_count = fields.Int(
        attribute='published_resources__count', data_key=_('Number of data'))
    dataset_conditions = fields.Method('get_dataset_conditions',
                                       data_key=_('Terms of use'))
    dataset_license = fields.Str(attribute='license_name',
                                 data_key=_('License'))
    dataset_source = fields.Nested(SourceXMLSchema,
                                   attribute='source',
                                   data_key=_('source'))
    organization = fields.Method('get_organization')
    resources = fields.Nested(ResourceCSVMetadataSerializer,
                              many=True,
                              attribute='published_resources')

    @ma.post_dump(pass_many=True)
    def unpack_nested_data(self, data, many, **kwargs):
        new_result_data = []
        for record in data:
            resources = record.pop('resources')
            organization = record.pop('organization')
            record.update(**organization)
            for resource in resources:
                tmp_record = record.copy()
                tmp_record.update(**resource)
                new_result_data.append(tmp_record)
        return new_result_data

    def get_dataset_conditions(self, dataset):
        conditions = _(
            'This dataset is public information, it can be reused under the following conditions: '
        )
        terms = [
            str(
                dataset._meta.get_field(
                    'license_condition_modification').verbose_name)
            if dataset.license_condition_modification else '',
            str(
                dataset._meta.get_field('license_condition_source').
                verbose_name) if dataset.license_condition_source else '',
            dataset.license_condition_db_or_copyrighted,
            dataset.license_condition_personal_data
        ]
        return conditions + '\n'.join([term for term in terms if term])

    @ma.post_dump(pass_many=False)
    def prepare_nested_data(self, data, **kwargs):
        source = data.get(_('source'))
        if source:
            source_str =\
                '{title_label}: {title}, {url_label}: {url},' \
                ' {last_import_label}: {last_import}, {frequency_label}: {frequency}'.format(
                    title=source['title'], title_label=_('name'),
                    url=source['url'], url_label=_('url'),
                    last_import=source['last_import_timestamp'], last_import_label=_('last import timestamp'),
                    frequency=source['update_frequency'], frequency_label=_('Update frequency')
                )
            data[_('source')] = source_str
        data[_('Notes')] = strip_tags(data[_('Notes')])
        return data

    def get_organization(self, dataset):
        context = {
            'published_datasets_count':
            dataset.organization_published_datasets__count,
            'published_resources_count':
            dataset.organization_published_resources__count,
        }
        return InstitutionCSVMetadataSerializer(
            many=False, context=context).dump(dataset.organization)

    def get_csv_headers(self):
        result = []
        for field_name, field in self.fields.items():
            if field_name == 'organization':
                org_headers = [
                    org_field.data_key for org_field_name, org_field in
                    InstitutionCSVMetadataSerializer().fields.items()
                ]
                result.extend(org_headers)
            elif field_name == 'resources':
                res_headers = [
                    res_field.data_key for res_field_name, res_field in
                    field.schema.fields.items()
                ]
                result.extend(res_headers)
            else:
                header = field.data_key or field_name
                result.append(header)
        return result

    class Meta:
        ordered = True
Beispiel #9
0
class DatasetRDFResponseSchema(ProfilesMixin, RDFResponseSchema):
    identifier = ma.fields.Function(lambda ds: ds.frontend_absolute_url)
    id = ma.fields.Str()
    frontend_absolute_url = ma.fields.Str()
    title_pl = ma.fields.Str(attribute='title_translated.pl')
    title_en = ma.fields.Str(attribute='title_translated.en')
    notes_pl = ma.fields.Str(attribute='notes_translated.pl')
    notes_en = ma.fields.Str(attribute='notes_translated.en')
    status = ma.fields.Str()
    created = ma.fields.DateTime()
    modified = ma.fields.DateTime()
    landing_page = fields.Function(lambda ds: ds.frontend_absolute_url)
    version = ma.fields.Str()
    tags = rdf_fields.Tags(ma.fields.Str())
    resources = ma.fields.Function(resources_dump)
    organization = ma.fields.Function(organization_dump)
    categories = ma.fields.Function(categories_dump)
    update_frequency = DcatUpdateFrequencyField()
    license = ma.fields.Function(lambda ds: ds.license_link)
    if is_enabled('S38_dcat_spatial_data.be'):
        spatial = ma.fields.Nested(RDFRegionSchema,
                                   many=True,
                                   attribute='regions')

    @staticmethod
    def _from_path(es_resp, path):
        try:
            obj = es_resp
            for step in path.split('.'):
                obj = getattr(obj, step)
            return obj
        except AttributeError:
            return None

    @ma.pre_dump(pass_many=True)
    def extract_pagination(self, data, many, **kwargs):
        request = self.context['request'] if 'request' in self.context else None
        cleaned_data = dict(getattr(request.context, 'cleaned_data',
                                    {})) if request else {}

        def _get_page_link(page_number):
            cleaned_data['page'] = page_number
            return '{}{}?{}'.format(settings.API_URL, request.path,
                                    builder.build(cleaned_data))

        if self.many:
            page, per_page = cleaned_data.get('page', 1), cleaned_data.get(
                'per_page', 20)
            self.context['count'] = self._from_path(data, 'hits.total')
            self.context['per_page'] = per_page

            items_count = self._from_path(data, 'hits.total')
            if page > 1:
                self.context['first_page'] = _get_page_link(1)
                self.context['prev_page'] = _get_page_link(page - 1)
            if items_count:
                max_count = min(items_count, 10000)
                off = 1 if max_count % per_page else 0
                last_page = max_count // per_page + off
                if last_page > 1:
                    self.context['last_page'] = _get_page_link(last_page)
                if page * per_page < max_count:
                    self.context['next_page'] = _get_page_link(page + 1)

        return data

    @ma.pre_dump(pass_many=True)
    def prepare_datasets(self, data, many, **kwargs):
        self.context['dataset_refs'] = []
        if self.many:
            self.context['catalog_modified'] = self._from_path(
                data, 'aggregations.catalog_modified.value_as_string')
            dataset_ids = [x.id for x in data]
            data = Dataset.objects.filter(pk__in=dataset_ids)
        return data

    @ma.post_dump(pass_many=False)
    def prepare_graph_triples(self, data, **kwargs):
        self.context['dataset_refs'].append(data['frontend_absolute_url'])
        dataset = self.get_rdf_class_for_model(model=Dataset)()
        return dataset.to_triples(data, self.include_nested_triples)

    @ma.post_dump(pass_many=True)
    def prepare_graph(self, data, many, **kwargs):
        graph = ExtendedGraph(ordered=True)
        self.add_bindings(graph=graph)

        # Jeżeli many == True, to serializujemy katalog.
        if many:
            triples = []
            # Dla katalogu, w data, mamy listę list, trzeba to spłaszczyć.
            for _triples in data:
                triples.extend(_triples)

            self.add_pagination_bindings(graph=graph)
            paged_collection = HYDRAPagedCollection()
            triples.extend(paged_collection.to_triples(self.context))
            catalog = self.get_rdf_class_for_catalog()()
            triples.extend(catalog.to_triples(self.context))
        else:
            triples = data
        for triple in triples:
            graph.add(triple)
        return graph

    class Meta:
        model = 'datasets.Dataset'