Exemple #1
0
 def test_multiple_spaces(self):
     field = db.SlugField()
     assert field.slugify('a  b') == 'a-b'
Exemple #2
0
class SlugTester(db.Document):
    title = db.StringField()
    slug = db.SlugField(populate_from='title')
    meta = {
        'allow_inheritance': True,
    }
Exemple #3
0
class SlugUpdateTester(db.Document):
    title = db.StringField()
    slug = db.SlugField(populate_from='title', update=True)
Exemple #4
0
class User(WithMetrics, UserMixin, db.Document):
    slug = db.SlugField(max_length=255,
                        required=True,
                        populate_from='fullname')
    email = db.StringField(max_length=255, required=True, unique=True)
    password = db.StringField()
    active = db.BooleanField()
    roles = db.ListField(db.ReferenceField(Role), default=[])

    first_name = db.StringField(max_length=255, required=True)
    last_name = db.StringField(max_length=255, required=True)

    avatar_url = db.URLField()
    avatar = db.ImageField(fs=avatars,
                           basename=default_image_basename,
                           thumbnails=AVATAR_SIZES)
    website = db.URLField()
    about = db.StringField()

    prefered_language = db.StringField()

    apikey = db.StringField()

    created_at = db.DateTimeField(default=datetime.now, required=True)

    # The field below is required for Flask-security
    # when SECURITY_CONFIRMABLE is True
    confirmed_at = db.DateTimeField()

    # The 5 fields below are required for Flask-security
    # when SECURITY_TRACKABLE is True
    last_login_at = db.DateTimeField()
    current_login_at = db.DateTimeField()
    last_login_ip = db.StringField()
    current_login_ip = db.StringField()
    login_count = db.IntField()

    deleted = db.DateTimeField()
    ext = db.MapField(db.GenericEmbeddedDocumentField())
    extras = db.ExtrasField()

    before_save = Signal()
    after_save = Signal()
    on_create = Signal()
    on_update = Signal()
    before_delete = Signal()
    after_delete = Signal()
    on_delete = Signal()

    meta = {
        'indexes': ['-created_at', 'slug', 'apikey'],
        'ordering': ['-created_at']
    }

    __search_metrics__ = Object(
        properties={
            'datasets': Integer(),
            'reuses': Integer(),
            'followers': Integer(),
            'views': Integer()
        })

    __metrics_keys__ = [
        'datasets',
        'reuses',
        'following',
        'followers',
    ]

    def __str__(self):
        return self.fullname

    @property
    def fullname(self):
        return ' '.join((self.first_name or '', self.last_name or '')).strip()

    @cached_property
    def organizations(self):
        from udata.core.organization.models import Organization
        return Organization.objects(members__user=self, deleted__exists=False)

    @property
    def sysadmin(self):
        return self.has_role('admin')

    def url_for(self, *args, **kwargs):
        return url_for('users.show', user=self, *args, **kwargs)

    display_url = property(url_for)

    @property
    def external_url(self):
        return self.url_for(_external=True)

    @property
    def visible(self):
        count = self.metrics.get('datasets', 0) + self.metrics.get('reuses', 0)
        return count > 0 and self.active

    @cached_property
    def resources_availability(self):
        """Return the percentage of availability for resources."""
        # Flatten the list.
        availabilities = list(
            chain(*[org.check_availability() for org in self.organizations]))
        # Filter out the unknown
        availabilities = [a for a in availabilities if type(a) is bool]
        if availabilities:
            # Trick will work because it's a sum() of booleans.
            return round(100. * sum(availabilities) / len(availabilities), 2)
        # if nothing is unavailable, everything is considered OK
        return 100

    @cached_property
    def datasets_org_count(self):
        """Return the number of datasets of user's organizations."""
        from udata.models import Dataset  # Circular imports.
        return sum(
            Dataset.objects(organization=org).visible().count()
            for org in self.organizations)

    @cached_property
    def followers_org_count(self):
        """Return the number of followers of user's organizations."""
        from udata.models import Follow  # Circular imports.
        return sum(
            Follow.objects(following=org).count()
            for org in self.organizations)

    @property
    def datasets_count(self):
        """Return the number of datasets of the user."""
        return self.metrics.get('datasets', 0)

    @property
    def followers_count(self):
        """Return the number of followers of the user."""
        return self.metrics.get('followers', 0)

    def generate_api_key(self):
        s = JSONWebSignatureSerializer(current_app.config['SECRET_KEY'])
        byte_str = s.dumps({
            'user': str(self.id),
            'time': time(),
        })
        self.apikey = byte_str.decode()

    def clear_api_key(self):
        self.apikey = None

    @classmethod
    def get(cls, id_or_slug):
        obj = cls.objects(slug=id_or_slug).first()
        return obj or cls.objects.get_or_404(id=id_or_slug)

    @classmethod
    def pre_save(cls, sender, document, **kwargs):
        cls.before_save.send(document)

    @classmethod
    def post_save(cls, sender, document, **kwargs):
        cls.after_save.send(document)
        if kwargs.get('created'):
            cls.on_create.send(document)
        else:
            cls.on_update.send(document)

    @cached_property
    def json_ld(self):

        result = {
            '@type': 'Person',
            '@context': 'http://schema.org',
            'name': self.fullname,
        }

        if self.about:
            result['description'] = mdstrip(self.about)

        if self.avatar_url:
            result['image'] = self.avatar_url

        if self.website:
            result['url'] = self.website

        return result

    def mark_as_deleted(self):
        copied_user = copy(self)
        self.email = '{}@deleted'.format(self.id)
        self.slug = 'deleted'
        self.password = None
        self.active = False
        self.first_name = 'DELETED'
        self.last_name = 'DELETED'
        self.avatar = None
        self.avatar_url = None
        self.website = None
        self.about = None
        self.extras = None
        self.apikey = None
        self.deleted = datetime.now()
        self.save()
        for organization in self.organizations:
            organization.members = [
                member for member in organization.members
                if member.user != self
            ]
            organization.save()
        for discussion in Discussion.objects(discussion__posted_by=self):
            for message in discussion.discussion:
                if message.posted_by == self:
                    message.content = 'DELETED'
            discussion.save()
        Follow.objects(follower=self).delete()
        Follow.objects(following=self).delete()
        mail.send(_('Account deletion'), copied_user, 'account_deleted')

    def count_datasets(self):
        from udata.models import Dataset
        self.metrics['datasets'] = Dataset.objects(
            owner=self).visible().count()
        self.save()

    def count_reuses(self):
        from udata.models import Reuse
        self.metrics['reuses'] = Reuse.objects(owner=self).visible().count()
        self.save()

    def count_followers(self):
        from udata.models import Follow
        self.metrics['followers'] = Follow.objects(
            until=None).followers(self).count()
        self.save()

    def count_following(self):
        from udata.models import Follow
        self.metrics['following'] = Follow.objects.following(self).count()
        self.save()
Exemple #5
0
class Organization(WithMetrics, BadgeMixin, db.Datetimed, db.Document):
    name = db.StringField(max_length=255, required=True)
    acronym = db.StringField(max_length=128)
    slug = db.SlugField(max_length=255,
                        required=True,
                        populate_from='name',
                        update=True)
    description = db.StringField(required=True)
    url = db.StringField()
    image_url = db.StringField()
    logo = db.ImageField(fs=avatars,
                         basename=default_image_basename,
                         thumbnails=LOGO_SIZES)

    members = db.ListField(db.EmbeddedDocumentField(Member))
    teams = db.ListField(db.EmbeddedDocumentField(Team))
    requests = db.ListField(db.EmbeddedDocumentField(MembershipRequest))
    badges = db.ListField(db.EmbeddedDocumentField(OrganizationBadge))

    ext = db.MapField(db.GenericEmbeddedDocumentField())
    extras = db.ExtrasField()

    deleted = db.DateTimeField()

    meta = {
        'allow_inheritance': True,
        'indexes': ['-created_at', 'slug'],
        'ordering': ['-created_at'],
        'queryset_class': OrganizationQuerySet,
    }

    def __str__(self):
        return self.name or ''

    __unicode__ = __str__

    before_save = Signal()
    after_save = Signal()
    on_create = Signal()
    on_update = Signal()
    before_delete = Signal()
    after_delete = Signal()

    @classmethod
    def pre_save(cls, sender, document, **kwargs):
        cls.before_save.send(document)

    @classmethod
    def post_save(cls, sender, document, **kwargs):
        cls.after_save.send(document)
        if kwargs.get('created'):
            cls.on_create.send(document)
        else:
            cls.on_update.send(document)

    @property
    def display_url(self):
        return url_for('organizations.show', org=self)

    @property
    def external_url(self):
        return url_for('organizations.show', org=self, _external=True)

    @property
    def pending_requests(self):
        return [r for r in self.requests if r.status == 'pending']

    @property
    def refused_requests(self):
        return [r for r in self.requests if r.status == 'refused']

    @property
    def accepted_requests(self):
        return [r for r in self.requests if r.status == 'accepted']

    @property
    def public_service(self):
        badges_kind = [badge.kind for badge in self.badges]
        return PUBLIC_SERVICE in badges_kind and CERTIFIED in badges_kind

    def member(self, user):
        for member in self.members:
            if member.user == user:
                return member
        return None

    def is_member(self, user):
        return self.member(user) is not None

    def is_admin(self, user):
        member = self.member(user)
        return member is not None and member.role == 'admin'

    def pending_request(self, user):
        for request in self.requests:
            if request.user == user and request.status == 'pending':
                return request
        return None

    @classmethod
    def get(cls, id_or_slug):
        obj = cls.objects(slug=id_or_slug).first()
        return obj or cls.objects.get_or_404(id=id_or_slug)

    def by_role(self, role):
        return filter(lambda m: m.role == role, self.members)
Exemple #6
0
class Dataset(WithMetrics, BadgeMixin, db.Owned, db.Document):
    created_at = DateTimeField(verbose_name=_('Creation date'),
                               default=datetime.now,
                               required=True)
    last_modified = DateTimeField(verbose_name=_('Last modification date'),
                                  default=datetime.now,
                                  required=True)
    title = db.StringField(required=True)
    acronym = db.StringField(max_length=128)
    # /!\ do not set directly the slug when creating or updating a dataset
    # this will break the search indexation
    slug = db.SlugField(max_length=255,
                        required=True,
                        populate_from='title',
                        update=True,
                        follow=True)
    description = db.StringField(required=True, default='')
    license = db.ReferenceField('License')

    tags = db.TagListField()
    resources = db.ListField(db.EmbeddedDocumentField(Resource))

    private = db.BooleanField(default=False)
    frequency = db.StringField(choices=list(UPDATE_FREQUENCIES.keys()))
    frequency_date = db.DateTimeField(verbose_name=_('Future date of update'))
    temporal_coverage = db.EmbeddedDocumentField(db.DateRange)
    spatial = db.EmbeddedDocumentField(SpatialCoverage)

    ext = db.MapField(db.GenericEmbeddedDocumentField())
    extras = db.ExtrasField()

    featured = db.BooleanField(required=True, default=False)

    deleted = db.DateTimeField()
    archived = db.DateTimeField()

    def __str__(self):
        return self.title or ''

    __badges__ = {
        PIVOTAL_DATA: _('Pivotal data'),
    }

    __search_metrics__ = Object(properties={
        'reuses': Integer(),
        'followers': Integer(),
        'views': Integer(),
    })

    __metrics_keys__ = [
        'discussions',
        'issues',
        'reuses',
        'followers',
        'views',
    ]

    meta = {
        'indexes': [
            '-created_at',
            'slug',
            'resources.id',
            'resources.urlhash',
        ] + db.Owned.meta['indexes'],
        'ordering': ['-created_at'],
        'queryset_class':
        DatasetQuerySet,
    }

    before_save = signal('Dataset.before_save')
    after_save = signal('Dataset.after_save')
    on_create = signal('Dataset.on_create')
    on_update = signal('Dataset.on_update')
    before_delete = signal('Dataset.before_delete')
    after_delete = signal('Dataset.after_delete')
    on_delete = signal('Dataset.on_delete')
    on_archive = signal('Dataset.on_archive')
    on_resource_added = signal('Dataset.on_resource_added')

    verbose_name = _('dataset')

    @classmethod
    def pre_save(cls, sender, document, **kwargs):
        cls.before_save.send(document)

    @classmethod
    def post_save(cls, sender, document, **kwargs):
        if 'post_save' in kwargs.get('ignores', []):
            return
        cls.after_save.send(document)
        if kwargs.get('created'):
            cls.on_create.send(document)
        else:
            cls.on_update.send(document)
        if document.deleted:
            cls.on_delete.send(document)
        if document.archived:
            cls.on_archive.send(document)
        if kwargs.get('resource_added'):
            cls.on_resource_added.send(document,
                                       resource_id=kwargs['resource_added'])

    def clean(self):
        if self.frequency in LEGACY_FREQUENCIES:
            self.frequency = LEGACY_FREQUENCIES[self.frequency]

    def url_for(self, *args, **kwargs):
        return url_for('datasets.show', dataset=self, *args, **kwargs)

    display_url = property(url_for)

    @property
    def is_visible(self):
        return not self.is_hidden

    @property
    def is_hidden(self):
        return (len(self.resources) == 0 or self.private or self.deleted
                or self.archived)

    @property
    def full_title(self):
        if not self.acronym:
            return self.title
        return '{title} ({acronym})'.format(**self._data)

    @property
    def external_url(self):
        return self.url_for(_external=True)

    @property
    def image_url(self):
        if self.organization:
            return self.organization.logo.url
        elif self.owner:
            return self.owner.avatar.url

    @property
    def frequency_label(self):
        return UPDATE_FREQUENCIES.get(self.frequency or 'unknown',
                                      UPDATE_FREQUENCIES['unknown'])

    def check_availability(self):
        """Check if resources from that dataset are available.

        Return a list of (boolean or 'unknown')
        """
        # Only check remote resources.
        remote_resources = [
            resource for resource in self.resources
            if resource.filetype == 'remote'
        ]
        if not remote_resources:
            return []
        return [resource.check_availability() for resource in remote_resources]

    @property
    def last_update(self):
        if self.resources:
            return max(resource.published for resource in self.resources)
        else:
            return self.last_modified

    @property
    def next_update(self):
        """Compute the next expected update date,

        given the frequency and last_update.
        Return None if the frequency is not handled.
        """
        delta = None
        if self.frequency == 'daily':
            delta = timedelta(days=1)
        elif self.frequency == 'weekly':
            delta = timedelta(weeks=1)
        elif self.frequency == 'fortnighly':
            delta = timedelta(weeks=2)
        elif self.frequency == 'monthly':
            delta = timedelta(weeks=4)
        elif self.frequency == 'bimonthly':
            delta = timedelta(weeks=4 * 2)
        elif self.frequency == 'quarterly':
            delta = timedelta(weeks=52 / 4)
        elif self.frequency == 'biannual':
            delta = timedelta(weeks=52 / 2)
        elif self.frequency == 'annual':
            delta = timedelta(weeks=52)
        elif self.frequency == 'biennial':
            delta = timedelta(weeks=52 * 2)
        elif self.frequency == 'triennial':
            delta = timedelta(weeks=52 * 3)
        elif self.frequency == 'quinquennial':
            delta = timedelta(weeks=52 * 5)
        if delta is None:
            return
        else:
            return self.last_update + delta

    @cached_property
    def quality(self):
        """Return a dict filled with metrics related to the inner

        quality of the dataset:

            * number of tags
            * description length
            * and so on
        """
        from udata.models import Discussion  # noqa: Prevent circular imports
        result = {}
        if not self.id:
            # Quality is only relevant on saved Datasets
            return result
        if self.frequency != 'unknown':
            result['frequency'] = self.frequency
        if self.next_update:
            result['update_in'] = -(self.next_update - datetime.now()).days
        if self.tags:
            result['tags_count'] = len(self.tags)
        if self.description:
            result['description_length'] = len(self.description)
        if self.resources:
            result['has_resources'] = True
            result['has_only_closed_or_no_formats'] = all(
                resource.closed_or_no_format for resource in self.resources)
            result['has_unavailable_resources'] = not all(
                self.check_availability())
        discussions = Discussion.objects(subject=self)
        if discussions:
            result['discussions'] = len(discussions)
            result['has_untreated_discussions'] = not all(
                discussion.person_involved(self.owner)
                for discussion in discussions)
        result['score'] = self.compute_quality_score(result)
        return result

    def compute_quality_score(self, quality):
        """Compute the score related to the quality of that dataset."""
        score = 0
        UNIT = 2
        if 'update_in' in quality:
            # TODO: should be related to frequency.
            if quality['update_in'] < 0:
                score += UNIT
            else:
                score -= UNIT
        if 'tags_count' in quality:
            if quality['tags_count'] > 3:
                score += UNIT
        if 'description_length' in quality:
            if quality['description_length'] > 100:
                score += UNIT
        if 'has_resources' in quality:
            if quality['has_only_closed_or_no_formats']:
                score -= UNIT
            else:
                score += UNIT
            if quality['has_unavailable_resources']:
                score -= UNIT
            else:
                score += UNIT
        if 'discussions' in quality:
            if quality['has_untreated_discussions']:
                score -= UNIT
            else:
                score += UNIT
        if score < 0:
            return 0
        return score

    @classmethod
    def get(cls, id_or_slug):
        obj = cls.objects(slug=id_or_slug).first()
        return obj or cls.objects.get_or_404(id=id_or_slug)

    def add_resource(self, resource):
        '''Perform an atomic prepend for a new resource'''
        resource.validate()
        self.update(
            __raw__={
                '$push': {
                    'resources': {
                        '$each': [resource.to_mongo()],
                        '$position': 0
                    }
                }
            })
        self.reload()
        post_save.send(self.__class__,
                       document=self,
                       resource_added=resource.id)

    def update_resource(self, resource):
        '''Perform an atomic update for an existing resource'''
        index = self.resources.index(resource)
        data = {'resources__{index}'.format(index=index): resource}
        self.update(**data)
        self.reload()
        post_save.send(self.__class__, document=self)

    @property
    def community_resources(self):
        return self.id and CommunityResource.objects.filter(dataset=self) or []

    @cached_property
    def json_ld(self):
        result = {
            '@context':
            'http://schema.org',
            '@type':
            'Dataset',
            '@id':
            str(self.id),
            'alternateName':
            self.slug,
            'dateCreated':
            self.created_at.isoformat(),
            'dateModified':
            self.last_modified.isoformat(),
            'url':
            url_for('datasets.show', dataset=self, _external=True),
            'name':
            self.title,
            'keywords':
            ','.join(self.tags),
            'distribution': [resource.json_ld for resource in self.resources],
            # Theses values are not standard
            'contributedDistribution':
            [resource.json_ld for resource in self.community_resources],
            'extras':
            [get_json_ld_extra(*item) for item in self.extras.items()],
        }

        if self.description:
            result['description'] = mdstrip(self.description)

        if self.license and self.license.url:
            result['license'] = self.license.url

        if self.organization:
            author = self.organization.json_ld
        elif self.owner:
            author = self.owner.json_ld
        else:
            author = None

        if author:
            result['author'] = author

        return result

    @property
    def views_count(self):
        return self.metrics.get('views', 0)

    def count_discussions(self):
        from udata.models import Discussion
        self.metrics['discussions'] = Discussion.objects(subject=self,
                                                         closed=None).count()
        self.save()

    def count_issues(self):
        from udata.models import Issue
        self.metrics['issues'] = Issue.objects(subject=self,
                                               closed=None).count()
        self.save()

    def count_reuses(self):
        from udata.models import Reuse
        self.metrics['reuses'] = Reuse.objects(datasets=self).visible().count()
        self.save()

    def count_followers(self):
        from udata.models import Follow
        self.metrics['followers'] = Follow.objects(
            until=None).followers(self).count()
        self.save()
Exemple #7
0
class Team(db.EmbeddedDocument):
    name = db.StringField(required=True)
    slug = db.SlugField(max_length=255, required=True, populate_from='name', update=True, unique=False)
    description = db.StringField()

    members = db.ListField(db.ReferenceField('User'))
Exemple #8
0
class Dataset(WithMetrics, BadgeMixin, db.Document):
    created_at = DateTimeField(verbose_name=_('Creation date'),
                               default=datetime.now,
                               required=True)
    last_modified = DateTimeField(verbose_name=_('Last modification date'),
                                  default=datetime.now,
                                  required=True)
    title = db.StringField(max_length=255, required=True)
    slug = db.SlugField(max_length=255,
                        required=True,
                        populate_from='title',
                        update=True)
    description = db.StringField(required=True, default='')
    license = db.ReferenceField('License')

    tags = db.TagListField()
    resources = db.ListField(db.EmbeddedDocumentField(Resource))

    private = db.BooleanField()
    owner = db.ReferenceField('User', reverse_delete_rule=db.NULLIFY)
    organization = db.ReferenceField('Organization',
                                     reverse_delete_rule=db.NULLIFY)
    frequency = db.StringField(choices=UPDATE_FREQUENCIES.keys())
    frequency_date = db.DateTimeField(verbose_name=_('Future date of update'))
    temporal_coverage = db.EmbeddedDocumentField(db.DateRange)
    spatial = db.EmbeddedDocumentField(SpatialCoverage)

    ext = db.MapField(db.GenericEmbeddedDocumentField())
    extras = db.ExtrasField()

    featured = db.BooleanField(required=True, default=False)

    deleted = db.DateTimeField()

    def __str__(self):
        return self.title or ''

    __unicode__ = __str__

    __badges__ = {
        PIVOTAL_DATA: _('Pivotal data'),
    }

    meta = {
        'allow_inheritance':
        True,
        'indexes': [
            '-created_at',
            'slug',
            'organization',
            'resources.id',
            'resources.urlhash',
        ],
        'ordering': ['-created_at'],
        'queryset_class':
        DatasetQuerySet,
    }

    before_save = signal('Dataset.before_save')
    after_save = signal('Dataset.after_save')
    on_create = signal('Dataset.on_create')
    on_update = signal('Dataset.on_update')
    before_delete = signal('Dataset.before_delete')
    after_delete = signal('Dataset.after_delete')
    on_delete = signal('Dataset.on_delete')

    verbose_name = _('dataset')

    @classmethod
    def pre_save(cls, sender, document, **kwargs):
        cls.before_save.send(document)

    @classmethod
    def post_save(cls, sender, document, **kwargs):
        cls.after_save.send(document)
        if kwargs.get('created'):
            cls.on_create.send(document)
        else:
            cls.on_update.send(document)

    def url_for(self, *args, **kwargs):
        return url_for('datasets.show', dataset=self, *args, **kwargs)

    display_url = property(url_for)

    @property
    def external_url(self):
        return self.url_for(_external=True)

    @property
    def image_url(self):
        if self.organization:
            return self.organization.logo.url
        elif self.owner:
            return self.owner.avatar.url

    @property
    def frequency_label(self):
        return UPDATE_FREQUENCIES.get(self.frequency or 'unknown',
                                      UPDATE_FREQUENCIES['unknown'])

    def check_availability(self):
        """Check if resources from that dataset are available.

        Return a list of booleans.
        """
        # Only check remote resources.
        remote_resources = [
            resource for resource in self.resources
            if resource.filetype == 'remote'
        ]
        if not remote_resources:
            return []
        # First, we try to retrieve all data from the group (slug).
        error, response = check_url_from_group(self.slug)
        if error:
            # The group is unknown, the check will be performed by resource.
            return [
                resource.check_availability(self.slug)
                for resource in remote_resources
            ]
        else:
            return [
                int(url_infos['status']) == 200
                for url_infos in response['urls']
            ]

    @property
    def last_update(self):
        if self.resources:
            return max(resource.published for resource in self.resources)
        else:
            return self.last_modified

    @property
    def next_update(self):
        """Compute the next expected update date,

        given the frequency and last_update.
        Return None if the frequency is not handled.
        """
        delta = None
        if self.frequency == 'daily':
            delta = timedelta(days=1)
        elif self.frequency == 'weekly':
            delta = timedelta(weeks=1)
        elif self.frequency == 'fortnighly':
            delta = timedelta(weeks=2)
        elif self.frequency == 'monthly':
            delta = timedelta(weeks=4)
        elif self.frequency == 'bimonthly':
            delta = timedelta(weeks=4 * 2)
        elif self.frequency == 'quarterly':
            delta = timedelta(weeks=52 / 4)
        elif self.frequency == 'biannual':
            delta = timedelta(weeks=52 / 2)
        elif self.frequency == 'annual':
            delta = timedelta(weeks=52)
        elif self.frequency == 'biennial':
            delta = timedelta(weeks=52 * 2)
        elif self.frequency == 'triennial':
            delta = timedelta(weeks=52 * 3)
        elif self.frequency == 'quinquennial':
            delta = timedelta(weeks=52 * 5)
        if delta is None:
            return
        else:
            return self.last_update + delta

    @cached_property
    def quality(self):
        """Return a dict filled with metrics related to the inner

        quality of the dataset:

            * number of tags
            * description length
            * and so on
        """
        result = {}
        if self.next_update:
            result['frequency'] = self.frequency
            result['update_in'] = -(self.next_update - datetime.now()).days
        if self.tags:
            result['tags_count'] = len(self.tags)
        if self.description:
            result['description_length'] = len(self.description)
        if self.resources:
            result['has_resources'] = True
            result['has_only_closed_formats'] = all(
                resource.closed_format for resource in self.resources)
            result['has_unavailable_resources'] = not all(
                self.check_availability())
        discussions = DatasetDiscussion.objects(subject=self.id)
        if discussions:
            result['discussions'] = len(discussions)
            result['has_untreated_discussions'] = not all(
                discussion.person_involved(self.owner)
                for discussion in discussions)
        result['score'] = self.compute_quality_score(result)
        return result

    def compute_quality_score(self, quality):
        """Compute the score related to the quality of that dataset."""
        score = 0
        UNIT = 2
        if 'frequency' in quality:
            # TODO: should be related to frequency.
            if quality['update_in'] < 0:
                score += UNIT
            else:
                score -= UNIT
        if 'tags_count' in quality:
            if quality['tags_count'] > 3:
                score += UNIT
        if 'description_length' in quality:
            if quality['description_length'] > 100:
                score += UNIT
        if 'has_resources' in quality:
            if quality['has_only_closed_formats']:
                score -= UNIT
            else:
                score += UNIT
            if quality['has_unavailable_resources']:
                score -= UNIT
            else:
                score += UNIT
        if 'discussions' in quality:
            if quality['has_untreated_discussions']:
                score -= UNIT
            else:
                score += UNIT
        if score < 0:
            return 0
        return score

    @classmethod
    def get(cls, id_or_slug):
        obj = cls.objects(slug=id_or_slug).first()
        return obj or cls.objects.get_or_404(id=id_or_slug)

    def add_resource(self, resource):
        '''Perform an atomic prepend for a new resource'''
        resource.validate()
        self.update(
            __raw__={
                '$push': {
                    'resources': {
                        '$each': [resource.to_mongo()],
                        '$position': 0
                    }
                }
            })
        self.reload()
        post_save.send(self.__class__, document=self)

    def update_resource(self, resource):
        '''Perform an atomic update for an existing resource'''
        index = self.resources.index(resource)
        data = {'resources__{index}'.format(index=index): resource}
        self.update(**data)
        self.reload()
        post_save.send(self.__class__, document=self)

    @property
    def community_resources(self):
        return self.id and CommunityResource.objects.filter(dataset=self) or []
Exemple #9
0
class License(db.Document):
    # We need to declare id explicitly since we do not use the default
    # value set by Mongo.
    id = db.StringField(primary_key=True)
    created_at = db.DateTimeField(default=datetime.now, required=True)
    title = db.StringField(required=True)
    alternate_titles = db.ListField(db.StringField())
    slug = db.SlugField(required=True, populate_from='title')
    url = db.URLField()
    alternate_urls = db.ListField(db.URLField())
    maintainer = db.StringField()
    flags = db.ListField(db.StringField())

    active = db.BooleanField()

    def __str__(self):
        return self.title

    @classmethod
    def guess(cls, *strings, **kwargs):
        '''
        Try to guess a license from a list of strings.

        Accept a `default` keyword argument which will be
        the default fallback license.
        '''
        license = None
        for string in strings:
            license = cls.guess_one(string)
            if license:
                break
        return license or kwargs.get('default')

    @classmethod
    def guess_one(cls, text):
        '''
        Try to guess license from a string.

        Try to exact match on identifier then slugified title
        and fallback on edit distance ranking (after slugification)
        '''
        if not text:
            return
        qs = cls.objects
        text = text.strip().lower()  # Stored identifiers are lower case
        slug = cls.slug.slugify(text)  # Use slug as it normalize string
        license = qs(
            db.Q(id__iexact=text) | db.Q(slug=slug) | db.Q(url__iexact=text)
            | db.Q(alternate_urls__iexact=text)).first()
        if license is None:
            # Try to single match with a low Damerau-Levenshtein distance
            computed = ((l, rdlevenshtein(l.slug, slug)) for l in cls.objects)
            candidates = [l for l, d in computed if d <= MAX_DISTANCE]
            # If there is more that one match, we cannot determinate
            # which one is closer to safely choose between candidates
            if len(candidates) == 1:
                license = candidates[0]
        if license is None:
            # Try to single match with a low Damerau-Levenshtein distance
            computed = ((l, rdlevenshtein(cls.slug.slugify(t), slug))
                        for l in cls.objects for t in l.alternate_titles)
            candidates = [l for l, d in computed if d <= MAX_DISTANCE]
            # If there is more that one match, we cannot determinate
            # which one is closer to safely choose between candidates
            if len(candidates) == 1:
                license = candidates[0]
        return license

    @classmethod
    def default(cls):
        return cls.objects(id=DEFAULT_LICENSE['id']).first()
Exemple #10
0
class Reuse(db.Datetimed, WithMetrics, BadgeMixin, db.Owned, db.Document):
    title = db.StringField(required=True)
    slug = db.SlugField(
        max_length=255, required=True, populate_from='title', update=True)
    description = db.StringField(required=True)
    type = db.StringField(required=True, choices=REUSE_TYPES.keys())
    url = db.StringField(required=True)
    urlhash = db.StringField(required=True, unique=True)
    image_url = db.StringField()
    image = db.ImageField(
        fs=images, basename=default_image_basename, max_size=IMAGE_MAX_SIZE,
        thumbnails=IMAGE_SIZES)
    datasets = db.ListField(
        db.ReferenceField('Dataset', reverse_delete_rule=db.PULL))
    tags = db.TagListField()
    # badges = db.ListField(db.EmbeddedDocumentField(ReuseBadge))

    private = db.BooleanField()

    ext = db.MapField(db.GenericEmbeddedDocumentField())
    extras = db.ExtrasField()

    featured = db.BooleanField()
    deleted = db.DateTimeField()

    def __unicode__(self):
        return self.title or ''

    __badges__ = {}

    meta = {
        'indexes': ['-created_at', 'urlhash'] + db.Owned.meta['indexes'],
        'ordering': ['-created_at'],
        'queryset_class': ReuseQuerySet,
    }

    before_save = Signal()
    after_save = Signal()
    on_create = Signal()
    on_update = Signal()
    before_delete = Signal()
    after_delete = Signal()
    on_delete = Signal()

    verbose_name = _('reuse')

    @classmethod
    def pre_save(cls, sender, document, **kwargs):
        # Emit before_save
        cls.before_save.send(document)

    @classmethod
    def post_save(cls, sender, document, **kwargs):
        cls.after_save.send(document)
        if kwargs.get('created'):
            cls.on_create.send(document)
        else:
            cls.on_update.send(document)
        if document.deleted:
            cls.on_delete.send(document)

    def url_for(self, *args, **kwargs):
        return url_for('reuses.show', reuse=self, *args, **kwargs)

    display_url = property(url_for)

    @property
    def external_url(self):
        return self.url_for(_external=True)

    @property
    def type_label(self):
        return REUSE_TYPES[self.type]

    def clean(self):
        '''Auto populate urlhash from url'''
        if not self.urlhash or 'url' in self._get_changed_fields():
            self.urlhash = hash_url(self.url)
        super(Reuse, self).clean()

    @classmethod
    def get(cls, id_or_slug):
        obj = cls.objects(slug=id_or_slug).first()
        return obj or cls.objects.get_or_404(id=id_or_slug)

    @classmethod
    def url_exists(cls, url):
        urlhash = hash_url(url)
        return cls.objects(urlhash=urlhash).count() > 0

    @cached_property
    def json_ld(self):
        result = {
            '@context': 'http://schema.org',
            '@type': 'CreativeWork',
            'alternateName': self.slug,
            'dateCreated': self.created_at.isoformat(),
            'dateModified': self.last_modified.isoformat(),
            'url': url_for('reuses.show', reuse=self, _external=True),
            'name': self.title,
            'isBasedOnUrl': self.url,
        }

        if self.description:
            result['description'] = mdstrip(self.description)

        if self.organization:
            author = self.organization.json_ld
        elif self.owner:
            author = self.owner.json_ld
        else:
            author = None

        if author:
            result['author'] = author

        return result
Exemple #11
0
class Organization(WithMetrics, BadgeMixin, db.Datetimed, db.Document):
    name = db.StringField(required=True)
    acronym = db.StringField(max_length=128)
    slug = db.SlugField(max_length=255,
                        required=True,
                        populate_from='name',
                        update=True)
    description = db.StringField(required=True)
    url = db.StringField()
    image_url = db.StringField()
    logo = db.ImageField(fs=avatars,
                         basename=default_image_basename,
                         max_size=LOGO_MAX_SIZE,
                         thumbnails=LOGO_SIZES)

    members = db.ListField(db.EmbeddedDocumentField(Member))
    teams = db.ListField(db.EmbeddedDocumentField(Team))
    requests = db.ListField(db.EmbeddedDocumentField(MembershipRequest))

    ext = db.MapField(db.GenericEmbeddedDocumentField())
    zone = db.StringField()
    extras = db.ExtrasField()

    deleted = db.DateTimeField()

    meta = {
        'indexes': ['-created_at', 'slug'],
        'ordering': ['-created_at'],
        'queryset_class': OrganizationQuerySet,
    }

    def __unicode__(self):
        return self.name or ''

    __badges__ = {
        PUBLIC_SERVICE: _('Public Service'),
        CERTIFIED: _('Certified'),
    }

    before_save = Signal()
    after_save = Signal()
    on_create = Signal()
    on_update = Signal()
    before_delete = Signal()
    after_delete = Signal()

    @classmethod
    def pre_save(cls, sender, document, **kwargs):
        cls.before_save.send(document)

    @classmethod
    def post_save(cls, sender, document, **kwargs):
        cls.after_save.send(document)
        if kwargs.get('created'):
            cls.on_create.send(document)
        else:
            cls.on_update.send(document)

    def url_for(self, *args, **kwargs):
        return url_for('organizations.show', org=self, *args, **kwargs)

    display_url = property(url_for)

    @property
    def external_url(self):
        return self.url_for(_external=True)

    @property
    def pending_requests(self):
        return [r for r in self.requests if r.status == 'pending']

    @property
    def refused_requests(self):
        return [r for r in self.requests if r.status == 'refused']

    @property
    def accepted_requests(self):
        return [r for r in self.requests if r.status == 'accepted']

    @property
    def certified(self):
        return any(b.kind == CERTIFIED for b in self.badges)

    @property
    def public_service(self):
        is_public_service = any(b.kind == PUBLIC_SERVICE for b in self.badges)
        return self.certified and is_public_service

    def member(self, user):
        for member in self.members:
            if member.user == user:
                return member
        return None

    def is_member(self, user):
        return self.member(user) is not None

    def is_admin(self, user):
        member = self.member(user)
        return member is not None and member.role == 'admin'

    def pending_request(self, user):
        for request in self.requests:
            if request.user == user and request.status == 'pending':
                return request
        return None

    @classmethod
    def get(cls, id_or_slug):
        obj = cls.objects(slug=id_or_slug).first()
        return obj or cls.objects.get_or_404(id=id_or_slug)

    def by_role(self, role):
        return filter(lambda m: m.role == role, self.members)

    def check_availability(self):
        from udata.models import Dataset  # Circular imports.
        # Performances: only check the first 20 datasets for now.
        return chain(*[
            dataset.check_availability()
            for dataset in Dataset.objects(organization=self).visible()[:20]
        ])

    @cached_property
    def json_ld(self):
        type_ = 'GovernmentOrganization' if self.public_service \
                else 'Organization'

        result = {
            '@context': 'http://schema.org',
            '@type': type_,
            'alternateName': self.slug,
            'url': url_for('organizations.show', org=self, _external=True),
            'name': self.name,
        }

        if self.description:
            result['description'] = mdstrip(self.description)

        logo = self.logo(external=True)
        if logo:
            result['logo'] = logo

        return result
Exemple #12
0
 def test_is_stripped(self):
     field = db.SlugField()
     self.assertEqual(field.slugify('  ab  '), 'ab')
Exemple #13
0
 def test_custom_separator(self):
     field = db.SlugField(separator='+')
     self.assertEqual(field.slugify('a b'), 'a+b')
Exemple #14
0
 def test_lower_case_false(self):
     field = db.SlugField(lower_case=False)
     self.assertEqual(field.slugify('AbC'), 'AbC')
Exemple #15
0
 def test_lower_case_default(self):
     field = db.SlugField()
     self.assertEqual(field.slugify('ABC'), 'abc')
Exemple #16
0
 def test_multiple_spaces(self):
     field = db.SlugField()
     self.assertEqual(field.slugify('a  b'), 'a-b')