class Dataset(WithMetrics, BadgeMixin, db.Owned, db.Document): created_at = DateTimeField(verbose_name=_('Creation date'), default=datetime.now, required=True) last_modified = DateTimeField(verbose_name=_('Last modification date'), default=datetime.now, required=True) title = db.StringField(required=True) acronym = db.StringField(max_length=128) slug = db.SlugField(max_length=255, required=True, populate_from='title', update=True, follow=True) description = db.StringField(required=True, default='') license = db.ReferenceField('License') tags = db.TagListField() resources = db.ListField(db.EmbeddedDocumentField(Resource)) private = db.BooleanField() frequency = db.StringField(choices=UPDATE_FREQUENCIES.keys()) frequency_date = db.DateTimeField(verbose_name=_('Future date of update')) temporal_coverage = db.EmbeddedDocumentField(db.DateRange) spatial = db.EmbeddedDocumentField(SpatialCoverage) ext = db.MapField(db.GenericEmbeddedDocumentField()) extras = db.ExtrasField() featured = db.BooleanField(required=True, default=False) deleted = db.DateTimeField() def __unicode__(self): return self.title or '' __badges__ = { PIVOTAL_DATA: _('Pivotal data'), } meta = { 'indexes': [ '-created_at', 'slug', 'resources.id', 'resources.urlhash', ] + db.Owned.meta['indexes'], 'ordering': ['-created_at'], 'queryset_class': DatasetQuerySet, } before_save = signal('Dataset.before_save') after_save = signal('Dataset.after_save') on_create = signal('Dataset.on_create') on_update = signal('Dataset.on_update') before_delete = signal('Dataset.before_delete') after_delete = signal('Dataset.after_delete') on_delete = signal('Dataset.on_delete') on_resource_added = signal('Dataset.on_resource_added') verbose_name = _('dataset') @classmethod def pre_save(cls, sender, document, **kwargs): cls.before_save.send(document) @classmethod def post_save(cls, sender, document, **kwargs): if 'post_save' in kwargs.get('ignores', []): return cls.after_save.send(document) if kwargs.get('created'): cls.on_create.send(document) else: cls.on_update.send(document) if document.deleted: cls.on_delete.send(document) if kwargs.get('resource_added'): cls.on_resource_added.send(document, resource_id=kwargs['resource_added']) def clean(self): super(Dataset, self).clean() if self.frequency in LEGACY_FREQUENCIES: self.frequency = LEGACY_FREQUENCIES[self.frequency] def url_for(self, *args, **kwargs): return url_for('datasets.show', dataset=self, *args, **kwargs) display_url = property(url_for) @property def is_visible(self): return not self.is_hidden @property def is_hidden(self): return len(self.resources) == 0 or self.private or self.deleted @property def full_title(self): if not self.acronym: return self.title return '{title} ({acronym})'.format(**self._data) @property def external_url(self): return self.url_for(_external=True) @property def image_url(self): if self.organization: return self.organization.logo.url elif self.owner: return self.owner.avatar.url @property def frequency_label(self): return UPDATE_FREQUENCIES.get(self.frequency or 'unknown', UPDATE_FREQUENCIES['unknown']) def check_availability(self): """Check if resources from that dataset are available. Return a list of (boolean or 'unknown') """ # Only check remote resources. remote_resources = [resource for resource in self.resources if resource.filetype == 'remote'] if not remote_resources: return [] return [resource.check_availability() for resource in remote_resources] @property def last_update(self): if self.resources: return max(resource.published for resource in self.resources) else: return self.last_modified @property def next_update(self): """Compute the next expected update date, given the frequency and last_update. Return None if the frequency is not handled. """ delta = None if self.frequency == 'daily': delta = timedelta(days=1) elif self.frequency == 'weekly': delta = timedelta(weeks=1) elif self.frequency == 'fortnighly': delta = timedelta(weeks=2) elif self.frequency == 'monthly': delta = timedelta(weeks=4) elif self.frequency == 'bimonthly': delta = timedelta(weeks=4 * 2) elif self.frequency == 'quarterly': delta = timedelta(weeks=52 / 4) elif self.frequency == 'biannual': delta = timedelta(weeks=52 / 2) elif self.frequency == 'annual': delta = timedelta(weeks=52) elif self.frequency == 'biennial': delta = timedelta(weeks=52 * 2) elif self.frequency == 'triennial': delta = timedelta(weeks=52 * 3) elif self.frequency == 'quinquennial': delta = timedelta(weeks=52 * 5) if delta is None: return else: return self.last_update + delta @cached_property def quality(self): """Return a dict filled with metrics related to the inner quality of the dataset: * number of tags * description length * and so on """ from udata.models import Discussion # noqa: Prevent circular imports result = {} if not self.id: # Quality is only relevant on saved Datasets return result if self.next_update: result['frequency'] = self.frequency result['update_in'] = -(self.next_update - datetime.now()).days if self.tags: result['tags_count'] = len(self.tags) if self.description: result['description_length'] = len(self.description) if self.resources: result['has_resources'] = True result['has_only_closed_or_no_formats'] = all( resource.closed_or_no_format for resource in self.resources) result['has_unavailable_resources'] = not all( self.check_availability()) discussions = Discussion.objects(subject=self) if discussions: result['discussions'] = len(discussions) result['has_untreated_discussions'] = not all( discussion.person_involved(self.owner) for discussion in discussions) result['score'] = self.compute_quality_score(result) return result def compute_quality_score(self, quality): """Compute the score related to the quality of that dataset.""" score = 0 UNIT = 2 if 'frequency' in quality: # TODO: should be related to frequency. if quality['update_in'] < 0: score += UNIT else: score -= UNIT if 'tags_count' in quality: if quality['tags_count'] > 3: score += UNIT if 'description_length' in quality: if quality['description_length'] > 100: score += UNIT if 'has_resources' in quality: if quality['has_only_closed_or_no_formats']: score -= UNIT else: score += UNIT if quality['has_unavailable_resources']: score -= UNIT else: score += UNIT if 'discussions' in quality: if quality['has_untreated_discussions']: score -= UNIT else: score += UNIT if score < 0: return 0 return score @classmethod def get(cls, id_or_slug): obj = cls.objects(slug=id_or_slug).first() return obj or cls.objects.get_or_404(id=id_or_slug) def add_resource(self, resource): '''Perform an atomic prepend for a new resource''' resource.validate() self.update(__raw__={ '$push': { 'resources': { '$each': [resource.to_mongo()], '$position': 0 } } }) self.reload() post_save.send(self.__class__, document=self, resource_added=resource.id) def update_resource(self, resource): '''Perform an atomic update for an existing resource''' index = self.resources.index(resource) data = { 'resources__{index}'.format(index=index): resource } self.update(**data) self.reload() post_save.send(self.__class__, document=self) @property def community_resources(self): return self.id and CommunityResource.objects.filter(dataset=self) or [] @cached_property def json_ld(self): result = { '@context': 'http://schema.org', '@type': 'Dataset', '@id': str(self.id), 'alternateName': self.slug, 'dateCreated': self.created_at.isoformat(), 'dateModified': self.last_modified.isoformat(), 'url': url_for('datasets.show', dataset=self, _external=True), 'name': self.title, 'keywords': ','.join(self.tags), 'distribution': [resource.json_ld for resource in self.resources], # Theses values are not standard 'contributedDistribution': [ resource.json_ld for resource in self.community_resources ], 'extras': [get_json_ld_extra(*item) for item in self.extras.items()], } if self.description: result['description'] = mdstrip(self.description) if self.license and self.license.url: result['license'] = self.license.url if self.organization: author = self.organization.json_ld elif self.owner: author = self.owner.json_ld else: author = None if author: result['author'] = author return result
class Reuse(db.Datetimed, WithMetrics, BadgeMixin, db.Owned, db.Document): title = db.StringField(required=True) slug = db.SlugField(max_length=255, required=True, populate_from='title', update=True, follow=True) description = db.StringField(required=True) type = db.StringField(required=True, choices=list(REUSE_TYPES)) url = db.StringField(required=True) urlhash = db.StringField(required=True, unique=True) image_url = db.StringField() image = db.ImageField(fs=images, basename=default_image_basename, max_size=IMAGE_MAX_SIZE, thumbnails=IMAGE_SIZES) datasets = db.ListField( db.ReferenceField('Dataset', reverse_delete_rule=db.PULL)) tags = db.TagListField() topic = db.StringField(required=True, choices=list(REUSE_TOPICS)) # badges = db.ListField(db.EmbeddedDocumentField(ReuseBadge)) private = db.BooleanField() ext = db.MapField(db.GenericEmbeddedDocumentField()) extras = db.ExtrasField() featured = db.BooleanField() deleted = db.DateTimeField() def __str__(self): return self.title or '' __badges__ = {} __metrics_keys__ = [ 'discussions', 'datasets', 'followers', 'views', ] meta = { 'indexes': [ '$title', 'created_at', 'last_modified', 'metrics.datasets', 'metrics.followers', 'metrics.views', 'urlhash' ] + db.Owned.meta['indexes'], 'ordering': ['-created_at'], 'queryset_class': ReuseQuerySet, } before_save = Signal() after_save = Signal() on_create = Signal() on_update = Signal() before_delete = Signal() after_delete = Signal() on_delete = Signal() verbose_name = _('reuse') @classmethod def pre_save(cls, sender, document, **kwargs): # Emit before_save cls.before_save.send(document) @classmethod def post_save(cls, sender, document, **kwargs): if 'post_save' in kwargs.get('ignores', []): return cls.after_save.send(document) if kwargs.get('created'): cls.on_create.send(document) else: cls.on_update.send(document) if document.deleted: cls.on_delete.send(document) def url_for(self, *args, **kwargs): return endpoint_for('reuses.show', 'api.reuse', reuse=self, *args, **kwargs) display_url = property(url_for) @property def is_visible(self): return not self.is_hidden @property def is_hidden(self): return len(self.datasets) == 0 or self.private or self.deleted @property def external_url(self): return self.url_for(_external=True) @property def type_label(self): return REUSE_TYPES[self.type] @property def topic_label(self): return REUSE_TOPICS[self.topic] def clean(self): super(Reuse, self).clean() '''Auto populate urlhash from url''' if not self.urlhash or 'url' in self._get_changed_fields(): self.urlhash = hash_url(self.url) @classmethod def get(cls, id_or_slug): obj = cls.objects(slug=id_or_slug).first() return obj or cls.objects.get_or_404(id=id_or_slug) @classmethod def url_exists(cls, url): urlhash = hash_url(url) return cls.objects(urlhash=urlhash).count() > 0 @cached_property def json_ld(self): result = { '@context': 'http://schema.org', '@type': 'CreativeWork', 'alternateName': self.slug, 'dateCreated': self.created_at.isoformat(), 'dateModified': self.last_modified.isoformat(), 'url': endpoint_for('reuses.show', 'api.reuse', reuse=self, _external=True), 'name': self.title, 'isBasedOnUrl': self.url, } if self.description: result['description'] = mdstrip(self.description) if self.organization: author = self.organization.json_ld elif self.owner: author = self.owner.json_ld else: author = None if author: result['author'] = author return result @property def views_count(self): return self.metrics.get('views', 0) def count_datasets(self): self.metrics['datasets'] = len(self.datasets) self.save(signal_kwargs={'ignores': ['post_save']}) def count_discussions(self): from udata.models import Discussion self.metrics['discussions'] = Discussion.objects(subject=self, closed=None).count() self.save() def count_followers(self): from udata.models import Follow self.metrics['followers'] = Follow.objects( until=None).followers(self).count() self.save()
class Reuse(db.Datetimed, WithMetrics, BadgeMixin, db.Document): title = db.StringField(max_length=255, required=True) slug = db.SlugField(max_length=255, required=True, populate_from='title', update=True) description = db.StringField(required=True) type = db.StringField(required=True, choices=REUSE_TYPES.keys()) url = db.StringField(required=True) urlhash = db.StringField(required=True, unique=True) image_url = db.StringField() image = db.ImageField(fs=images, basename=default_image_basename, max_size=IMAGE_MAX_SIZE, thumbnails=IMAGE_SIZES) datasets = db.ListField( db.ReferenceField('Dataset', reverse_delete_rule=db.PULL)) tags = db.TagListField() # badges = db.ListField(db.EmbeddedDocumentField(ReuseBadge)) private = db.BooleanField() owner = db.ReferenceField('User', reverse_delete_rule=db.NULLIFY) organization = db.ReferenceField('Organization', reverse_delete_rule=db.NULLIFY) ext = db.MapField(db.GenericEmbeddedDocumentField()) extras = db.ExtrasField() featured = db.BooleanField() deleted = db.DateTimeField() def __str__(self): return self.title or '' __unicode__ = __str__ __badges__ = {} meta = { 'allow_inheritance': True, 'indexes': ['-created_at', 'owner', 'urlhash'], 'ordering': ['-created_at'], 'queryset_class': ReuseQuerySet, } before_save = Signal() after_save = Signal() on_create = Signal() on_update = Signal() before_delete = Signal() after_delete = Signal() on_delete = Signal() verbose_name = _('reuse') @classmethod def pre_save(cls, sender, document, **kwargs): # Emit before_save cls.before_save.send(document) @classmethod def post_save(cls, sender, document, **kwargs): cls.after_save.send(document) if kwargs.get('created'): cls.on_create.send(document) else: cls.on_update.send(document) def url_for(self, *args, **kwargs): return url_for('reuses.show', reuse=self, *args, **kwargs) display_url = property(url_for) @property def external_url(self): return self.url_for(_external=True) @property def type_label(self): return REUSE_TYPES[self.type] def clean(self): '''Auto populate urlhash from url''' if not self.urlhash or 'url' in self._get_changed_fields(): self.urlhash = hash_url(self.url) super(Reuse, self).clean() @classmethod def get(cls, id_or_slug): obj = cls.objects(slug=id_or_slug).first() return obj or cls.objects.get_or_404(id=id_or_slug) @classmethod def url_exists(cls, url): urlhash = hash_url(url) return cls.objects(urlhash=urlhash).count() > 0
class User(db.Document, WithMetrics, UserMixin): slug = db.SlugField(max_length=255, required=True, populate_from='fullname') email = db.StringField(max_length=255, required=True) password = db.StringField() active = db.BooleanField() roles = db.ListField(db.ReferenceField(Role), default=[]) first_name = db.StringField(max_length=255, required=True) last_name = db.StringField(max_length=255, required=True) avatar_url = db.URLField() avatar = db.ImageField(fs=avatars, basename=default_image_basename, thumbnails=AVATAR_SIZES) website = db.URLField() about = db.StringField() prefered_language = db.StringField() apikey = db.StringField() created_at = db.DateTimeField(default=datetime.now, required=True) confirmed_at = db.DateTimeField() last_login_at = db.DateTimeField() current_login_at = db.DateTimeField() last_login_ip = db.StringField() current_login_ip = db.StringField() login_count = db.IntField() deleted = db.DateTimeField() ext = db.MapField(db.GenericEmbeddedDocumentField()) extras = db.ExtrasField() before_save = Signal() after_save = Signal() on_create = Signal() on_update = Signal() before_delete = Signal() after_delete = Signal() on_delete = Signal() meta = { 'allow_inheritance': True, 'indexes': ['-created_at', 'slug', 'apikey'], 'ordering': ['-created_at'] } def __str__(self): return self.fullname __unicode__ = __str__ @property def fullname(self): return ' '.join((self.first_name or '', self.last_name or '')).strip() @cached_property def organizations(self): from udata.core.organization.models import Organization return Organization.objects(members__user=self) @property def sysadmin(self): return self.has_role('admin') @property def display_url(self): return url_for('users.show', user=self) @property def visible(self): return (self.metrics.get('datasets', 0) + self.metrics.get('reuses', 0)) > 0 def generate_api_key(self): s = JSONWebSignatureSerializer(current_app.config['SECRET_KEY']) self.apikey = s.dumps({ 'user': str(self.id), 'time': time(), }) def clear_api_key(self): self.apikey = None @classmethod def get(cls, id_or_slug): obj = cls.objects(slug=id_or_slug).first() return obj or cls.objects.get_or_404(id=id_or_slug) @classmethod def pre_save(cls, sender, document, **kwargs): cls.before_save.send(document) @classmethod def post_save(cls, sender, document, **kwargs): cls.after_save.send(document) if kwargs.get('created'): cls.on_create.send(document) else: cls.on_update.send(document)
class Organization(WithMetrics, BadgeMixin, db.Datetimed, db.Document): name = db.StringField(required=True) acronym = db.StringField(max_length=128) slug = db.SlugField(max_length=255, required=True, populate_from='name', update=True, follow=True) description = db.StringField(required=True) url = db.StringField() image_url = db.StringField() logo = db.ImageField(fs=avatars, basename=default_image_basename, max_size=LOGO_MAX_SIZE, thumbnails=LOGO_SIZES) members = db.ListField(db.EmbeddedDocumentField(Member)) teams = db.ListField(db.EmbeddedDocumentField(Team)) requests = db.ListField(db.EmbeddedDocumentField(MembershipRequest)) ext = db.MapField(db.GenericEmbeddedDocumentField()) zone = db.StringField() extras = db.ExtrasField() deleted = db.DateTimeField() meta = { 'indexes': ['-created_at', 'slug'], 'ordering': ['-created_at'], 'queryset_class': OrganizationQuerySet, } def __str__(self): return self.name or '' __badges__ = { PUBLIC_SERVICE: _('Public Service'), CERTIFIED: _('Certified'), } __search_metrics__ = Object( properties={ 'datasets': Integer(), 'reuses': Integer(), 'followers': Integer(), 'views': Integer(), }) __metrics_keys__ = [ 'datasets', 'members', 'reuses', 'followers', 'views', ] before_save = Signal() after_save = Signal() on_create = Signal() on_update = Signal() before_delete = Signal() after_delete = Signal() @classmethod def pre_save(cls, sender, document, **kwargs): cls.before_save.send(document) @classmethod def post_save(cls, sender, document, **kwargs): cls.after_save.send(document) if kwargs.get('created'): cls.on_create.send(document) else: cls.on_update.send(document) def url_for(self, *args, **kwargs): return url_for('organizations.show', org=self, *args, **kwargs) display_url = property(url_for) @property def external_url(self): return self.url_for(_external=True) @property def pending_requests(self): return [r for r in self.requests if r.status == 'pending'] @property def refused_requests(self): return [r for r in self.requests if r.status == 'refused'] @property def accepted_requests(self): return [r for r in self.requests if r.status == 'accepted'] @property def certified(self): return any(b.kind == CERTIFIED for b in self.badges) @property def public_service(self): is_public_service = any(b.kind == PUBLIC_SERVICE for b in self.badges) return self.certified and is_public_service def member(self, user): for member in self.members: if member.user == user: return member return None def is_member(self, user): return self.member(user) is not None def is_admin(self, user): member = self.member(user) return member is not None and member.role == 'admin' def pending_request(self, user): for request in self.requests: if request.user == user and request.status == 'pending': return request return None @classmethod def get(cls, id_or_slug): obj = cls.objects(slug=id_or_slug).first() return obj or cls.objects.get_or_404(id=id_or_slug) def by_role(self, role): return filter(lambda m: m.role == role, self.members) def check_availability(self): from udata.models import Dataset # Circular imports. # Performances: only check the first 20 datasets for now. return chain(*[ dataset.check_availability() for dataset in Dataset.objects(organization=self).visible()[:20] ]) @cached_property def json_ld(self): type_ = 'GovernmentOrganization' if self.public_service \ else 'Organization' result = { '@context': 'http://schema.org', '@type': type_, '@id': str(self.id), 'alternateName': self.slug, 'url': url_for('organizations.show', org=self, _external=True), 'name': self.name, 'dateCreated': self.created_at.isoformat(), 'dateModified': self.last_modified.isoformat() } if self.description: result['description'] = mdstrip(self.description) logo = self.logo(external=True) if logo: result['logo'] = logo return result @property def views_count(self): return self.metrics.get('views', 0) def count_members(self): self.metrics['members'] = len(self.members) self.save() def count_datasets(self): from udata.models import Dataset self.metrics['datasets'] = Dataset.objects( organization=self).visible().count() self.save() def count_reuses(self): from udata.models import Reuse self.metrics['reuses'] = Reuse.objects(organization=self).count() self.save() def count_followers(self): from udata.models import Follow self.metrics['followers'] = Follow.objects( until=None).followers(self).count() self.save()
class User(WithMetrics, UserMixin, db.Document): slug = db.SlugField(max_length=255, required=True, populate_from='fullname') email = db.StringField(max_length=255, required=True, unique=True) password = db.StringField() active = db.BooleanField() roles = db.ListField(db.ReferenceField(Role), default=[]) first_name = db.StringField(max_length=255, required=True) last_name = db.StringField(max_length=255, required=True) avatar_url = db.URLField() avatar = db.ImageField(fs=avatars, basename=default_image_basename, thumbnails=AVATAR_SIZES) website = db.URLField() about = db.StringField() prefered_language = db.StringField() apikey = db.StringField() created_at = db.DateTimeField(default=datetime.now, required=True) # The field below is required for Flask-security # when SECURITY_CONFIRMABLE is True confirmed_at = db.DateTimeField() # The 5 fields below are required for Flask-security # when SECURITY_TRACKABLE is True last_login_at = db.DateTimeField() current_login_at = db.DateTimeField() last_login_ip = db.StringField() current_login_ip = db.StringField() login_count = db.IntField() deleted = db.DateTimeField() ext = db.MapField(db.GenericEmbeddedDocumentField()) extras = db.ExtrasField() before_save = Signal() after_save = Signal() on_create = Signal() on_update = Signal() before_delete = Signal() after_delete = Signal() on_delete = Signal() meta = { 'indexes': ['-created_at', 'slug', 'apikey'], 'ordering': ['-created_at'] } def __str__(self): return self.fullname __unicode__ = __str__ @property def fullname(self): return ' '.join((self.first_name or '', self.last_name or '')).strip() @cached_property def organizations(self): from udata.core.organization.models import Organization return Organization.objects(members__user=self, deleted__exists=False) @property def sysadmin(self): return self.has_role('admin') def url_for(self, *args, **kwargs): return url_for('users.show', user=self, *args, **kwargs) display_url = property(url_for) @property def external_url(self): return self.url_for(_external=True) @property def visible(self): count = self.metrics.get('datasets', 0) + self.metrics.get('reuses', 0) return count > 0 and self.active @cached_property def resources_availability(self): """Return the percentage of availability for resources.""" # Flatten the list. availabilities = list( chain(*[org.check_availability() for org in self.organizations])) if availabilities: # Trick will work because it's a sum() of booleans. return round(100. * sum(availabilities) / len(availabilities), 2) else: return 0 @cached_property def datasets_org_count(self): """Return the number of datasets of user's organizations.""" from udata.models import Dataset # Circular imports. return sum( Dataset.objects(organization=org).visible().count() for org in self.organizations) @cached_property def followers_org_count(self): """Return the number of followers of user's organizations.""" from udata.models import Follow # Circular imports. return sum( Follow.objects(following=org).count() for org in self.organizations) @property def datasets_count(self): """Return the number of datasets of the user.""" return self.metrics.get('datasets', 0) @property def followers_count(self): """Return the number of followers of the user.""" return self.metrics.get('followers', 0) def generate_api_key(self): s = JSONWebSignatureSerializer(current_app.config['SECRET_KEY']) self.apikey = s.dumps({ 'user': str(self.id), 'time': time(), }) def clear_api_key(self): self.apikey = None @classmethod def get(cls, id_or_slug): obj = cls.objects(slug=id_or_slug).first() return obj or cls.objects.get_or_404(id=id_or_slug) @classmethod def pre_save(cls, sender, document, **kwargs): cls.before_save.send(document) @classmethod def post_save(cls, sender, document, **kwargs): cls.after_save.send(document) if kwargs.get('created'): cls.on_create.send(document) else: cls.on_update.send(document) @cached_property def json_ld(self): result = { '@type': 'Person', '@context': 'http://schema.org', 'name': self.fullname, } if self.about: result['description'] = mdstrip(self.about) if self.avatar_url: result['image'] = self.avatar_url if self.website: result['url'] = self.website return result def mark_as_deleted(self): copied_user = copy(self) self.email = '{}@deleted'.format(self.id) self.password = None self.active = False self.first_name = 'DELETED' self.last_name = 'DELETED' self.avatar = None self.avatar_url = None self.website = None self.about = None self.deleted = datetime.now() self.save() for organization in self.organizations: organization.members = [ member for member in organization.members if member.user != self ] organization.save() for discussion in Discussion.objects(discussion__posted_by=self): for message in discussion.discussion: if message.posted_by == self: message.content = 'DELETED' discussion.save() Follow.objects(follower=self).delete() Follow.objects(following=self).delete() mail.send(_('Account deletion'), copied_user, 'account_deleted')
class Dataset(WithMetrics, db.Datetimed, db.Document): title = db.StringField(max_length=255, required=True) slug = db.SlugField(max_length=255, required=True, populate_from='title', update=True) description = db.StringField(required=True, default='') license = db.ReferenceField('License') tags = db.ListField(db.StringField()) resources = db.ListField(db.EmbeddedDocumentField(Resource)) community_resources = db.ListField(db.EmbeddedDocumentField(Resource)) private = db.BooleanField() owner = db.ReferenceField('User', reverse_delete_rule=db.NULLIFY) organization = db.ReferenceField('Organization', reverse_delete_rule=db.NULLIFY) supplier = db.ReferenceField('Organization', reverse_delete_rule=db.NULLIFY) frequency = db.StringField(choices=UPDATE_FREQUENCIES.keys()) temporal_coverage = db.EmbeddedDocumentField(db.DateRange) spatial = db.EmbeddedDocumentField(SpatialCoverage) ext = db.MapField(db.GenericEmbeddedDocumentField()) extras = db.ExtrasField() featured = db.BooleanField(required=True, default=False) deleted = db.DateTimeField() def __str__(self): return self.title or '' __unicode__ = __str__ meta = { 'allow_inheritance': True, 'indexes': [ '-created_at', 'slug', 'organization', 'supplier', 'resources.id', 'resources.urlhash', ], 'ordering': ['-created_at'], 'queryset_class': DatasetQuerySet, } before_save = Signal() after_save = Signal() on_create = Signal() on_update = Signal() before_delete = Signal() after_delete = Signal() on_delete = Signal() verbose_name = _('dataset') @classmethod def pre_save(cls, sender, document, **kwargs): cls.before_save.send(document) @classmethod def post_save(cls, sender, document, **kwargs): cls.after_save.send(document) if kwargs.get('created'): cls.on_create.send(document) else: cls.on_update.send(document) @property def display_url(self): return url_for('datasets.show', dataset=self) @property def external_url(self): return url_for('datasets.show', dataset=self, _external=True) @property def image_url(self): if self.organization: return self.organization.logo.url elif self.owner: return self.owner.avatar.url @property def frequency_label(self): return UPDATE_FREQUENCIES.get(self.frequency or 'unknown', UPDATE_FREQUENCIES['unknown']) @classmethod def get(cls, id_or_slug): obj = cls.objects(slug=id_or_slug).first() return obj or cls.objects.get_or_404(id=id_or_slug) def add_resource(self, resource): '''Perform an atomic prepend for a new resource''' self.update( __raw__={ '$push': { 'resources': { '$each': [resource.to_mongo()], '$position': 0 } } }) self.reload() def add_community_resource(self, resource): '''Perform an atomic prepend for a new resource''' self.update( __raw__={ '$push': { 'community_resources': { '$each': [resource.to_mongo()], '$position': 0 } } }) self.reload()
class Organization(WithMetrics, BadgeMixin, db.Datetimed, db.Document): name = db.StringField(max_length=255, required=True) acronym = db.StringField(max_length=128) slug = db.SlugField(max_length=255, required=True, populate_from='name', update=True) description = db.StringField(required=True) url = db.StringField() image_url = db.StringField() logo = db.ImageField(fs=avatars, basename=default_image_basename, thumbnails=LOGO_SIZES) members = db.ListField(db.EmbeddedDocumentField(Member)) teams = db.ListField(db.EmbeddedDocumentField(Team)) requests = db.ListField(db.EmbeddedDocumentField(MembershipRequest)) ext = db.MapField(db.GenericEmbeddedDocumentField()) zone = db.StringField() extras = db.ExtrasField() deleted = db.DateTimeField() meta = { 'allow_inheritance': True, 'indexes': ['-created_at', 'slug'], 'ordering': ['-created_at'], 'queryset_class': OrganizationQuerySet, } def __str__(self): return self.name or '' __unicode__ = __str__ __badges__ = { PUBLIC_SERVICE: _('Public Service'), CERTIFIED: _('Certified'), } before_save = Signal() after_save = Signal() on_create = Signal() on_update = Signal() before_delete = Signal() after_delete = Signal() @classmethod def pre_save(cls, sender, document, **kwargs): cls.before_save.send(document) @classmethod def post_save(cls, sender, document, **kwargs): cls.after_save.send(document) if kwargs.get('created'): cls.on_create.send(document) else: cls.on_update.send(document) def url_for(self, *args, **kwargs): return url_for('organizations.show', org=self, *args, **kwargs) display_url = property(url_for) @property def external_url(self): return self.url_for(_external=True) @property def pending_requests(self): return [r for r in self.requests if r.status == 'pending'] @property def refused_requests(self): return [r for r in self.requests if r.status == 'refused'] @property def accepted_requests(self): return [r for r in self.requests if r.status == 'accepted'] @property def public_service(self): badges_kind = [badge.kind for badge in self.badges] return PUBLIC_SERVICE in badges_kind and CERTIFIED in badges_kind def member(self, user): for member in self.members: if member.user == user: return member return None def is_member(self, user): return self.member(user) is not None def is_admin(self, user): member = self.member(user) return member is not None and member.role == 'admin' def pending_request(self, user): for request in self.requests: if request.user == user and request.status == 'pending': return request return None @classmethod def get(cls, id_or_slug): obj = cls.objects(slug=id_or_slug).first() return obj or cls.objects.get_or_404(id=id_or_slug) def by_role(self, role): return filter(lambda m: m.role == role, self.members) def check_availability(self): from udata.models import Dataset # Circular imports. # Performances: only check the first 20 datasets for now. return chain(*[ dataset.check_availability() for dataset in Dataset.objects(organization=self).visible()[:20] ])
class User(db.Document, WithMetrics, UserMixin): slug = db.SlugField(max_length=255, required=True, populate_from='fullname') email = db.StringField(max_length=255, required=True) password = db.StringField() active = db.BooleanField() roles = db.ListField(db.ReferenceField(Role), default=[]) first_name = db.StringField(max_length=255, required=True) last_name = db.StringField(max_length=255, required=True) avatar_url = db.URLField() avatar = db.ImageField(fs=avatars, basename=default_image_basename, thumbnails=AVATAR_SIZES) website = db.URLField() about = db.StringField() prefered_language = db.StringField() apikey = db.StringField() created_at = db.DateTimeField(default=datetime.now, required=True) confirmed_at = db.DateTimeField() last_login_at = db.DateTimeField() current_login_at = db.DateTimeField() last_login_ip = db.StringField() current_login_ip = db.StringField() login_count = db.IntField() deleted = db.DateTimeField() ext = db.MapField(db.GenericEmbeddedDocumentField()) extras = db.ExtrasField() before_save = Signal() after_save = Signal() on_create = Signal() on_update = Signal() before_delete = Signal() after_delete = Signal() on_delete = Signal() meta = { 'allow_inheritance': True, 'indexes': ['-created_at', 'slug', 'apikey'], 'ordering': ['-created_at'] } def __str__(self): return self.fullname __unicode__ = __str__ @property def fullname(self): return ' '.join((self.first_name or '', self.last_name or '')).strip() @cached_property def organizations(self): from udata.core.organization.models import Organization return Organization.objects(members__user=self) @property def sysadmin(self): return self.has_role('admin') def url_for(self, *args, **kwargs): return url_for('users.show', user=self, *args, **kwargs) display_url = property(url_for) @property def external_url(self): return self.url_for(_external=True) @property def visible(self): count = self.metrics.get('datasets', 0) + self.metrics.get('reuses', 0) return count > 0 @cached_property def resources_availability(self): """Return the percentage of availability for resources.""" # Flatten the list. availabilities = list( chain(*[org.check_availability() for org in self.organizations])) if availabilities: # Trick will work because it's a sum() of booleans. return round(100. * sum(availabilities) / len(availabilities), 2) else: return 0 @cached_property def datasets_org_count(self): """Return the number of datasets of user's organizations.""" from udata.models import Dataset # Circular imports. return sum( Dataset.objects(organization=org).visible().count() for org in self.organizations) @cached_property def followers_org_count(self): """Return the number of followers of user's organizations.""" from udata.models import FollowOrg # Circular imports. return sum( FollowOrg.objects(following=org).count() for org in self.organizations) @property def datasets_count(self): """Return the number of datasets of the user.""" return self.metrics.get('datasets', 0) @property def followers_count(self): """Return the number of followers of the user.""" return self.metrics.get('followers', 0) def generate_api_key(self): s = JSONWebSignatureSerializer(current_app.config['SECRET_KEY']) self.apikey = s.dumps({ 'user': str(self.id), 'time': time(), }) def clear_api_key(self): self.apikey = None @classmethod def get(cls, id_or_slug): obj = cls.objects(slug=id_or_slug).first() return obj or cls.objects.get_or_404(id=id_or_slug) @classmethod def pre_save(cls, sender, document, **kwargs): cls.before_save.send(document) @classmethod def post_save(cls, sender, document, **kwargs): cls.after_save.send(document) if kwargs.get('created'): cls.on_create.send(document) else: cls.on_update.send(document)
class Dataset(WithMetrics, BadgeMixin, db.Datetimed, db.Document): title = db.StringField(max_length=255, required=True) slug = db.SlugField(max_length=255, required=True, populate_from='title', update=True) description = db.StringField(required=True, default='') license = db.ReferenceField('License') tags = db.ListField(db.StringField()) resources = db.ListField(db.EmbeddedDocumentField(Resource)) private = db.BooleanField() owner = db.ReferenceField('User', reverse_delete_rule=db.NULLIFY) organization = db.ReferenceField('Organization', reverse_delete_rule=db.NULLIFY) frequency = db.StringField(choices=UPDATE_FREQUENCIES.keys()) frequency_date = db.DateTimeField(verbose_name=_('Future date of update')) temporal_coverage = db.EmbeddedDocumentField(db.DateRange) spatial = db.EmbeddedDocumentField(SpatialCoverage) ext = db.MapField(db.GenericEmbeddedDocumentField()) extras = db.ExtrasField() featured = db.BooleanField(required=True, default=False) deleted = db.DateTimeField() def __str__(self): return self.title or '' __unicode__ = __str__ __badges__ = { PIVOTAL_DATA: _('Pivotal data'), } meta = { 'allow_inheritance': True, 'indexes': [ '-created_at', 'slug', 'organization', 'resources.id', 'resources.urlhash', ], 'ordering': ['-created_at'], 'queryset_class': DatasetQuerySet, } before_save = signal('Dataset.before_save') after_save = signal('Dataset.after_save') on_create = signal('Dataset.on_create') on_update = signal('Dataset.on_update') before_delete = signal('Dataset.before_delete') after_delete = signal('Dataset.after_delete') on_delete = signal('Dataset.on_delete') verbose_name = _('dataset') @classmethod def pre_save(cls, sender, document, **kwargs): cls.before_save.send(document) @classmethod def post_save(cls, sender, document, **kwargs): cls.after_save.send(document) if kwargs.get('created'): cls.on_create.send(document) else: cls.on_update.send(document) def url_for(self, *args, **kwargs): return url_for('datasets.show', dataset=self, *args, **kwargs) display_url = property(url_for) @property def external_url(self): return self.url_for(_external=True) @property def image_url(self): if self.organization: return self.organization.logo.url elif self.owner: return self.owner.avatar.url @property def frequency_label(self): return UPDATE_FREQUENCIES.get(self.frequency or 'unknown', UPDATE_FREQUENCIES['unknown']) def check_availability(self): """Check if resources from that dataset are available. Return a list of booleans. """ # Only check remote resources. remote_resources = [ resource for resource in self.resources if resource.filetype == 'remote' ] if not remote_resources: return [] # First, we try to retrieve all data from the group (slug). error, response = check_url_from_group(self.slug) if error: # The group is unknown, the check will be performed by resource. return [ resource.check_availability(self.slug) for resource in remote_resources ] else: return [ int(url_infos['status']) == 200 for url_infos in response['urls'] ] @property def last_update(self): if self.resources: return max(resource.published for resource in self.resources) else: return self.last_modified @property def next_update(self): """Compute the next expected update date, given the frequency and last_update. Return None if the frequency is not handled. """ delta = None if self.frequency == 'daily': delta = timedelta(days=1) elif self.frequency == 'weekly': delta = timedelta(weeks=1) elif self.frequency == 'fortnighly': delta = timedelta(weeks=2) elif self.frequency == 'monthly': delta = timedelta(weeks=4) elif self.frequency == 'bimonthly': delta = timedelta(weeks=4 * 2) elif self.frequency == 'quarterly': delta = timedelta(weeks=52 / 4) elif self.frequency == 'biannual': delta = timedelta(weeks=52 / 2) elif self.frequency == 'annual': delta = timedelta(weeks=52) elif self.frequency == 'biennial': delta = timedelta(weeks=52 * 2) elif self.frequency == 'triennial': delta = timedelta(weeks=52 * 3) elif self.frequency == 'quinquennial': delta = timedelta(weeks=52 * 5) if delta is None: return else: return self.last_update + delta @cached_property def quality(self): """Return a dict filled with metrics related to the inner quality of the dataset: * number of tags * description length * and so on """ result = {} if self.next_update: result['frequency'] = self.frequency result['update_in'] = -(self.next_update - datetime.now()).days if self.tags: result['tags_count'] = len(self.tags) if self.description: result['description_length'] = len(self.description) if self.resources: result['has_resources'] = True result['has_only_closed_formats'] = all( resource.closed_format for resource in self.resources) result['has_unavailable_resources'] = not all( self.check_availability()) discussions = DatasetDiscussion.objects(subject=self.id) if discussions: result['discussions'] = len(discussions) result['has_untreated_discussions'] = not all( discussion.person_involved(self.owner) for discussion in discussions) result['score'] = self.compute_quality_score(result) return result def compute_quality_score(self, quality): """Compute the score related to the quality of that dataset.""" score = 0 UNIT = 2 if 'frequency' in quality: # TODO: should be related to frequency. if quality['update_in'] < 0: score += UNIT else: score -= UNIT if 'tags_count' in quality: if quality['tags_count'] > 3: score += UNIT if 'description_length' in quality: if quality['description_length'] > 100: score += UNIT if 'has_resources' in quality: if quality['has_only_closed_formats']: score -= UNIT else: score += UNIT if quality['has_unavailable_resources']: score -= UNIT else: score += UNIT if 'discussions' in quality: if quality['has_untreated_discussions']: score -= UNIT else: score += UNIT if score < 0: return 0 return score @classmethod def get(cls, id_or_slug): obj = cls.objects(slug=id_or_slug).first() return obj or cls.objects.get_or_404(id=id_or_slug) def add_resource(self, resource): '''Perform an atomic prepend for a new resource''' self.update( __raw__={ '$push': { 'resources': { '$each': [resource.to_mongo()], '$position': 0 } } }) self.reload() post_save.send(self.__class__, document=self) def update_resource(self, resource): '''Perform an atomic update for an existing resource''' index = self.resources.index(resource) data = {'resources__{index}'.format(index=index): resource} self.update(**data) self.reload() post_save.send(self.__class__, document=self) @property def community_resources(self): return self.id and CommunityResource.objects.filter(dataset=self) or []
class Organization(WithMetrics, db.Datetimed, db.Document): name = db.StringField(max_length=255, required=True) acronym = db.StringField(max_length=128) slug = db.SlugField(max_length=255, required=True, populate_from='name', update=True) description = db.StringField(required=True) url = db.StringField() image_url = db.StringField() logo = db.ImageField(fs=avatars, basename=default_image_basename, thumbnails=LOGO_SIZES) members = db.ListField(db.EmbeddedDocumentField(Member)) teams = db.ListField(db.EmbeddedDocumentField(Team)) requests = db.ListField(db.EmbeddedDocumentField(MembershipRequest)) ext = db.MapField(db.GenericEmbeddedDocumentField()) extras = db.ExtrasField() deleted = db.DateTimeField() # TODO: Extract into extension public_service = db.BooleanField() meta = { 'allow_inheritance': True, 'indexes': ['-created_at', 'slug'], 'ordering': ['-created_at'], 'queryset_class': OrganizationQuerySet, } def __str__(self): return self.name or '' __unicode__ = __str__ before_save = Signal() after_save = Signal() on_create = Signal() on_update = Signal() before_delete = Signal() after_delete = Signal() @classmethod def pre_save(cls, sender, document, **kwargs): cls.before_save.send(document) @classmethod def post_save(cls, sender, document, **kwargs): cls.after_save.send(document) if kwargs.get('created'): cls.on_create.send(document) else: cls.on_update.send(document) @property def display_url(self): return url_for('organizations.show', org=self) @property def external_url(self): return url_for('organizations.show', org=self, _external=True) @property def pending_requests(self): return [r for r in self.requests if r.status == 'pending'] @property def refused_requests(self): return [r for r in self.requests if r.status == 'refused'] @property def accepted_requests(self): return [r for r in self.requests if r.status == 'accepted'] def member(self, user): for member in self.members: if member.user == user: return member return None def is_member(self, user): return self.member(user) is not None def is_admin(self, user): member = self.member(user) return member is not None and member.role == 'admin' def pending_request(self, user): for request in self.requests: if request.user == user and request.status == 'pending': return request return None @classmethod def get(cls, id_or_slug): obj = cls.objects(slug=id_or_slug).first() return obj or cls.objects.get_or_404(id=id_or_slug) def by_role(self, role): return filter(lambda m: m.role == role, self.members)