Example #1
0
class OrgUnit(object):
    '''
    Simple mixin holding common fields for all organization units.
    '''
    name = db.StringField(max_length=255, required=True)
    slug = db.SlugField(max_length=255, required=True, populate_from='name', update=True)
    description = db.StringField(required=True)
    url = db.URLField(max_length=255)
    image_url = db.URLField(max_length=255)
    extras = db.DictField()
Example #2
0
 def pre_validate(self, form):
     if self.data:
         try:
             db.URLField().validate(self.data)
         except db.ValidationError:
             raise validators.ValidationError(_('Invalid URL'))
     return True
Example #3
0
class Post(db.Datetimed, db.Document):
    name = db.StringField(max_length=255, required=True)
    slug = db.SlugField(max_length=255,
                        required=True,
                        populate_from='name',
                        update=True)
    headline = db.StringField()
    content = db.StringField(required=True)
    image_url = db.StringField()
    image = db.ImageField(fs=images,
                          basename=default_image_basename,
                          thumbnails=IMAGE_SIZES)

    credit_to = db.StringField()
    credit_url = db.URLField()

    tags = db.ListField(db.StringField())
    datasets = db.ListField(
        db.ReferenceField('Dataset', reverse_delete_rule=db.PULL))
    reuses = db.ListField(
        db.ReferenceField('Reuse', reverse_delete_rule=db.PULL))

    owner = db.ReferenceField('User')
    private = db.BooleanField()

    meta = {
        'ordering': ['-created_at'],
    }

    def __unicode__(self):
        return self.name or ''

    @property
    def display_url(self):
        return url_for('posts.show', post=self)
Example #4
0
class License(db.Document):
    # We need to declare id explicitly since we do not use the default
    # value set by Mongo.
    id = db.StringField(primary_key=True)
    created_at = db.DateTimeField(default=datetime.now, required=True)
    title = db.StringField(required=True)
    slug = db.SlugField(required=True, populate_from='title')
    url = db.URLField()
    maintainer = db.StringField()
    flags = db.ListField(db.StringField())

    active = db.BooleanField()

    def __unicode__(self):
        return self.title

    @classmethod
    def guess(cls, *strings, **kwargs):
        '''
        Try to guess a license from a list of strings.

        Accept a `default` keyword argument which will be
        the default fallback license.
        '''
        license = None
        for string in strings:
            license = cls.guess_one(string)
            if license:
                break
        return license or kwargs.get('default')

    @classmethod
    def guess_one(cls, text):
        '''
        Try to guess license from a string.

        Try to exact match on identifier then slugified title
        and fallback on edit distance ranking (after slugification)
        '''
        if not text:
            return
        qs = cls.objects
        text = text.strip().lower()  # Stored identifiers are lower case
        slug = cls.slug.slugify(text)  # Use slug as it normalize string
        license = qs(db.Q(id=text) | db.Q(slug=slug) | db.Q(url=text)).first()
        if license is None:
            # Try to single match with a low Damerau-Levenshtein distance
            computed = ((l, rdlevenshtein(l.slug, slug)) for l in cls.objects)
            candidates = [l for l, d in computed if d <= MAX_DISTANCE]
            # If there is more that one match, we cannot determinate
            # which one is closer to safely choose between candidates
            if len(candidates) == 1:
                license = candidates[0]
        return license

    @classmethod
    def default(cls):
        return cls.objects(id=DEFAULT_LICENSE['id']).first()
Example #5
0
class Post(db.Datetimed, db.Document):
    name = db.StringField(max_length=255, required=True)
    slug = db.SlugField(max_length=255,
                        required=True,
                        populate_from='name',
                        update=True,
                        follow=True)
    headline = db.StringField()
    content = db.StringField(required=True)
    image_url = db.StringField()
    image = db.ImageField(fs=images,
                          basename=default_image_basename,
                          thumbnails=IMAGE_SIZES)

    credit_to = db.StringField()
    credit_url = db.URLField()

    tags = db.ListField(db.StringField())
    datasets = db.ListField(
        db.ReferenceField('Dataset', reverse_delete_rule=db.PULL))
    reuses = db.ListField(
        db.ReferenceField('Reuse', reverse_delete_rule=db.PULL))

    owner = db.ReferenceField('User')
    published = db.DateTimeField()

    meta = {
        'ordering': ['-created_at'],
        'indexes': [
            '-created_at',
            '-published',
        ],
        'queryset_class': PostQuerySet,
    }

    verbose_name = _('post')

    def __str__(self):
        return self.name or ''

    def url_for(self, *args, **kwargs):
        return url_for('posts.show', post=self, *args, **kwargs)

    @property
    def display_url(self):
        return self.url_for()

    @property
    def external_url(self):
        return self.url_for(_external=True)

    def count_discussions(self):
        # There are no metrics on Post to store discussions count
        pass
Example #6
0
class License(db.Document):
    id = db.StringField(primary_key=True)
    created_at = db.DateTimeField(default=datetime.datetime.now, required=True)
    title = db.StringField(required=True)
    slug = db.SlugField(required=True, populate_from='title')
    url = db.URLField()
    maintainer = db.StringField()
    flags = db.ListField(db.StringField())

    active = db.BooleanField()

    def __unicode__(self):
        return self.title
Example #7
0
class License(db.Document):
    # We need to declare id explicitly since we do not use the default
    # value set by Mongo.
    id = db.StringField(primary_key=True)
    created_at = db.DateTimeField(default=datetime.now, required=True)
    title = db.StringField(required=True)
    slug = db.SlugField(required=True, populate_from='title')
    url = db.URLField()
    maintainer = db.StringField()
    flags = db.ListField(db.StringField())

    active = db.BooleanField()

    def __unicode__(self):
        return self.title
Example #8
0
class URLTester(db.Document):
    url = db.URLField()
Example #9
0
class User(db.Document, WithMetrics, UserMixin):
    slug = db.SlugField(max_length=255,
                        required=True,
                        populate_from='fullname')
    email = db.StringField(max_length=255, required=True)
    password = db.StringField()
    active = db.BooleanField()
    roles = db.ListField(db.ReferenceField(Role), default=[])

    first_name = db.StringField(max_length=255, required=True)
    last_name = db.StringField(max_length=255, required=True)

    avatar_url = db.URLField()
    avatar = db.ImageField(fs=avatars,
                           basename=default_image_basename,
                           thumbnails=AVATAR_SIZES)
    website = db.URLField()
    about = db.StringField()

    prefered_language = db.StringField()

    apikey = db.StringField()

    created_at = db.DateTimeField(default=datetime.now, required=True)
    confirmed_at = db.DateTimeField()
    last_login_at = db.DateTimeField()
    current_login_at = db.DateTimeField()
    last_login_ip = db.StringField()
    current_login_ip = db.StringField()
    login_count = db.IntField()

    deleted = db.DateTimeField()
    ext = db.MapField(db.GenericEmbeddedDocumentField())
    extras = db.ExtrasField()

    before_save = Signal()
    after_save = Signal()
    on_create = Signal()
    on_update = Signal()
    before_delete = Signal()
    after_delete = Signal()
    on_delete = Signal()

    meta = {
        'allow_inheritance': True,
        'indexes': ['-created_at', 'slug', 'apikey'],
        'ordering': ['-created_at']
    }

    def __str__(self):
        return self.fullname

    __unicode__ = __str__

    @property
    def fullname(self):
        return ' '.join((self.first_name or '', self.last_name or '')).strip()

    @cached_property
    def organizations(self):
        from udata.core.organization.models import Organization
        return Organization.objects(members__user=self)

    @property
    def sysadmin(self):
        return self.has_role('admin')

    @property
    def display_url(self):
        return url_for('users.show', user=self)

    @property
    def visible(self):
        return (self.metrics.get('datasets', 0) +
                self.metrics.get('reuses', 0)) > 0

    def generate_api_key(self):
        s = JSONWebSignatureSerializer(current_app.config['SECRET_KEY'])
        self.apikey = s.dumps({
            'user': str(self.id),
            'time': time(),
        })

    def clear_api_key(self):
        self.apikey = None

    @classmethod
    def get(cls, id_or_slug):
        obj = cls.objects(slug=id_or_slug).first()
        return obj or cls.objects.get_or_404(id=id_or_slug)

    @classmethod
    def pre_save(cls, sender, document, **kwargs):
        cls.before_save.send(document)

    @classmethod
    def post_save(cls, sender, document, **kwargs):
        cls.after_save.send(document)
        if kwargs.get('created'):
            cls.on_create.send(document)
        else:
            cls.on_update.send(document)
Example #10
0
class User(WithMetrics, UserMixin, db.Document):
    slug = db.SlugField(max_length=255,
                        required=True,
                        populate_from='fullname')
    email = db.StringField(max_length=255, required=True, unique=True)
    password = db.StringField()
    active = db.BooleanField()
    roles = db.ListField(db.ReferenceField(Role), default=[])

    first_name = db.StringField(max_length=255, required=True)
    last_name = db.StringField(max_length=255, required=True)

    avatar_url = db.URLField()
    avatar = db.ImageField(fs=avatars,
                           basename=default_image_basename,
                           thumbnails=AVATAR_SIZES)
    website = db.URLField()
    about = db.StringField()

    prefered_language = db.StringField()

    apikey = db.StringField()

    created_at = db.DateTimeField(default=datetime.now, required=True)

    # The field below is required for Flask-security
    # when SECURITY_CONFIRMABLE is True
    confirmed_at = db.DateTimeField()

    # The 5 fields below are required for Flask-security
    # when SECURITY_TRACKABLE is True
    last_login_at = db.DateTimeField()
    current_login_at = db.DateTimeField()
    last_login_ip = db.StringField()
    current_login_ip = db.StringField()
    login_count = db.IntField()

    deleted = db.DateTimeField()
    ext = db.MapField(db.GenericEmbeddedDocumentField())
    extras = db.ExtrasField()

    before_save = Signal()
    after_save = Signal()
    on_create = Signal()
    on_update = Signal()
    before_delete = Signal()
    after_delete = Signal()
    on_delete = Signal()

    meta = {
        'indexes': ['-created_at', 'slug', 'apikey'],
        'ordering': ['-created_at']
    }

    def __str__(self):
        return self.fullname

    __unicode__ = __str__

    @property
    def fullname(self):
        return ' '.join((self.first_name or '', self.last_name or '')).strip()

    @cached_property
    def organizations(self):
        from udata.core.organization.models import Organization
        return Organization.objects(members__user=self, deleted__exists=False)

    @property
    def sysadmin(self):
        return self.has_role('admin')

    def url_for(self, *args, **kwargs):
        return url_for('users.show', user=self, *args, **kwargs)

    display_url = property(url_for)

    @property
    def external_url(self):
        return self.url_for(_external=True)

    @property
    def visible(self):
        count = self.metrics.get('datasets', 0) + self.metrics.get('reuses', 0)
        return count > 0 and self.active

    @cached_property
    def resources_availability(self):
        """Return the percentage of availability for resources."""
        # Flatten the list.
        availabilities = list(
            chain(*[org.check_availability() for org in self.organizations]))
        if availabilities:
            # Trick will work because it's a sum() of booleans.
            return round(100. * sum(availabilities) / len(availabilities), 2)
        else:
            return 0

    @cached_property
    def datasets_org_count(self):
        """Return the number of datasets of user's organizations."""
        from udata.models import Dataset  # Circular imports.
        return sum(
            Dataset.objects(organization=org).visible().count()
            for org in self.organizations)

    @cached_property
    def followers_org_count(self):
        """Return the number of followers of user's organizations."""
        from udata.models import Follow  # Circular imports.
        return sum(
            Follow.objects(following=org).count()
            for org in self.organizations)

    @property
    def datasets_count(self):
        """Return the number of datasets of the user."""
        return self.metrics.get('datasets', 0)

    @property
    def followers_count(self):
        """Return the number of followers of the user."""
        return self.metrics.get('followers', 0)

    def generate_api_key(self):
        s = JSONWebSignatureSerializer(current_app.config['SECRET_KEY'])
        self.apikey = s.dumps({
            'user': str(self.id),
            'time': time(),
        })

    def clear_api_key(self):
        self.apikey = None

    @classmethod
    def get(cls, id_or_slug):
        obj = cls.objects(slug=id_or_slug).first()
        return obj or cls.objects.get_or_404(id=id_or_slug)

    @classmethod
    def pre_save(cls, sender, document, **kwargs):
        cls.before_save.send(document)

    @classmethod
    def post_save(cls, sender, document, **kwargs):
        cls.after_save.send(document)
        if kwargs.get('created'):
            cls.on_create.send(document)
        else:
            cls.on_update.send(document)

    @cached_property
    def json_ld(self):

        result = {
            '@type': 'Person',
            '@context': 'http://schema.org',
            'name': self.fullname,
        }

        if self.about:
            result['description'] = mdstrip(self.about)

        if self.avatar_url:
            result['image'] = self.avatar_url

        if self.website:
            result['url'] = self.website

        return result

    def mark_as_deleted(self):
        copied_user = copy(self)
        self.email = '{}@deleted'.format(self.id)
        self.password = None
        self.active = False
        self.first_name = 'DELETED'
        self.last_name = 'DELETED'
        self.avatar = None
        self.avatar_url = None
        self.website = None
        self.about = None
        self.deleted = datetime.now()
        self.save()
        for organization in self.organizations:
            organization.members = [
                member for member in organization.members
                if member.user != self
            ]
            organization.save()
        for discussion in Discussion.objects(discussion__posted_by=self):
            for message in discussion.discussion:
                if message.posted_by == self:
                    message.content = 'DELETED'
            discussion.save()
        Follow.objects(follower=self).delete()
        Follow.objects(following=self).delete()
        mail.send(_('Account deletion'), copied_user, 'account_deleted')
Example #11
0
class License(db.Document):
    # We need to declare id explicitly since we do not use the default
    # value set by Mongo.
    id = db.StringField(primary_key=True)
    created_at = db.DateTimeField(default=datetime.now, required=True)
    title = db.StringField(required=True)
    alternate_titles = db.ListField(db.StringField())
    slug = db.SlugField(required=True, populate_from='title')
    url = db.URLField()
    alternate_urls = db.ListField(db.URLField())
    maintainer = db.StringField()
    flags = db.ListField(db.StringField())

    active = db.BooleanField()

    def __str__(self):
        return self.title

    @classmethod
    def guess(cls, *strings, **kwargs):
        '''
        Try to guess a license from a list of strings.

        Accept a `default` keyword argument which will be
        the default fallback license.
        '''
        license = None
        for string in strings:
            license = cls.guess_one(string)
            if license:
                break
        return license or kwargs.get('default')

    @classmethod
    def guess_one(cls, text):
        '''
        Try to guess license from a string.

        Try to exact match on identifier then slugified title
        and fallback on edit distance ranking (after slugification)
        '''
        if not text:
            return
        qs = cls.objects
        text = text.strip().lower()  # Stored identifiers are lower case
        slug = cls.slug.slugify(text)  # Use slug as it normalize string
        license = qs(
            db.Q(id__iexact=text) | db.Q(slug=slug) | db.Q(url__iexact=text)
            | db.Q(alternate_urls__iexact=text)).first()

        if license is None:
            # If we're dealing with an URL, let's try some specific stuff
            # like getting rid of trailing slash and scheme mismatch
            try:
                url = validate_url(text)
            except ValidationError:
                pass
            else:
                parsed = urlparse(url)
                path = parsed.path.rstrip('/')
                query = f'{parsed.netloc}{path}'
                license = qs(
                    db.Q(url__icontains=query)
                    | db.Q(alternate_urls__contains=query)).first()

        if license is None:
            # Try to single match `slug` with a low Damerau-Levenshtein distance
            computed = ((l, rdlevenshtein(l.slug, slug)) for l in cls.objects)
            candidates = [l for l, d in computed if d <= MAX_DISTANCE]
            # If there is more that one match, we cannot determinate
            # which one is closer to safely choose between candidates
            if len(candidates) == 1:
                license = candidates[0]

        if license is None:
            # Try to match `title` with a low Damerau-Levenshtein distance
            computed = ((l, rdlevenshtein(l.title, text)) for l in cls.objects)
            candidates = [l for l, d in computed if d <= MAX_DISTANCE]
            # If there is more that one match, we cannot determinate
            # which one is closer to safely choose between candidates
            if len(candidates) == 1:
                license = candidates[0]

        if license is None:
            # Try to single match `alternate_titles` with a low Damerau-Levenshtein distance
            computed = ((l, rdlevenshtein(cls.slug.slugify(t), slug))
                        for l in cls.objects for t in l.alternate_titles)
            candidates = [l for l, d in computed if d <= MAX_DISTANCE]
            # If there is more that one match, we cannot determinate
            # which one is closer to safely choose between candidates
            if len(candidates) == 1:
                license = candidates[0]
        return license

    @classmethod
    def default(cls):
        return cls.objects(id=DEFAULT_LICENSE['id']).first()
Example #12
0
class PublicURLTester(db.Document):
    url = db.URLField(public=True)
Example #13
0
class User(db.Document, WithMetrics, UserMixin):
    slug = db.SlugField(max_length=255,
                        required=True,
                        populate_from='fullname')
    email = db.StringField(max_length=255, required=True)
    password = db.StringField()
    active = db.BooleanField()
    roles = db.ListField(db.ReferenceField(Role), default=[])

    first_name = db.StringField(max_length=255, required=True)
    last_name = db.StringField(max_length=255, required=True)

    avatar_url = db.URLField()
    avatar = db.ImageField(fs=avatars,
                           basename=default_image_basename,
                           thumbnails=AVATAR_SIZES)
    website = db.URLField()
    about = db.StringField()

    prefered_language = db.StringField()

    apikey = db.StringField()

    created_at = db.DateTimeField(default=datetime.now, required=True)
    confirmed_at = db.DateTimeField()
    last_login_at = db.DateTimeField()
    current_login_at = db.DateTimeField()
    last_login_ip = db.StringField()
    current_login_ip = db.StringField()
    login_count = db.IntField()

    deleted = db.DateTimeField()
    ext = db.MapField(db.GenericEmbeddedDocumentField())
    extras = db.ExtrasField()

    before_save = Signal()
    after_save = Signal()
    on_create = Signal()
    on_update = Signal()
    before_delete = Signal()
    after_delete = Signal()
    on_delete = Signal()

    meta = {
        'allow_inheritance': True,
        'indexes': ['-created_at', 'slug', 'apikey'],
        'ordering': ['-created_at']
    }

    def __str__(self):
        return self.fullname

    __unicode__ = __str__

    @property
    def fullname(self):
        return ' '.join((self.first_name or '', self.last_name or '')).strip()

    @cached_property
    def organizations(self):
        from udata.core.organization.models import Organization
        return Organization.objects(members__user=self)

    @property
    def sysadmin(self):
        return self.has_role('admin')

    def url_for(self, *args, **kwargs):
        return url_for('users.show', user=self, *args, **kwargs)

    display_url = property(url_for)

    @property
    def external_url(self):
        return self.url_for(_external=True)

    @property
    def visible(self):
        count = self.metrics.get('datasets', 0) + self.metrics.get('reuses', 0)
        return count > 0

    @cached_property
    def resources_availability(self):
        """Return the percentage of availability for resources."""
        # Flatten the list.
        availabilities = list(
            chain(*[org.check_availability() for org in self.organizations]))
        if availabilities:
            # Trick will work because it's a sum() of booleans.
            return round(100. * sum(availabilities) / len(availabilities), 2)
        else:
            return 0

    @cached_property
    def datasets_org_count(self):
        """Return the number of datasets of user's organizations."""
        from udata.models import Dataset  # Circular imports.
        return sum(
            Dataset.objects(organization=org).visible().count()
            for org in self.organizations)

    @cached_property
    def followers_org_count(self):
        """Return the number of followers of user's organizations."""
        from udata.models import FollowOrg  # Circular imports.
        return sum(
            FollowOrg.objects(following=org).count()
            for org in self.organizations)

    @property
    def datasets_count(self):
        """Return the number of datasets of the user."""
        return self.metrics.get('datasets', 0)

    @property
    def followers_count(self):
        """Return the number of followers of the user."""
        return self.metrics.get('followers', 0)

    def generate_api_key(self):
        s = JSONWebSignatureSerializer(current_app.config['SECRET_KEY'])
        self.apikey = s.dumps({
            'user': str(self.id),
            'time': time(),
        })

    def clear_api_key(self):
        self.apikey = None

    @classmethod
    def get(cls, id_or_slug):
        obj = cls.objects(slug=id_or_slug).first()
        return obj or cls.objects.get_or_404(id=id_or_slug)

    @classmethod
    def pre_save(cls, sender, document, **kwargs):
        cls.before_save.send(document)

    @classmethod
    def post_save(cls, sender, document, **kwargs):
        cls.after_save.send(document)
        if kwargs.get('created'):
            cls.on_create.send(document)
        else:
            cls.on_update.send(document)
Example #14
0
class ResourceMixin(object):
    id = db.AutoUUIDField(primary_key=True)
    title = db.StringField(verbose_name="Title", required=True)
    description = db.StringField()
    filetype = db.StringField(choices=RESOURCE_TYPES.keys(),
                              default='file',
                              required=True)
    url = db.URLField(required=True)
    urlhash = db.StringField()
    checksum = db.EmbeddedDocumentField(Checksum)
    format = db.StringField()
    mime = db.StringField()
    filesize = db.IntField()  # `size` is a reserved keyword for mongoengine.
    extras = db.ExtrasField()

    created_at = db.DateTimeField(default=datetime.now, required=True)
    modified = db.DateTimeField(default=datetime.now, required=True)
    published = db.DateTimeField(default=datetime.now, required=True)
    deleted = db.DateTimeField()

    def clean(self):
        super(ResourceMixin, self).clean()
        if not self.urlhash or 'url' in self._get_changed_fields():
            self.urlhash = hash_url(self.url)

    @property
    def closed_format(self):
        """Return True if the specified format is in CLOSED_FORMATS."""
        return self.format and self.format.lower() in CLOSED_FORMATS

    def check_availability(self):
        '''
        Return the check status from extras if any.

        NB: `unknown` will evaluate to True in the aggregate checks using
        `all([])` (dataset, organization, user).
        '''
        return self.extras.get('check:available', 'unknown')

    @property
    def latest(self):
        '''
        Permanent link to the latest version of this resource.

        If this resource is updated and `url` changes, this property won't.
        '''
        return url_for('datasets.resource', id=self.id, _external=True)

    @cached_property
    def json_ld(self):

        result = {
            '@type': 'DataDownload',
            '@id': str(self.id),
            'url': self.latest,
            'name': self.title or _('Nameless resource'),
            'contentUrl': self.url,
            'dateCreated': self.created_at.isoformat(),
            'dateModified': self.modified.isoformat(),
            'datePublished': self.published.isoformat(),
            'extras':
            [get_json_ld_extra(*item) for item in self.extras.items()],
        }

        if 'views' in self.metrics:
            result['interactionStatistic'] = {
                '@type': 'InteractionCounter',
                'interactionType': {
                    '@type': 'DownloadAction',
                },
                'userInteractionCount': self.metrics['views']
            }

        if self.format:
            result['encodingFormat'] = self.format

        if self.filesize:
            result['contentSize'] = self.filesize

        if self.mime:
            result['fileFormat'] = self.mime

        if self.description:
            result['description'] = mdstrip(self.description)

        # These 2 values are not standard
        if self.checksum:
            result['checksum'] = self.checksum.value,
            result['checksumType'] = self.checksum.type or 'sha1'

        return result
Example #15
0
 class Fake(db.Document):
     url = db.URLField()
Example #16
0
class PrivateURLTester(db.Document):
    url = db.URLField(private=True)
Example #17
0
class ResourceMixin(object):
    id = db.AutoUUIDField(primary_key=True)
    title = db.StringField(verbose_name="Title", required=True)
    description = db.StringField()
    filetype = db.StringField(
        choices=RESOURCE_FILETYPES.keys(), default='file', required=True)
    type = db.StringField(
        choices=RESOURCE_TYPES.keys(), default='main', required=True)
    url = db.URLField(required=True)
    urlhash = db.StringField()
    checksum = db.EmbeddedDocumentField(Checksum)
    format = db.StringField()
    mime = db.StringField()
    filesize = db.IntField()  # `size` is a reserved keyword for mongoengine.
    extras = db.ExtrasField()

    created_at = db.DateTimeField(default=datetime.now, required=True)
    modified = db.DateTimeField(default=datetime.now, required=True)
    published = db.DateTimeField(default=datetime.now, required=True)
    deleted = db.DateTimeField()

    def clean(self):
        super(ResourceMixin, self).clean()
        if not self.urlhash or 'url' in self._get_changed_fields():
            self.urlhash = hash_url(self.url)

    @cached_property  # Accessed at least 2 times in front rendering
    def preview_url(self):
        return get_preview_url(self)

    @property
    def closed_or_no_format(self):
        """
        Return True if the specified format is in CLOSED_FORMATS or
        no format has been specified.
        """
        return not self.format or self.format.lower() in CLOSED_FORMATS

    def check_availability(self):
        '''
        Return the check status from extras if any.

        NB: `unknown` will evaluate to True in the aggregate checks using
        `all([])` (dataset, organization, user).
        '''
        return self.extras.get('check:available', 'unknown')

    def need_check(self):
        '''Does the resource needs to be checked against its linkchecker?

        We check unavailable resources often, unless they go over the
        threshold. Available resources are checked less and less frequently
        based on their historical availability.
        '''
        min_cache_duration, max_cache_duration, ko_threshold = [
            current_app.config.get(k) for k in (
                'LINKCHECKING_MIN_CACHE_DURATION',
                'LINKCHECKING_MAX_CACHE_DURATION',
                'LINKCHECKING_UNAVAILABLE_THRESHOLD',
            )
        ]
        count_availability = self.extras.get('check:count-availability', 1)
        is_available = self.check_availability()
        if is_available == 'unknown':
            return True
        elif is_available or count_availability > ko_threshold:
            delta = min(min_cache_duration * count_availability,
                        max_cache_duration)
        else:
            delta = min_cache_duration
        if self.extras.get('check:date'):
            limit_date = datetime.now() - timedelta(minutes=delta)
            check_date = self.extras['check:date']
            if not isinstance(check_date, datetime):
                try:
                    check_date = parse_dt(check_date)
                except (ValueError, TypeError):
                    return True
            if check_date >= limit_date:
                return False
        return True

    @property
    def latest(self):
        '''
        Permanent link to the latest version of this resource.

        If this resource is updated and `url` changes, this property won't.
        '''
        return url_for('datasets.resource', id=self.id, _external=True)

    @cached_property
    def json_ld(self):

        result = {
            '@type': 'DataDownload',
            '@id': str(self.id),
            'url': self.latest,
            'name': self.title or _('Nameless resource'),
            'contentUrl': self.url,
            'dateCreated': self.created_at.isoformat(),
            'dateModified': self.modified.isoformat(),
            'datePublished': self.published.isoformat(),
            'extras': [get_json_ld_extra(*item)
                       for item in self.extras.items()],
        }

        if 'views' in self.metrics:
            result['interactionStatistic'] = {
                '@type': 'InteractionCounter',
                'interactionType': {
                    '@type': 'DownloadAction',
                },
                'userInteractionCount': self.metrics['views']
            }

        if self.format:
            result['encodingFormat'] = self.format

        if self.filesize:
            result['contentSize'] = self.filesize

        if self.mime:
            result['fileFormat'] = self.mime

        if self.description:
            result['description'] = mdstrip(self.description)

        return result
Example #18
0
class ResourceMixin(object):
    id = db.AutoUUIDField(primary_key=True)
    title = db.StringField(verbose_name="Title", required=True)
    description = db.StringField()
    filetype = db.StringField(choices=RESOURCE_TYPES.keys(),
                              default='file',
                              required=True)
    url = db.URLField(required=True)
    urlhash = db.StringField()
    checksum = db.EmbeddedDocumentField(Checksum)
    format = db.StringField()
    mime = db.StringField()
    filesize = db.IntField()  # `size` is a reserved keyword for mongoengine.
    extras = db.ExtrasField()

    created_at = db.DateTimeField(default=datetime.now, required=True)
    modified = db.DateTimeField(default=datetime.now, required=True)
    published = db.DateTimeField(default=datetime.now, required=True)
    deleted = db.DateTimeField()

    def clean(self):
        super(ResourceMixin, self).clean()
        if not self.urlhash or 'url' in self._get_changed_fields():
            self.urlhash = hash_url(self.url)

    @property
    def closed_format(self):
        """Return True if the specified format is in CLOSED_FORMATS."""
        return self.format.lower() in CLOSED_FORMATS

    def check_availability(self, group):
        """Check if a resource is reachable against a Croquemort server.

        Return a boolean.
        """
        if self.filetype == 'remote':
            # We perform a quick check for performances matters.
            error, response = check_url_from_cache(self.url, group)
            if error or 'status' not in response:
                return False
            elif int(response['status']) >= httplib.INTERNAL_SERVER_ERROR:
                return False
            else:
                return True
        else:
            return True  # We consider that API cases (types) are OK.

    @property
    def is_available(self):
        return self.check_availability(group=None)

    @property
    def latest(self):
        '''
        Permanent link to the latest version of this resource.

        If this resource is updated and `url` changes, this property won't.
        '''
        return url_for('datasets.resource', id=self.id, _external=True)

    @cached_property
    def json_ld(self):

        result = {
            '@type': 'DataDownload',
            '@id': str(self.id),
            'url': self.latest,
            'name': self.title or _('Nameless resource'),
            'contentUrl': self.url,
            'dateCreated': self.created_at.isoformat(),
            'dateModified': self.modified.isoformat(),
            'datePublished': self.published.isoformat(),
        }

        if 'views' in self.metrics:
            result['interactionStatistic'] = {
                '@type': 'InteractionCounter',
                'interactionType': {
                    '@type': 'DownloadAction',
                },
                'userInteractionCount': self.metrics['views']
            }

        if self.format:
            result['encodingFormat'] = self.format

        if self.filesize:
            result['contentSize'] = self.filesize

        if self.mime:
            result['fileFormat'] = self.mime

        if self.description:
            result['description'] = mdstrip(self.description)

        # These 2 values are not standard
        if self.checksum:
            result['checksum'] = self.checksum.value,
            result['checksumType'] = self.checksum.type or 'sha1'

        return result