Beispiel #1
0
def migrate(dry=True):
    migrated = 0
    pointers_with_invalid_backrefs = []
    pointers = database.pointer.find({'$where': 'this._id.length <= 5'},
                                     {'_id': True})
    total = pointers.count()
    for i, doc in enumerate(pointers):
        pointer = Pointer.load(doc['_id'])
        with TokuTransaction():
            old_id = pointer._id
            logger.info('({}/{}) Preparing to migrate Pointer {}'.format(
                i + 1, total, old_id))
            pointer._legacy_id = old_id
            pointer._id = str(ObjectId())
            try:
                if not dry:
                    pointer.save()
            except ValueError:
                logger.warn(
                    'Removing backref for orphaned pointer: {}'.format(old_id))
                if not dry:
                    remove_invalid_backref(pointer)
                    pointers_with_invalid_backrefs.append(old_id)
                    pointer.save()
            logger.info('Successfully migrated Pointer {} _id to {}'.format(
                old_id, pointer._id))
            migrated += 1
    logger.info('Successfully migrated {} pointers'.format(migrated))
    logger.info('Removed invalid backrefs on {} pointers: {}'.format(
        len(pointers_with_invalid_backrefs), pointers_with_invalid_backrefs))
Beispiel #2
0
class NodeLicenseRecord(StoredObject):

    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))

    node_license = fields.ForeignField('nodelicense', required=True)
    # Deliberately left as a StringField to support year ranges (e.g. 2012-2015)
    year = fields.StringField()
    copyright_holders = fields.StringField(list=True)

    @property
    def name(self):
        return self.node_license.name if self.node_license else None

    @property
    def text(self):
        return self.node_license.text if self.node_license else None

    @property
    def id(self):
        return self.node_license.id if self.node_license else None

    def to_json(self):
        return serialize_node_license_record(self)

    def copy(self):
        copied = NodeLicenseRecord(node_license=self.node_license,
                                   year=self.year,
                                   copyright_holders=self.copyright_holders)
        copied.save()
        return copied
Beispiel #3
0
class ArchiveTarget(StoredObject):
    """Stores the results of archiving a single addon
    """

    _id = fields.StringField(
        primary=True,
        default=lambda: str(ObjectId())
    )

    # addon_short_name of target addon
    name = fields.StringField()

    status = fields.StringField(default=ARCHIVER_INITIATED)
    # <dict> representation of a website.archiver.AggregateStatResult
    # Format: {
    #     'target_id': <str>,
    #     'target_name': <str>,
    #     'targets': <list>(StatResult | AggregateStatResult),
    #     'num_files': <int>,
    #     'disk_usage': <float>,
    # }
    stat_result = fields.DictionaryField()
    errors = fields.StringField(list=True)

    def __repr__(self):
        return '<{0}(_id={1}, name={2}, status={3})>'.format(
            self.__class__.__name__,
            self._id,
            self.name,
            self.status
        )
Beispiel #4
0
class NotificationDigest(StoredObject):
    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))
    user_id = fields.StringField()
    timestamp = fields.DateTimeField()
    event = fields.StringField()
    message = fields.StringField()
    node_lineage = fields.StringField(list=True)
Beispiel #5
0
class PreprintProvider(StoredObject):
    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))
    name = fields.StringField(required=True)
    logo_name = fields.StringField()
    description = fields.StringField()
    banner_name = fields.StringField()
    external_url = fields.StringField()

    def get_absolute_url(self):
        return '{}preprint_providers/{}'.format(self.absolute_api_v2_url, self._id)

    @property
    def absolute_api_v2_url(self):
        path = '/preprint_providers/{}/'.format(self._id)
        return api_v2_url(path)

    @property
    def logo_path(self):
        if self.logo_name:
            return '/static/img/preprint_providers/{}'.format(self.logo_name)
        else:
            return None

    @property
    def banner_path(self):
        if self.logo_name:
            return '/static/img/preprint_providers/{}'.format(self.logo_name)
        else:
            return None
Beispiel #6
0
class NotificationDigest(StoredObject):
    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))
    user_id = fields.StringField(index=True)
    timestamp = fields.DateTimeField()
    send_type = fields.StringField(index=True, validate=validate_subscription_type)
    event = fields.StringField()
    message = fields.StringField()
    node_lineage = fields.StringField(list=True)
Beispiel #7
0
class NodeLicense(StoredObject):

    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))

    id = fields.StringField(required=True, unique=True, editable=False)
    name = fields.StringField(required=True, unique=True)
    text = fields.StringField(required=True)
    properties = fields.StringField(list=True)
Beispiel #8
0
class ApiOAuth2Scope(StoredObject):
    """
    Store information about recognized OAuth2 scopes. Only scopes registered under this database model can
        be requested by third parties.
    """
    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))
    name = fields.StringField(unique=True, required=True, index=True)
    description = fields.StringField(required=True)
    is_active = fields.BooleanField(
        default=True, index=True)  # TODO: Add mechanism to deactivate a scope?
Beispiel #9
0
class ExternalAccount(StoredObject):
    """An account on an external service.

    Note that this object is not and should not be aware of what other objects
    are associated with it. This is by design, and this object should be kept as
    thin as possible, containing only those fields that must be stored in the
    database.

    The ``provider`` field is a de facto foreign key to an ``ExternalProvider``
    object, as providers are not stored in the database.
    """
    __indices__ = [
        {
            'key_or_list': [
                ('provider', pymongo.ASCENDING),
                ('provider_id', pymongo.ASCENDING),
            ],
            'unique': True,
        }
    ]
    _id = fields.StringField(default=lambda: str(ObjectId()), primary=True)

    # The OAuth credentials. One or both of these fields should be populated.
    # For OAuth1, this is usually the "oauth_token"
    # For OAuth2, this is usually the "access_token"
    oauth_key = fields.StringField()

    # For OAuth1, this is usually the "oauth_token_secret"
    # For OAuth2, this is not used
    oauth_secret = fields.StringField()

    # Used for OAuth2 only
    refresh_token = fields.StringField()
    expires_at = fields.DateTimeField()
    scopes = fields.StringField(list=True, default=lambda: list())

    # The `name` of the service
    # This lets us query for only accounts on a particular provider
    provider = fields.StringField(required=True)
    # The proper 'name' of the service
    # Needed for account serialization
    provider_name = fields.StringField(required=True)

    # The unique, persistent ID on the remote service.
    provider_id = fields.StringField()

    # The user's name on the external service
    display_name = fields.StringField()
    # A link to the user's profile on the external service
    profile_url = fields.StringField()

    def __repr__(self):
        return '<ExternalAccount: {}/{}>'.format(self.provider,
                                                 self.provider_id)
Beispiel #10
0
class Subject(StoredObject):
    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))
    text = fields.StringField(required=True)
    parents = fields.ForeignField('subject', list=True)
    children = fields.ForeignField('subject', list=True)

    @property
    def absolute_api_v2_url(self):
        return api_v2_url('taxonomies/{}/'.format(self._id))

    @property
    def child_count(self):
        return len(self.children)

    def get_absolute_url(self):
        return self.absolute_api_v2_url
Beispiel #11
0
class NodeLicense(StoredObject):

    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))

    id = fields.StringField(
        required=True,
        unique=
        False,  # Skip modular-odm's uniqueness implementation, depending on MongoDB's
        # instead (the decorator will install the proper index), so that we can
        # kludge a non-racey upsert in ensure_licenses.
        editable=False)
    name = fields.StringField(
        required=True,
        unique=False  # Ditto.
    )
    text = fields.StringField(required=True)
    properties = fields.StringField(list=True)
Beispiel #12
0
class ArchiveJob(StoredObject):

    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))

    # whether or not the ArchiveJob is complete (success or fail)
    done = fields.BooleanField(default=False)
    # whether or not emails have been sent for this ArchiveJob
    sent = fields.BooleanField(default=False)
    status = fields.StringField(default=ARCHIVER_INITIATED)
    datetime_initiated = fields.DateTimeField(default=datetime.datetime.utcnow)

    dst_node = fields.ForeignField('node', backref='active')
    src_node = fields.ForeignField('node')
    initiator = fields.ForeignField('user')

    target_addons = fields.ForeignField('archivetarget', list=True)

    # This field is used for stashing embargo URLs while still in the app context
    # Format: {
    #     'view': <str> url,
    #     'approve': <str> url,
    #     'disapprove': <str> url,
    # }
    meta = fields.DictionaryField()

    def __repr__(self):
        return (
            '<{ClassName}(_id={self._id}, done={self.done}, '
            ' status={self.status}, src_node={self.src_node}, dst_node={self.dst_node})>'
        ).format(ClassName=self.__class__.__name__, self=self)

    @property
    def children(self):
        return [
            node.archive_job for node in self.dst_node.nodes if node.primary
        ]

    @property
    def parent(self):
        parent_node = self.dst_node.parent_node
        return parent_node.archive_job if parent_node else None

    @property
    def success(self):
        return self.status == ARCHIVER_SUCCESS

    @property
    def pending(self):
        return any([
            target for target in self.target_addons
            if target.status not in (ARCHIVER_SUCCESS, ARCHIVER_FAILURE)
        ])

    def info(self):
        return self.src_node, self.dst_node, self.initiator

    def target_info(self):
        return [{
            'name': target.name,
            'status': target.status,
            'stat_result': target.stat_result,
            'errors': target.errors
        } for target in self.target_addons]

    def archive_tree_finished(self):
        if not self.pending:
            return len([
                ret for ret in
                [child.archive_tree_finished() for child in self.children]
                if ret
            ]) if len(self.children) else True
        return False

    def _fail_above(self):
        """Marks all ArchiveJob instances attached to Nodes above this as failed
        """
        parent = self.parent
        if parent:
            parent.status = ARCHIVER_FAILURE
            parent.save()

    def _post_update_target(self):
        """Checks for success or failure if the ArchiveJob on self.dst_node
        is finished
        """
        if self.status == ARCHIVER_FAILURE:
            return
        if not self.pending:
            self.done = True
            if any([
                    target.status for target in self.target_addons
                    if target.status in ARCHIVER_FAILURE_STATUSES
            ]):
                self.status = ARCHIVER_FAILURE
                self._fail_above()
            else:
                self.status = ARCHIVER_SUCCESS
            self.save()

    def get_target(self, addon_short_name):
        try:
            return [
                addon for addon in self.target_addons
                if addon.name == addon_short_name
            ][0]
        except IndexError:
            return None

    def _set_target(self, addon_short_name):
        if self.get_target(addon_short_name):
            return
        target = ArchiveTarget(name=addon_short_name)
        target.save()
        self.target_addons.append(target)

    def set_targets(self):
        addons = []
        for addon in [
                self.src_node.get_addon(name)
                for name in settings.ADDONS_ARCHIVABLE
                if settings.ADDONS_ARCHIVABLE[name] != 'none'
        ]:
            if not addon or not addon.complete or not isinstance(
                    addon, StorageAddonBase):
                continue
            archive_errors = getattr(addon, 'archive_errors', None)
            if not archive_errors or (archive_errors and not archive_errors()):
                if addon.config.short_name == 'dataverse':
                    addons.append(addon.config.short_name + '-draft')
                    addons.append(addon.config.short_name + '-published')
                else:
                    addons.append(addon.config.short_name)
        for addon in addons:
            self._set_target(addon)
        self.save()

    def update_target(self,
                      addon_short_name,
                      status,
                      stat_result=None,
                      errors=None):
        stat_result = stat_result or {}
        errors = errors or []

        target = self.get_target(addon_short_name)
        target.status = status
        target.errors = errors
        target.stat_result = stat_result
        target.save()
        self._post_update_target()
Beispiel #13
0
class Sanction(StoredObject):
    """Sanction class is a generic way to track approval states"""
    # Tell modularodm not to attach backends
    _meta = {
        'abstract': True,
    }

    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))

    # Neither approved not cancelled
    UNAPPROVED = 'unapproved'
    # Has approval
    APPROVED = 'approved'
    # Rejected by at least one person
    REJECTED = 'rejected'
    # Embargo has been completed
    COMPLETED = 'completed'

    state = fields.StringField(default=UNAPPROVED,
                               validate=validators.choice_in((
                                   UNAPPROVED,
                                   APPROVED,
                                   REJECTED,
                                   COMPLETED,
                               )))

    DISPLAY_NAME = 'Sanction'
    # SHORT_NAME must correspond with the associated foreign field to query against,
    # e.g. Node.find_one(Q(sanction.SHORT_NAME, 'eq', sanction))
    SHORT_NAME = 'sanction'

    APPROVAL_NOT_AUTHORIZED_MESSAGE = 'This user is not authorized to approve this {DISPLAY_NAME}'
    APPROVAL_INVALID_TOKEN_MESSAGE = 'Invalid approval token provided for this {DISPLAY_NAME}.'
    REJECTION_NOT_AUTHORIZED_MESSAEGE = 'This user is not authorized to reject this {DISPLAY_NAME}'
    REJECTION_INVALID_TOKEN_MESSAGE = 'Invalid rejection token provided for this {DISPLAY_NAME}.'

    # Controls whether or not the Sanction needs unanimous approval or just a single approval
    ANY = 'any'
    UNANIMOUS = 'unanimous'
    mode = UNANIMOUS

    initiation_date = fields.DateTimeField(
        auto_now_add=datetime.datetime.utcnow)
    # Expiration date-- Sanctions in the UNAPPROVED state that are older than their end_date
    # are automatically made ACTIVE by a daily cron job
    # Use end_date=None for a non-expiring Sanction
    end_date = fields.DateTimeField(default=None)

    # Sanction subclasses must have an initiated_by field
    # initiated_by = fields.ForeignField('user', backref='initiated')

    # Expanded: Dictionary field mapping admin IDs their approval status and relevant tokens:
    # {
    #   'b3k97': {
    #     'has_approved': False,
    #     'approval_token': 'Pew7wj1Puf7DENUPFPnXSwa1rf3xPN',
    #     'rejection_token': 'TwozClTFOic2PYxHDStby94bCQMwJy'}
    # }
    approval_state = fields.DictionaryField()

    def __repr__(self):
        return '<Sanction(end_date={self.end_date!r}) with _id {self._id!r}>'.format(
            self=self)

    @property
    def is_pending_approval(self):
        return self.state == Sanction.UNAPPROVED

    @property
    def is_approved(self):
        return self.state == Sanction.APPROVED

    @property
    def is_rejected(self):
        return self.state == Sanction.REJECTED

    def approve(self, user):
        raise NotImplementedError(
            "Sanction subclasses must implement an approve method.")

    def reject(self, user):
        raise NotImplementedError(
            "Sanction subclasses must implement an approve method.")

    def _on_reject(self, user):
        """Callback for rejection of a Sanction

        :param User user:
        """
        raise NotImplementedError(
            'Sanction subclasses must implement an #_on_reject method')

    def _on_complete(self, user):
        """Callback for when a Sanction has approval and enters the ACTIVE state

        :param User user:
        """
        raise NotImplementedError(
            'Sanction subclasses must implement an #_on_complete method')

    def forcibly_reject(self):
        self.state = Sanction.REJECTED
Beispiel #14
0
class ApiOAuth2PersonalToken(StoredObject):
    """Information for user-created personal access tokens

    This collection is also used by CAS to create the master list of available tokens.
    Any changes made to field names in this model must be echoed in the CAS implementation.
    """
    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))

    # Name of the field being `token_id` is a CAS requirement.
    # This is the actual value of the token that's used to authenticate
    token_id = fields.StringField(default=functools.partial(random_string,
                                                            length=70),
                                  unique=True)

    owner = fields.ForeignField('User', index=True, required=True)

    name = fields.StringField(required=True, index=True)

    # This field is a space delimited list of scopes, e.g. "osf.full_read osf.full_write"
    scopes = fields.StringField(required=True)

    is_active = fields.BooleanField(default=True, index=True)

    def deactivate(self, save=False):
        """
        Deactivate an ApiOAuth2PersonalToken

        Does not delete the database record, but hides this instance from API
        """
        client = cas.get_client()
        # Will raise a CasHttpError if deletion fails for any reason other than the token
        # not yet being created. This will also stop setting of active=False.
        try:
            resp = client.revoke_tokens({'token': self.token_id})  # noqa
        except cas.CasHTTPError as e:
            if e.code == 400:
                pass  # Token hasn't been used yet, so not created in cas
            else:
                raise e

        self.is_active = False

        if save:
            self.save()
        return True

    @property
    def url(self):
        return '/settings/tokens/{}/'.format(self._id)

    @property
    def absolute_url(self):
        return urlparse.urljoin(settings.DOMAIN, self.url)

    # Properties used by Django and DRF "Links: self" field
    @property
    def absolute_api_v2_url(self):
        path = '/tokens/{}/'.format(self._id)
        return api_v2_url(path)

    # used by django and DRF
    def get_absolute_url(self):
        return self.absolute_api_v2_url
Beispiel #15
0
class ApiOAuth2Application(StoredObject):
    """Registration and key for user-created OAuth API applications

    This collection is also used by CAS to create the master list of available applications.
    Any changes made to field names in this model must be echoed in the CAS implementation.
    """
    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))

    # Client ID and secret. Use separate ID field so ID format doesn't have to be restricted to database internals.
    client_id = fields.StringField(
        default=lambda: uuid.uuid4().
        hex,  # Not *guaranteed* unique, but very unlikely
        unique=True,
        index=True)
    client_secret = fields.StringField(default=generate_client_secret)

    is_active = fields.BooleanField(
        default=True,  # Set to False if application is deactivated
        index=True)

    owner = fields.ForeignField('User', index=True, required=True)

    # User-specified application descriptors
    name = fields.StringField(
        index=True,
        required=True,
        validate=[string_required, MaxLengthValidator(200)])
    description = fields.StringField(required=False,
                                     validate=MaxLengthValidator(1000))

    date_created = fields.DateTimeField(auto_now_add=True, editable=False)

    home_url = fields.StringField(required=True, validate=URLValidator())
    callback_url = fields.StringField(required=True, validate=URLValidator())

    def deactivate(self, save=False):
        """
        Deactivate an ApiOAuth2Application

        Does not delete the database record, but revokes all tokens and sets a flag that hides this instance from API
        """
        client = cas.get_client()
        # Will raise a CasHttpError if deletion fails, which will also stop setting of active=False.
        resp = client.revoke_application_tokens(self.client_id,
                                                self.client_secret)  # noqa

        self.is_active = False

        if save:
            self.save()
        return True

    def reset_secret(self, save=False):
        """
        Reset the secret of an ApiOAuth2Application
        Revokes all tokens
        """
        client = cas.get_client()
        client.revoke_application_tokens(self.client_id, self.client_secret)
        self.client_secret = generate_client_secret()

        if save:
            self.save()
        return True

    @property
    def url(self):
        return '/settings/applications/{}/'.format(self.client_id)

    @property
    def absolute_url(self):
        return urlparse.urljoin(settings.DOMAIN, self.url)

    # Properties used by Django and DRF "Links: self" field
    @property
    def absolute_api_v2_url(self):
        path = '/applications/{}/'.format(self.client_id)
        return api_v2_url(path)

    # used by django and DRF
    def get_absolute_url(self):
        return self.absolute_api_v2_url
Beispiel #16
0
def get_object_id():
    return str(ObjectId())
Beispiel #17
0
class PreprintProvider(StoredObject):
    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))
    name = fields.StringField(required=True)
    logo_name = fields.StringField()
    header_text = fields.StringField()
    description = fields.StringField()
    domain = fields.StringField()
    banner_name = fields.StringField()
    external_url = fields.StringField()
    email_contact = fields.StringField()
    email_support = fields.StringField()
    example = fields.StringField()
    access_token = EncryptedStringField()
    advisory_board = fields.StringField()
    social_twitter = fields.StringField()
    social_facebook = fields.StringField()
    social_instagram = fields.StringField()
    subjects_acceptable = fields.DictionaryField(list=True, default=lambda: [])
    licenses_acceptable = fields.ForeignField('NodeLicense',
                                              list=True,
                                              default=lambda: [])

    @property
    def top_level_subjects(self):
        if len(self.subjects_acceptable) == 0:
            return Subject.find(Q('parents', 'eq', []))
        tops = set([sub[0][0] for sub in self.subjects_acceptable])
        return [Subject.load(sub) for sub in tops]

    @property
    def all_subjects(self):
        q = []
        for rule in self.subjects_acceptable:
            if rule[1]:
                q.append(Q('parents', 'eq', Subject.load(rule[0][-1])))
                if len(rule[0]) == 1:
                    potential_parents = Subject.find(
                        Q('parents', 'eq', Subject.load(rule[0][-1])))
                    for parent in potential_parents:
                        q.append(Q('parents', 'eq', parent))
            for sub in rule[0]:
                q.append(Q('_id', 'eq', sub))
        return Subject.find(reduce(lambda x, y: x | y, q)) if len(q) > 1 else (
            Subject.find(q[0]) if len(q) else Subject.find())

    def get_absolute_url(self):
        return '{}preprint_providers/{}'.format(self.absolute_api_v2_url,
                                                self._id)

    @property
    def absolute_api_v2_url(self):
        path = '/preprint_providers/{}/'.format(self._id)
        return api_v2_url(path)

    @property
    def logo_path(self):
        if self.logo_name:
            return '/static/img/preprint_providers/{}'.format(self.logo_name)
        else:
            return None

    @property
    def banner_path(self):
        if self.logo_name:
            return '/static/img/preprint_providers/{}'.format(self.logo_name)
        else:
            return None