Ejemplo n.º 1
0
class OsfStorageFileVersion(StoredObject):
    """A version of an OsfStorageFileNode. contains information
    about where the file is located, hashes and datetimes
    """

    _id = fields.StringField(primary=True,
                             default=lambda: str(bson.ObjectId()))
    creator = fields.ForeignField('user', required=True)

    # Date version record was created. This is the date displayed to the user.
    date_created = fields.DateTimeField(auto_now_add=True)

    # Dictionary specifying all information needed to locate file on backend
    # {
    #     'service': 'cloudfiles',  # required
    #     'container': 'osf',       # required
    #     'object': '20c53b',       # required
    #     'worker_url': '127.0.0.1',
    #     'worker_host': 'upload-service-1',
    # }
    location = fields.DictionaryField(validate=utils.validate_location)

    # Dictionary containing raw metadata from upload service response
    # {
    #     'size': 1024,                            # required
    #     'content_type': 'text/plain',            # required
    #     'date_modified': '2014-11-07T20:24:15',  # required
    #     'md5': 'd077f2',
    # }
    metadata = fields.DictionaryField()

    size = fields.IntegerField()
    content_type = fields.StringField()
    # Date file modified on third-party backend. Not displayed to user, since
    # this date may be earlier than the date of upload if the file already
    # exists on the backend
    date_modified = fields.DateTimeField()

    @property
    def location_hash(self):
        return self.location['object']

    def is_duplicate(self, other):
        return self.location_hash == other.location_hash

    def update_metadata(self, metadata):
        self.metadata.update(metadata)
        self.content_type = self.metadata.get('contentType', None)
        try:
            self.size = self.metadata['size']
            self.date_modified = parse_date(self.metadata['modified'],
                                            ignoretz=True)
        except KeyError as err:
            raise errors.MissingFieldError(str(err))
        self.save()
Ejemplo n.º 2
0
class Conference(StoredObject):
    #: Determines the email address for submission and the OSF url
    # Example: If endpoint is spsp2014, then submission email will be
    # [email protected] or [email protected] and the OSF url will
    # be osf.io/view/spsp2014
    endpoint = fields.StringField(primary=True, required=True, unique=True)
    #: Full name, e.g. "SPSP 2014"
    name = fields.StringField(required=True)
    info_url = fields.StringField(required=False, default=None)
    logo_url = fields.StringField(required=False, default=None)
    location = fields.StringField(required=False, default=None)
    start_date = fields.DateTimeField(default=None)
    end_date = fields.DateTimeField(default=None)
    active = fields.BooleanField(required=True)
    admins = fields.ForeignField('user', list=True, required=False, default=None)
    #: Whether to make submitted projects public
    public_projects = fields.BooleanField(required=False, default=True)
    poster = fields.BooleanField(default=True)
    talk = fields.BooleanField(default=True)
    # field_names are used to customize the text on the conference page, the categories
    # of submissions, and the email adress to send material to.
    field_names = fields.DictionaryField(default=lambda: DEFAULT_FIELD_NAMES)

    # Cached number of submissions
    num_submissions = fields.IntegerField(default=0)

    @classmethod
    def get_by_endpoint(cls, endpoint, active=True):
        query = Q('endpoint', 'iexact', endpoint)
        if active:
            query &= Q('active', 'eq', True)
        try:
            return Conference.find_one(query)
        except ModularOdmException:
            raise ConferenceError('Endpoint {0} not found'.format(endpoint))
Ejemplo n.º 3
0
class ArchiveTarget(StoredObject):
    """Stores the results of archiving a single addon
    """

    _id = fields.StringField(
        primary=True,
        default=lambda: str(ObjectId())
    )

    # addon_short_name of target addon
    name = fields.StringField()

    status = fields.StringField(default=ARCHIVER_INITIATED)
    # <dict> representation of a website.archiver.AggregateStatResult
    # Format: {
    #     'target_id': <str>,
    #     'target_name': <str>,
    #     'targets': <list>(StatResult | AggregateStatResult),
    #     'num_files': <int>,
    #     'disk_usage': <float>,
    # }
    stat_result = fields.DictionaryField()
    errors = fields.StringField(list=True)

    def __repr__(self):
        return '<{0}(_id={1}, name={2}, status={3})>'.format(
            self.__class__.__name__,
            self._id,
            self.name,
            self.status
        )
Ejemplo n.º 4
0
class OsfStorageFileVersion(StoredObject):

    _id = oid_primary_key
    creator = fields.ForeignField('user', required=True)

    date_created = fields.DateTimeField(auto_now_add=True)

    # Dictionary specifying all information needed to locate file on backend
    # {
    #     'service': 'buttfiles',  # required
    #     'container': 'osf',       # required
    #     'object': '20c53b',       # required
    #     'worker_url': '127.0.0.1',
    #     'worker_host': 'upload-service-1',
    # }
    location = fields.DictionaryField(validate=validate_location)

    # Dictionary containing raw metadata from upload service response
    # {
    #     'size': 1024,                            # required
    #     'content_type': 'text/plain',            # required
    #     'date_modified': '2014-11-07T20:24:15',  # required
    #     'md5': 'd077f2',
    # }
    metadata = fields.DictionaryField()

    size = fields.IntegerField()
    content_type = fields.StringField()
    date_modified = fields.DateTimeField()

    @property
    def location_hash(self):
        return self.location['object']

    def is_duplicate(self, other):
        return self.location_hash == other.location_hash

    def update_metadata(self, metadata):
        self.metadata.update(metadata)
        for key, parser in metadata_fields.iteritems():
            try:
                value = metadata[key]
            except KeyError:
                raise errors.MissingFieldError
            setattr(self, key, parser(value))
        self.save()
Ejemplo n.º 5
0
class BadgesUserSettings(AddonUserSettingsBase):

    revocation_list = fields.DictionaryField(
    )  # {'id':'12345', 'reason':'is a loser'}

    @property
    def can_award(self):
        return bool(self.badges) or len(Badge.get_system_badges()) > 0

    @property
    def badges(self):
        return list(Badge.find(Q('creator', 'eq', self._id))) + [
            badge
            for badge in Badge.get_system_badges() if badge.creator != self
        ]

    @property
    def issued(self):
        assertions = []
        for badge in self.badges:
            for assertion in badge.assertions:
                if assertion.awarder == self:
                    assertions.append(assertion)
        return assertions

    def get_badges_json(self):
        return [badge.to_json() for badge in self.badges]

    def get_badges_json_simple(self):
        return [{
            'value': badge._id,
            'text': badge.name
        } for badge in self.badges]

    def to_json(self, user):
        ret = super(BadgesUserSettings, self).to_json(user)
        ret['badges'] = self.get_badges_json()
        return ret

    def to_openbadge(self):
        ret = {
            'name': self.owner.fullname,
            'email': self.owner.username,
        }
        # Place holder for later when orgaizations get worked on
        # if self.description:
        #     ret['description'] = self.description,
        # if self.image:
        #     ret['image'] = self.image,
        # if self.url:
        #     ret['url'] = self.url
        # if self.revocation_list:
        #     ret['revocationList'] = self.revocation_list
        return ret

    def issued_json(self):
        return [assertion.to_json() for assertion in self.issued]
Ejemplo n.º 6
0
class Session(StoredObject):

    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))
    date_created = fields.DateTimeField(auto_now_add=True)
    date_modified = fields.DateTimeField(auto_now=True)
    data = fields.DictionaryField()

    @property
    def is_authenticated(self):
        return 'auth_user_id' in self.data
Ejemplo n.º 7
0
class Conference(StoredObject):
    #: Determines the email address for submission and the OSF url
    # Example: If endpoint is spsp2014, then submission email will be
    # [email protected] or [email protected] and the OSF url will
    # be osf.io/view/spsp2014
    endpoint = fields.StringField(primary=True, required=True, unique=True)
    #: Full name, e.g. "SPSP 2014"
    name = fields.StringField(required=True)
    info_url = fields.StringField(required=False, default=None)
    logo_url = fields.StringField(required=False, default=None)
    active = fields.BooleanField(required=True)
    admins = fields.ForeignField('user',
                                 list=True,
                                 required=False,
                                 default=None)
    #: Whether to make submitted projects public
    public_projects = fields.BooleanField(required=False, default=True)
    poster = fields.BooleanField(default=True)
    talk = fields.BooleanField(default=True)
    # field_names are used to customize the text on the conference page, the categories
    # of submissions, and the email adress to send material to.
    field_names = fields.DictionaryField(
        default=lambda: {
            'submission1':
            'poster',
            'submission2':
            'talk',
            'submission1_plural':
            'posters',
            'submission2_plural':
            'talks',
            'meeting_title_type':
            'Posters & Talks',
            'add_submission':
            'poster or talk',
            'mail_subject':
            'Presentation title',
            'mail_message_body':
            'Presentation abstract (if any)',
            'mail_attachment':
            'Your presentation file (e.g., PowerPoint, PDF, etc.)'
        })

    # Cached number of submissions
    num_submissions = fields.IntegerField(default=0)

    @classmethod
    def get_by_endpoint(cls, endpoint, active=True):
        query = Q('endpoint', 'iexact', endpoint)
        if active:
            query &= Q('active', 'eq', True)
        try:
            return Conference.find_one(query)
        except ModularOdmException:
            raise ConferenceError('Endpoint {0} not found'.format(endpoint))
Ejemplo n.º 8
0
class TrashedFileNode(StoredObject):
    """The graveyard for all deleted FileNodes"""
    _id = fields.StringField(primary=True)

    last_touched = fields.DateTimeField()
    history = fields.DictionaryField(list=True)
    versions = fields.ForeignField('FileVersion', list=True)

    node = fields.ForeignField('node', required=True)
    parent = fields.AbstractForeignField(default=None)

    is_file = fields.BooleanField(default=True)
    provider = fields.StringField(required=True)

    name = fields.StringField(required=True)
    path = fields.StringField(required=True)
    materialized_path = fields.StringField(required=True)

    checkout = fields.AbstractForeignField('User')
    deleted_by = fields.AbstractForeignField('User')
    deleted_on = fields.DateTimeField(auto_now_add=True)
    tags = fields.ForeignField('Tag', list=True)

    @property
    def deep_url(self):
        """Allows deleted files to resolve to a view
        that will provide a nice error message and http.GONE
        """
        return self.node.web_url_for('addon_deleted_file', trashed_id=self._id)

    def restore(self, recursive=True, parent=None):
        """Recreate a StoredFileNode from the data in this object
        Will re-point all guids and finally remove itself
        :raises KeyExistsException:
        """
        data = self.to_storage()
        data.pop('deleted_on')
        data.pop('deleted_by')
        if parent:
            data['parent'] = parent._id
        elif data['parent']:
            # parent is an AbstractForeignField, so it gets stored as tuple
            data['parent'] = data['parent'][0]
        restored = FileNode.resolve_class(self.provider,
                                          int(self.is_file))(**data)
        if not restored.parent:
            raise ValueError('No parent to restore to')
        restored.save()

        if recursive:
            for child in TrashedFileNode.find(Q('parent', 'eq', self)):
                child.restore(recursive=recursive, parent=restored)

        TrashedFileNode.remove_one(self)
        return restored
Ejemplo n.º 9
0
class QueuedMail(StoredObject):
    _id = fields.StringField(primary=True, default=lambda: str(bson.ObjectId()))
    user = fields.ForeignField('User', index=True, required=True)
    to_addr = fields.StringField()
    send_at = fields.DateTimeField(index=True, required=True)

    # string denoting the template, presend to be used. Has to be an index of queue_mail types
    email_type = fields.StringField(index=True, required=True)

    # dictionary with variables used to populate mako template and store information used in presends
    # Example:
    # self.data = {
    #    'nid' : 'ShIpTo',
    #    'fullname': 'Florence Welch',
    #}
    data = fields.DictionaryField()
    sent_at = fields.DateTimeField(index=True)

    def send_mail(self):
        """
        Grabs the data from this email, checks for user subscription to help mails,

        constructs the mail object and checks presend. Then attempts to send the email
        through send_mail()
        :return: boolean based on whether email was sent.
        """
        mail_struct = queue_mail_types[self.email_type]
        presend = mail_struct['presend'](self)
        mail = Mail(
            mail_struct['template'],
            subject=mail_struct['subject'],
            categories=mail_struct.get('categories', None)
        )
        self.data['osf_url'] = settings.DOMAIN
        if presend and self.user.is_active and self.user.osf_mailing_lists.get(settings.OSF_HELP_LIST):
            send_mail(self.to_addr or self.user.username, mail, mimetype='html', **(self.data or {}))
            self.sent_at = datetime.utcnow()
            self.save()
            return True
        else:
            self.__class__.remove_one(self)
            return False

    def find_sent_of_same_type_and_user(self):
        """
        Queries up for all emails of the same type as self, sent to the same user as self.
        Does not look for queue-up emails.
        :return: a list of those emails
        """
        return self.__class__.find(
            Q('email_type', 'eq', self.email_type) &
            Q('user', 'eq', self.user) &
            Q('sent_at', 'ne', None)
        )
Ejemplo n.º 10
0
class Session(StoredObject):

    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))
    date_created = fields.DateTimeField(auto_now_add=True)
    date_modified = fields.DateTimeField(auto_now=True)
    data = fields.DictionaryField()

    def __init__(self, *args, **kwargs):
        super(Session, self).__init__(*args, **kwargs)
        # Initialize history to empty list if not found
        if 'history' not in self.data:
            self.data['history'] = []
Ejemplo n.º 11
0
class TrashedFileNode(StoredObject):
    """The graveyard for all deleted FileNodes"""
    _id = fields.StringField(primary=True)

    last_touched = fields.DateTimeField()
    history = fields.DictionaryField(list=True)
    versions = fields.ForeignField('FileVersion', list=True)

    node = fields.ForeignField('node', required=True)
    parent = fields.AbstractForeignField(default=None)

    is_file = fields.BooleanField(default=True)
    provider = fields.StringField(required=True)

    name = fields.StringField(required=True)
    path = fields.StringField(required=True)
    materialized_path = fields.StringField(required=True)

    checkout = fields.AbstractForeignField('User')
    deleted_by = fields.AbstractForeignField('User')
    deleted_on = fields.DateTimeField(auto_now_add=True)

    @property
    def deep_url(self):
        """Allows deleted files to resolve to a view
        that will provide a nice error message and http.GONE
        """
        return self.node.web_url_for('addon_deleted_file', trashed_id=self._id)

    def restore(self):
        """Recreate a StoredFileNode from the data in this object
        Will re-point all guids and finally remove itself
        :raises KeyExistsException:
        """
        data = self.to_storage()
        data.pop('deleted_on')
        data.pop('deleted_by')
        restored = FileNode.resolve_class(self.provider,
                                          int(self.is_file))(**data)
        restored.save()
        TrashedFileNode.remove_one(self)
        return restored
Ejemplo n.º 12
0
class DropboxUserSettings(AddonUserSettingsBase):
    """Stores user-specific dropbox information, including the Oauth access
    token.
    """

    dropbox_id = fields.StringField(required=False)
    access_token = fields.StringField(required=False)
    dropbox_info = fields.DictionaryField(required=False)

    # TODO(sloria): The `user` param in unnecessary for AddonUserSettings
    def to_json(self, user=None):
        """Return a dictionary representation of the user settings.
        The dictionary keys and values will be available as variables in
        dropbox_user_settings.mako.
        """
        output = super(DropboxUserSettings, self).to_json(self.owner)
        output['has_auth'] = self.has_auth
        return output

    @property
    def has_auth(self):
        return bool(self.access_token)

    def delete(self, save=True):
        self.clear()
        super(DropboxUserSettings, self).delete(save)

    def clear(self):
        """Clear settings and deauthorize any associated nodes."""
        self.dropbox_id = None
        self.access_token = None
        for node_settings in self.dropboxnodesettings__authorized:
            node_settings.deauthorize(Auth(self.owner))
            node_settings.save()
        return self

    def __repr__(self):
        return u'<DropboxUserSettings(user={self.owner.username!r})>'.format(
            self=self)
Ejemplo n.º 13
0
class DropboxNodeSettings(AddonNodeSettingsBase):

    user_settings = fields.ForeignField('dropboxusersettings',
                                        backref='authorized')

    folder = fields.StringField(default=None)

    #: Information saved at the time of registration
    #: Note: This is unused right now
    registration_data = fields.DictionaryField()

    @property
    def display_name(self):
        return '{0}: {1}'.format(self.config.full_name, self.folder)

    @property
    def complete(self):
        return self.has_auth and self.folder is not None

    @property
    def has_auth(self):
        """Whether an access token is associated with this node."""
        return bool(self.user_settings and self.user_settings.has_auth)

    def find_or_create_file_guid(self, path):
        return DropboxFile.get_or_create(
            self.owner, clean_path(os.path.join(self.folder,
                                                path.lstrip('/'))))

    def set_folder(self, folder, auth):
        self.folder = folder
        # Add log to node
        nodelogger = DropboxNodeLogger(node=self.owner, auth=auth)
        nodelogger.log(action="folder_selected", save=True)

    def set_user_auth(self, user_settings):
        """Import a user's Dropbox authentication and create a NodeLog.

        :param DropboxUserSettings user_settings: The user settings to link.
        """
        self.user_settings = user_settings
        nodelogger = DropboxNodeLogger(node=self.owner,
                                       auth=Auth(user_settings.owner))
        nodelogger.log(action="node_authorized", save=True)

    # TODO: Is this used? If not, remove this and perhaps remove the 'deleted' field
    def delete(self, save=True):
        self.deauthorize(add_log=False)
        super(DropboxNodeSettings, self).delete(save)

    def deauthorize(self, auth=None, add_log=True):
        """Remove user authorization from this node and log the event."""
        node = self.owner
        folder = self.folder

        self.folder = None
        self.user_settings = None

        if add_log:
            extra = {'folder': folder}
            nodelogger = DropboxNodeLogger(node=node, auth=auth)
            nodelogger.log(action="node_deauthorized", extra=extra, save=True)

    def serialize_waterbutler_credentials(self):
        if not self.has_auth:
            raise exceptions.AddonError('Addon is not authorized')
        return {'token': self.user_settings.access_token}

    def serialize_waterbutler_settings(self):
        if not self.folder:
            raise exceptions.AddonError('Folder is not configured')
        return {'folder': self.folder}

    def create_waterbutler_log(self, auth, action, metadata):
        cleaned_path = clean_path(os.path.join(self.folder, metadata['path']))
        url = self.owner.web_url_for('addon_view_or_download_file',
                                     path=cleaned_path,
                                     provider='dropbox')
        self.owner.add_log(
            'dropbox_{0}'.format(action),
            auth=auth,
            params={
                'project': self.owner.parent_id,
                'node': self.owner._id,
                'path': cleaned_path,
                'folder': self.folder,
                'urls': {
                    'view': url,
                    'download': url + '?action=download'
                },
            },
        )

    def __repr__(self):
        return u'<DropboxNodeSettings(node_id={self.owner._primary_key!r})>'.format(
            self=self)

    ##### Callback overrides #####

    def before_register_message(self, node, user):
        """Return warning text to display if user auth will be copied to a
        registration.
        """
        category = node.project_or_component
        if self.user_settings and self.user_settings.has_auth:
            return (
                u'The contents of Dropbox add-ons cannot be registered at this time; '
                u'the Dropbox folder linked to this {category} will not be included '
                u'as part of this registration.').format(**locals())

    # backwards compatibility
    before_register = before_register_message

    def before_fork_message(self, node, user):
        """Return warning text to display if user auth will be copied to a
        fork.
        """
        category = node.project_or_component
        if self.user_settings and self.user_settings.owner == user:
            return (
                u'Because you have authorized the Dropbox add-on for this '
                '{category}, forking it will also transfer your authentication token to '
                'the forked {category}.').format(category=category)

        else:
            return (
                u'Because the Dropbox add-on has been authorized by a different '
                'user, forking it will not transfer authentication token to the forked '
                '{category}.').format(category=category)

    # backwards compatibility
    before_fork = before_fork_message

    def before_remove_contributor_message(self, node, removed):
        """Return warning text to display if removed contributor is the user
        who authorized the Dropbox addon
        """
        if self.user_settings and self.user_settings.owner == removed:
            category = node.project_or_component
            name = removed.fullname
            return (
                u'The Dropbox add-on for this {category} is authenticated by {name}. '
                'Removing this user will also remove write access to Dropbox '
                'unless another contributor re-authenticates the add-on.'
            ).format(**locals())

    # backwards compatibility
    before_remove_contributor = before_remove_contributor_message

    # Note: Registering Dropbox content is disabled for now; leaving this code
    # here in case we enable registrations later on.
    # @jmcarp
    # def after_register(self, node, registration, user, save=True):
    #     """After registering a node, copy the user settings and save the
    #     chosen folder.
    #
    #     :return: A tuple of the form (cloned_settings, message)
    #     """
    #     clone, message = super(DropboxNodeSettings, self).after_register(
    #         node, registration, user, save=False
    #     )
    #     # Copy user_settings and add registration data
    #     if self.has_auth and self.folder is not None:
    #         clone.user_settings = self.user_settings
    #         clone.registration_data['folder'] = self.folder
    #     if save:
    #         clone.save()
    #     return clone, message

    def after_fork(self, node, fork, user, save=True):
        """After forking, copy user settings if the user is the one who authorized
        the addon.

        :return: A tuple of the form (cloned_settings, message)
        """
        clone, _ = super(DropboxNodeSettings, self).after_fork(node=node,
                                                               fork=fork,
                                                               user=user,
                                                               save=False)

        if self.user_settings and self.user_settings.owner == user:
            clone.user_settings = self.user_settings
            message = ('Dropbox authorization copied to forked {cat}.').format(
                cat=fork.project_or_component)
        else:
            message = (
                u'Dropbox authorization not copied to forked {cat}. You may '
                'authorize this fork on the <a href="{url}">Settings</a> '
                'page.').format(url=fork.web_url_for('node_setting'),
                                cat=fork.project_or_component)
        if save:
            clone.save()
        return clone, message

    def after_remove_contributor(self, node, removed, auth=None):
        """If the removed contributor was the user who authorized the Dropbox
        addon, remove the auth credentials from this node.
        Return the message text that will be displayed to the user.
        """
        if self.user_settings and self.user_settings.owner == removed:
            self.user_settings = None
            self.save()

            message = (
                u'Because the Dropbox add-on for {category} "{title}" was authenticated '
                u'by {user}, authentication information has been deleted.'
            ).format(category=node.category_display,
                     title=node.title,
                     user=removed.fullname)

            if not auth or auth.user != removed:
                url = node.web_url_for('node_setting')
                message += (
                    u' You can re-authenticate on the <a href="{url}">Settings</a> page.'
                ).format(url=url)
            #
            return message

    def after_delete(self, node, user):
        self.deauthorize(Auth(user=user), add_log=True)
        self.save()
Ejemplo n.º 14
0
class AddonOAuthUserSettingsBase(AddonUserSettingsBase):
    _meta = {
        'abstract': True,
    }

    # Keeps track of what nodes have been given permission to use external
    #   accounts belonging to the user.
    oauth_grants = fields.DictionaryField()
    # example:
    # {
    #     '<Node._id>': {
    #         '<ExternalAccount._id>': {
    #             <metadata>
    #         },
    #     }
    # }
    #
    # metadata here is the specific to each addon.

    # The existence of this property is used to determine whether or not
    #   an addon instance is an "OAuth addon" in
    #   AddonModelMixin.get_oauth_addons().
    oauth_provider = None

    serializer = serializer.OAuthAddonSerializer

    @property
    def has_auth(self):
        return bool(self.external_accounts)

    @property
    def external_accounts(self):
        """The user's list of ``ExternalAccount`` instances for this provider"""
        return [
            x for x in self.owner.external_accounts
            if x.provider == self.oauth_provider.short_name
        ]

    def delete(self, save=True):
        for account in self.external_accounts:
            self.revoke_oauth_access(account, save=False)
        super(AddonOAuthUserSettingsBase, self).delete(save=save)

    def grant_oauth_access(self, node, external_account, metadata=None):
        """Give a node permission to use an ``ExternalAccount`` instance."""
        # ensure the user owns the external_account
        if external_account not in self.owner.external_accounts:
            raise PermissionsError()

        metadata = metadata or {}

        # create an entry for the node, if necessary
        if node._id not in self.oauth_grants:
            self.oauth_grants[node._id] = {}

        # create an entry for the external account on the node, if necessary
        if external_account._id not in self.oauth_grants[node._id]:
            self.oauth_grants[node._id][external_account._id] = {}

        # update the metadata with the supplied values
        for key, value in metadata.iteritems():
            self.oauth_grants[node._id][external_account._id][key] = value

        self.save()

    @must_be_logged_in
    def revoke_oauth_access(self, external_account, auth, save=True):
        """Revoke all access to an ``ExternalAccount``.

        TODO: This should accept node and metadata params in the future, to
            allow fine-grained revocation of grants. That's not yet been needed,
            so it's not yet been implemented.
        """
        for node in self.get_nodes_with_oauth_grants(external_account):
            try:
                addon_settings = node.get_addon(external_account.provider, deleted=True)
            except AttributeError:
                # No associated addon settings despite oauth grant
                pass
            else:
                addon_settings.deauthorize(auth=auth)

        if User.find(Q('external_accounts', 'eq', external_account._id)).count() == 1:
            # Only this user is using the account, so revoke remote access as well.
            self.revoke_remote_oauth_access(external_account)

        for key in self.oauth_grants:
            self.oauth_grants[key].pop(external_account._id, None)
        if save:
            self.save()

    def revoke_remote_oauth_access(self, external_account):
        """ Makes outgoing request to remove the remote oauth grant
        stored by third-party provider.

        Individual addons must override this method, as it is addon-specific behavior.
        Not all addon providers support this through their API, but those that do
        should also handle the case where this is called with an external_account
        with invalid credentials, to prevent a user from being unable to disconnect
        an account.
        """
        pass

    def verify_oauth_access(self, node, external_account, metadata=None):
        """Verify that access has been previously granted.

        If metadata is not provided, this checks only if the node can access the
        account. This is suitable to check to see if the node's addon settings
        is still connected to an external account (i.e., the user hasn't revoked
        it in their user settings pane).

        If metadata is provided, this checks to see that all key/value pairs
        have been granted. This is suitable for checking access to a particular
        folder or other resource on an external provider.
        """

        metadata = metadata or {}

        # ensure the grant exists
        try:
            grants = self.oauth_grants[node._id][external_account._id]
        except KeyError:
            return False

        # Verify every key/value pair is in the grants dict
        for key, value in metadata.iteritems():
            if key not in grants or grants[key] != value:
                return False

        return True

    def get_nodes_with_oauth_grants(self, external_account):
        # Generator of nodes which have grants for this external account
        for node_id, grants in self.oauth_grants.iteritems():
            node = Node.load(node_id)
            if external_account._id in grants.keys() and not node.is_deleted:
                yield node

    def get_attached_nodes(self, external_account):
        for node in self.get_nodes_with_oauth_grants(external_account):
            if node is None:
                continue
            node_settings = node.get_addon(self.oauth_provider.short_name)

            if node_settings is None:
                continue

            if node_settings.external_account == external_account:
                yield node

    def merge(self, user_settings):
        """Merge `user_settings` into this instance"""
        if user_settings.__class__ is not self.__class__:
            raise TypeError('Cannot merge different addons')

        for node_id, data in user_settings.oauth_grants.iteritems():
            if node_id not in self.oauth_grants:
                self.oauth_grants[node_id] = data
            else:
                node_grants = user_settings.oauth_grants[node_id].iteritems()
                for ext_acct, meta in node_grants:
                    if ext_acct not in self.oauth_grants[node_id]:
                        self.oauth_grants[node_id][ext_acct] = meta
                    else:
                        for k, v in meta:
                            if k not in self.oauth_grants[node_id][ext_acct]:
                                self.oauth_grants[node_id][ext_acct][k] = v

        user_settings.oauth_grants = {}
        user_settings.save()

        try:
            config = settings.ADDONS_AVAILABLE_DICT[
                self.oauth_provider.short_name
            ]
            Model = config.settings_models['node']
        except KeyError:
            pass
        else:
            connected = Model.find(Q('user_settings', 'eq', user_settings))
            for node_settings in connected:
                node_settings.user_settings = self
                node_settings.save()

        self.save()

    def to_json(self, user):
        ret = super(AddonOAuthUserSettingsBase, self).to_json(user)

        ret['accounts'] = self.serializer(
            user_settings=self
        ).serialized_accounts

        return ret

    #############
    # Callbacks #
    #############

    def on_delete(self):
        """When the user deactivates the addon, clear auth for connected nodes.
        """
        super(AddonOAuthUserSettingsBase, self).on_delete()
        nodes = [Node.load(node_id) for node_id in self.oauth_grants.keys()]
        for node in nodes:
            node_addon = node.get_addon(self.oauth_provider.short_name)
            if node_addon and node_addon.user_settings == self:
                node_addon.clear_auth()
class SpamMixin(StoredObject):
    """Mixin to add to objects that can be marked as spam.
    """

    _meta = {'abstract': True}

    # # Node fields that trigger an update to search on save
    # SPAM_UPDATE_FIELDS = {
    #     'spam_status',
    # }
    spam_status = fields.IntegerField(default=SpamStatus.UNKNOWN, index=True)
    spam_pro_tip = fields.StringField(default=None)
    # Data representing the original spam indication
    # - author: author name
    # - author_email: email of the author
    # - content: data flagged
    # - headers: request headers
    #   - Remote-Addr: ip address from request
    #   - User-Agent: user agent from request
    #   - Referer: referrer header from request (typo +1, rtd)
    spam_data = fields.DictionaryField(default=dict)
    date_last_reported = fields.DateTimeField(default=None, index=True)

    # Reports is a dict of reports keyed on reporting user
    # Each report is a dictionary including:
    #  - date: date reported
    #  - retracted: if a report has been retracted
    #  - category: What type of spam does the reporter believe this is
    #  - text: Comment on the comment
    reports = fields.DictionaryField(default=dict, validate=_validate_reports)

    def flag_spam(self):
        # If ham and unedited then tell user that they should read it again
        if self.spam_status == SpamStatus.UNKNOWN:
            self.spam_status = SpamStatus.FLAGGED

    def remove_flag(self, save=False):
        if self.spam_status != SpamStatus.FLAGGED:
            return
        for report in self.reports.values():
            if not report.get('retracted', True):
                return
        self.spam_status = SpamStatus.UNKNOWN
        if save:
            self.save()

    @property
    def is_spam(self):
        return self.spam_status == SpamStatus.SPAM

    @property
    def is_spammy(self):
        return self.spam_status in [SpamStatus.FLAGGED, SpamStatus.SPAM]

    def report_abuse(self, user, save=False, **kwargs):
        """Report object is spam or other abuse of OSF

        :param user: User submitting report
        :param save: Save changes
        :param kwargs: Should include category and message
        :raises ValueError: if user is reporting self
        """
        if user == self.user:
            raise ValueError('User cannot report self.')
        self.flag_spam()
        date = datetime.utcnow()
        report = {'date': date, 'retracted': False}
        report.update(kwargs)
        if 'text' not in report:
            report['text'] = None
        self.reports[user._id] = report
        self.date_last_reported = report['date']
        if save:
            self.save()

    def retract_report(self, user, save=False):
        """Retract last report by user

        Only marks the last report as retracted because there could be
        history in how the object is edited that requires a user
        to flag or retract even if object is marked as HAM.
        :param user: User retracting
        :param save: Save changes
        """
        if user._id in self.reports:
            if not self.reports[user._id]['retracted']:
                self.reports[user._id]['retracted'] = True
                self.remove_flag()
        else:
            raise ValueError('User has not reported this content')
        if save:
            self.save()

    def confirm_ham(self, save=False):
        # not all mixins will implement check spam pre-req, only submit ham when it was incorrectly flagged
        if settings.SPAM_CHECK_ENABLED and self.spam_data and self.spam_status in [
                SpamStatus.FLAGGED, SpamStatus.SPAM
        ]:
            client = _get_client()
            client.submit_ham(
                user_ip=self.spam_data['headers']['Remote-Addr'],
                user_agent=self.spam_data['headers'].get('User-Agent'),
                referrer=self.spam_data['headers'].get('Referer'),
                comment_content=self.spam_data['content'],
                comment_author=self.spam_data['author'],
                comment_author_email=self.spam_data['author_email'],
            )
            logger.info('confirm_ham update sent')
        self.spam_status = SpamStatus.HAM
        if save:
            self.save()

    def confirm_spam(self, save=False):
        # not all mixins will implement check spam pre-req, only submit spam when it was incorrectly flagged
        if settings.SPAM_CHECK_ENABLED and self.spam_data and self.spam_status in [
                SpamStatus.UNKNOWN, SpamStatus.HAM
        ]:
            client = _get_client()
            client.submit_spam(
                user_ip=self.spam_data['headers']['Remote-Addr'],
                user_agent=self.spam_data['headers'].get('User-Agent'),
                referrer=self.spam_data['headers'].get('Referer'),
                comment_content=self.spam_data['content'],
                comment_author=self.spam_data['author'],
                comment_author_email=self.spam_data['author_email'],
            )
            logger.info('confirm_spam update sent')
        self.spam_status = SpamStatus.SPAM
        if save:
            self.save()

    @abc.abstractmethod
    def check_spam(self, saved_fields, request_headers, save=False):
        """Must return is_spam"""
        pass

    def do_check_spam(self, author, author_email, content, request_headers):
        if self.spam_status == SpamStatus.HAM:
            return False
        if self.is_spammy:
            return True

        client = _get_client()
        remote_addr = request_headers['Remote-Addr']
        user_agent = request_headers.get('User-Agent')
        referer = request_headers.get('Referer')
        is_spam, pro_tip = client.check_comment(
            user_ip=remote_addr,
            user_agent=user_agent,
            referrer=referer,
            comment_content=content,
            comment_author=author,
            comment_author_email=author_email)
        self.spam_pro_tip = pro_tip
        self.spam_data['headers'] = {
            'Remote-Addr': remote_addr,
            'User-Agent': user_agent,
            'Referer': referer,
        }
        self.spam_data['content'] = content
        self.spam_data['author'] = author
        self.spam_data['author_email'] = author_email
        if is_spam:
            self.flag_spam()
        return is_spam
Ejemplo n.º 16
0
class SpamMixin(StoredObject):
    """Mixin to add to objects that can be marked as spam.
    """

    _meta = {'abstract': True}

    UNKNOWN = 0
    FLAGGED = 1
    SPAM = 2
    HAM = 4

    spam_status = fields.IntegerField(default=UNKNOWN, index=True)

    # Reports is a dict of reports keyed on reporting user
    # Each report is a dictionary including:
    #  - date: date reported
    #  - retracted: if a report has been retracted
    #  - category: What type of spam does the reporter believe this is
    #  - text: Comment on the comment
    reports = fields.DictionaryField(default=dict, validate=validate_reports)

    def flag_spam(self, save=False):
        # If ham and unedited then tell user that they should read it again
        if self.spam_status == self.UNKNOWN:
            self.spam_status = self.FLAGGED
        if save:
            self.save()

    def remove_flag(self, save=False):
        if self.spam_status != self.FLAGGED:
            return
        for report in self.reports.values():
            if not report.get('retracted', True):
                return
        self.spam_status = self.UNKNOWN
        if save:
            self.save()

    def confirm_ham(self, save=False):
        self.spam_status = self.HAM
        if save:
            self.save()

    def confirm_spam(self, save=False):
        self.spam_status = self.SPAM
        if save:
            self.save()

    @property
    def is_spam(self):
        return self.spam_status == self.SPAM

    def report_abuse(self, user, save=False, **kwargs):
        """Report object is spam or other abuse of OSF

        :param user: User submitting report
        :param date: Date report submitted
        :param save: Save changes
        :param kwargs: Should include category and message
        :raises ValueError: if user is reporting self
        """
        if user == self.user:
            raise ValueError('User cannot report self.')
        self.flag_spam()
        report = {'date': datetime.utcnow(), 'retracted': False}
        report.update(kwargs)
        if 'text' not in report:
            report['text'] = None
        self.reports[user._id] = report
        if save:
            self.save()

    def retract_report(self, user, save=False):
        """Retract last report by user

        Only marks the last report as retracted because there could be
        history in how the object is edited that requires a user
        to flag or retract even if object is marked as HAM.
        :param user: User retracting
        :param save: Save changes
        """
        if user._id in self.reports:
            if not self.reports[user._id]['retracted']:
                self.reports[user._id]['retracted'] = True
                self.remove_flag()
        else:
            raise ValueError('User has not reported this content')
        if save:
            self.save()
Ejemplo n.º 17
0
class StoredFileNode(StoredObject):
    """The storage backend for FileNode objects.
    This class should generally not be used or created manually as FileNode
    contains all the helpers required.
    A FileNode wraps a StoredFileNode to provider usable abstraction layer
    """

    __indices__ = [{
        'unique':
        False,
        'key_or_list': [('path', pymongo.ASCENDING),
                        ('node', pymongo.ASCENDING),
                        ('is_file', pymongo.ASCENDING),
                        ('provider', pymongo.ASCENDING)]
    }, {
        'unique':
        False,
        'key_or_list': [('node', pymongo.ASCENDING),
                        ('is_file', pymongo.ASCENDING),
                        ('provider', pymongo.ASCENDING)]
    }]

    _id = fields.StringField(primary=True,
                             default=lambda: str(bson.ObjectId()))

    # The last time the touch method was called on this FileNode
    last_touched = fields.DateTimeField()
    # A list of dictionaries sorted by the 'modified' key
    # The raw output of the metadata request deduped by etag
    # Add regardless it can be pinned to a version or not
    history = fields.DictionaryField(list=True)
    # A concrete version of a FileNode, must have an identifier
    versions = fields.ForeignField('FileVersion', list=True)

    node = fields.ForeignField('Node', required=True)
    parent = fields.ForeignField('StoredFileNode', default=None)

    is_file = fields.BooleanField(default=True)
    provider = fields.StringField(required=True)

    name = fields.StringField(required=True)
    path = fields.StringField(required=True)
    materialized_path = fields.StringField(required=True)

    # The User that has this file "checked out"
    # Should only be used for OsfStorage
    checkout = fields.AbstractForeignField('User')

    #Tags for a file, currently only used for osfStorage
    tags = fields.ForeignField('Tag', list=True)

    # For Django compatibility
    @property
    def pk(self):
        return self._id

    # For Django compatibility
    # TODO Find a better way
    @property
    def node_id(self):
        return self.node._id

    @property
    def deep_url(self):
        return self.wrapped().deep_url

    def wrapped(self):
        """Wrap self in a FileNode subclass
        """
        return FileNode.resolve_class(self.provider, int(self.is_file))(self)

    def get_guid(self, create=False):
        """Attempt to find a Guid that points to this object.
        One will be created if requested.
        :rtype: Guid
        """
        try:
            # Note sometimes multiple GUIDs can exist for
            # a single object. Just go with the first one
            return Guid.find(Q('referent', 'eq', self))[0]
        except IndexError:
            if not create:
                return None
        return Guid.generate(self)
Ejemplo n.º 18
0
class User(GuidStoredObject, AddonModelMixin):

    # Node fields that trigger an update to the search engine on save
    SEARCH_UPDATE_FIELDS = {
        'fullname',
        'given_name',
        'middle_names',
        'family_name',
        'suffix',
        'merged_by',
        'date_disabled',
        'date_confirmed',
        'jobs',
        'schools',
        'social',
    }

    # TODO: Add SEARCH_UPDATE_NODE_FIELDS, for fields that should trigger a
    #   search update for all nodes to which the user is a contributor.

    SOCIAL_FIELDS = {
        'orcid': u'http://orcid.com/{}',
        'github': u'http://github.com/{}',
        'scholar': u'http://scholar.google.com/citation?user={}',
        'twitter': u'http://twitter.com/{}',
        'personal': u'{}',
        'linkedIn': u'https://www.linkedin.com/profile/view?id={}',
        'impactStory': u'https://impactstory.org/{}',
        'researcherId': u'http://researcherid.com/rid/{}',
    }

    # This is a GuidStoredObject, so this will be a GUID.
    _id = fields.StringField(primary=True)

    # The primary email address for the account.
    # This value is unique, but multiple "None" records exist for:
    #   * unregistered contributors where an email address was not provided.
    # TODO: Update mailchimp subscription on username change in user.save()
    username = fields.StringField(required=False, unique=True, index=True)

    # Hashed. Use `User.set_password` and `User.check_password`
    password = fields.StringField()

    fullname = fields.StringField(required=True, validate=string_required)

    # user has taken action to register the account
    is_registered = fields.BooleanField(index=True)

    # user has claimed the account
    # TODO: This should be retired - it always reflects is_registered.
    #   While a few entries exist where this is not the case, they appear to be
    #   the result of a bug, as they were all created over a small time span.
    is_claimed = fields.BooleanField(default=False, index=True)

    # a list of strings - for internal use
    system_tags = fields.StringField(list=True)

    # security emails that have been sent
    # TODO: This should be removed and/or merged with system_tags
    security_messages = fields.DictionaryField()
    # Format: {
    #   <message label>: <datetime>
    #   ...
    # }

    # user was invited (as opposed to registered unprompted)
    is_invited = fields.BooleanField(default=False, index=True)

    # Per-project unclaimed user data:
    # TODO: add validation
    unclaimed_records = fields.DictionaryField(required=False)
    # Format: {
    #   <project_id>: {
    #       'name': <name that referrer provided>,
    #       'referrer_id': <user ID of referrer>,
    #       'token': <token used for verification urls>,
    #       'email': <email the referrer provided or None>,
    #       'claimer_email': <email the claimer entered or None>,
    #       'last_sent': <timestamp of last email sent to referrer or None>
    #   }
    #   ...
    # }

    # Time of last sent notification email to newly added contributors
    # Format : {
    #   <project_id>: {
    #       'last_sent': time.time()
    #   }
    #   ...
    # }
    contributor_added_email_records = fields.DictionaryField(default=dict)

    # The user into which this account was merged
    merged_by = fields.ForeignField('user',
                                    default=None,
                                    backref='merged',
                                    index=True)

    # verification key used for resetting password
    verification_key = fields.StringField()

    # confirmed emails
    #   emails should be stripped of whitespace and lower-cased before appending
    # TODO: Add validator to ensure an email address only exists once across
    # all User's email lists
    emails = fields.StringField(list=True)

    # email verification tokens
    #   see also ``unconfirmed_emails``
    email_verifications = fields.DictionaryField(default=dict)
    # Format: {
    #   <token> : {'email': <email address>,
    #              'expiration': <datetime>}
    # }

    # email lists to which the user has chosen a subscription setting
    mailing_lists = fields.DictionaryField()
    # Format: {
    #   'list1': True,
    #   'list2: False,
    #    ...
    # }

    # the date this user was registered
    # TODO: consider removal - this can be derived from date_registered
    date_registered = fields.DateTimeField(auto_now_add=dt.datetime.utcnow,
                                           index=True)

    # watched nodes are stored via a list of WatchConfigs
    watched = fields.ForeignField("WatchConfig", list=True, backref="watched")

    # list of users recently added to nodes as a contributor
    recently_added = fields.ForeignField("user",
                                         list=True,
                                         backref="recently_added")

    # Attached external accounts (OAuth)
    external_accounts = fields.ForeignField("externalaccount",
                                            list=True,
                                            backref="connected")

    # CSL names
    given_name = fields.StringField()
    middle_names = fields.StringField()
    family_name = fields.StringField()
    suffix = fields.StringField()

    # Employment history
    jobs = fields.DictionaryField(list=True, validate=validate_history_item)
    # Format: {
    #     'title': <position or job title>,
    #     'institution': <institution or organization>,
    #     'department': <department>,
    #     'location': <location>,
    #     'startMonth': <start month>,
    #     'startYear': <start year>,
    #     'endMonth': <end month>,
    #     'endYear': <end year>,
    #     'ongoing: <boolean>
    # }

    # Educational history
    schools = fields.DictionaryField(list=True, validate=validate_history_item)
    # Format: {
    #     'degree': <position or job title>,
    #     'institution': <institution or organization>,
    #     'department': <department>,
    #     'location': <location>,
    #     'startMonth': <start month>,
    #     'startYear': <start year>,
    #     'endMonth': <end month>,
    #     'endYear': <end year>,
    #     'ongoing: <boolean>
    # }

    # Social links
    social = fields.DictionaryField(validate=validate_social)
    # Format: {
    #     'personal': <personal site>,
    #     'twitter': <twitter id>,
    # }

    # hashed password used to authenticate to Piwik
    piwik_token = fields.StringField()

    # date the user last logged in via the web interface
    date_last_login = fields.DateTimeField()

    # date the user first successfully confirmed an email address
    date_confirmed = fields.DateTimeField(index=True)

    # When the user was disabled.
    date_disabled = fields.DateTimeField(index=True)

    # when comments for a node were last viewed
    comments_viewed_timestamp = fields.DictionaryField()
    # Format: {
    #   'node_id': 'timestamp'
    # }

    # timezone for user's locale (e.g. 'America/New_York')
    timezone = fields.StringField(default='Etc/UTC')

    # user language and locale data (e.g. 'en_US')
    locale = fields.StringField(default='en_US')

    _meta = {'optimistic': True}

    def __repr__(self):
        return '<User({0!r}) with id {1!r}>'.format(self.username, self._id)

    def __str__(self):
        return self.fullname.encode('ascii', 'replace')

    __unicode__ = __str__

    # For compatibility with Django auth
    @property
    def pk(self):
        return self._id

    @property
    def email(self):
        return self.username

    def is_authenticated(self):  # Needed for django compat
        return True

    def is_anonymous(self):
        return False

    @property
    def absolute_api_v2_url(self):
        from api.base.utils import absolute_reverse  # Avoid circular dependency
        return absolute_reverse('users:user-detail',
                                kwargs={'user_id': self.pk})

    # used by django and DRF
    def get_absolute_url(self):
        return self.absolute_api_v2_url

    @classmethod
    def create_unregistered(cls, fullname, email=None):
        """Create a new unregistered user.
        """
        user = cls(
            username=email,
            fullname=fullname,
            is_invited=True,
            is_registered=False,
        )
        user.update_guessed_names()
        return user

    @classmethod
    def create(cls, username, password, fullname):
        user = cls(
            username=username,
            fullname=fullname,
        )
        user.update_guessed_names()
        user.set_password(password)
        return user

    @classmethod
    def create_unconfirmed(cls, username, password, fullname, do_confirm=True):
        """Create a new user who has begun registration but needs to verify
        their primary email address (username).
        """
        user = cls.create(username, password, fullname)
        user.add_unconfirmed_email(username)
        user.is_registered = False
        return user

    @classmethod
    def create_confirmed(cls, username, password, fullname):
        user = cls.create(username, password, fullname)
        user.is_registered = True
        user.is_claimed = True
        user.date_confirmed = user.date_registered
        return user

    @classmethod
    def from_cookie(cls, cookie, secret=None):
        """Attempt to load a user from their signed cookie
        :returns: None if a user cannot be loaded else User
        """
        if not cookie:
            return None

        secret = secret or settings.SECRET_KEY

        try:
            token = itsdangerous.Signer(secret).unsign(cookie)
        except itsdangerous.BadSignature:
            return None

        user_session = Session.load(token)

        if user_session is None:
            return None

        return cls.load(user_session.data.get('auth_user_id'))

    def get_or_create_cookie(self, secret=None):
        """Find the cookie for the given user
        Create a new session if no cookie is found

        :param str secret: The key to sign the cookie with
        :returns: The signed cookie
        """
        secret = secret or settings.SECRET_KEY
        sessions = Session.find(Q('data.auth_user_id', 'eq',
                                  self._id)).sort('-date_modified').limit(1)

        if sessions.count() > 0:
            user_session = sessions[0]
        else:
            user_session = Session(
                data={
                    'auth_user_id': self._id,
                    'auth_user_username': self.username,
                    'auth_user_fullname': self.fullname,
                })
            user_session.save()

        signer = itsdangerous.Signer(secret)
        return signer.sign(user_session._id)

    def update_guessed_names(self):
        """Updates the CSL name fields inferred from the the full name.
        """
        parsed = utils.impute_names(self.fullname)
        self.given_name = parsed['given']
        self.middle_names = parsed['middle']
        self.family_name = parsed['family']
        self.suffix = parsed['suffix']

    def register(self, username, password=None):
        """Registers the user.
        """
        self.username = username
        if password:
            self.set_password(password)
        if username not in self.emails:
            self.emails.append(username)
        self.is_registered = True
        self.is_claimed = True
        self.date_confirmed = dt.datetime.utcnow()
        self.update_search()
        self.update_search_nodes()

        # Emit signal that a user has confirmed
        signals.user_confirmed.send(self)

        return self

    def add_unclaimed_record(self, node, referrer, given_name, email=None):
        """Add a new project entry in the unclaimed records dictionary.

        :param Node node: Node this unclaimed user was added to.
        :param User referrer: User who referred this user.
        :param str given_name: The full name that the referrer gave for this user.
        :param str email: The given email address.
        :returns: The added record
        """
        if not node.can_edit(user=referrer):
            raise PermissionsError(
                'Referrer does not have permission to add a contributor '
                'to project {0}'.format(node._primary_key))
        project_id = node._primary_key
        referrer_id = referrer._primary_key
        if email:
            clean_email = email.lower().strip()
        else:
            clean_email = None
        record = {
            'name': given_name,
            'referrer_id': referrer_id,
            'token': generate_confirm_token(),
            'email': clean_email
        }
        self.unclaimed_records[project_id] = record
        return record

    def display_full_name(self, node=None):
        """Return the full name , as it would display in a contributor list for a
        given node.

        NOTE: Unclaimed users may have a different name for different nodes.
        """
        if node:
            unclaimed_data = self.unclaimed_records.get(
                node._primary_key, None)
            if unclaimed_data:
                return unclaimed_data['name']
        return self.fullname

    @property
    def is_active(self):
        """Returns True if the user is active. The user must have activated
        their account, must not be deleted, suspended, etc.

        :return: bool
        """
        return (self.is_registered and self.password is not None
                and not self.is_merged and not self.is_disabled
                and self.is_confirmed)

    def get_unclaimed_record(self, project_id):
        """Get an unclaimed record for a given project_id.

        :raises: ValueError if there is no record for the given project.
        """
        try:
            return self.unclaimed_records[project_id]
        except KeyError:  # reraise as ValueError
            raise ValueError(
                'No unclaimed record for user {self._id} on node {project_id}'.
                format(**locals()))

    def get_claim_url(self, project_id, external=False):
        """Return the URL that an unclaimed user should use to claim their
        account. Return ``None`` if there is no unclaimed_record for the given
        project ID.

        :param project_id: The project ID for the unclaimed record
        :raises: ValueError if a record doesn't exist for the given project ID
        :rtype: dict
        :returns: The unclaimed record for the project
        """
        uid = self._primary_key
        base_url = settings.DOMAIN if external else '/'
        unclaimed_record = self.get_unclaimed_record(project_id)
        token = unclaimed_record['token']
        return '{base_url}user/{uid}/{project_id}/claim/?token={token}'\
                    .format(**locals())

    def set_password(self, raw_password):
        """Set the password for this user to the hash of ``raw_password``."""
        self.password = generate_password_hash(raw_password)

    def check_password(self, raw_password):
        """Return a boolean of whether ``raw_password`` was correct."""
        if not self.password or not raw_password:
            return False
        return check_password_hash(self.password, raw_password)

    @property
    def csl_given_name(self):
        parts = [self.given_name]
        if self.middle_names:
            parts.extend(each[0]
                         for each in re.split(r'\s+', self.middle_names))
        return ' '.join(parts)

    @property
    def csl_name(self):
        return {
            'family': self.family_name,
            'given': self.csl_given_name,
        }

    # TODO: This should not be on the User object.
    def change_password(self, raw_old_password, raw_new_password,
                        raw_confirm_password):
        """Change the password for this user to the hash of ``raw_new_password``."""
        raw_old_password = (raw_old_password or '').strip()
        raw_new_password = (raw_new_password or '').strip()
        raw_confirm_password = (raw_confirm_password or '').strip()

        issues = []
        if not self.check_password(raw_old_password):
            issues.append('Old password is invalid')
        elif raw_old_password == raw_new_password:
            issues.append('Password cannot be the same')

        if not raw_old_password or not raw_new_password or not raw_confirm_password:
            issues.append('Passwords cannot be blank')
        elif len(raw_new_password) < 6:
            issues.append('Password should be at least six characters')
        elif len(raw_new_password) > 256:
            issues.append('Password should not be longer than 256 characters')

        if raw_new_password != raw_confirm_password:
            issues.append('Password does not match the confirmation')

        if issues:
            raise ChangePasswordError(issues)
        self.set_password(raw_new_password)

    def _set_email_token_expiration(self, token, expiration=None):
        """Set the expiration date for given email token.

        :param str token: The email token to set the expiration for.
        :param datetime expiration: Datetime at which to expire the token. If ``None``, the
            token will expire after ``settings.EMAIL_TOKEN_EXPIRATION`` hours. This is only
            used for testing purposes.
        """
        expiration = expiration or (dt.datetime.utcnow() + dt.timedelta(
            hours=settings.EMAIL_TOKEN_EXPIRATION))
        self.email_verifications[token]['expiration'] = expiration
        return expiration

    def add_unconfirmed_email(self, email, expiration=None):
        """Add an email verification token for a given email."""

        # TODO: This is technically not compliant with RFC 822, which requires
        #       that case be preserved in the "local-part" of an address. From
        #       a practical standpoint, the vast majority of email servers do
        #       not preserve case.
        #       ref: https://tools.ietf.org/html/rfc822#section-6
        email = email.lower().strip()

        if email in self.emails:
            raise ValueError("Email already confirmed to this user.")

        utils.validate_email(email)

        # If the unconfirmed email is already present, refresh the token
        if email in self.unconfirmed_emails:
            self.remove_unconfirmed_email(email)

        token = generate_confirm_token()

        # handle when email_verifications is None
        if not self.email_verifications:
            self.email_verifications = {}

        self.email_verifications[token] = {'email': email}
        self._set_email_token_expiration(token, expiration=expiration)
        return token

    def remove_unconfirmed_email(self, email):
        """Remove an unconfirmed email addresses and their tokens."""
        for token, value in self.email_verifications.iteritems():
            if value.get('email') == email:
                del self.email_verifications[token]
                return True

        return False

    def remove_email(self, email):
        """Remove a confirmed email"""
        if email == self.username:
            raise PermissionsError("Can't remove primary email")
        if email in self.emails:
            self.emails.remove(email)
            signals.user_email_removed.send(self, email=email)

    @signals.user_email_removed.connect
    def _send_email_removal_confirmations(self, email):
        mails.send_mail(
            to_addr=self.username,
            mail=mails.REMOVED_EMAIL,
            user=self,
            removed_email=email,
            security_addr='alternate email address ({})'.format(email))
        mails.send_mail(to_addr=email,
                        mail=mails.REMOVED_EMAIL,
                        user=self,
                        removed_email=email,
                        security_addr='primary email address ({})'.format(
                            self.username))

    def get_confirmation_token(self, email, force=False):
        """Return the confirmation token for a given email.

        :param str email: Email to get the token for.
        :param bool force: If an expired token exists for the given email, generate a new
            token and return that token.

        :raises: ExpiredTokenError if trying to access a token that is expired and force=False.
        :raises: KeyError if there no token for the email.
        """
        # TODO: Refactor "force" flag into User.get_or_add_confirmation_token
        for token, info in self.email_verifications.items():
            if info['email'].lower() == email.lower():
                # Old records will not have an expiration key. If it's missing,
                # assume the token is expired
                expiration = info.get('expiration')
                if not expiration or (expiration
                                      and expiration < dt.datetime.utcnow()):
                    if not force:
                        raise ExpiredTokenError(
                            'Token for email "{0}" is expired'.format(email))
                    else:
                        new_token = self.add_unconfirmed_email(email)
                        self.save()
                        return new_token
                return token
        raise KeyError('No confirmation token for email "{0}"'.format(email))

    def get_confirmation_url(self, email, external=True, force=False):
        """Return the confirmation url for a given email.

        :raises: ExpiredTokenError if trying to access a token that is expired.
        :raises: KeyError if there is no token for the email.
        """
        base = settings.DOMAIN if external else '/'
        token = self.get_confirmation_token(email, force=force)
        return "{0}confirm/{1}/{2}/".format(base, self._primary_key, token)

    def _get_unconfirmed_email_for_token(self, token):
        """Return whether or not a confirmation token is valid for this user.
        :rtype: bool
        """
        if token not in self.email_verifications:
            raise exceptions.InvalidTokenError()

        verification = self.email_verifications[token]
        # Not all tokens are guaranteed to have expiration dates
        if ('expiration' in verification
                and verification['expiration'] < dt.datetime.utcnow()):
            raise exceptions.ExpiredTokenError()

        return verification['email']

    def verify_claim_token(self, token, project_id):
        """Return whether or not a claim token is valid for this user for
        a given node which they were added as a unregistered contributor for.
        """
        try:
            record = self.get_unclaimed_record(project_id)
        except ValueError:  # No unclaimed record for given pid
            return False
        return record['token'] == token

    def confirm_email(self, token, merge=False):
        """Confirm the email address associated with the token"""
        email = self._get_unconfirmed_email_for_token(token)

        # If this email is confirmed on another account, abort
        try:
            user_to_merge = User.find_one(Q('emails', 'iexact', email))
        except NoResultsFound:
            user_to_merge = None

        if user_to_merge and merge:
            self.merge_user(user_to_merge)
        elif user_to_merge:
            raise exceptions.MergeConfirmedRequiredError(
                'Merge requires confirmation',
                user=self,
                user_to_merge=user_to_merge,
            )

        # If another user has this email as its username, get it
        try:
            unregistered_user = User.find_one(
                Q('username', 'eq', email) & Q('_id', 'ne', self._id))
        except NoResultsFound:
            unregistered_user = None

        if unregistered_user:
            self.merge_user(unregistered_user)
            self.save()
            unregistered_user.username = None

        if email not in self.emails:
            self.emails.append(email)

        # Complete registration if primary email
        if email.lower() == self.username.lower():
            self.register(self.username)
            self.date_confirmed = dt.datetime.utcnow()
        # Revoke token
        del self.email_verifications[token]

        # TODO: We can't assume that all unclaimed records are now claimed.
        # Clear unclaimed records, so user's name shows up correctly on
        # all projects
        self.unclaimed_records = {}
        self.save()

        self.update_search_nodes()

        return True

    @property
    def unconfirmed_emails(self):
        # Handle when email_verifications field is None
        email_verifications = self.email_verifications or {}
        return [each['email'] for each in email_verifications.values()]

    def update_search_nodes(self):
        """Call `update_search` on all nodes on which the user is a
        contributor. Needed to add self to contributor lists in search upon
        registration or claiming.

        """
        for node in self.node__contributed:
            node.update_search()

    def update_search_nodes_contributors(self):
        """
        Bulk update contributor name on all nodes on which the user is
        a contributor.
        :return:
        """
        from website.search import search
        search.update_contributors(self.visible_contributor_to)

    @property
    def is_confirmed(self):
        return bool(self.date_confirmed)

    @property
    def social_links(self):
        return {
            key: self.SOCIAL_FIELDS[key].format(val)
            for key, val in self.social.items()
            if val and self.SOCIAL_FIELDS.get(key)
        }

    @property
    def biblio_name(self):
        given_names = self.given_name + ' ' + self.middle_names
        surname = self.family_name
        if surname != given_names:
            initials = [
                name[0].upper() + '.' for name in given_names.split(' ')
                if name and re.search(r'\w', name[0], re.I)
            ]
            return u'{0}, {1}'.format(surname, ' '.join(initials))
        return surname

    @property
    def given_name_initial(self):
        """
        The user's preferred initialization of their given name.

        Some users with common names may choose to distinguish themselves from
        their colleagues in this way. For instance, there could be two
        well-known researchers in a single field named "Robert Walker".
        "Walker, R" could then refer to either of them. "Walker, R.H." could
        provide easy disambiguation.

        NOTE: The internal representation for this should never end with a
              period. "R" and "R.H" would be correct in the prior case, but
              "R.H." would not.
        """
        return self.given_name[0]

    @property
    def url(self):
        return '/{}/'.format(self._primary_key)

    @property
    def api_url(self):
        return '/api/v1/profile/{0}/'.format(self._primary_key)

    @property
    def absolute_url(self):
        return urlparse.urljoin(settings.DOMAIN, self.url)

    @property
    def display_absolute_url(self):
        url = self.absolute_url
        if url is not None:
            return re.sub(r'https?:', '', url).strip('/')

    @property
    def deep_url(self):
        return '/profile/{}/'.format(self._primary_key)

    @property
    def gravatar_url(self):
        return filters.gravatar(self,
                                use_ssl=True,
                                size=settings.GRAVATAR_SIZE_ADD_CONTRIBUTOR)

    def get_activity_points(self, db=None):
        db = db or framework.mongo.database
        return analytics.get_total_activity_count(self._primary_key, db=db)

    @property
    def is_disabled(self):
        """Whether or not this account has been disabled.

        Abstracts ``User.date_disabled``.

        :return: bool
        """
        return self.date_disabled is not None

    @is_disabled.setter
    def is_disabled(self, val):
        """Set whether or not this account has been disabled."""
        if val:
            self.date_disabled = dt.datetime.utcnow()
        else:
            self.date_disabled = None

    @property
    def is_merged(self):
        '''Whether or not this account has been merged into another account.
        '''
        return self.merged_by is not None

    @property
    def profile_url(self):
        return '/{}/'.format(self._id)

    @property
    def contributor_to(self):
        return (node for node in self.node__contributed
                if not (node.is_deleted or node.is_dashboard))

    @property
    def visible_contributor_to(self):
        return (node for node in self.contributor_to
                if self._id in node.visible_contributor_ids)

    def get_summary(self, formatter='long'):
        return {
            'user_fullname': self.fullname,
            'user_profile_url': self.profile_url,
            'user_display_name': name_formatters[formatter](self),
            'user_is_claimed': self.is_claimed
        }

    def save(self, *args, **kwargs):
        # TODO: Update mailchimp subscription on username change
        # Avoid circular import
        from framework.analytics import tasks as piwik_tasks
        self.username = self.username.lower().strip(
        ) if self.username else None
        ret = super(User, self).save(*args, **kwargs)
        if self.SEARCH_UPDATE_FIELDS.intersection(ret) and self.is_confirmed:
            self.update_search()
            self.update_search_nodes_contributors()
        if settings.PIWIK_HOST and not self.piwik_token:
            piwik_tasks.update_user(self._id)
        return ret

    def update_search(self):
        from website import search
        try:
            search.search.update_user(self)
        except search.exceptions.SearchUnavailableError as e:
            logger.exception(e)
            log_exception()

    @classmethod
    def find_by_email(cls, email):
        try:
            user = cls.find_one(Q('emails', 'eq', email))
            return [user]
        except:
            return []

    def serialize(self, anonymous=False):
        return {
            'id':
            utils.privacy_info_handle(self._primary_key, anonymous),
            'fullname':
            utils.privacy_info_handle(self.fullname, anonymous, name=True),
            'registered':
            self.is_registered,
            'url':
            utils.privacy_info_handle(self.url, anonymous),
            'api_url':
            utils.privacy_info_handle(self.api_url, anonymous),
        }

    ###### OSF-Specific methods ######

    def watch(self, watch_config):
        """Watch a node by adding its WatchConfig to this user's ``watched``
        list. Raises ``ValueError`` if the node is already watched.

        :param watch_config: The WatchConfig to add.
        :param save: Whether to save the user.

        """
        watched_nodes = [each.node for each in self.watched]
        if watch_config.node in watched_nodes:
            raise ValueError('Node is already being watched.')
        watch_config.save()
        self.watched.append(watch_config)
        return None

    def unwatch(self, watch_config):
        """Unwatch a node by removing its WatchConfig from this user's ``watched``
        list. Raises ``ValueError`` if the node is not already being watched.

        :param watch_config: The WatchConfig to remove.
        :param save: Whether to save the user.

        """
        for each in self.watched:
            if watch_config.node._id == each.node._id:
                each.__class__.remove_one(each)
                return None
        raise ValueError('Node not being watched.')

    def is_watching(self, node):
        '''Return whether a not a user is watching a Node.'''
        watched_node_ids = set([config.node._id for config in self.watched])
        return node._id in watched_node_ids

    def get_recent_log_ids(self, since=None):
        '''Return a generator of recent logs' ids.

        :param since: A datetime specifying the oldest time to retrieve logs
        from. If ``None``, defaults to 60 days before today. Must be a tz-aware
        datetime because PyMongo's generation times are tz-aware.

        :rtype: generator of log ids (strings)
        '''
        log_ids = []
        # Default since to 60 days before today if since is None
        # timezone aware utcnow
        utcnow = dt.datetime.utcnow().replace(tzinfo=pytz.utc)
        since_date = since or (utcnow - dt.timedelta(days=60))
        for config in self.watched:
            # Extract the timestamps for each log from the log_id (fast!)
            # The first 4 bytes of Mongo's ObjectId encodes time
            # This prevents having to load each Log Object and access their
            # date fields
            node_log_ids = [
                log_id for log_id in config.node.logs._to_primary_keys()
                if bson.ObjectId(log_id).generation_time > since_date
                and log_id not in log_ids
            ]
            # Log ids in reverse chronological order
            log_ids = _merge_into_reversed(log_ids, node_log_ids)
        return (l_id for l_id in log_ids)

    def get_daily_digest_log_ids(self):
        '''Return a generator of log ids generated in the past day
        (starting at UTC 00:00).
        '''
        utcnow = dt.datetime.utcnow()
        midnight = dt.datetime(utcnow.year,
                               utcnow.month,
                               utcnow.day,
                               0,
                               0,
                               0,
                               tzinfo=pytz.utc)
        return self.get_recent_log_ids(since=midnight)

    @property
    def can_be_merged(self):
        """The ability of the `merge_user` method to fully merge the user"""
        return all((addon.can_be_merged for addon in self.get_addons()))

    def merge_user(self, user):
        """Merge a registered user into this account. This user will be
        a contributor on any project. if the registered user and this account
        are both contributors of the same project. Then it will remove the
        registered user and set this account to the highest permission of the two
        and set this account to be visible if either of the two are visible on
        the project.

        :param user: A User object to be merged.
        """
        # Fail if the other user has conflicts.
        if not user.can_be_merged:
            raise exceptions.MergeConflictError("Users cannot be merged")
        # Move over the other user's attributes
        # TODO: confirm
        for system_tag in user.system_tags:
            if system_tag not in self.system_tags:
                self.system_tags.append(system_tag)

        self.is_claimed = self.is_claimed or user.is_claimed
        self.is_invited = self.is_invited or user.is_invited

        # copy over profile only if this user has no profile info
        if user.jobs and not self.jobs:
            self.jobs = user.jobs

        if user.schools and not self.schools:
            self.schools = user.schools

        if user.social and not self.social:
            self.social = user.social

        unclaimed = user.unclaimed_records.copy()
        unclaimed.update(self.unclaimed_records)
        self.unclaimed_records = unclaimed
        # - unclaimed records should be connected to only one user
        user.unclaimed_records = {}

        security_messages = user.security_messages.copy()
        security_messages.update(self.security_messages)
        self.security_messages = security_messages

        for key, value in user.mailing_lists.iteritems():
            # subscribe to each list if either user was subscribed
            subscription = value or self.mailing_lists.get(key)
            signals.user_merged.send(self,
                                     list_name=key,
                                     subscription=subscription)

            # clear subscriptions for merged user
            signals.user_merged.send(user, list_name=key, subscription=False)

        for node_id, timestamp in user.comments_viewed_timestamp.iteritems():
            if not self.comments_viewed_timestamp.get(node_id):
                self.comments_viewed_timestamp[node_id] = timestamp
            elif timestamp > self.comments_viewed_timestamp[node_id]:
                self.comments_viewed_timestamp[node_id] = timestamp

        self.emails.extend(user.emails)
        user.emails = []

        for k, v in user.email_verifications.iteritems():
            email_to_confirm = v['email']
            if k not in self.email_verifications and email_to_confirm != user.username:
                self.email_verifications[k] = v
        user.email_verifications = {}

        # FOREIGN FIELDS
        for watched in user.watched:
            if watched not in self.watched:
                self.watched.append(watched)
        user.watched = []

        for account in user.external_accounts:
            if account not in self.external_accounts:
                self.external_accounts.append(account)
        user.external_accounts = []

        # - addons
        # Note: This must occur before the merged user is removed as a
        #       contributor on the nodes, as an event hook is otherwise fired
        #       which removes the credentials.
        for addon in user.get_addons():
            user_settings = self.get_or_add_addon(addon.config.short_name)
            user_settings.merge(addon)
            user_settings.save()

        # - projects where the user was a contributor
        for node in user.node__contributed:
            # Skip dashboard node
            if node.is_dashboard:
                continue
            # if both accounts are contributor of the same project
            if node.is_contributor(self) and node.is_contributor(user):
                if node.permissions[user._id] > node.permissions[self._id]:
                    permissions = node.permissions[user._id]
                else:
                    permissions = node.permissions[self._id]
                node.set_permissions(user=self, permissions=permissions)

                visible1 = self._id in node.visible_contributor_ids
                visible2 = user._id in node.visible_contributor_ids
                if visible1 != visible2:
                    node.set_visible(user=self,
                                     visible=True,
                                     log=True,
                                     auth=Auth(user=self))

            else:
                node.add_contributor(
                    contributor=self,
                    permissions=node.get_permissions(user),
                    visible=node.get_visible(user),
                    log=False,
                )

            try:
                node.remove_contributor(
                    contributor=user,
                    auth=Auth(user=self),
                    log=False,
                )
            except ValueError:
                logger.error('Contributor {0} not in list on node {1}'.format(
                    user._id, node._id))
            node.save()

        # - projects where the user was the creator
        for node in user.node__created:
            node.creator = self
            node.save()

        # finalize the merge

        remove_sessions_for_user(user)

        # - username is set to None so the resultant user can set it primary
        #   in the future.
        user.username = None
        user.password = None
        user.verification_key = None
        user.merged_by = self

        user.save()

    def get_projects_in_common(self, other_user, primary_keys=True):
        """Returns either a collection of "shared projects" (projects that both users are contributors for)
        or just their primary keys
        """
        if primary_keys:
            projects_contributed_to = set(
                self.node__contributed._to_primary_keys())
            return projects_contributed_to.intersection(
                other_user.node__contributed._to_primary_keys())
        else:
            projects_contributed_to = set(self.node__contributed)
            return projects_contributed_to.intersection(
                other_user.node__contributed)

    def n_projects_in_common(self, other_user):
        """Returns number of "shared projects" (projects that both users are contributors for)"""
        return len(self.get_projects_in_common(other_user, primary_keys=True))
Ejemplo n.º 19
0
class MailRecord(StoredObject):
    _id = fields.StringField(primary=True, default=lambda: str(bson.ObjectId()))
    data = fields.DictionaryField()
    records = fields.AbstractForeignField(list=True)
Ejemplo n.º 20
0
class EmailApprovableSanction(TokenApprovableSanction):

    # Tell modularodm not to attach backends
    _meta = {
        'abstract': True,
    }

    AUTHORIZER_NOTIFY_EMAIL_TEMPLATE = None
    NON_AUTHORIZER_NOTIFY_EMAIL_TEMPLATE = None

    VIEW_URL_TEMPLATE = ''
    APPROVE_URL_TEMPLATE = ''
    REJECT_URL_TEMPLATE = ''

    # A flag to conditionally run a callback on complete
    notify_initiator_on_complete = fields.BooleanField(default=False)
    # Store a persistant copy of urls for use when needed outside of a request context.
    # This field gets automagically updated whenever models approval_state is modified
    # and the model is saved
    # {
    #   'abcde': {
    #     'approve': [APPROVAL_URL],
    #     'reject': [REJECT_URL],
    #   }
    # }
    stashed_urls = fields.DictionaryField(default=dict)

    @staticmethod
    def _format_or_empty(template, context):
        if context:
            return template.format(**context)
        return ''

    def _view_url(self, user_id, node):
        return self._format_or_empty(self.VIEW_URL_TEMPLATE,
                                     self._view_url_context(user_id, node))

    def _view_url_context(self, user_id, node):
        return None

    def _approval_url(self, user_id):
        return self._format_or_empty(self.APPROVE_URL_TEMPLATE,
                                     self._approval_url_context(user_id))

    def _approval_url_context(self, user_id):
        return None

    def _rejection_url(self, user_id):
        return self._format_or_empty(self.REJECT_URL_TEMPLATE,
                                     self._rejection_url_context(user_id))

    def _rejection_url_context(self, user_id):
        return None

    def _send_approval_request_email(self, user, template, context):
        mails.send_mail(user.username, template, user=user, **context)

    def _email_template_context(self, user, node, is_authorizer=False):
        return {}

    def _notify_authorizer(self, authorizer, node):
        context = self._email_template_context(authorizer,
                                               node,
                                               is_authorizer=True)
        if self.AUTHORIZER_NOTIFY_EMAIL_TEMPLATE:
            self._send_approval_request_email(
                authorizer, self.AUTHORIZER_NOTIFY_EMAIL_TEMPLATE, context)
        else:
            raise NotImplementedError

    def _notify_non_authorizer(self, user, node):
        context = self._email_template_context(user, node)
        if self.NON_AUTHORIZER_NOTIFY_EMAIL_TEMPLATE:
            self._send_approval_request_email(
                user, self.NON_AUTHORIZER_NOTIFY_EMAIL_TEMPLATE, context)
        else:
            raise NotImplementedError

    def add_authorizer(self, user, node, **kwargs):
        super(EmailApprovableSanction,
              self).add_authorizer(user, node, **kwargs)
        self.stashed_urls[user._id] = {
            'view': self._view_url(user._id, node),
            'approve': self._approval_url(user._id),
            'reject': self._rejection_url(user._id)
        }
        self.save()

    def _notify_initiator(self):
        raise NotImplementedError

    def _on_complete(self, *args):
        if self.notify_initiator_on_complete:
            self._notify_initiator()
Ejemplo n.º 21
0
class Article(StoredObject):

    _id = fields.StringField(default=make_oid)

    record = fields.DictionaryField()
    _lrecord = fields.DictionaryField()
    date = fields.DateTimeField(index=True)
    pmid = fields.StringField(index=True)
    doi = fields.StringField(index=True)
    place = fields.StringField()

    authors = fields.ForeignField('Author', list=True, backref='wrote')

    publisher = fields.StringField()
    publisher_url = fields.StringField()

    pubmed_html = fields.ForeignField('HTMLDocument', backref='scraped')
    publisher_html = fields.ForeignField('HTMLDocument', backref='scraped')
    publisher_pdf = fields.ForeignField('PDFDocument', backref='scraped')
    verified = fields.StringField(list=True)

    tags = fields.DictionaryField('Tag', list=True)

    date_last_scraped = fields.DateTimeField(index=True)
    date_last_tagged = fields.DateTimeField(index=True)

    def __repr__(self):
        return '<Article: PMID={0}, DOI={1}>'.format(self.pmid, self.doi)

    def _get_doi_openurl(self, save=True):
        """Look up DOI using CrossRef's OpenURL service. Pass enough
        information for unambiguous resolution.

        :param bool save: Save record if DOI is found
        """
        # Check required fields
        try:
            title = self.record['TI']
            aulast = self.authors[0].last
            year = self.date.year
        except:
            raise Exception(
                'Article must include title, first author, and year.')

        # Note: OpenURL requests tend to get dropped; must set manual timeout,
        # else requests may never terminate
        data = requests.get(OPENURL_URL,
                            params={
                                'atitle': title,
                                'aulast': aulast,
                                'year': year,
                                'noredirect': 'true',
                                'pid': EMAIL_ADDR,
                            },
                            timeout=10)

        data_parsed = PyQuery(data.content.replace(' xmlns:',
                                                   ' xmlnamespace:'))
        doi = data_parsed('doi').text()
        if doi:
            self.doi = doi
            if save:
                self.save()

    def update_date(self):
        self.date = parse_publication_date(self.record['DP'])

    @classmethod
    def from_record(cls, record, doi=None):
        """Create instance of Article from a PubMed record.

        :param dict record: PubMed record from pubtools
        :param str doi: Optional DOI
        :return: Created article
        """
        article = Article()

        # Store original record
        article.record = record

        # Add authors
        # FAU -> Full Author
        for author_name in record.get('FAU', []):
            human = HumanName(author_name)
            try:
                author = Author.find_one(
                    Q('last', 'eq', human.last) & Q('first', 'eq', human.first)
                    & Q('middle', 'eq', human.middle)
                    & Q('suffix', 'eq', human.suffix))
            except:
                author = Author(
                    last=human.last,
                    first=human.first,
                    middle=human.middle,
                    suffix=human.suffix,
                )
                author.save()
            article.authors.append(author)

        # Add date
        article.update_date()

        # Add PMID
        article.pmid = record['PMID']

        # Get DOI from CrossRef
        if doi is None:
            try:
                article._get_doi_openurl(save=False)
            except:
                pass

        article.save()

        return article

    @classmethod
    def from_pmid(cls, pmid):
        records = pubtools.download_pmids([pmid])
        return cls.from_record(records[0])

    @classmethod
    def from_doi(cls, doi):
        pass

    def _get_filepath(self, document_type):
        """Build filepath.

        :param str document_type: Document type (html, pdf, pmc)
        :return str: Path to file
        """
        return os.path.join(
            SAVE_DIRS[document_type],
            '{}.{}'.format(self.pmid, EXTENSIONS[document_type]))

    def _add_document(self, document_type, save=True):
        """Create document field corresponding to stored file.

        :param str document_type: Document type (html, pdf, pmc)
        :param bool save: Save record after update
        """
        filepath = self._get_filepath(document_type)
        if not os.path.exists(filepath):
            raise ValueError('File does not exist')
        document_class = DOCUMENT_MAP[document_type]['class']
        document_search = document_class.find(Q('filepath', 'eq', filepath))
        if document_search.count():
            document = document_search[0]
        else:
            document = DOCUMENT_MAP[document_type]['class'](
                filepath=filepath,
                document_type=document_type,
            )
            document.save()
        setattr(self, DOCUMENT_MAP[document_type]['field'], document)
        if save:
            self.save()

    def _remove_document(self, document_type):
        """Remove document field and any existing stored files.

        :param str document_type: Document type (html, pdf, pmc)
        """
        document_attr = DOCUMENT_MAP[document_type]['field']
        document = getattr(self, document_attr)
        if document:
            setattr(self, document_attr, None)
            document.remove_one(document)
            if document.filepath and os.path.exists(document.filepath):
                os.remove(document.filepath)
            if document.extract_filepath and os.path.exists(
                    document.extract_filepath):
                os.remove(document.extract_filepath)

    def get_institution(self, save=True):
        affiliation = self.record.get('AD')
        if affiliation is None:
            return
        self.place = get_institution(affiliation)
        if save:
            self.save()

    def scrape(self, scraper=None, document_types=None, overwrite=False):
        """Fetch and save documents, then add to document fields.

        :param Scrape scraper: Article scraper; created if `None`
        :param list document_types: Document types to scrape; may include
            'pmc', 'html', and 'pdf'
        :param bool overwrite: Overwrite existing files
        """
        # Get default arguments
        scraper = scraper or SCRAPE_CLASS(**SCRAPE_KWARGS)
        document_types = document_types or DOCUMENT_TYPES

        # Update scraped date
        self.date_last_scraped = datetime.datetime.utcnow()
        self.save()

        # Skip files if document field and associated file already exist
        if not overwrite:
            for document_type in document_types:
                document = getattr(self, DOCUMENT_MAP[document_type]['field'])
                filepath = self._get_filepath(document_type)
                if document:
                    if filepath and os.path.exists(filepath):
                        # Document is complete; remove from fetch list
                        document_types.remove(document_type)
                    else:
                        # Document field exists, but file is missing; remove
                        # document and keep in fetch list
                        self._remove_document(document_type)
                else:
                    if os.path.exists(filepath):
                        # Document file exists, but field is empty; create
                        # document field pointing to file
                        self._add_document(document_type, save=False)

        # Delete existing documents fields and files
        else:
            for document_type in DOCUMENT_TYPES:
                self._remove_document(document_type)

        # Scrape files
        try:
            info = scraper.scrape(
                pmid=self.pmid,
                doi=self.doi,
                fetch_types=document_types,
            )
        except ScrapeError:
            logger.info('Could not scrape article')
            return

        # Save scraped files
        saved = info.save(self.pmid, save_dirs=SAVE_DIRS)

        # Create document objects and add to self
        for document_type in saved:
            self._add_document(document_type, save=False)

        # Add publisher information
        self.publisher = info.publisher
        self.publisher_url = info.pub_link

        self.save()

    def verify(self, threshold=VERIFY_THRESHOLD, overwrite=False, save=True):
        """Verify referenced documents.

        :param float threshold: Verification threshold
        :param bool overwrite: Overwrite existing verification info
        :param bool save: Save record after update
        """
        self.verified = [
            name for name, field in DOCUMENT_TYPES_TO_FIELDS.iteritems()
            if getattr(self, field) is not None
            and getattr(self, field).verify(threshold, overwrite=overwrite)
        ]
        if save:
            self.save()

    def tag(self, tag_groups=None, overwrite=False, save=True):
        """Add tags to article.

        :param list tag_groups: List of TagGroup objects
        :param bool overwrite: Overwrite existing tags
        :param bool save: Save record after update
        :return list: New or modified extracted tags
        """
        tag_groups = tag_groups or pattern.tag_groups.values()

        if overwrite:
            self.tags = []
            existing_tags = []
        else:
            existing_tags = [tagger.Tag(tag) for tag in self.tags]

        new_tags = []

        self.verify(save=False)

        for document_type in self.verified:

            document_field = DOCUMENT_TYPES_TO_FIELDS[document_type]
            document = getattr(self, document_field)

            # Quit if document not set
            if document is None:
                continue

            doc = document.read()

            # Quit if document empty or fails verification
            if not doc:
                continue

            # Clean document text
            doc = clean(doc)

            for tag_group in tag_groups:

                # Extract tags
                tags = tagger.tag(tag_group, doc)

                for tag in tags:

                    # Build context documents
                    context_data = {document_type: tag['context']}
                    group_data = {document_type: tag['group']}
                    span_data = {document_type: tag['span']}

                    # Update existing tag with context
                    if tag in existing_tags:
                        idx = existing_tags.index(tag)
                        if document_type not in existing_tags[idx]['context']:
                            existing_tags[idx]['context'].update(context_data)
                            existing_tags[idx]['group'].update(group_data)
                            existing_tags[idx]['span'].update(span_data)
                            new_tags.append(existing_tags[idx])
                    # Create new tag in database
                    else:
                        tag['context'] = context_data
                        tag['group'] = group_data
                        tag['span'] = span_data
                        existing_tags.append(tag)
                        new_tags.append(tag)

        # Cast tags to dictionaries for ODM compatibility
        self.tags = [dict(tag) for tag in existing_tags]

        # Update tagged date
        self.date_last_tagged = datetime.datetime.utcnow()

        if save:
            self.save()

        return new_tags

    def clear_tags(self, labels, save=True):
        """Delete all tags matching any of the provided labels.

        :param list labels: Labels of tags to delete
        :param bool save: Save record after update
        """
        self.tags = [tag for tag in self.tags if tag['label'] not in labels]
        if save:
            self.save()
Ejemplo n.º 22
0
class AddonGitHubNodeSettings(AddonNodeSettingsBase):

    user = fields.StringField()
    repo = fields.StringField()
    hook_id = fields.StringField()
    hook_secret = fields.StringField()

    user_settings = fields.ForeignField('addongithubusersettings',
                                        backref='authorized')

    registration_data = fields.DictionaryField()

    @property
    def has_auth(self):
        return bool(self.user_settings and self.user_settings.has_auth)

    @property
    def complete(self):
        return self.has_auth and self.repo is not None and self.user is not None

    def find_or_create_file_guid(self, path):
        return GithubGuidFile.get_or_create(node=self.owner, path=path)

    def authorize(self, user_settings, save=False):
        self.user_settings = user_settings
        self.owner.add_log(
            action='github_node_authorized',
            params={
                'project': self.owner.parent_id,
                'node': self.owner._id,
            },
            auth=Auth(user_settings.owner),
        )
        if save:
            self.save()

    def deauthorize(self, auth=None, log=True, save=False):
        self.delete_hook(save=False)
        self.user, self.repo, self.user_settings = None, None, None
        if log:
            self.owner.add_log(
                action='github_node_deauthorized',
                params={
                    'project': self.owner.parent_id,
                    'node': self.owner._id,
                },
                auth=auth,
            )
        if save:
            self.save()

    def delete(self, save=False):
        super(AddonGitHubNodeSettings, self).delete(save=False)
        self.deauthorize(save=False, log=False)
        if save:
            self.save()

    @property
    def repo_url(self):
        if self.user and self.repo:
            return 'https://github.com/{0}/{1}/'.format(self.user, self.repo)

    @property
    def short_url(self):
        if self.user and self.repo:
            return '/'.join([self.user, self.repo])

    @property
    def is_private(self):
        connection = GitHub.from_settings(self.user_settings)
        return connection.repo(user=self.user, repo=self.repo).private

    # TODO: Delete me and replace with serialize_settings / Knockout
    def to_json(self, user):
        ret = super(AddonGitHubNodeSettings, self).to_json(user)
        user_settings = user.get_addon('github')
        ret.update({
            'user_has_auth': user_settings and user_settings.has_auth,
            'is_registration': self.owner.is_registration,
        })
        if self.user_settings and self.user_settings.has_auth:
            valid_credentials = False
            owner = self.user_settings.owner
            if user_settings and user_settings.owner == owner:
                connection = GitHub.from_settings(user_settings)
                # TODO: Fetch repo list client-side
                # Since /user/repos excludes organization repos to which the
                # current user has push access, we have to make extra requests to
                # find them
                valid_credentials = True
                try:
                    repos = itertools.chain.from_iterable(
                        (connection.repos(), connection.my_org_repos()))
                    repo_names = [
                        '{0} / {1}'.format(repo.owner.login, repo.name)
                        for repo in repos
                    ]
                except GitHubError as error:
                    if error.code == http.UNAUTHORIZED:
                        repo_names = []
                        valid_credentials = False
                ret.update({'repo_names': repo_names})
            ret.update({
                'node_has_auth':
                True,
                'github_user':
                self.user or '',
                'github_repo':
                self.repo or '',
                'github_repo_full_name':
                '{0} / {1}'.format(self.user, self.repo),
                'auth_osf_name':
                owner.fullname,
                'auth_osf_url':
                owner.url,
                'auth_osf_id':
                owner._id,
                'github_user_name':
                self.user_settings.github_user_name,
                'github_user_url':
                'https://github.com/{0}'.format(
                    self.user_settings.github_user_name),
                'is_owner':
                owner == user,
                'valid_credentials':
                valid_credentials,
                'addons_url':
                web_url_for('user_addons'),
            })
        return ret

    def serialize_waterbutler_credentials(self):
        if not self.complete or not self.repo:
            raise exceptions.AddonError('Addon is not authorized')
        return {'token': self.user_settings.oauth_access_token}

    def serialize_waterbutler_settings(self):
        if not self.complete:
            raise exceptions.AddonError('Repo is not configured')
        return {
            'owner': self.user,
            'repo': self.repo,
        }

    def create_waterbutler_log(self, auth, action, metadata):
        path = metadata['path']

        url = self.owner.web_url_for('addon_view_or_download_file',
                                     path=path,
                                     provider='github')

        if not metadata.get('extra'):
            sha = None
            urls = {}
        else:
            sha = metadata['extra']['commit']['sha']
            urls = {
                'view': '{0}?ref={1}'.format(url, sha),
                'download': '{0}?action=download&ref={1}'.format(url, sha)
            }

        self.owner.add_log(
            'github_{0}'.format(action),
            auth=auth,
            params={
                'project': self.owner.parent_id,
                'node': self.owner._id,
                'path': path,
                'urls': urls,
                'github': {
                    'user': self.user,
                    'repo': self.repo,
                    'sha': sha,
                },
            },
        )

    #############
    # Callbacks #
    #############

    def before_page_load(self, node, user):
        """

        :param Node node:
        :param User user:
        :return str: Alert message
        """
        messages = []

        # Quit if not contributor
        if not node.is_contributor(user):
            return messages

        # Quit if not configured
        if self.user is None or self.repo is None:
            return messages

        # Quit if no user authorization
        if self.user_settings is None:
            return messages

        connect = GitHub.from_settings(self.user_settings)

        try:
            repo = connect.repo(self.user, self.repo)
        except (ApiError, GitHubError):
            return

        node_permissions = 'public' if node.is_public else 'private'
        repo_permissions = 'private' if repo.private else 'public'
        if repo_permissions != node_permissions:
            message = (
                'Warnings: This OSF {category} is {node_perm}, but the GitHub '
                'repo {user} / {repo} is {repo_perm}.'.format(
                    category=node.project_or_component,
                    node_perm=node_permissions,
                    repo_perm=repo_permissions,
                    user=self.user,
                    repo=self.repo,
                ))
            if repo_permissions == 'private':
                message += (
                    ' Users can view the contents of this private GitHub '
                    'repository through this public project.')
            else:
                message += (
                    ' The files in this GitHub repo can be viewed on GitHub '
                    '<a href="https://github.com/{user}/{repo}/">here</a>.'
                ).format(
                    user=self.user,
                    repo=self.repo,
                )
            messages.append(message)
            return messages

    # TODO: Rename to before_remove_contributor_message
    def before_remove_contributor(self, node, removed):
        """

        :param Node node:
        :param User removed:
        :return str: Alert message

        """
        if self.user_settings and self.user_settings.owner == removed:
            return (
                'The GitHub add-on for this {category} is authenticated '
                'by {user}. Removing this user will also remove write access '
                'to GitHub unless another contributor re-authenticates. You '
                'can download the contents of this repository before removing '
                'this contributor <a href="{url}">here</a>.').format(
                    category=node.project_or_component,
                    user=removed.fullname,
                    url=node.api_url + 'github/tarball/')

    def after_remove_contributor(self, node, removed, auth=None):
        """

        :param Node node:
        :param User removed:
        :return str: Alert message

        """
        if self.user_settings and self.user_settings.owner == removed:

            # Delete OAuth tokens
            self.user_settings = None
            self.save()
            message = (
                u'Because the GitHub add-on for {category} "{title}" was authenticated '
                u'by {user}, authentication information has been deleted.'
            ).format(category=node.category_display,
                     title=node.title,
                     user=removed.fullname)

            if not auth or auth.user != removed:
                url = node.web_url_for('node_setting')
                message += (
                    u' You can re-authenticate on the <a href="{url}">Settings</a> page.'
                ).format(url=url)
            #
            return message

    def after_set_privacy(self, node, permissions):
        """

        :param Node node:
        :param str permissions:
        :return str: Alert message

        """
        if not github_settings.SET_PRIVACY:
            return

        connect = GitHub.from_settings(self.user_settings)

        data = connect.set_privacy(self.user, self.repo,
                                   permissions == 'private')
        if data is None or 'errors' in data:
            repo = connect.repo(self.user, self.repo)
            if repo is not None:
                current_privacy = 'private' if repo.private else 'public'
            else:
                current_privacy = 'unknown'
            return ('Could not set privacy for repo {user}::{repo}. '
                    'Current privacy status is {perm}.'.format(
                        user=self.user,
                        repo=self.repo,
                        perm=current_privacy,
                    ))

        return ('GitHub repo {user}::{repo} made {perm}.'.format(
            user=self.user,
            repo=self.repo,
            perm=permissions,
        ))

    def before_fork(self, node, user):
        """

        :param Node node:
        :param User user:
        :return str: Alert message

        """
        if self.user_settings and self.user_settings.owner == user:
            return (
                'Because you have authenticated the GitHub add-on for this '
                '{cat}, forking it will also transfer your authorization to '
                'the forked {cat}.').format(cat=node.project_or_component, )
        return (
            'Because this GitHub add-on has been authenticated by a different '
            'user, forking it will not transfer authentication to the forked '
            '{cat}.').format(cat=node.project_or_component, )

    def after_fork(self, node, fork, user, save=True):
        """

        :param Node node: Original node
        :param Node fork: Forked node
        :param User user: User creating fork
        :param bool save: Save settings after callback
        :return tuple: Tuple of cloned settings and alert message

        """
        clone, _ = super(AddonGitHubNodeSettings, self).after_fork(node,
                                                                   fork,
                                                                   user,
                                                                   save=False)

        # Copy authentication if authenticated by forking user
        if self.user_settings and self.user_settings.owner == user:
            clone.user_settings = self.user_settings
            message = ('GitHub authorization copied to forked {cat}.').format(
                cat=fork.project_or_component, )
        else:
            message = (
                'GitHub authorization not copied to forked {cat}. You may '
                'authorize this fork on the <a href={url}>Settings</a> '
                'page.').format(cat=fork.project_or_component,
                                url=fork.url + 'settings/')

        if save:
            clone.save()

        return clone, message

    def before_register(self, node, user):
        """

        :param Node node:
        :param User user:
        :return str: Alert message
        """
        category = node.project_or_component
        if self.user_settings and self.user_settings.has_auth:
            return (
                u'The contents of GitHub add-ons cannot be registered at this time; '
                u'the GitHub repository linked to this {category} will not be included '
                u'as part of this registration.').format(**locals())

    def before_make_public(self, node):
        try:
            is_private = self.is_private
        except NotFoundError:
            return None
        if is_private:
            return (
                'This {cat} is connected to a private GitHub repository. Users '
                '(other than contributors) will not be able to see the '
                'contents of this repo unless it is made public on GitHub.'
            ).format(cat=node.project_or_component, )

    def after_delete(self, node, user):
        self.deauthorize(Auth(user=user), log=True, save=True)

    #########
    # Hooks #
    #########

    # TODO: Should Events be added here?
    # TODO: Move hook logic to service
    def add_hook(self, save=True):

        if self.user_settings:
            connect = GitHub.from_settings(self.user_settings)
            secret = utils.make_hook_secret()
            hook = connect.add_hook(
                self.user, self.repo, 'web', {
                    'url':
                    urlparse.urljoin(
                        hook_domain,
                        os.path.join(self.owner.api_url, 'github', 'hook/')),
                    'content_type':
                    github_settings.HOOK_CONTENT_TYPE,
                    'secret':
                    secret,
                })

            if hook:
                self.hook_id = hook.id
                self.hook_secret = secret
                if save:
                    self.save()

    def delete_hook(self, save=True):
        """
        :return bool: Hook was deleted
        """
        if self.user_settings and self.hook_id:
            connection = GitHub.from_settings(self.user_settings)
            try:
                response = connection.delete_hook(self.user, self.repo,
                                                  self.hook_id)
            except (GitHubError, NotFoundError):
                return False
            if response:
                self.hook_id = None
                if save:
                    self.save()
                return True
        return False
Ejemplo n.º 23
0
class User(GuidStoredObject, AddonModelMixin):

    redirect_mode = 'proxy'

    # Node fields that trigger an update to the search engine on save
    SEARCH_UPDATE_FIELDS = {
        'fullname',
        'given_name',
        'middle_names',
        'family_name',
        'suffix',
        'merged_by',
        'date_disabled',
        'jobs',
        'schools',
        'social',
    }

    SOCIAL_FIELDS = {
        'orcid': 'http://orcid.com/{}',
        'github': 'http://github.com/{}',
        'scholar': 'http://scholar.google.com/citation?user={}',
        'twitter': 'http://twitter.com/{}',
        'personal': '{}',
        'linkedIn': 'https://www.linkedin.com/profile/view?id={}',
        'impactStory': 'https://impactstory.org/{}',
        'researcherId': 'http://researcherid.com/rid/{}',
    }

    _id = fields.StringField(primary=True)

    # NOTE: In the OSF, username is an email
    # May be None for unregistered contributors
    username = fields.StringField(required=False, unique=True, index=True)
    password = fields.StringField()
    fullname = fields.StringField(required=True, validate=string_required)
    is_registered = fields.BooleanField()

    # TODO: Migrate unclaimed users to the new style, then remove this attribute
    # Note: No new users should be created where is_claimed is False.
    #   As of 9 Sep 2014, there were 331 legacy unclaimed users in the system.
    #   When those users are migrated to the new style, this attribute should be
    #   removed.
    is_claimed = fields.BooleanField()

    # Tags for internal use
    system_tags = fields.StringField(list=True)

    # Per-project unclaimed user data:
    # Format: {
    #   <project_id>: {
    #       'name': <name that referrer provided>,
    #       'referrer_id': <user ID of referrer>,
    #       'token': <token used for verification urls>,
    #       'email': <email the referrer provided or None>,
    #       'last_sent': <timestamp of last email sent to referrer or None>
    #   }
    #   ...
    # }
    # TODO: add validation
    unclaimed_records = fields.DictionaryField(required=False)
    # The user who merged this account
    merged_by = fields.ForeignField('user', default=None, backref="merged")
    #: Verification key used for resetting password
    verification_key = fields.StringField()
    emails = fields.StringField(list=True)
    # Email verification tokens
    # Format: {
    #   <token> : {'email': <email address>,
    #              'expiration': <datetime>}
    # }
    email_verifications = fields.DictionaryField()

    # Format: {
    #   'list1': True,
    #   'list2: False,
    #    ...
    # }
    mailing_lists = fields.DictionaryField()

    aka = fields.StringField(list=True)
    date_registered = fields.DateTimeField(auto_now_add=dt.datetime.utcnow)
    # Watched nodes are stored via a list of WatchConfigs
    watched = fields.ForeignField("WatchConfig", list=True, backref="watched")

    # Recently added contributors stored via a list of users
    recently_added = fields.ForeignField("user",
                                         list=True,
                                         backref="recently_added")

    # CSL names
    given_name = fields.StringField()
    middle_names = fields.StringField()
    family_name = fields.StringField()
    suffix = fields.StringField()

    # Employment history
    # Format: {
    #     'title': <position or job title>,
    #     'institution': <institution or organization>,
    #     'department': <department>,
    #     'location': <location>,
    #     'startMonth': <start month>,
    #     'startYear': <start year>,
    #     'endMonth': <end month>,
    #     'endYear': <end year>,
    #     'ongoing: <boolean>
    # }
    jobs = fields.DictionaryField(list=True, validate=validate_history_item)

    # Educational history
    # Format: {
    #     'degree': <position or job title>,
    #     'institution': <institution or organization>,
    #     'department': <department>,
    #     'location': <location>,
    #     'startMonth': <start month>,
    #     'startYear': <start year>,
    #     'endMonth': <end month>,
    #     'endYear': <end year>,
    #     'ongoing: <boolean>
    # }
    schools = fields.DictionaryField(list=True, validate=validate_history_item)

    # Social links
    # Format: {
    #     'personal': <personal site>,
    #     'twitter': <twitter id>,
    # }
    social = fields.DictionaryField(validate=validate_social)

    api_keys = fields.ForeignField('apikey', list=True, backref='keyed')

    piwik_token = fields.StringField()

    date_last_login = fields.DateTimeField()

    date_confirmed = fields.DateTimeField()

    # When the user was disabled.
    date_disabled = fields.DateTimeField()

    # Format: {
    #   'node_id': 'timestamp'
    # }
    comments_viewed_timestamp = fields.DictionaryField()

    _meta = {'optimistic': True}

    def __repr__(self):
        return '<User({0!r}) with id {1!r}>'.format(self.username, self._id)

    @classmethod
    def create_unregistered(cls, fullname, email=None):
        """Creates a new unregistered user.

        :raises: DuplicateEmailError if a user with the given email address
            is already in the database.
        """
        user = cls(
            username=email,
            fullname=fullname,
        )
        user.update_guessed_names()
        if email:
            user.emails.append(email)
        user.is_registered = False
        return user

    @classmethod
    def create(cls, username, password, fullname):
        user = cls(
            username=username,
            fullname=fullname,
        )
        user.update_guessed_names()
        user.set_password(password)
        return user

    @classmethod
    def create_unconfirmed(cls, username, password, fullname, do_confirm=True):
        """Create a new user who has begun registration but needs to verify
        their primary email address (username).
        """
        user = cls.create(username, password, fullname)
        user.add_email_verification(username)
        user.is_registered = False
        return user

    @classmethod
    def create_confirmed(cls, username, password, fullname):
        user = cls.create(username, password, fullname)
        user.is_registered = True
        user.is_claimed = True
        user.date_confirmed = user.date_registered
        return user

    def update_guessed_names(self):
        """Updates the CSL name fields inferred from the the full name.
        """
        parsed = utils.impute_names(self.fullname)
        self.given_name = parsed['given']
        self.middle_names = parsed['middle']
        self.family_name = parsed['family']
        self.suffix = parsed['suffix']

    def register(self, username, password=None):
        """Registers the user.
        """
        self.username = username
        if password:
            self.set_password(password)
        if username not in self.emails:
            self.emails.append(username)
        self.is_registered = True
        self.is_claimed = True
        self.date_confirmed = dt.datetime.utcnow()
        self.update_search()
        self.update_search_nodes()

        # Emit signal that a user has confirmed
        signals.user_confirmed.send(self)

        return self

    def add_unclaimed_record(self, node, referrer, given_name, email=None):
        """Add a new project entry in the unclaimed records dictionary.

        :param Node node: Node this unclaimed user was added to.
        :param User referrer: User who referred this user.
        :param str given_name: The full name that the referrer gave for this user.
        :param str email: The given email address.
        :returns: The added record
        """
        if not node.can_edit(user=referrer):
            raise PermissionsError(
                'Referrer does not have permission to add a contributor '
                'to project {0}'.format(node._primary_key))
        project_id = node._primary_key
        referrer_id = referrer._primary_key
        if email:
            clean_email = email.lower().strip()
        else:
            clean_email = None
        record = {
            'name': given_name,
            'referrer_id': referrer_id,
            'token': generate_confirm_token(),
            'email': clean_email
        }
        self.unclaimed_records[project_id] = record
        return record

    def display_full_name(self, node=None):
        """Return the full name , as it would display in a contributor list for a
        given node.

        NOTE: Unclaimed users may have a different name for different nodes.
        """
        if node:
            unclaimed_data = self.unclaimed_records.get(
                node._primary_key, None)
            if unclaimed_data:
                return unclaimed_data['name']
        return self.fullname

    @property
    def is_active(self):
        """Returns True if the user is active. The user must have activated
        their account, must not be deleted, suspended, etc.

        :return: bool
        """
        return (self.is_registered and self.password is not None
                and not self.is_merged and not self.is_disabled
                and self.is_confirmed())

    def get_unclaimed_record(self, project_id):
        """Get an unclaimed record for a given project_id.

        :raises: ValueError if there is no record for the given project.
        """
        try:
            return self.unclaimed_records[project_id]
        except KeyError:  # reraise as ValueError
            raise ValueError(
                'No unclaimed record for user {self._id} on node {project_id}'.
                format(**locals()))

    def get_claim_url(self, project_id, external=False):
        """Return the URL that an unclaimed user should use to claim their
        account. Return ``None`` if there is no unclaimed_record for the given
        project ID.

        :param project_id: The project ID for the unclaimed record
        :raises: ValueError if a record doesn't exist for the given project ID
        :rtype: dict
        :returns: The unclaimed record for the project
        """
        uid = self._primary_key
        base_url = settings.DOMAIN if external else '/'
        unclaimed_record = self.get_unclaimed_record(project_id)
        token = unclaimed_record['token']
        return '{base_url}user/{uid}/{project_id}/claim/?token={token}'\
                    .format(**locals())

    def set_password(self, raw_password):
        """Set the password for this user to the hash of ``raw_password``."""
        self.password = generate_password_hash(raw_password)

    def check_password(self, raw_password):
        """Return a boolean of whether ``raw_password`` was correct."""
        if not self.password or not raw_password:
            return False
        return check_password_hash(self.password, raw_password)

    def change_password(self, raw_old_password, raw_new_password,
                        raw_confirm_password):
        """Change the password for this user to the hash of ``raw_new_password``."""
        raw_old_password = (raw_old_password or '').strip()
        raw_new_password = (raw_new_password or '').strip()
        raw_confirm_password = (raw_confirm_password or '').strip()

        issues = []
        if not self.check_password(raw_old_password):
            issues.append('Old password is invalid')
        elif raw_old_password == raw_new_password:
            issues.append('Password cannot be the same')

        if not raw_old_password or not raw_new_password or not raw_confirm_password:
            issues.append('Passwords cannot be blank')
        elif len(raw_new_password) < 6:
            issues.append('Password should be at least six characters')

        if raw_new_password != raw_confirm_password:
            issues.append('Password does not match the confirmation')

        if issues:
            raise ChangePasswordError(issues)
        self.set_password(raw_new_password)

    def _set_email_token_expiration(self, token, expiration=None):
        """Set the expiration date for given email token.

        :param str token: The email token to set the expiration for.
        :param datetime expiration: Datetime at which to expire the token. If ``None``, the
            token will expire after ``settings.EMAIL_TOKEN_EXPIRATION`` hours. This is only
            used for testing purposes.
        """
        expiration = expiration or (dt.datetime.utcnow() + dt.timedelta(
            hours=settings.EMAIL_TOKEN_EXPIRATION))
        self.email_verifications[token]['expiration'] = expiration
        return expiration

    def add_email_verification(self, email, expiration=None):
        """Add an email verification token for a given email."""
        token = generate_confirm_token()

        self.email_verifications[token] = {'email': email.lower()}
        self._set_email_token_expiration(token, expiration=expiration)
        return token

    def get_confirmation_token(self, email, force=False):
        """Return the confirmation token for a given email.

        :param str email: Email to get the token for.
        :param bool force: If an expired token exists for the given email, generate a new
            token and return that token.

        :raises: ExpiredTokenError if trying to access a token that is expired and force=False.
        :raises: KeyError if there no token for the email.
        """
        for token, info in self.email_verifications.items():
            if info['email'].lower() == email.lower():
                if info['expiration'] < dt.datetime.utcnow():
                    if not force:
                        raise ExpiredTokenError(
                            'Token for email "{0}" is expired'.format(email))
                    else:
                        new_token = self.add_email_verification(email)
                        self.save()
                        return new_token
                return token
        raise KeyError('No confirmation token for email "{0}"'.format(email))

    def get_confirmation_url(self, email, external=True, force=False):
        """Return the confirmation url for a given email.

        :raises: ExpiredTokenError if trying to access a token that is expired.
        :raises: KeyError if there is no token for the email.
        """
        base = settings.DOMAIN if external else '/'
        token = self.get_confirmation_token(email, force=force)
        return "{0}confirm/{1}/{2}/".format(base, self._primary_key, token)

    def verify_confirmation_token(self, token):
        """Return whether or not a confirmation token is valid for this user.
        :rtype: bool
        """
        if token in self.email_verifications.keys():
            return self.email_verifications.get(
                token)['expiration'] > dt.datetime.utcnow()
        return False

    def verify_claim_token(self, token, project_id):
        """Return whether or not a claim token is valid for this user for
        a given node which they were added as a unregistered contributor for.
        """
        try:
            record = self.get_unclaimed_record(project_id)
        except ValueError:  # No unclaimed record for given pid
            return False
        return record['token'] == token

    def confirm_email(self, token):
        if self.verify_confirmation_token(token):
            email = self.email_verifications[token]['email']
            self.emails.append(email)
            # Complete registration if primary email
            if email.lower() == self.username.lower():
                self.register(self.username)
                self.date_confirmed = dt.datetime.utcnow()
            # Revoke token
            del self.email_verifications[token]
            # Clear unclaimed records, so user's name shows up correctly on
            # all projects
            self.unclaimed_records = {}
            self.save()
            # Note: We must manually update search here because the fullname
            # field has not changed
            self.update_search()
            self.update_search_nodes()
            return True
        else:
            return False

    def update_search_nodes(self):
        """Call `update_search` on all nodes on which the user is a
        contributor. Needed to add self to contributor lists in search upon
        registration or claiming.

        """
        for node in self.node__contributed:
            node.update_search()

    def is_confirmed(self):
        return bool(self.date_confirmed)

    @property
    def social_links(self):
        return {
            key: self.SOCIAL_FIELDS[key].format(val)
            for key, val in self.social.items()
            if val and self.SOCIAL_FIELDS.get(key)
        }

    @property
    def biblio_name(self):
        given_names = self.given_name + ' ' + self.middle_names
        surname = self.family_name
        if surname != given_names:
            initials = [
                name[0].upper() + '.' for name in given_names.split(' ')
                if name and re.search(r'\w', name[0], re.I)
            ]
            return u'{0}, {1}'.format(surname, ' '.join(initials))
        return surname

    @property
    def given_name_initial(self):
        """
        The user's preferred initialization of their given name.

        Some users with common names may choose to distinguish themselves from
        their colleagues in this way. For instance, there could be two
        well-known researchers in a single field named "Robert Walker".
        "Walker, R" could then refer to either of them. "Walker, R.H." could
        provide easy disambiguation.

        NOTE: The internal representation for this should never end with a
              period. "R" and "R.H" would be correct in the prior case, but
              "R.H." would not.
        """
        return self.given_name[0]

    @property
    def url(self):
        return '/{}/'.format(self._primary_key)

    @property
    def api_url(self):
        return '/api/v1/profile/{0}/'.format(self._primary_key)

    @property
    def absolute_url(self):
        return urlparse.urljoin(settings.DOMAIN, self.url)

    @property
    def display_absolute_url(self):
        url = self.absolute_url
        if url is not None:
            return re.sub(r'https?:', '', url).strip('/')

    @property
    def deep_url(self):
        return '/profile/{}/'.format(self._primary_key)

    @property
    def gravatar_url(self):
        return filters.gravatar(self,
                                use_ssl=True,
                                size=settings.GRAVATAR_SIZE_ADD_CONTRIBUTOR)

    def get_activity_points(self, db=None):
        db = db or framework.mongo.database
        return analytics.get_total_activity_count(self._primary_key, db=db)

    @property
    def is_disabled(self):
        """Whether or not this account has been disabled.

        Abstracts ``User.date_disabled``.

        :return: bool
        """
        return self.date_disabled is not None

    @is_disabled.setter
    def is_disabled(self, val):
        """Set whether or not this account has been disabled."""
        if val:
            self.date_disabled = dt.datetime.utcnow()
        else:
            self.date_disabled = None

    @property
    def is_merged(self):
        '''Whether or not this account has been merged into another account.
        '''
        return self.merged_by is not None

    @property
    def profile_url(self):
        return '/{}/'.format(self._id)

    def get_summary(self, formatter='long'):
        return {
            'user_fullname': self.fullname,
            'user_profile_url': self.profile_url,
            'user_display_name': name_formatters[formatter](self),
            'user_is_claimed': self.is_claimed
        }

    def save(self, *args, **kwargs):
        self.username = self.username.lower().strip(
        ) if self.username else None
        ret = super(User, self).save(*args, **kwargs)
        if self.SEARCH_UPDATE_FIELDS.intersection(ret) and self.is_confirmed():
            self.update_search()
        if settings.PIWIK_HOST and not self.piwik_token:
            try:
                piwik.create_user(self)
            except (piwik.PiwikException, ValueError):
                logger.error("Piwik user creation failed: " + self._id)
        return ret

    def update_search(self):
        from website import search
        try:
            search.search.update_user(self)
        except search.exceptions.SearchUnavailableError as e:
            logger.exception(e)
            log_exception()

    @classmethod
    def find_by_email(cls, email):
        try:
            user = cls.find_one(Q('emails', 'eq', email))
            return [user]
        except:
            return []

    def serialize(self, anonymous=False):
        return {
            'id':
            utils.privacy_info_handle(self._primary_key, anonymous),
            'fullname':
            utils.privacy_info_handle(self.fullname, anonymous, name=True),
            'registered':
            self.is_registered,
            'url':
            utils.privacy_info_handle(self.url, anonymous),
            'api_url':
            utils.privacy_info_handle(self.api_url, anonymous),
        }

    ###### OSF-Specific methods ######

    def watch(self, watch_config):
        """Watch a node by adding its WatchConfig to this user's ``watched``
        list. Raises ``ValueError`` if the node is already watched.

        :param watch_config: The WatchConfig to add.
        :param save: Whether to save the user.

        """
        watched_nodes = [each.node for each in self.watched]
        if watch_config.node in watched_nodes:
            raise ValueError('Node is already being watched.')
        watch_config.save()
        self.watched.append(watch_config)
        return None

    def unwatch(self, watch_config):
        """Unwatch a node by removing its WatchConfig from this user's ``watched``
        list. Raises ``ValueError`` if the node is not already being watched.

        :param watch_config: The WatchConfig to remove.
        :param save: Whether to save the user.

        """
        for each in self.watched:
            if watch_config.node._id == each.node._id:
                each.__class__.remove_one(each)
                return None
        raise ValueError('Node not being watched.')

    def is_watching(self, node):
        '''Return whether a not a user is watching a Node.'''
        watched_node_ids = set([config.node._id for config in self.watched])
        return node._id in watched_node_ids

    def get_recent_log_ids(self, since=None):
        '''Return a generator of recent logs' ids.

        :param since: A datetime specifying the oldest time to retrieve logs
        from. If ``None``, defaults to 60 days before today. Must be a tz-aware
        datetime because PyMongo's generation times are tz-aware.

        :rtype: generator of log ids (strings)
        '''
        log_ids = []
        # Default since to 60 days before today if since is None
        # timezone aware utcnow
        utcnow = dt.datetime.utcnow().replace(tzinfo=pytz.utc)
        since_date = since or (utcnow - dt.timedelta(days=60))
        for config in self.watched:
            # Extract the timestamps for each log from the log_id (fast!)
            # The first 4 bytes of Mongo's ObjectId encodes time
            # This prevents having to load each Log Object and access their
            # date fields
            node_log_ids = [
                log_id for log_id in config.node.logs._to_primary_keys()
                if bson.ObjectId(log_id).generation_time > since_date
                and log_id not in log_ids
            ]
            # Log ids in reverse chronological order
            log_ids = _merge_into_reversed(log_ids, node_log_ids)
        return (l_id for l_id in log_ids)

    def get_daily_digest_log_ids(self):
        '''Return a generator of log ids generated in the past day
        (starting at UTC 00:00).
        '''
        utcnow = dt.datetime.utcnow()
        midnight = dt.datetime(utcnow.year,
                               utcnow.month,
                               utcnow.day,
                               0,
                               0,
                               0,
                               tzinfo=pytz.utc)
        return self.get_recent_log_ids(since=midnight)

    def merge_user(self, user, save=False):
        """Merge a registered user into this account. This user will be
        a contributor on any project

        :param user: A User object to be merged.
        """
        # Inherit emails
        self.emails.extend(user.emails)
        # Inherit projects the user was a contributor for
        for node in user.node__contributed:
            node.add_contributor(
                contributor=self,
                permissions=node.get_permissions(user),
                visible=node.get_visible(user),
                log=False,
            )
            try:
                node.remove_contributor(
                    contributor=user,
                    auth=Auth(user=self),
                    log=False,
                )
            except ValueError:
                logger.error('Contributor {0} not in list on node {1}'.format(
                    user._id, node._id))
            node.save()
        # Inherits projects the user created
        for node in user.node__created:
            node.creator = self
            node.save()
        user.merged_by = self
        user.save()
        if save:
            self.save()
        return None

    def get_projects_in_common(self, other_user, primary_keys=True):
        """Returns either a collection of "shared projects" (projects that both users are contributors for)
        or just their primary keys
        """
        if primary_keys:
            projects_contributed_to = set(
                self.node__contributed._to_primary_keys())
            return projects_contributed_to.intersection(
                other_user.node__contributed._to_primary_keys())
        else:
            projects_contributed_to = set(self.node__contributed)
            return projects_contributed_to.intersection(
                other_user.node__contributed)

    def n_projects_in_common(self, other_user):
        """Returns number of "shared projects" (projects that both users are contributors for)"""
        return len(self.get_projects_in_common(other_user, primary_keys=True))
Ejemplo n.º 24
0
class PreprintService(GuidStoredObject):

    _id = fields.StringField(primary=True)
    date_created = fields.DateTimeField(auto_now_add=True)
    date_modified = fields.DateTimeField(auto_now=True)
    provider = fields.ForeignField('PreprintProvider', index=True)
    node = fields.ForeignField('Node', index=True)
    is_published = fields.BooleanField(default=False, index=True)
    date_published = fields.DateTimeField()

    # This is a list of tuples of Subject id's. MODM doesn't do schema
    # validation for DictionaryFields, but would unsuccessfully attempt
    # to validate the schema for a list of lists of ForeignFields.
    #
    # Format: [[root_subject._id, ..., child_subject._id], ...]
    subjects = fields.DictionaryField(list=True)

    @property
    def primary_file(self):
        if not self.node:
            return
        return self.node.preprint_file

    @property
    def article_doi(self):
        if not self.node:
            return
        return self.node.preprint_article_doi

    @property
    def is_preprint_orphan(self):
        if not self.node:
            return
        return self.node.is_preprint_orphan

    @property
    def deep_url(self):
        # Required for GUID routing
        return '/preprints/{}/'.format(self._primary_key)

    @property
    def url(self):
        return '/{}/'.format(self._id)

    @property
    def absolute_url(self):
        return urlparse.urljoin(settings.DOMAIN, self.url)

    @property
    def absolute_api_v2_url(self):
        path = '/preprints/{}/'.format(self._id)
        return api_v2_url(path)

    def get_subjects(self):
        ret = []
        for subj_list in self.subjects:
            subj_hierarchy = []
            for subj_id in subj_list:
                subj = Subject.load(subj_id)
                if subj:
                    subj_hierarchy += ({'id': subj_id, 'text': subj.text}, )
            if subj_hierarchy:
                ret.append(subj_hierarchy)
        return ret

    def set_subjects(self, preprint_subjects, auth, save=False):
        if not self.node.has_permission(auth.user, ADMIN):
            raise PermissionsError('Only admins can change a preprint\'s subjects.')

        self.subjects = []
        for subj_list in preprint_subjects:
            subj_hierarchy = []
            for s in subj_list:
                subj_hierarchy.append(s)
            if subj_hierarchy:
                validate_subject_hierarchy(subj_hierarchy)
                self.subjects.append(subj_hierarchy)

        if save:
            self.save()

    def set_primary_file(self, preprint_file, auth, save=False):
        if not self.node.has_permission(auth.user, ADMIN):
            raise PermissionsError('Only admins can change a preprint\'s primary file.')

        if not isinstance(preprint_file, StoredFileNode):
            preprint_file = preprint_file.stored_object

        if preprint_file.node != self.node or preprint_file.provider != 'osfstorage':
            raise ValueError('This file is not a valid primary file for this preprint.')

        # there is no preprint file yet! This is the first time!
        if not self.node.preprint_file:
            self.node.preprint_file = preprint_file
        elif preprint_file != self.node.preprint_file:
            # if there was one, check if it's a new file
            self.node.preprint_file = preprint_file
            self.node.add_log(
                action=NodeLog.PREPRINT_FILE_UPDATED,
                params={},
                auth=auth,
                save=False,
            )

        if save:
            self.save()
            self.node.save()

    def set_published(self, published, auth, save=False):
        if not self.node.has_permission(auth.user, ADMIN):
            raise PermissionsError('Only admins can publish a preprint.')

        if self.is_published and not published:
            raise ValueError('Cannot unpublish preprint.')

        self.is_published = published

        if published:
            if not (self.node.preprint_file and self.node.preprint_file.node == self.node):
                raise ValueError('Preprint node is not a valid preprint; cannot publish.')
            if not self.provider:
                raise ValueError('Preprint provider not specified; cannot publish.')
            if not self.subjects:
                raise ValueError('Preprint must have at least one subject to be published.')
            self.date_published = datetime.datetime.utcnow()
            self.node._has_abandoned_preprint = False

            self.node.add_log(action=NodeLog.PREPRINT_INITIATED, params={}, auth=auth, save=False)

            if not self.node.is_public:
                self.node.set_privacy(
                    self.node.PUBLIC,
                    auth=None,
                    log=True
                )

        if save:
            self.node.save()
            self.save()

    def save(self, *args, **kwargs):
        saved_fields = super(PreprintService, self).save(*args, **kwargs)
        if saved_fields:
            enqueue_task(on_preprint_updated.s(self._id))
Ejemplo n.º 25
0
class FileVersion(StoredObject):
    """A version of an OsfStorageFileNode. contains information
    about where the file is located, hashes and datetimes
    """

    _id = fields.StringField(primary=True,
                             default=lambda: str(bson.ObjectId()))

    creator = fields.ForeignField('user')

    identifier = fields.StringField(required=True)

    # Date version record was created. This is the date displayed to the user.
    date_created = fields.DateTimeField(auto_now_add=True)

    # Dictionary specifying all information needed to locate file on backend
    # {
    #     'service': 'cloudfiles',  # required
    #     'container': 'osf',       # required
    #     'object': '20c53b',       # required
    #     'worker_url': '127.0.0.1',
    #     'worker_host': 'upload-service-1',
    # }
    location = fields.DictionaryField(default=None,
                                      validate=utils.validate_location)

    # Dictionary containing raw metadata from upload service response
    # {
    #     'size': 1024,                            # required
    #     'content_type': 'text/plain',            # required
    #     'date_modified': '2014-11-07T20:24:15',  # required
    #     'md5': 'd077f2',
    # }
    metadata = fields.DictionaryField()

    size = fields.IntegerField()
    content_type = fields.StringField()
    # Date file modified on third-party backend. Not displayed to user, since
    # this date may be earlier than the date of upload if the file already
    # exists on the backend
    date_modified = fields.DateTimeField()

    @property
    def location_hash(self):
        return self.location['object']

    @property
    def archive(self):
        return self.metadata.get('archive')

    def is_duplicate(self, other):
        return self.location_hash == other.location_hash

    def update_metadata(self, metadata, save=True):
        self.metadata.update(metadata)
        # metadata has no defined structure so only attempt to set attributes
        # If its are not in this callback it'll be in the next
        self.size = self.metadata.get('size', self.size)
        self.content_type = self.metadata.get('contentType', self.content_type)
        if self.metadata.get('modified') is not None:
            # TODO handle the timezone here the user that updates the file may see an
            # Incorrect version
            self.date_modified = parse_date(self.metadata['modified'],
                                            ignoretz=True)

        if save:
            self.save()

    def _find_matching_archive(self, save=True):
        """Find another version with the same sha256 as this file.
        If found copy its vault name and glacier id, no need to create additional backups.
        returns True if found otherwise false
        """
        if 'sha256' not in self.metadata:
            return False  # Dont bother searching for nothing

        if 'vault' in self.metadata and 'archive' in self.metadata:
            # Shouldn't ever happen, but we already have an archive
            return True  # We've found ourself

        qs = self.__class__.find(
            Q('_id', 'ne', self._id) & Q('metadata.vault', 'ne', None)
            & Q('metadata.archive', 'ne', None)
            & Q('metadata.sha256', 'eq', self.metadata['sha256'])).limit(1)
        if qs.count() < 1:
            return False
        other = qs[0]
        try:
            self.metadata['vault'] = other.metadata['vault']
            self.metadata['archive'] = other.metadata['archive']
        except KeyError:
            return False
        if save:
            self.save()
        return True
Ejemplo n.º 26
0
class ArchiveJob(StoredObject):

    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))

    # whether or not the ArchiveJob is complete (success or fail)
    done = fields.BooleanField(default=False)
    # whether or not emails have been sent for this ArchiveJob
    sent = fields.BooleanField(default=False)
    status = fields.StringField(default=ARCHIVER_INITIATED)
    datetime_initiated = fields.DateTimeField(default=datetime.datetime.utcnow)

    dst_node = fields.ForeignField('node', backref='active')
    src_node = fields.ForeignField('node')
    initiator = fields.ForeignField('user')

    target_addons = fields.ForeignField('archivetarget', list=True)

    # This field is used for stashing embargo URLs while still in the app context
    # Format: {
    #     'view': <str> url,
    #     'approve': <str> url,
    #     'disapprove': <str> url,
    # }
    meta = fields.DictionaryField()

    def __repr__(self):
        return (
            '<{ClassName}(_id={self._id}, done={self.done}, '
            ' status={self.status}, src_node={self.src_node}, dst_node={self.dst_node})>'
        ).format(ClassName=self.__class__.__name__, self=self)

    @property
    def children(self):
        return [
            node.archive_job for node in self.dst_node.nodes if node.primary
        ]

    @property
    def parent(self):
        parent_node = self.dst_node.parent_node
        return parent_node.archive_job if parent_node else None

    @property
    def success(self):
        return self.status == ARCHIVER_SUCCESS

    @property
    def pending(self):
        return any([
            target for target in self.target_addons
            if target.status not in (ARCHIVER_SUCCESS, ARCHIVER_FAILURE)
        ])

    def info(self):
        return self.src_node, self.dst_node, self.initiator

    def target_info(self):
        return [{
            'name': target.name,
            'status': target.status,
            'stat_result': target.stat_result,
            'errors': target.errors
        } for target in self.target_addons]

    def archive_tree_finished(self):
        if not self.pending:
            return len([
                ret for ret in
                [child.archive_tree_finished() for child in self.children]
                if ret
            ]) if len(self.children) else True
        return False

    def _fail_above(self):
        """Marks all ArchiveJob instances attached to Nodes above this as failed
        """
        parent = self.parent
        if parent:
            parent.status = ARCHIVER_FAILURE
            parent.save()

    def _post_update_target(self):
        """Checks for success or failure if the ArchiveJob on self.dst_node
        is finished
        """
        if self.status == ARCHIVER_FAILURE:
            return
        if not self.pending:
            self.done = True
            if any([
                    target.status for target in self.target_addons
                    if target.status in ARCHIVER_FAILURE_STATUSES
            ]):
                self.status = ARCHIVER_FAILURE
                self._fail_above()
            else:
                self.status = ARCHIVER_SUCCESS
            self.save()

    def get_target(self, addon_short_name):
        try:
            return [
                addon for addon in self.target_addons
                if addon.name == addon_short_name
            ][0]
        except IndexError:
            return None

    def _set_target(self, addon_short_name):
        if self.get_target(addon_short_name):
            return
        target = ArchiveTarget(name=addon_short_name)
        target.save()
        self.target_addons.append(target)

    def set_targets(self):
        addons = []
        for addon in [
                self.src_node.get_addon(name)
                for name in settings.ADDONS_ARCHIVABLE
                if settings.ADDONS_ARCHIVABLE[name] != 'none'
        ]:
            if not addon or not addon.complete or not isinstance(
                    addon, StorageAddonBase):
                continue
            archive_errors = getattr(addon, 'archive_errors', None)
            if not archive_errors or (archive_errors and not archive_errors()):
                if addon.config.short_name == 'dataverse':
                    addons.append(addon.config.short_name + '-draft')
                    addons.append(addon.config.short_name + '-published')
                else:
                    addons.append(addon.config.short_name)
        for addon in addons:
            self._set_target(addon)
        self.save()

    def update_target(self,
                      addon_short_name,
                      status,
                      stat_result=None,
                      errors=None):
        stat_result = stat_result or {}
        errors = errors or []

        target = self.get_target(addon_short_name)
        target.status = status
        target.errors = errors
        target.stat_result = stat_result
        target.save()
        self._post_update_target()
Ejemplo n.º 27
0
class Sanction(StoredObject):
    """Sanction class is a generic way to track approval states"""
    # Tell modularodm not to attach backends
    _meta = {
        'abstract': True,
    }

    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))

    # Neither approved not cancelled
    UNAPPROVED = 'unapproved'
    # Has approval
    APPROVED = 'approved'
    # Rejected by at least one person
    REJECTED = 'rejected'
    # Embargo has been completed
    COMPLETED = 'completed'

    state = fields.StringField(default=UNAPPROVED,
                               validate=validators.choice_in((
                                   UNAPPROVED,
                                   APPROVED,
                                   REJECTED,
                                   COMPLETED,
                               )))

    DISPLAY_NAME = 'Sanction'
    # SHORT_NAME must correspond with the associated foreign field to query against,
    # e.g. Node.find_one(Q(sanction.SHORT_NAME, 'eq', sanction))
    SHORT_NAME = 'sanction'

    APPROVAL_NOT_AUTHORIZED_MESSAGE = 'This user is not authorized to approve this {DISPLAY_NAME}'
    APPROVAL_INVALID_TOKEN_MESSAGE = 'Invalid approval token provided for this {DISPLAY_NAME}.'
    REJECTION_NOT_AUTHORIZED_MESSAEGE = 'This user is not authorized to reject this {DISPLAY_NAME}'
    REJECTION_INVALID_TOKEN_MESSAGE = 'Invalid rejection token provided for this {DISPLAY_NAME}.'

    # Controls whether or not the Sanction needs unanimous approval or just a single approval
    ANY = 'any'
    UNANIMOUS = 'unanimous'
    mode = UNANIMOUS

    initiation_date = fields.DateTimeField(
        auto_now_add=datetime.datetime.utcnow)
    # Expiration date-- Sanctions in the UNAPPROVED state that are older than their end_date
    # are automatically made ACTIVE by a daily cron job
    # Use end_date=None for a non-expiring Sanction
    end_date = fields.DateTimeField(default=None)

    # Sanction subclasses must have an initiated_by field
    # initiated_by = fields.ForeignField('user', backref='initiated')

    # Expanded: Dictionary field mapping admin IDs their approval status and relevant tokens:
    # {
    #   'b3k97': {
    #     'has_approved': False,
    #     'approval_token': 'Pew7wj1Puf7DENUPFPnXSwa1rf3xPN',
    #     'rejection_token': 'TwozClTFOic2PYxHDStby94bCQMwJy'}
    # }
    approval_state = fields.DictionaryField()

    def __repr__(self):
        return '<Sanction(end_date={self.end_date!r}) with _id {self._id!r}>'.format(
            self=self)

    @property
    def is_pending_approval(self):
        return self.state == Sanction.UNAPPROVED

    @property
    def is_approved(self):
        return self.state == Sanction.APPROVED

    @property
    def is_rejected(self):
        return self.state == Sanction.REJECTED

    def approve(self, user):
        raise NotImplementedError(
            "Sanction subclasses must implement an approve method.")

    def reject(self, user):
        raise NotImplementedError(
            "Sanction subclasses must implement an approve method.")

    def _on_reject(self, user):
        """Callback for rejection of a Sanction

        :param User user:
        """
        raise NotImplementedError(
            'Sanction subclasses must implement an #_on_reject method')

    def _on_complete(self, user):
        """Callback for when a Sanction has approval and enters the ACTIVE state

        :param User user:
        """
        raise NotImplementedError(
            'Sanction subclasses must implement an #_on_complete method')

    def forcibly_reject(self):
        self.state = Sanction.REJECTED
Ejemplo n.º 28
0
class DropboxNodeSettings(StorageAddonBase, AddonOAuthNodeSettingsBase):

    oauth_provider = DropboxProvider
    serializer = DropboxSerializer

    folder = fields.StringField(default=None)

    #: Information saved at the time of registration
    #: Note: This is unused right now
    registration_data = fields.DictionaryField()

    _folder_data = None

    _api = None

    @property
    def api(self):
        """authenticated ExternalProvider instance"""
        if self._api is None:
            self._api = DropboxProvider(self.external_account)
        return self._api

    @property
    def folder_id(self):
        return self.folder

    @property
    def folder_name(self):
        return os.path.split(self.folder or '')[1]

    @property
    def folder_path(self):
        return self.folder

    @property
    def display_name(self):
        return '{0}: {1}'.format(self.config.full_name, self.folder)

    def clear_settings(self):
        self.folder = None

    def fetch_folder_name(self):
        return self.folder

    def set_folder(self, folder, auth):
        self.folder = folder
        # Add log to node
        self.nodelogger.log(action="folder_selected", save=True)

    # TODO: Is this used? If not, remove this and perhaps remove the 'deleted' field
    def delete(self, save=True):
        self.deauthorize(add_log=False)
        super(DropboxNodeSettings, self).delete(save)

    def deauthorize(self, auth=None, add_log=True):
        """Remove user authorization from this node and log the event."""
        folder = self.folder
        self.clear_settings()

        if add_log:
            extra = {'folder': folder}
            self.nodelogger.log(action="node_deauthorized",
                                extra=extra,
                                save=True)

        self.clear_auth()

    def serialize_waterbutler_credentials(self):
        if not self.has_auth:
            raise exceptions.AddonError('Addon is not authorized')
        return {'token': self.external_account.oauth_key}

    def serialize_waterbutler_settings(self):
        if not self.folder:
            raise exceptions.AddonError('Folder is not configured')
        return {'folder': self.folder}

    def create_waterbutler_log(self, auth, action, metadata):
        url = self.owner.web_url_for('addon_view_or_download_file',
                                     path=metadata['path'].strip('/'),
                                     provider='dropbox')
        self.owner.add_log(
            'dropbox_{0}'.format(action),
            auth=auth,
            params={
                'project': self.owner.parent_id,
                'node': self.owner._id,
                'path': metadata['path'],
                'folder': self.folder,
                'urls': {
                    'view': url,
                    'download': url + '?action=download'
                },
            },
        )

    def __repr__(self):
        return u'<DropboxNodeSettings(node_id={self.owner._primary_key!r})>'.format(
            self=self)

    ##### Callback overrides #####

    def before_register_message(self, node, user):
        """Return warning text to display if user auth will be copied to a
        registration.
        """
        category = node.project_or_component
        if self.user_settings and self.user_settings.has_auth:
            return (
                u'The contents of Dropbox add-ons cannot be registered at this time; '
                u'the Dropbox folder linked to this {category} will not be included '
                u'as part of this registration.').format(**locals())

    # backwards compatibility
    before_register = before_register_message

    def before_remove_contributor_message(self, node, removed):
        """Return warning text to display if removed contributor is the user
        who authorized the Dropbox addon
        """
        if self.user_settings and self.user_settings.owner == removed:
            category = node.project_or_component
            name = removed.fullname
            return (
                u'The Dropbox add-on for this {category} is authenticated by {name}. '
                'Removing this user will also remove write access to Dropbox '
                'unless another contributor re-authenticates the add-on.'
            ).format(**locals())

    # backwards compatibility
    before_remove_contributor = before_remove_contributor_message

    # Note: Registering Dropbox content is disabled for now; leaving this code
    # here in case we enable registrations later on.
    # @jmcarp
    # def after_register(self, node, registration, user, save=True):
    #     """After registering a node, copy the user settings and save the
    #     chosen folder.
    #
    #     :return: A tuple of the form (cloned_settings, message)
    #     """
    #     clone, message = super(DropboxNodeSettings, self).after_register(
    #         node, registration, user, save=False
    #     )
    #     # Copy user_settings and add registration data
    #     if self.has_auth and self.folder is not None:
    #         clone.user_settings = self.user_settings
    #         clone.registration_data['folder'] = self.folder
    #     if save:
    #         clone.save()
    #     return clone, message

    def after_fork(self, node, fork, user, save=True):
        """After forking, copy user settings if the user is the one who authorized
        the addon.

        :return: A tuple of the form (cloned_settings, message)
        """
        clone, _ = super(DropboxNodeSettings, self).after_fork(node=node,
                                                               fork=fork,
                                                               user=user,
                                                               save=False)

        if self.user_settings and self.user_settings.owner == user:
            clone.user_settings = self.user_settings
            message = ('Dropbox authorization copied to forked {cat}.').format(
                cat=fork.project_or_component)
        else:
            message = (
                u'Dropbox authorization not copied to forked {cat}. You may '
                'authorize this fork on the <u><a href="{url}">Settings</a></u> '
                'page.').format(url=fork.web_url_for('node_setting'),
                                cat=fork.project_or_component)
        if save:
            clone.save()
        return clone, message

    def after_remove_contributor(self, node, removed, auth=None):
        """If the removed contributor was the user who authorized the Dropbox
        addon, remove the auth credentials from this node.
        Return the message text that will be displayed to the user.
        """
        if self.user_settings and self.user_settings.owner == removed:
            self.user_settings = None
            self.save()

            message = (
                u'Because the Dropbox add-on for {category} "{title}" was authenticated '
                u'by {user}, authentication information has been deleted.'
            ).format(category=node.category_display,
                     title=node.title,
                     user=removed.fullname)

            if not auth or auth.user != removed:
                url = node.web_url_for('node_setting')
                message += (
                    u' You can re-authenticate on the <u><a href="{url}">Settings</a></u> page.'
                ).format(url=url)
            #
            return message

    def after_delete(self, node, user):
        self.deauthorize(Auth(user=user), add_log=True)
        self.save()
Ejemplo n.º 29
0
class AddonS3NodeSettings(StorageAddonBase, AddonNodeSettingsBase):

    registration_data = fields.DictionaryField()
    bucket = fields.StringField()
    user_settings = fields.ForeignField('addons3usersettings',
                                        backref='authorized')

    @property
    def folder_name(self):
        return self.bucket

    def find_or_create_file_guid(self, path):
        path = path.lstrip('/')
        return S3GuidFile.get_or_create(node=self.owner, path=path)

    @property
    def display_name(self):
        return u'{0}: {1}'.format(self.config.full_name, self.bucket)

    @property
    def complete(self):
        return self.has_auth and self.bucket is not None

    def authorize(self, user_settings, save=False):
        self.user_settings = user_settings
        self.owner.add_log(
            action='s3_node_authorized',
            params={
                'project': self.owner.parent_id,
                'node': self.owner._id,
            },
            auth=Auth(user_settings.owner),
        )
        if save:
            self.save()

    def deauthorize(self, auth=None, log=True, save=False):
        self.registration_data = {}
        self.bucket = None
        self.user_settings = None

        if log:
            self.owner.add_log(
                action='s3_node_deauthorized',
                params={
                    'project': self.owner.parent_id,
                    'node': self.owner._id,
                },
                auth=auth,
            )
        if save:
            self.save()

    def delete(self, save=True):
        self.deauthorize(log=False, save=False)
        super(AddonS3NodeSettings, self).delete(save=save)

    def serialize_waterbutler_credentials(self):
        if not self.has_auth:
            raise exceptions.AddonError(
                'Cannot serialize credentials for S3 addon')
        return {
            'access_key': self.user_settings.access_key,
            'secret_key': self.user_settings.secret_key,
        }

    def serialize_waterbutler_settings(self):
        if not self.bucket:
            raise exceptions.AddonError(
                'Cannot serialize settings for S3 addon')
        return {'bucket': self.bucket}

    def create_waterbutler_log(self, auth, action, metadata):
        url = self.owner.web_url_for('addon_view_or_download_file',
                                     path=metadata['path'],
                                     provider='s3')

        self.owner.add_log(
            's3_{0}'.format(action),
            auth=auth,
            params={
                'project': self.owner.parent_id,
                'node': self.owner._id,
                'path': metadata['materialized'],
                'bucket': self.bucket,
                'urls': {
                    'view': url,
                    'download': url + '?action=download'
                }
            },
        )

    def to_json(self, user):
        ret = super(AddonS3NodeSettings, self).to_json(user)

        user_settings = user.get_addon('s3')

        ret.update({
            'bucket':
            self.bucket or '',
            'has_bucket':
            self.bucket is not None,
            'user_is_owner': (self.user_settings
                              and self.user_settings.owner == user),
            'user_has_auth':
            bool(user_settings) and user_settings.has_auth,
            'node_has_auth':
            self.has_auth,
            'owner':
            None,
            'bucket_list':
            None,
            'is_registration':
            self.owner.is_registration,
            'valid_credentials':
            user_settings and user_settings.is_valid,
        })

        if self.has_auth:
            ret['owner'] = self.user_settings.owner.fullname
            ret['owner_url'] = self.user_settings.owner.url
            ret['node_has_auth'] = True

        return ret

    @property
    def is_registration(self):
        return True if self.registration_data else False

    @property
    def has_auth(self):
        return bool(self.user_settings and self.user_settings.has_auth)
        #TODO Update callbacks

    def before_register(self, node, user):
        """

        :param Node node:
        :param User user:
        :return str: Alert message

        """
        category = node.project_or_component
        if self.user_settings and self.user_settings.has_auth:
            return (
                u'The contents of S3 add-ons cannot be registered at this time; '
                u'the S3 bucket linked to this {category} will not be included '
                u'as part of this registration.').format(**locals())

    def after_fork(self, node, fork, user, save=True):
        """

        :param Node node: Original node
        :param Node fork: Forked node
        :param User user: User creating fork
        :param bool save: Save settings after callback
        :return tuple: Tuple of cloned settings and alert message

        """
        clone, _ = super(AddonS3NodeSettings, self).after_fork(node,
                                                               fork,
                                                               user,
                                                               save=False)

        # Copy authentication if authenticated by forking user
        if self.user_settings and self.user_settings.owner == user:
            clone.user_settings = self.user_settings
            clone.bucket = self.bucket
            message = (
                'Amazon Simple Storage authorization copied to forked {cat}.'
            ).format(cat=fork.project_or_component, )
        else:
            message = (
                'Amazon Simple Storage authorization not copied to forked {cat}. You may '
                'authorize this fork on the <a href={url}>Settings</a> '
                'page.').format(cat=fork.project_or_component,
                                url=fork.url + 'settings/')

        if save:
            clone.save()

        return clone, message

    def before_fork(self, node, user):
        """

        :param Node node:
        :param User user:
        :return str: Alert message

        """

        if self.user_settings and self.user_settings.owner == user:
            return (
                'Because you have authenticated the S3 add-on for this '
                '{cat}, forking it will also transfer your authorization to '
                'the forked {cat}.').format(cat=node.project_or_component, )
        return (
            'Because this S3 add-on has been authenticated by a different '
            'user, forking it will not transfer authentication to the forked '
            '{cat}.').format(cat=node.project_or_component, )

    def before_remove_contributor(self, node, removed):
        """

        :param Node node:
        :param User removed:
        :return str: Alert message

        """
        if self.user_settings and self.user_settings.owner == removed:
            return (
                'The Amazon Simple Storage add-on for this {category} is authenticated '
                'by {user}. Removing this user will also remove access '
                'to {bucket} unless another contributor re-authenticates.'
            ).format(category=node.project_or_component,
                     user=removed.fullname,
                     bucket=self.bucket)

    def after_remove_contributor(self, node, removed, auth=None):
        """

        :param Node node:
        :param User removed:
        :return str: Alert message

        """
        if self.user_settings and self.user_settings.owner == removed:
            self.user_settings = None
            self.bucket = None
            self.save()

            message = (
                u'Because the Amazon Simple Storage add-on for {category} "{title}" was '
                u'authenticated by {user}, authentication information has been deleted.'
            ).format(category=node.category_display,
                     title=node.title,
                     user=removed.fullname)

            if not auth or auth.user != removed:
                url = node.web_url_for('node_setting')
                message += (
                    u' You can re-authenticate on the <a href="{url}">Settings</a> page.'
                ).format(url=url)
            #
            return message

    def after_delete(self, node, user):
        self.deauthorize(Auth(user=user), log=True, save=True)
Ejemplo n.º 30
0
class DraftRegistrationApproval(Sanction):

    mode = Sanction.ANY

    # Since draft registrations that require approval are not immediately registered,
    # meta stores registration_choice and embargo_end_date (when applicable)
    meta = fields.DictionaryField(default=dict)

    def _send_rejection_email(self, user, draft):
        schema = draft.registration_schema
        prereg_schema = prereg_utils.get_prereg_schema()

        if schema._id == prereg_schema._id:
            mails.send_mail(user.username,
                            mails.PREREG_CHALLENGE_REJECTED,
                            user=user,
                            draft_url=draft.absolute_url)
        else:
            raise NotImplementedError(
                'TODO: add a generic email template for registration approvals'
            )

    def approve(self, user):
        if settings.PREREG_ADMIN_TAG not in user.system_tags:
            raise PermissionsError(
                "This user does not have permission to approve this draft.")
        self.state = Sanction.APPROVED
        self._on_complete(user)

    def reject(self, user):
        if settings.PREREG_ADMIN_TAG not in user.system_tags:
            raise PermissionsError(
                "This user does not have permission to approve this draft.")
        self.state = Sanction.REJECTED
        self._on_reject(user)

    def _on_complete(self, user):
        from website.project.model import DraftRegistration

        draft = DraftRegistration.find_one(Q('approval', 'eq', self))
        auth = Auth(draft.initiator)
        registration = draft.register(auth=auth, save=True)
        registration_choice = self.meta['registration_choice']

        if registration_choice == 'immediate':
            sanction = functools.partial(registration.require_approval,
                                         draft.initiator)
        elif registration_choice == 'embargo':
            sanction = functools.partial(
                registration.embargo_registration, draft.initiator,
                parse_date(self.meta.get('embargo_end_date'), ignoretz=True))
        else:
            raise ValueError(
                "'registration_choice' must be either 'embargo' or 'immediate'"
            )
        sanction(notify_initiator_on_complete=True)

    def _on_reject(self, user, *args, **kwargs):
        from website.project.model import DraftRegistration

        # clear out previous registration options
        self.meta = {}
        self.save()

        draft = DraftRegistration.find_one(Q('approval', 'eq', self))
        self._send_rejection_email(draft.initiator, draft)