Exemple #1
0
class Ron(StoredObject):

    _id = fields.DateTimeField(primary=True)

    ron_str = fields.StringField()
    ron_int = fields.IntegerField()
    ron_now = fields.DateTimeField()
Exemple #2
0
class Sheila(StoredObject):

    _id = fields.StringField(primary=True)
    _meta = {'optimistic': True}

    # Simple fields
    sheila_str = fields.StringField(default='sheila',
                                    validate=True,
                                    required=True)
    sheila_int = fields.IntegerField(default=7, validate=MaxValueValidator(9))
    sheila_now = fields.DateTimeField()
    sheila_url = fields.StringField(validate=URLValidator())
    sheila_foostop = fields.StringField(required=True,
                                        validate=RegexValidator(r'foo$'),
                                        list=True)

    created = fields.DateTimeField(auto_now_add=True)
    modified = fields.DateTimeField(auto_now=True)

    # List fields
    sheila_strs = fields.StringField(list=True,
                                     validate=MinLengthValidator(5),
                                     list_validate=MinLengthValidator(3))
    sheila_nows = fields.DateTimeField(list=True)  #, default=[])
    sheila_urls = fields.StringField(
        list=True,
        validate=[URLValidator(), MinLengthValidator(20)],
        list_validate=MinLengthValidator(2))
    sheila_ints = fields.IntegerField(list=True,
                                      validate=MinValueValidator(3),
                                      list_validate=MinLengthValidator(2))

    # Foreign fields
    sheila_ron = fields.ForeignField('Ron', backref='ron')
    sheila_rons = fields.ForeignField('Ron', backref='rons', list=True)
Exemple #3
0
class Conference(StoredObject):
    #: Determines the email address for submission and the OSF url
    # Example: If endpoint is spsp2014, then submission email will be
    # [email protected] or [email protected] and the OSF url will
    # be osf.io/view/spsp2014
    endpoint = fields.StringField(primary=True, required=True, unique=True)
    #: Full name, e.g. "SPSP 2014"
    name = fields.StringField(required=True)
    info_url = fields.StringField(required=False, default=None)
    logo_url = fields.StringField(required=False, default=None)
    location = fields.StringField(required=False, default=None)
    start_date = fields.DateTimeField(default=None)
    end_date = fields.DateTimeField(default=None)
    active = fields.BooleanField(required=True)
    admins = fields.ForeignField('user', list=True, required=False, default=None)
    #: Whether to make submitted projects public
    public_projects = fields.BooleanField(required=False, default=True)
    poster = fields.BooleanField(default=True)
    talk = fields.BooleanField(default=True)
    # field_names are used to customize the text on the conference page, the categories
    # of submissions, and the email adress to send material to.
    field_names = fields.DictionaryField(default=lambda: DEFAULT_FIELD_NAMES)

    # Cached number of submissions
    num_submissions = fields.IntegerField(default=0)

    @classmethod
    def get_by_endpoint(cls, endpoint, active=True):
        query = Q('endpoint', 'iexact', endpoint)
        if active:
            query &= Q('active', 'eq', True)
        try:
            return Conference.find_one(query)
        except ModularOdmException:
            raise ConferenceError('Endpoint {0} not found'.format(endpoint))
Exemple #4
0
class User(StoredObject):
    _id = fields.StringField(primary=True)
    name = fields.StringField(required=True)
    date_created = fields.DateTimeField(auto_now_add=set_datetime)
    date_updated = fields.DateTimeField(auto_now=set_datetime)
    read_only = fields.StringField(editable=False)
    unique = fields.StringField(unique=True)

    _meta = {'optimistic': True}
Exemple #5
0
class Session(StoredObject):

    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))
    date_created = fields.DateTimeField(auto_now_add=True)
    date_modified = fields.DateTimeField(auto_now=True)
    data = fields.DictionaryField()

    @property
    def is_authenticated(self):
        return 'auth_user_id' in self.data
class Tag(StoredObject):
    _id = fields.StringField(primary=True)
    date_created = fields.DateTimeField(validate=True, auto_now_add=True)
    date_modified = fields.DateTimeField(validate=True, auto_now=True)
    value = fields.StringField(default='default',
                               validate=MinLengthValidator(5))
    keywords = fields.StringField(default=['keywd1', 'keywd2'],
                                  validate=MinLengthValidator(5),
                                  list=True)
    _meta = {'optimistic': True}
Exemple #7
0
class OsfStorageFileVersion(StoredObject):
    """A version of an OsfStorageFileNode. contains information
    about where the file is located, hashes and datetimes
    """

    _id = fields.StringField(primary=True,
                             default=lambda: str(bson.ObjectId()))
    creator = fields.ForeignField('user', required=True)

    # Date version record was created. This is the date displayed to the user.
    date_created = fields.DateTimeField(auto_now_add=True)

    # Dictionary specifying all information needed to locate file on backend
    # {
    #     'service': 'cloudfiles',  # required
    #     'container': 'osf',       # required
    #     'object': '20c53b',       # required
    #     'worker_url': '127.0.0.1',
    #     'worker_host': 'upload-service-1',
    # }
    location = fields.DictionaryField(validate=utils.validate_location)

    # Dictionary containing raw metadata from upload service response
    # {
    #     'size': 1024,                            # required
    #     'content_type': 'text/plain',            # required
    #     'date_modified': '2014-11-07T20:24:15',  # required
    #     'md5': 'd077f2',
    # }
    metadata = fields.DictionaryField()

    size = fields.IntegerField()
    content_type = fields.StringField()
    # Date file modified on third-party backend. Not displayed to user, since
    # this date may be earlier than the date of upload if the file already
    # exists on the backend
    date_modified = fields.DateTimeField()

    @property
    def location_hash(self):
        return self.location['object']

    def is_duplicate(self, other):
        return self.location_hash == other.location_hash

    def update_metadata(self, metadata):
        self.metadata.update(metadata)
        self.content_type = self.metadata.get('contentType', None)
        try:
            self.size = self.metadata['size']
            self.date_modified = parse_date(self.metadata['modified'],
                                            ignoretz=True)
        except KeyError as err:
            raise errors.MissingFieldError(str(err))
        self.save()
Exemple #8
0
class TrashedFileNode(StoredObject):
    """The graveyard for all deleted FileNodes"""
    _id = fields.StringField(primary=True)

    last_touched = fields.DateTimeField()
    history = fields.DictionaryField(list=True)
    versions = fields.ForeignField('FileVersion', list=True)

    node = fields.ForeignField('node', required=True)
    parent = fields.AbstractForeignField(default=None)

    is_file = fields.BooleanField(default=True)
    provider = fields.StringField(required=True)

    name = fields.StringField(required=True)
    path = fields.StringField(required=True)
    materialized_path = fields.StringField(required=True)

    checkout = fields.AbstractForeignField('User')
    deleted_by = fields.AbstractForeignField('User')
    deleted_on = fields.DateTimeField(auto_now_add=True)
    tags = fields.ForeignField('Tag', list=True)

    @property
    def deep_url(self):
        """Allows deleted files to resolve to a view
        that will provide a nice error message and http.GONE
        """
        return self.node.web_url_for('addon_deleted_file', trashed_id=self._id)

    def restore(self, recursive=True, parent=None):
        """Recreate a StoredFileNode from the data in this object
        Will re-point all guids and finally remove itself
        :raises KeyExistsException:
        """
        data = self.to_storage()
        data.pop('deleted_on')
        data.pop('deleted_by')
        if parent:
            data['parent'] = parent._id
        elif data['parent']:
            # parent is an AbstractForeignField, so it gets stored as tuple
            data['parent'] = data['parent'][0]
        restored = FileNode.resolve_class(self.provider,
                                          int(self.is_file))(**data)
        if not restored.parent:
            raise ValueError('No parent to restore to')
        restored.save()

        if recursive:
            for child in TrashedFileNode.find(Q('parent', 'eq', self)):
                child.restore(recursive=recursive, parent=restored)

        TrashedFileNode.remove_one(self)
        return restored
Exemple #9
0
class QueuedMail(StoredObject):
    _id = fields.StringField(primary=True, default=lambda: str(bson.ObjectId()))
    user = fields.ForeignField('User', index=True, required=True)
    to_addr = fields.StringField()
    send_at = fields.DateTimeField(index=True, required=True)

    # string denoting the template, presend to be used. Has to be an index of queue_mail types
    email_type = fields.StringField(index=True, required=True)

    # dictionary with variables used to populate mako template and store information used in presends
    # Example:
    # self.data = {
    #    'nid' : 'ShIpTo',
    #    'fullname': 'Florence Welch',
    #}
    data = fields.DictionaryField()
    sent_at = fields.DateTimeField(index=True)

    def send_mail(self):
        """
        Grabs the data from this email, checks for user subscription to help mails,

        constructs the mail object and checks presend. Then attempts to send the email
        through send_mail()
        :return: boolean based on whether email was sent.
        """
        mail_struct = queue_mail_types[self.email_type]
        presend = mail_struct['presend'](self)
        mail = Mail(
            mail_struct['template'],
            subject=mail_struct['subject'],
            categories=mail_struct.get('categories', None)
        )
        self.data['osf_url'] = settings.DOMAIN
        if presend and self.user.is_active and self.user.osf_mailing_lists.get(settings.OSF_HELP_LIST):
            send_mail(self.to_addr or self.user.username, mail, mimetype='html', **(self.data or {}))
            self.sent_at = datetime.utcnow()
            self.save()
            return True
        else:
            self.__class__.remove_one(self)
            return False

    def find_sent_of_same_type_and_user(self):
        """
        Queries up for all emails of the same type as self, sent to the same user as self.
        Does not look for queue-up emails.
        :return: a list of those emails
        """
        return self.__class__.find(
            Q('email_type', 'eq', self.email_type) &
            Q('user', 'eq', self.user) &
            Q('sent_at', 'ne', None)
        )
Exemple #10
0
class Session(StoredObject):

    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))
    date_created = fields.DateTimeField(auto_now_add=True)
    date_modified = fields.DateTimeField(auto_now=True)
    data = fields.DictionaryField()

    def __init__(self, *args, **kwargs):
        super(Session, self).__init__(*args, **kwargs)
        # Initialize history to empty list if not found
        if 'history' not in self.data:
            self.data['history'] = []
Exemple #11
0
class NotificationDigest(StoredObject):
    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))
    user_id = fields.StringField()
    timestamp = fields.DateTimeField()
    event = fields.StringField()
    message = fields.StringField()
    node_lineage = fields.StringField(list=True)
Exemple #12
0
class Tag(StoredObject):
    value = fields.StringField(primary=True, index=False)
    count = fields.StringField(default='c', validate=True, index=True)
    misc = fields.StringField(default='')
    misc2 = fields.StringField(default='')
    created = fields.DateTimeField(validate=True)
    modified = fields.DateTimeField(validate=True, auto_now=True)
    keywords = fields.StringField(
        default=['keywd1', 'keywd2'],
        validate=[MinLengthValidator(5),
                  MaxLengthValidator(10)],
        list=True)
    mybool = fields.BooleanField(default=False)
    myint = fields.IntegerField()
    myfloat = fields.FloatField(required=True, default=4.5)
    myurl = fields.StringField(validate=URLValidator())
Exemple #13
0
class CitationStyle(StoredObject):
    """Persistent representation of a CSL style.

    These are parsed from .csl files, so that metadata fields can be indexed.
    """

    # The name of the citation file, sans extension
    _id = fields.StringField(primary=True)

    # The full title of the style
    title = fields.StringField(required=True)

    # Datetime the file was last parsed
    date_parsed = fields.DateTimeField(default=datetime.datetime.utcnow,
                                       required=True)

    short_title = fields.StringField(required=False)
    summary = fields.StringField(required=False)

    def to_json(self):
        return {
            'id': self._id,
            'title': self.title,
            'short_title': self.short_title,
            'summary': self.summary,
        }
 class Foo(StoredObject):
     _id = fields.IntegerField(primary=True)
     integer_field = fields.IntegerField()
     string_field = fields.StringField()
     datetime_field = fields.DateTimeField()
     float_field = fields.FloatField()
     list_field = fields.IntegerField(list=True)
Exemple #15
0
class ExternalAccount(StoredObject):
    """An account on an external service.

    Note that this object is not and should not be aware of what other objects
    are associated with it. This is by design, and this object should be kept as
    thin as possible, containing only those fields that must be stored in the
    database.

    The ``provider`` field is a de facto foreign key to an ``ExternalProvider``
    object, as providers are not stored in the database.
    """
    _id = fields.StringField(default=lambda: str(ObjectId()), primary=True)

    # The OAuth credentials. One or both of these fields should be populated.
    # For OAuth1, this is usually the "oauth_token"
    # For OAuth2, this is usually the "access_token"
    oauth_key = EncryptedStringField()

    # For OAuth1, this is usually the "oauth_token_secret"
    # For OAuth2, this is not used
    oauth_secret = EncryptedStringField()

    # Used for OAuth2 only
    refresh_token = EncryptedStringField()
    date_last_refreshed = fields.DateTimeField()
    expires_at = fields.DateTimeField()
    scopes = fields.StringField(list=True, default=lambda: list())

    # The `name` of the service
    # This lets us query for only accounts on a particular provider
    provider = fields.StringField(required=True)
    # The proper 'name' of the service
    # Needed for account serialization
    provider_name = fields.StringField(required=True)

    # The unique, persistent ID on the remote service.
    provider_id = fields.StringField()

    # The user's name on the external service
    display_name = EncryptedStringField()
    # A link to the user's profile on the external service
    profile_url = EncryptedStringField()

    def __repr__(self):
        return '<ExternalAccount: {}/{}>'.format(self.provider,
                                                 self.provider_id)
Exemple #16
0
class OsfStorageFileVersion(StoredObject):

    _id = oid_primary_key
    creator = fields.ForeignField('user', required=True)

    date_created = fields.DateTimeField(auto_now_add=True)

    # Dictionary specifying all information needed to locate file on backend
    # {
    #     'service': 'buttfiles',  # required
    #     'container': 'osf',       # required
    #     'object': '20c53b',       # required
    #     'worker_url': '127.0.0.1',
    #     'worker_host': 'upload-service-1',
    # }
    location = fields.DictionaryField(validate=validate_location)

    # Dictionary containing raw metadata from upload service response
    # {
    #     'size': 1024,                            # required
    #     'content_type': 'text/plain',            # required
    #     'date_modified': '2014-11-07T20:24:15',  # required
    #     'md5': 'd077f2',
    # }
    metadata = fields.DictionaryField()

    size = fields.IntegerField()
    content_type = fields.StringField()
    date_modified = fields.DateTimeField()

    @property
    def location_hash(self):
        return self.location['object']

    def is_duplicate(self, other):
        return self.location_hash == other.location_hash

    def update_metadata(self, metadata):
        self.metadata.update(metadata)
        for key, parser in metadata_fields.iteritems():
            try:
                value = metadata[key]
            except KeyError:
                raise errors.MissingFieldError
            setattr(self, key, parser(value))
        self.save()
Exemple #17
0
class NotificationDigest(StoredObject):
    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))
    user_id = fields.StringField(index=True)
    timestamp = fields.DateTimeField()
    send_type = fields.StringField(index=True, validate=validate_subscription_type)
    event = fields.StringField()
    message = fields.StringField()
    node_lineage = fields.StringField(list=True)
Exemple #18
0
class WikiPageVersion(StoredObject):
    _meta = {'optimistic': True}
    _id = fields.StringField(primary=True, index=True)
    modified_on = fields.DateTimeField()
    text = fields.StringField(default=[''], list=True)

    @property
    def content(self):
        return '\n'.join(self.text)
Exemple #19
0
class NodeWikiPage(GuidStoredObject):

    redirect_mode = 'redirect'

    _id = fields.StringField(primary=True)

    page_name = fields.StringField(validate=validate_page_name)
    version = fields.IntegerField()
    date = fields.DateTimeField(auto_now_add=datetime.datetime.utcnow)
    is_current = fields.BooleanField()
    content = fields.StringField(default='')

    user = fields.ForeignField('user')
    node = fields.ForeignField('node')

    @property
    def deep_url(self):
        return '{}wiki/{}/'.format(self.node.deep_url, self.page_name)

    @property
    def url(self):
        return '{}wiki/{}/'.format(self.node.url, self.page_name)

    def html(self, node):
        """The cleaned HTML of the page"""
        sanitized_content = render_content(self.content, node=node)
        try:
            return linkify(
                sanitized_content,
                [
                    nofollow,
                ],
            )
        except TypeError:
            logger.warning('Returning unlinkified content.')
            return sanitized_content

    def raw_text(self, node):
        """ The raw text of the page, suitable for using in a test search"""

        return sanitize(self.html(node), tags=[], strip=True)

    def save(self, *args, **kwargs):
        rv = super(NodeWikiPage, self).save(*args, **kwargs)
        if self.node:
            self.node.update_search()
        return rv

    def rename(self, new_name, save=True):
        self.page_name = new_name
        if save:
            self.save()

    def to_json(self):
        return {}
Exemple #20
0
class TrashedFileNode(StoredObject):
    """The graveyard for all deleted FileNodes"""
    _id = fields.StringField(primary=True)

    last_touched = fields.DateTimeField()
    history = fields.DictionaryField(list=True)
    versions = fields.ForeignField('FileVersion', list=True)

    node = fields.ForeignField('node', required=True)
    parent = fields.AbstractForeignField(default=None)

    is_file = fields.BooleanField(default=True)
    provider = fields.StringField(required=True)

    name = fields.StringField(required=True)
    path = fields.StringField(required=True)
    materialized_path = fields.StringField(required=True)

    checkout = fields.AbstractForeignField('User')
    deleted_by = fields.AbstractForeignField('User')
    deleted_on = fields.DateTimeField(auto_now_add=True)

    @property
    def deep_url(self):
        """Allows deleted files to resolve to a view
        that will provide a nice error message and http.GONE
        """
        return self.node.web_url_for('addon_deleted_file', trashed_id=self._id)

    def restore(self):
        """Recreate a StoredFileNode from the data in this object
        Will re-point all guids and finally remove itself
        :raises KeyExistsException:
        """
        data = self.to_storage()
        data.pop('deleted_on')
        data.pop('deleted_by')
        restored = FileNode.resolve_class(self.provider,
                                          int(self.is_file))(**data)
        restored.save()
        TrashedFileNode.remove_one(self)
        return restored
Exemple #21
0
class GoogleDriveOAuthSettings(StoredObject):
    """
    this model address the problem if we have two osf user link
    to the same google drive user and their access token conflicts issue
    """

    # google drive user id, for example, "4974056"
    user_id = fields.StringField(primary=True, required=True)
    # google drive user name this is the user's login
    username = fields.StringField()
    access_token = fields.StringField()
    refresh_token = fields.StringField()
    expires_at = fields.DateTimeField()

    def fetch_access_token(self):
        self.refresh_access_token()
        return self.access_token

    def refresh_access_token(self, force=False):
        if self._needs_refresh() or force:
            client = GoogleAuthClient()
            token = client.refresh(self.access_token, self.refresh_token)

            self.access_token = token['access_token']
            self.refresh_token = token['refresh_token']
            self.expires_at = datetime.utcfromtimestamp(token['expires_at'])
            self.save()

    def revoke_access_token(self):
        # if there is only one osf user linked to this google drive user oauth, revoke the token,
        # otherwise, disconnect the osf user from the googledriveoauthsettings
        if len(self.googledriveusersettings__accessed) <= 1:
            client = GoogleAuthClient()
            try:
                client.revoke(self.access_token)
            except:
                # no need to fail, revoke is opportunistic
                pass

            # remove the object as its the last instance.
            GoogleDriveOAuthSettings.remove_one(self)

    def _needs_refresh(self):
        if self.expires_at is None:
            return False
        return (self.expires_at - datetime.utcnow()
                ).total_seconds() < drive_settings.REFRESH_TIME
Exemple #22
0
class User(GuidStoredObject, AddonModelMixin):

    # Node fields that trigger an update to the search engine on save
    SEARCH_UPDATE_FIELDS = {
        'fullname',
        'given_name',
        'middle_names',
        'family_name',
        'suffix',
        'merged_by',
        'date_disabled',
        'date_confirmed',
        'jobs',
        'schools',
        'social',
    }

    # TODO: Add SEARCH_UPDATE_NODE_FIELDS, for fields that should trigger a
    #   search update for all nodes to which the user is a contributor.

    SOCIAL_FIELDS = {
        'orcid': u'http://orcid.com/{}',
        'github': u'http://github.com/{}',
        'scholar': u'http://scholar.google.com/citation?user={}',
        'twitter': u'http://twitter.com/{}',
        'personal': u'{}',
        'linkedIn': u'https://www.linkedin.com/profile/view?id={}',
        'impactStory': u'https://impactstory.org/{}',
        'researcherId': u'http://researcherid.com/rid/{}',
    }

    # This is a GuidStoredObject, so this will be a GUID.
    _id = fields.StringField(primary=True)

    # The primary email address for the account.
    # This value is unique, but multiple "None" records exist for:
    #   * unregistered contributors where an email address was not provided.
    # TODO: Update mailchimp subscription on username change in user.save()
    username = fields.StringField(required=False, unique=True, index=True)

    # Hashed. Use `User.set_password` and `User.check_password`
    password = fields.StringField()

    fullname = fields.StringField(required=True, validate=string_required)

    # user has taken action to register the account
    is_registered = fields.BooleanField(index=True)

    # user has claimed the account
    # TODO: This should be retired - it always reflects is_registered.
    #   While a few entries exist where this is not the case, they appear to be
    #   the result of a bug, as they were all created over a small time span.
    is_claimed = fields.BooleanField(default=False, index=True)

    # a list of strings - for internal use
    system_tags = fields.StringField(list=True)

    # security emails that have been sent
    # TODO: This should be removed and/or merged with system_tags
    security_messages = fields.DictionaryField()
    # Format: {
    #   <message label>: <datetime>
    #   ...
    # }

    # user was invited (as opposed to registered unprompted)
    is_invited = fields.BooleanField(default=False, index=True)

    # Per-project unclaimed user data:
    # TODO: add validation
    unclaimed_records = fields.DictionaryField(required=False)
    # Format: {
    #   <project_id>: {
    #       'name': <name that referrer provided>,
    #       'referrer_id': <user ID of referrer>,
    #       'token': <token used for verification urls>,
    #       'email': <email the referrer provided or None>,
    #       'claimer_email': <email the claimer entered or None>,
    #       'last_sent': <timestamp of last email sent to referrer or None>
    #   }
    #   ...
    # }

    # Time of last sent notification email to newly added contributors
    # Format : {
    #   <project_id>: {
    #       'last_sent': time.time()
    #   }
    #   ...
    # }
    contributor_added_email_records = fields.DictionaryField(default=dict)

    # The user into which this account was merged
    merged_by = fields.ForeignField('user',
                                    default=None,
                                    backref='merged',
                                    index=True)

    # verification key used for resetting password
    verification_key = fields.StringField()

    # confirmed emails
    #   emails should be stripped of whitespace and lower-cased before appending
    # TODO: Add validator to ensure an email address only exists once across
    # all User's email lists
    emails = fields.StringField(list=True)

    # email verification tokens
    #   see also ``unconfirmed_emails``
    email_verifications = fields.DictionaryField(default=dict)
    # Format: {
    #   <token> : {'email': <email address>,
    #              'expiration': <datetime>}
    # }

    # email lists to which the user has chosen a subscription setting
    mailing_lists = fields.DictionaryField()
    # Format: {
    #   'list1': True,
    #   'list2: False,
    #    ...
    # }

    # the date this user was registered
    # TODO: consider removal - this can be derived from date_registered
    date_registered = fields.DateTimeField(auto_now_add=dt.datetime.utcnow,
                                           index=True)

    # watched nodes are stored via a list of WatchConfigs
    watched = fields.ForeignField("WatchConfig", list=True, backref="watched")

    # list of users recently added to nodes as a contributor
    recently_added = fields.ForeignField("user",
                                         list=True,
                                         backref="recently_added")

    # Attached external accounts (OAuth)
    external_accounts = fields.ForeignField("externalaccount",
                                            list=True,
                                            backref="connected")

    # CSL names
    given_name = fields.StringField()
    middle_names = fields.StringField()
    family_name = fields.StringField()
    suffix = fields.StringField()

    # Employment history
    jobs = fields.DictionaryField(list=True, validate=validate_history_item)
    # Format: {
    #     'title': <position or job title>,
    #     'institution': <institution or organization>,
    #     'department': <department>,
    #     'location': <location>,
    #     'startMonth': <start month>,
    #     'startYear': <start year>,
    #     'endMonth': <end month>,
    #     'endYear': <end year>,
    #     'ongoing: <boolean>
    # }

    # Educational history
    schools = fields.DictionaryField(list=True, validate=validate_history_item)
    # Format: {
    #     'degree': <position or job title>,
    #     'institution': <institution or organization>,
    #     'department': <department>,
    #     'location': <location>,
    #     'startMonth': <start month>,
    #     'startYear': <start year>,
    #     'endMonth': <end month>,
    #     'endYear': <end year>,
    #     'ongoing: <boolean>
    # }

    # Social links
    social = fields.DictionaryField(validate=validate_social)
    # Format: {
    #     'personal': <personal site>,
    #     'twitter': <twitter id>,
    # }

    # hashed password used to authenticate to Piwik
    piwik_token = fields.StringField()

    # date the user last logged in via the web interface
    date_last_login = fields.DateTimeField()

    # date the user first successfully confirmed an email address
    date_confirmed = fields.DateTimeField(index=True)

    # When the user was disabled.
    date_disabled = fields.DateTimeField(index=True)

    # when comments for a node were last viewed
    comments_viewed_timestamp = fields.DictionaryField()
    # Format: {
    #   'node_id': 'timestamp'
    # }

    # timezone for user's locale (e.g. 'America/New_York')
    timezone = fields.StringField(default='Etc/UTC')

    # user language and locale data (e.g. 'en_US')
    locale = fields.StringField(default='en_US')

    _meta = {'optimistic': True}

    def __repr__(self):
        return '<User({0!r}) with id {1!r}>'.format(self.username, self._id)

    def __str__(self):
        return self.fullname.encode('ascii', 'replace')

    __unicode__ = __str__

    # For compatibility with Django auth
    @property
    def pk(self):
        return self._id

    @property
    def email(self):
        return self.username

    def is_authenticated(self):  # Needed for django compat
        return True

    def is_anonymous(self):
        return False

    @property
    def absolute_api_v2_url(self):
        from api.base.utils import absolute_reverse  # Avoid circular dependency
        return absolute_reverse('users:user-detail',
                                kwargs={'user_id': self.pk})

    # used by django and DRF
    def get_absolute_url(self):
        return self.absolute_api_v2_url

    @classmethod
    def create_unregistered(cls, fullname, email=None):
        """Create a new unregistered user.
        """
        user = cls(
            username=email,
            fullname=fullname,
            is_invited=True,
            is_registered=False,
        )
        user.update_guessed_names()
        return user

    @classmethod
    def create(cls, username, password, fullname):
        user = cls(
            username=username,
            fullname=fullname,
        )
        user.update_guessed_names()
        user.set_password(password)
        return user

    @classmethod
    def create_unconfirmed(cls, username, password, fullname, do_confirm=True):
        """Create a new user who has begun registration but needs to verify
        their primary email address (username).
        """
        user = cls.create(username, password, fullname)
        user.add_unconfirmed_email(username)
        user.is_registered = False
        return user

    @classmethod
    def create_confirmed(cls, username, password, fullname):
        user = cls.create(username, password, fullname)
        user.is_registered = True
        user.is_claimed = True
        user.date_confirmed = user.date_registered
        return user

    @classmethod
    def from_cookie(cls, cookie, secret=None):
        """Attempt to load a user from their signed cookie
        :returns: None if a user cannot be loaded else User
        """
        if not cookie:
            return None

        secret = secret or settings.SECRET_KEY

        try:
            token = itsdangerous.Signer(secret).unsign(cookie)
        except itsdangerous.BadSignature:
            return None

        user_session = Session.load(token)

        if user_session is None:
            return None

        return cls.load(user_session.data.get('auth_user_id'))

    def get_or_create_cookie(self, secret=None):
        """Find the cookie for the given user
        Create a new session if no cookie is found

        :param str secret: The key to sign the cookie with
        :returns: The signed cookie
        """
        secret = secret or settings.SECRET_KEY
        sessions = Session.find(Q('data.auth_user_id', 'eq',
                                  self._id)).sort('-date_modified').limit(1)

        if sessions.count() > 0:
            user_session = sessions[0]
        else:
            user_session = Session(
                data={
                    'auth_user_id': self._id,
                    'auth_user_username': self.username,
                    'auth_user_fullname': self.fullname,
                })
            user_session.save()

        signer = itsdangerous.Signer(secret)
        return signer.sign(user_session._id)

    def update_guessed_names(self):
        """Updates the CSL name fields inferred from the the full name.
        """
        parsed = utils.impute_names(self.fullname)
        self.given_name = parsed['given']
        self.middle_names = parsed['middle']
        self.family_name = parsed['family']
        self.suffix = parsed['suffix']

    def register(self, username, password=None):
        """Registers the user.
        """
        self.username = username
        if password:
            self.set_password(password)
        if username not in self.emails:
            self.emails.append(username)
        self.is_registered = True
        self.is_claimed = True
        self.date_confirmed = dt.datetime.utcnow()
        self.update_search()
        self.update_search_nodes()

        # Emit signal that a user has confirmed
        signals.user_confirmed.send(self)

        return self

    def add_unclaimed_record(self, node, referrer, given_name, email=None):
        """Add a new project entry in the unclaimed records dictionary.

        :param Node node: Node this unclaimed user was added to.
        :param User referrer: User who referred this user.
        :param str given_name: The full name that the referrer gave for this user.
        :param str email: The given email address.
        :returns: The added record
        """
        if not node.can_edit(user=referrer):
            raise PermissionsError(
                'Referrer does not have permission to add a contributor '
                'to project {0}'.format(node._primary_key))
        project_id = node._primary_key
        referrer_id = referrer._primary_key
        if email:
            clean_email = email.lower().strip()
        else:
            clean_email = None
        record = {
            'name': given_name,
            'referrer_id': referrer_id,
            'token': generate_confirm_token(),
            'email': clean_email
        }
        self.unclaimed_records[project_id] = record
        return record

    def display_full_name(self, node=None):
        """Return the full name , as it would display in a contributor list for a
        given node.

        NOTE: Unclaimed users may have a different name for different nodes.
        """
        if node:
            unclaimed_data = self.unclaimed_records.get(
                node._primary_key, None)
            if unclaimed_data:
                return unclaimed_data['name']
        return self.fullname

    @property
    def is_active(self):
        """Returns True if the user is active. The user must have activated
        their account, must not be deleted, suspended, etc.

        :return: bool
        """
        return (self.is_registered and self.password is not None
                and not self.is_merged and not self.is_disabled
                and self.is_confirmed)

    def get_unclaimed_record(self, project_id):
        """Get an unclaimed record for a given project_id.

        :raises: ValueError if there is no record for the given project.
        """
        try:
            return self.unclaimed_records[project_id]
        except KeyError:  # reraise as ValueError
            raise ValueError(
                'No unclaimed record for user {self._id} on node {project_id}'.
                format(**locals()))

    def get_claim_url(self, project_id, external=False):
        """Return the URL that an unclaimed user should use to claim their
        account. Return ``None`` if there is no unclaimed_record for the given
        project ID.

        :param project_id: The project ID for the unclaimed record
        :raises: ValueError if a record doesn't exist for the given project ID
        :rtype: dict
        :returns: The unclaimed record for the project
        """
        uid = self._primary_key
        base_url = settings.DOMAIN if external else '/'
        unclaimed_record = self.get_unclaimed_record(project_id)
        token = unclaimed_record['token']
        return '{base_url}user/{uid}/{project_id}/claim/?token={token}'\
                    .format(**locals())

    def set_password(self, raw_password):
        """Set the password for this user to the hash of ``raw_password``."""
        self.password = generate_password_hash(raw_password)

    def check_password(self, raw_password):
        """Return a boolean of whether ``raw_password`` was correct."""
        if not self.password or not raw_password:
            return False
        return check_password_hash(self.password, raw_password)

    @property
    def csl_given_name(self):
        parts = [self.given_name]
        if self.middle_names:
            parts.extend(each[0]
                         for each in re.split(r'\s+', self.middle_names))
        return ' '.join(parts)

    @property
    def csl_name(self):
        return {
            'family': self.family_name,
            'given': self.csl_given_name,
        }

    # TODO: This should not be on the User object.
    def change_password(self, raw_old_password, raw_new_password,
                        raw_confirm_password):
        """Change the password for this user to the hash of ``raw_new_password``."""
        raw_old_password = (raw_old_password or '').strip()
        raw_new_password = (raw_new_password or '').strip()
        raw_confirm_password = (raw_confirm_password or '').strip()

        issues = []
        if not self.check_password(raw_old_password):
            issues.append('Old password is invalid')
        elif raw_old_password == raw_new_password:
            issues.append('Password cannot be the same')

        if not raw_old_password or not raw_new_password or not raw_confirm_password:
            issues.append('Passwords cannot be blank')
        elif len(raw_new_password) < 6:
            issues.append('Password should be at least six characters')
        elif len(raw_new_password) > 256:
            issues.append('Password should not be longer than 256 characters')

        if raw_new_password != raw_confirm_password:
            issues.append('Password does not match the confirmation')

        if issues:
            raise ChangePasswordError(issues)
        self.set_password(raw_new_password)

    def _set_email_token_expiration(self, token, expiration=None):
        """Set the expiration date for given email token.

        :param str token: The email token to set the expiration for.
        :param datetime expiration: Datetime at which to expire the token. If ``None``, the
            token will expire after ``settings.EMAIL_TOKEN_EXPIRATION`` hours. This is only
            used for testing purposes.
        """
        expiration = expiration or (dt.datetime.utcnow() + dt.timedelta(
            hours=settings.EMAIL_TOKEN_EXPIRATION))
        self.email_verifications[token]['expiration'] = expiration
        return expiration

    def add_unconfirmed_email(self, email, expiration=None):
        """Add an email verification token for a given email."""

        # TODO: This is technically not compliant with RFC 822, which requires
        #       that case be preserved in the "local-part" of an address. From
        #       a practical standpoint, the vast majority of email servers do
        #       not preserve case.
        #       ref: https://tools.ietf.org/html/rfc822#section-6
        email = email.lower().strip()

        if email in self.emails:
            raise ValueError("Email already confirmed to this user.")

        utils.validate_email(email)

        # If the unconfirmed email is already present, refresh the token
        if email in self.unconfirmed_emails:
            self.remove_unconfirmed_email(email)

        token = generate_confirm_token()

        # handle when email_verifications is None
        if not self.email_verifications:
            self.email_verifications = {}

        self.email_verifications[token] = {'email': email}
        self._set_email_token_expiration(token, expiration=expiration)
        return token

    def remove_unconfirmed_email(self, email):
        """Remove an unconfirmed email addresses and their tokens."""
        for token, value in self.email_verifications.iteritems():
            if value.get('email') == email:
                del self.email_verifications[token]
                return True

        return False

    def remove_email(self, email):
        """Remove a confirmed email"""
        if email == self.username:
            raise PermissionsError("Can't remove primary email")
        if email in self.emails:
            self.emails.remove(email)
            signals.user_email_removed.send(self, email=email)

    @signals.user_email_removed.connect
    def _send_email_removal_confirmations(self, email):
        mails.send_mail(
            to_addr=self.username,
            mail=mails.REMOVED_EMAIL,
            user=self,
            removed_email=email,
            security_addr='alternate email address ({})'.format(email))
        mails.send_mail(to_addr=email,
                        mail=mails.REMOVED_EMAIL,
                        user=self,
                        removed_email=email,
                        security_addr='primary email address ({})'.format(
                            self.username))

    def get_confirmation_token(self, email, force=False):
        """Return the confirmation token for a given email.

        :param str email: Email to get the token for.
        :param bool force: If an expired token exists for the given email, generate a new
            token and return that token.

        :raises: ExpiredTokenError if trying to access a token that is expired and force=False.
        :raises: KeyError if there no token for the email.
        """
        # TODO: Refactor "force" flag into User.get_or_add_confirmation_token
        for token, info in self.email_verifications.items():
            if info['email'].lower() == email.lower():
                # Old records will not have an expiration key. If it's missing,
                # assume the token is expired
                expiration = info.get('expiration')
                if not expiration or (expiration
                                      and expiration < dt.datetime.utcnow()):
                    if not force:
                        raise ExpiredTokenError(
                            'Token for email "{0}" is expired'.format(email))
                    else:
                        new_token = self.add_unconfirmed_email(email)
                        self.save()
                        return new_token
                return token
        raise KeyError('No confirmation token for email "{0}"'.format(email))

    def get_confirmation_url(self, email, external=True, force=False):
        """Return the confirmation url for a given email.

        :raises: ExpiredTokenError if trying to access a token that is expired.
        :raises: KeyError if there is no token for the email.
        """
        base = settings.DOMAIN if external else '/'
        token = self.get_confirmation_token(email, force=force)
        return "{0}confirm/{1}/{2}/".format(base, self._primary_key, token)

    def _get_unconfirmed_email_for_token(self, token):
        """Return whether or not a confirmation token is valid for this user.
        :rtype: bool
        """
        if token not in self.email_verifications:
            raise exceptions.InvalidTokenError()

        verification = self.email_verifications[token]
        # Not all tokens are guaranteed to have expiration dates
        if ('expiration' in verification
                and verification['expiration'] < dt.datetime.utcnow()):
            raise exceptions.ExpiredTokenError()

        return verification['email']

    def verify_claim_token(self, token, project_id):
        """Return whether or not a claim token is valid for this user for
        a given node which they were added as a unregistered contributor for.
        """
        try:
            record = self.get_unclaimed_record(project_id)
        except ValueError:  # No unclaimed record for given pid
            return False
        return record['token'] == token

    def confirm_email(self, token, merge=False):
        """Confirm the email address associated with the token"""
        email = self._get_unconfirmed_email_for_token(token)

        # If this email is confirmed on another account, abort
        try:
            user_to_merge = User.find_one(Q('emails', 'iexact', email))
        except NoResultsFound:
            user_to_merge = None

        if user_to_merge and merge:
            self.merge_user(user_to_merge)
        elif user_to_merge:
            raise exceptions.MergeConfirmedRequiredError(
                'Merge requires confirmation',
                user=self,
                user_to_merge=user_to_merge,
            )

        # If another user has this email as its username, get it
        try:
            unregistered_user = User.find_one(
                Q('username', 'eq', email) & Q('_id', 'ne', self._id))
        except NoResultsFound:
            unregistered_user = None

        if unregistered_user:
            self.merge_user(unregistered_user)
            self.save()
            unregistered_user.username = None

        if email not in self.emails:
            self.emails.append(email)

        # Complete registration if primary email
        if email.lower() == self.username.lower():
            self.register(self.username)
            self.date_confirmed = dt.datetime.utcnow()
        # Revoke token
        del self.email_verifications[token]

        # TODO: We can't assume that all unclaimed records are now claimed.
        # Clear unclaimed records, so user's name shows up correctly on
        # all projects
        self.unclaimed_records = {}
        self.save()

        self.update_search_nodes()

        return True

    @property
    def unconfirmed_emails(self):
        # Handle when email_verifications field is None
        email_verifications = self.email_verifications or {}
        return [each['email'] for each in email_verifications.values()]

    def update_search_nodes(self):
        """Call `update_search` on all nodes on which the user is a
        contributor. Needed to add self to contributor lists in search upon
        registration or claiming.

        """
        for node in self.node__contributed:
            node.update_search()

    def update_search_nodes_contributors(self):
        """
        Bulk update contributor name on all nodes on which the user is
        a contributor.
        :return:
        """
        from website.search import search
        search.update_contributors(self.visible_contributor_to)

    @property
    def is_confirmed(self):
        return bool(self.date_confirmed)

    @property
    def social_links(self):
        return {
            key: self.SOCIAL_FIELDS[key].format(val)
            for key, val in self.social.items()
            if val and self.SOCIAL_FIELDS.get(key)
        }

    @property
    def biblio_name(self):
        given_names = self.given_name + ' ' + self.middle_names
        surname = self.family_name
        if surname != given_names:
            initials = [
                name[0].upper() + '.' for name in given_names.split(' ')
                if name and re.search(r'\w', name[0], re.I)
            ]
            return u'{0}, {1}'.format(surname, ' '.join(initials))
        return surname

    @property
    def given_name_initial(self):
        """
        The user's preferred initialization of their given name.

        Some users with common names may choose to distinguish themselves from
        their colleagues in this way. For instance, there could be two
        well-known researchers in a single field named "Robert Walker".
        "Walker, R" could then refer to either of them. "Walker, R.H." could
        provide easy disambiguation.

        NOTE: The internal representation for this should never end with a
              period. "R" and "R.H" would be correct in the prior case, but
              "R.H." would not.
        """
        return self.given_name[0]

    @property
    def url(self):
        return '/{}/'.format(self._primary_key)

    @property
    def api_url(self):
        return '/api/v1/profile/{0}/'.format(self._primary_key)

    @property
    def absolute_url(self):
        return urlparse.urljoin(settings.DOMAIN, self.url)

    @property
    def display_absolute_url(self):
        url = self.absolute_url
        if url is not None:
            return re.sub(r'https?:', '', url).strip('/')

    @property
    def deep_url(self):
        return '/profile/{}/'.format(self._primary_key)

    @property
    def gravatar_url(self):
        return filters.gravatar(self,
                                use_ssl=True,
                                size=settings.GRAVATAR_SIZE_ADD_CONTRIBUTOR)

    def get_activity_points(self, db=None):
        db = db or framework.mongo.database
        return analytics.get_total_activity_count(self._primary_key, db=db)

    @property
    def is_disabled(self):
        """Whether or not this account has been disabled.

        Abstracts ``User.date_disabled``.

        :return: bool
        """
        return self.date_disabled is not None

    @is_disabled.setter
    def is_disabled(self, val):
        """Set whether or not this account has been disabled."""
        if val:
            self.date_disabled = dt.datetime.utcnow()
        else:
            self.date_disabled = None

    @property
    def is_merged(self):
        '''Whether or not this account has been merged into another account.
        '''
        return self.merged_by is not None

    @property
    def profile_url(self):
        return '/{}/'.format(self._id)

    @property
    def contributor_to(self):
        return (node for node in self.node__contributed
                if not (node.is_deleted or node.is_dashboard))

    @property
    def visible_contributor_to(self):
        return (node for node in self.contributor_to
                if self._id in node.visible_contributor_ids)

    def get_summary(self, formatter='long'):
        return {
            'user_fullname': self.fullname,
            'user_profile_url': self.profile_url,
            'user_display_name': name_formatters[formatter](self),
            'user_is_claimed': self.is_claimed
        }

    def save(self, *args, **kwargs):
        # TODO: Update mailchimp subscription on username change
        # Avoid circular import
        from framework.analytics import tasks as piwik_tasks
        self.username = self.username.lower().strip(
        ) if self.username else None
        ret = super(User, self).save(*args, **kwargs)
        if self.SEARCH_UPDATE_FIELDS.intersection(ret) and self.is_confirmed:
            self.update_search()
            self.update_search_nodes_contributors()
        if settings.PIWIK_HOST and not self.piwik_token:
            piwik_tasks.update_user(self._id)
        return ret

    def update_search(self):
        from website import search
        try:
            search.search.update_user(self)
        except search.exceptions.SearchUnavailableError as e:
            logger.exception(e)
            log_exception()

    @classmethod
    def find_by_email(cls, email):
        try:
            user = cls.find_one(Q('emails', 'eq', email))
            return [user]
        except:
            return []

    def serialize(self, anonymous=False):
        return {
            'id':
            utils.privacy_info_handle(self._primary_key, anonymous),
            'fullname':
            utils.privacy_info_handle(self.fullname, anonymous, name=True),
            'registered':
            self.is_registered,
            'url':
            utils.privacy_info_handle(self.url, anonymous),
            'api_url':
            utils.privacy_info_handle(self.api_url, anonymous),
        }

    ###### OSF-Specific methods ######

    def watch(self, watch_config):
        """Watch a node by adding its WatchConfig to this user's ``watched``
        list. Raises ``ValueError`` if the node is already watched.

        :param watch_config: The WatchConfig to add.
        :param save: Whether to save the user.

        """
        watched_nodes = [each.node for each in self.watched]
        if watch_config.node in watched_nodes:
            raise ValueError('Node is already being watched.')
        watch_config.save()
        self.watched.append(watch_config)
        return None

    def unwatch(self, watch_config):
        """Unwatch a node by removing its WatchConfig from this user's ``watched``
        list. Raises ``ValueError`` if the node is not already being watched.

        :param watch_config: The WatchConfig to remove.
        :param save: Whether to save the user.

        """
        for each in self.watched:
            if watch_config.node._id == each.node._id:
                each.__class__.remove_one(each)
                return None
        raise ValueError('Node not being watched.')

    def is_watching(self, node):
        '''Return whether a not a user is watching a Node.'''
        watched_node_ids = set([config.node._id for config in self.watched])
        return node._id in watched_node_ids

    def get_recent_log_ids(self, since=None):
        '''Return a generator of recent logs' ids.

        :param since: A datetime specifying the oldest time to retrieve logs
        from. If ``None``, defaults to 60 days before today. Must be a tz-aware
        datetime because PyMongo's generation times are tz-aware.

        :rtype: generator of log ids (strings)
        '''
        log_ids = []
        # Default since to 60 days before today if since is None
        # timezone aware utcnow
        utcnow = dt.datetime.utcnow().replace(tzinfo=pytz.utc)
        since_date = since or (utcnow - dt.timedelta(days=60))
        for config in self.watched:
            # Extract the timestamps for each log from the log_id (fast!)
            # The first 4 bytes of Mongo's ObjectId encodes time
            # This prevents having to load each Log Object and access their
            # date fields
            node_log_ids = [
                log_id for log_id in config.node.logs._to_primary_keys()
                if bson.ObjectId(log_id).generation_time > since_date
                and log_id not in log_ids
            ]
            # Log ids in reverse chronological order
            log_ids = _merge_into_reversed(log_ids, node_log_ids)
        return (l_id for l_id in log_ids)

    def get_daily_digest_log_ids(self):
        '''Return a generator of log ids generated in the past day
        (starting at UTC 00:00).
        '''
        utcnow = dt.datetime.utcnow()
        midnight = dt.datetime(utcnow.year,
                               utcnow.month,
                               utcnow.day,
                               0,
                               0,
                               0,
                               tzinfo=pytz.utc)
        return self.get_recent_log_ids(since=midnight)

    @property
    def can_be_merged(self):
        """The ability of the `merge_user` method to fully merge the user"""
        return all((addon.can_be_merged for addon in self.get_addons()))

    def merge_user(self, user):
        """Merge a registered user into this account. This user will be
        a contributor on any project. if the registered user and this account
        are both contributors of the same project. Then it will remove the
        registered user and set this account to the highest permission of the two
        and set this account to be visible if either of the two are visible on
        the project.

        :param user: A User object to be merged.
        """
        # Fail if the other user has conflicts.
        if not user.can_be_merged:
            raise exceptions.MergeConflictError("Users cannot be merged")
        # Move over the other user's attributes
        # TODO: confirm
        for system_tag in user.system_tags:
            if system_tag not in self.system_tags:
                self.system_tags.append(system_tag)

        self.is_claimed = self.is_claimed or user.is_claimed
        self.is_invited = self.is_invited or user.is_invited

        # copy over profile only if this user has no profile info
        if user.jobs and not self.jobs:
            self.jobs = user.jobs

        if user.schools and not self.schools:
            self.schools = user.schools

        if user.social and not self.social:
            self.social = user.social

        unclaimed = user.unclaimed_records.copy()
        unclaimed.update(self.unclaimed_records)
        self.unclaimed_records = unclaimed
        # - unclaimed records should be connected to only one user
        user.unclaimed_records = {}

        security_messages = user.security_messages.copy()
        security_messages.update(self.security_messages)
        self.security_messages = security_messages

        for key, value in user.mailing_lists.iteritems():
            # subscribe to each list if either user was subscribed
            subscription = value or self.mailing_lists.get(key)
            signals.user_merged.send(self,
                                     list_name=key,
                                     subscription=subscription)

            # clear subscriptions for merged user
            signals.user_merged.send(user, list_name=key, subscription=False)

        for node_id, timestamp in user.comments_viewed_timestamp.iteritems():
            if not self.comments_viewed_timestamp.get(node_id):
                self.comments_viewed_timestamp[node_id] = timestamp
            elif timestamp > self.comments_viewed_timestamp[node_id]:
                self.comments_viewed_timestamp[node_id] = timestamp

        self.emails.extend(user.emails)
        user.emails = []

        for k, v in user.email_verifications.iteritems():
            email_to_confirm = v['email']
            if k not in self.email_verifications and email_to_confirm != user.username:
                self.email_verifications[k] = v
        user.email_verifications = {}

        # FOREIGN FIELDS
        for watched in user.watched:
            if watched not in self.watched:
                self.watched.append(watched)
        user.watched = []

        for account in user.external_accounts:
            if account not in self.external_accounts:
                self.external_accounts.append(account)
        user.external_accounts = []

        # - addons
        # Note: This must occur before the merged user is removed as a
        #       contributor on the nodes, as an event hook is otherwise fired
        #       which removes the credentials.
        for addon in user.get_addons():
            user_settings = self.get_or_add_addon(addon.config.short_name)
            user_settings.merge(addon)
            user_settings.save()

        # - projects where the user was a contributor
        for node in user.node__contributed:
            # Skip dashboard node
            if node.is_dashboard:
                continue
            # if both accounts are contributor of the same project
            if node.is_contributor(self) and node.is_contributor(user):
                if node.permissions[user._id] > node.permissions[self._id]:
                    permissions = node.permissions[user._id]
                else:
                    permissions = node.permissions[self._id]
                node.set_permissions(user=self, permissions=permissions)

                visible1 = self._id in node.visible_contributor_ids
                visible2 = user._id in node.visible_contributor_ids
                if visible1 != visible2:
                    node.set_visible(user=self,
                                     visible=True,
                                     log=True,
                                     auth=Auth(user=self))

            else:
                node.add_contributor(
                    contributor=self,
                    permissions=node.get_permissions(user),
                    visible=node.get_visible(user),
                    log=False,
                )

            try:
                node.remove_contributor(
                    contributor=user,
                    auth=Auth(user=self),
                    log=False,
                )
            except ValueError:
                logger.error('Contributor {0} not in list on node {1}'.format(
                    user._id, node._id))
            node.save()

        # - projects where the user was the creator
        for node in user.node__created:
            node.creator = self
            node.save()

        # finalize the merge

        remove_sessions_for_user(user)

        # - username is set to None so the resultant user can set it primary
        #   in the future.
        user.username = None
        user.password = None
        user.verification_key = None
        user.merged_by = self

        user.save()

    def get_projects_in_common(self, other_user, primary_keys=True):
        """Returns either a collection of "shared projects" (projects that both users are contributors for)
        or just their primary keys
        """
        if primary_keys:
            projects_contributed_to = set(
                self.node__contributed._to_primary_keys())
            return projects_contributed_to.intersection(
                other_user.node__contributed._to_primary_keys())
        else:
            projects_contributed_to = set(self.node__contributed)
            return projects_contributed_to.intersection(
                other_user.node__contributed)

    def n_projects_in_common(self, other_user):
        """Returns number of "shared projects" (projects that both users are contributors for)"""
        return len(self.get_projects_in_common(other_user, primary_keys=True))
Exemple #23
0
class StoredFileNode(StoredObject):
    """The storage backend for FileNode objects.
    This class should generally not be used or created manually as FileNode
    contains all the helpers required.
    A FileNode wraps a StoredFileNode to provider usable abstraction layer
    """

    __indices__ = [{
        'unique':
        False,
        'key_or_list': [('path', pymongo.ASCENDING),
                        ('node', pymongo.ASCENDING),
                        ('is_file', pymongo.ASCENDING),
                        ('provider', pymongo.ASCENDING)]
    }, {
        'unique':
        False,
        'key_or_list': [('node', pymongo.ASCENDING),
                        ('is_file', pymongo.ASCENDING),
                        ('provider', pymongo.ASCENDING)]
    }]

    _id = fields.StringField(primary=True,
                             default=lambda: str(bson.ObjectId()))

    # The last time the touch method was called on this FileNode
    last_touched = fields.DateTimeField()
    # A list of dictionaries sorted by the 'modified' key
    # The raw output of the metadata request deduped by etag
    # Add regardless it can be pinned to a version or not
    history = fields.DictionaryField(list=True)
    # A concrete version of a FileNode, must have an identifier
    versions = fields.ForeignField('FileVersion', list=True)

    node = fields.ForeignField('Node', required=True)
    parent = fields.ForeignField('StoredFileNode', default=None)

    is_file = fields.BooleanField(default=True)
    provider = fields.StringField(required=True)

    name = fields.StringField(required=True)
    path = fields.StringField(required=True)
    materialized_path = fields.StringField(required=True)

    # The User that has this file "checked out"
    # Should only be used for OsfStorage
    checkout = fields.AbstractForeignField('User')

    #Tags for a file, currently only used for osfStorage
    tags = fields.ForeignField('Tag', list=True)

    # For Django compatibility
    @property
    def pk(self):
        return self._id

    # For Django compatibility
    # TODO Find a better way
    @property
    def node_id(self):
        return self.node._id

    @property
    def deep_url(self):
        return self.wrapped().deep_url

    def wrapped(self):
        """Wrap self in a FileNode subclass
        """
        return FileNode.resolve_class(self.provider, int(self.is_file))(self)

    def get_guid(self, create=False):
        """Attempt to find a Guid that points to this object.
        One will be created if requested.
        :rtype: Guid
        """
        try:
            # Note sometimes multiple GUIDs can exist for
            # a single object. Just go with the first one
            return Guid.find(Q('referent', 'eq', self))[0]
        except IndexError:
            if not create:
                return None
        return Guid.generate(self)
Exemple #24
0
class FileVersion(StoredObject):
    """A version of an OsfStorageFileNode. contains information
    about where the file is located, hashes and datetimes
    """

    _id = fields.StringField(primary=True,
                             default=lambda: str(bson.ObjectId()))

    creator = fields.ForeignField('user')

    identifier = fields.StringField(required=True)

    # Date version record was created. This is the date displayed to the user.
    date_created = fields.DateTimeField(auto_now_add=True)

    # Dictionary specifying all information needed to locate file on backend
    # {
    #     'service': 'cloudfiles',  # required
    #     'container': 'osf',       # required
    #     'object': '20c53b',       # required
    #     'worker_url': '127.0.0.1',
    #     'worker_host': 'upload-service-1',
    # }
    location = fields.DictionaryField(default=None,
                                      validate=utils.validate_location)

    # Dictionary containing raw metadata from upload service response
    # {
    #     'size': 1024,                            # required
    #     'content_type': 'text/plain',            # required
    #     'date_modified': '2014-11-07T20:24:15',  # required
    #     'md5': 'd077f2',
    # }
    metadata = fields.DictionaryField()

    size = fields.IntegerField()
    content_type = fields.StringField()
    # Date file modified on third-party backend. Not displayed to user, since
    # this date may be earlier than the date of upload if the file already
    # exists on the backend
    date_modified = fields.DateTimeField()

    @property
    def location_hash(self):
        return self.location['object']

    @property
    def archive(self):
        return self.metadata.get('archive')

    def is_duplicate(self, other):
        return self.location_hash == other.location_hash

    def update_metadata(self, metadata, save=True):
        self.metadata.update(metadata)
        # metadata has no defined structure so only attempt to set attributes
        # If its are not in this callback it'll be in the next
        self.size = self.metadata.get('size', self.size)
        self.content_type = self.metadata.get('contentType', self.content_type)
        if self.metadata.get('modified') is not None:
            # TODO handle the timezone here the user that updates the file may see an
            # Incorrect version
            self.date_modified = parse_date(self.metadata['modified'],
                                            ignoretz=True)

        if save:
            self.save()

    def _find_matching_archive(self, save=True):
        """Find another version with the same sha256 as this file.
        If found copy its vault name and glacier id, no need to create additional backups.
        returns True if found otherwise false
        """
        if 'sha256' not in self.metadata:
            return False  # Dont bother searching for nothing

        if 'vault' in self.metadata and 'archive' in self.metadata:
            # Shouldn't ever happen, but we already have an archive
            return True  # We've found ourself

        qs = self.__class__.find(
            Q('_id', 'ne', self._id) & Q('metadata.vault', 'ne', None)
            & Q('metadata.archive', 'ne', None)
            & Q('metadata.sha256', 'eq', self.metadata['sha256'])).limit(1)
        if qs.count() < 1:
            return False
        other = qs[0]
        try:
            self.metadata['vault'] = other.metadata['vault']
            self.metadata['archive'] = other.metadata['archive']
        except KeyError:
            return False
        if save:
            self.save()
        return True
Exemple #25
0
class User(GuidStoredObject, AddonModelMixin):

    redirect_mode = 'proxy'

    # Node fields that trigger an update to the search engine on save
    SEARCH_UPDATE_FIELDS = {
        'fullname',
        'given_name',
        'middle_names',
        'family_name',
        'suffix',
        'merged_by',
        'date_disabled',
        'jobs',
        'schools',
        'social',
    }

    SOCIAL_FIELDS = {
        'orcid': 'http://orcid.com/{}',
        'github': 'http://github.com/{}',
        'scholar': 'http://scholar.google.com/citation?user={}',
        'twitter': 'http://twitter.com/{}',
        'personal': '{}',
        'linkedIn': 'https://www.linkedin.com/profile/view?id={}',
        'impactStory': 'https://impactstory.org/{}',
        'researcherId': 'http://researcherid.com/rid/{}',
    }

    _id = fields.StringField(primary=True)

    # NOTE: In the OSF, username is an email
    # May be None for unregistered contributors
    username = fields.StringField(required=False, unique=True, index=True)
    password = fields.StringField()
    fullname = fields.StringField(required=True, validate=string_required)
    is_registered = fields.BooleanField()

    # TODO: Migrate unclaimed users to the new style, then remove this attribute
    # Note: No new users should be created where is_claimed is False.
    #   As of 9 Sep 2014, there were 331 legacy unclaimed users in the system.
    #   When those users are migrated to the new style, this attribute should be
    #   removed.
    is_claimed = fields.BooleanField()

    # Tags for internal use
    system_tags = fields.StringField(list=True)

    # Per-project unclaimed user data:
    # Format: {
    #   <project_id>: {
    #       'name': <name that referrer provided>,
    #       'referrer_id': <user ID of referrer>,
    #       'token': <token used for verification urls>,
    #       'email': <email the referrer provided or None>,
    #       'last_sent': <timestamp of last email sent to referrer or None>
    #   }
    #   ...
    # }
    # TODO: add validation
    unclaimed_records = fields.DictionaryField(required=False)
    # The user who merged this account
    merged_by = fields.ForeignField('user', default=None, backref="merged")
    #: Verification key used for resetting password
    verification_key = fields.StringField()
    emails = fields.StringField(list=True)
    # Email verification tokens
    # Format: {
    #   <token> : {'email': <email address>,
    #              'expiration': <datetime>}
    # }
    email_verifications = fields.DictionaryField()

    # Format: {
    #   'list1': True,
    #   'list2: False,
    #    ...
    # }
    mailing_lists = fields.DictionaryField()

    aka = fields.StringField(list=True)
    date_registered = fields.DateTimeField(auto_now_add=dt.datetime.utcnow)
    # Watched nodes are stored via a list of WatchConfigs
    watched = fields.ForeignField("WatchConfig", list=True, backref="watched")

    # Recently added contributors stored via a list of users
    recently_added = fields.ForeignField("user",
                                         list=True,
                                         backref="recently_added")

    # CSL names
    given_name = fields.StringField()
    middle_names = fields.StringField()
    family_name = fields.StringField()
    suffix = fields.StringField()

    # Employment history
    # Format: {
    #     'title': <position or job title>,
    #     'institution': <institution or organization>,
    #     'department': <department>,
    #     'location': <location>,
    #     'startMonth': <start month>,
    #     'startYear': <start year>,
    #     'endMonth': <end month>,
    #     'endYear': <end year>,
    #     'ongoing: <boolean>
    # }
    jobs = fields.DictionaryField(list=True, validate=validate_history_item)

    # Educational history
    # Format: {
    #     'degree': <position or job title>,
    #     'institution': <institution or organization>,
    #     'department': <department>,
    #     'location': <location>,
    #     'startMonth': <start month>,
    #     'startYear': <start year>,
    #     'endMonth': <end month>,
    #     'endYear': <end year>,
    #     'ongoing: <boolean>
    # }
    schools = fields.DictionaryField(list=True, validate=validate_history_item)

    # Social links
    # Format: {
    #     'personal': <personal site>,
    #     'twitter': <twitter id>,
    # }
    social = fields.DictionaryField(validate=validate_social)

    api_keys = fields.ForeignField('apikey', list=True, backref='keyed')

    piwik_token = fields.StringField()

    date_last_login = fields.DateTimeField()

    date_confirmed = fields.DateTimeField()

    # When the user was disabled.
    date_disabled = fields.DateTimeField()

    # Format: {
    #   'node_id': 'timestamp'
    # }
    comments_viewed_timestamp = fields.DictionaryField()

    _meta = {'optimistic': True}

    def __repr__(self):
        return '<User({0!r}) with id {1!r}>'.format(self.username, self._id)

    @classmethod
    def create_unregistered(cls, fullname, email=None):
        """Creates a new unregistered user.

        :raises: DuplicateEmailError if a user with the given email address
            is already in the database.
        """
        user = cls(
            username=email,
            fullname=fullname,
        )
        user.update_guessed_names()
        if email:
            user.emails.append(email)
        user.is_registered = False
        return user

    @classmethod
    def create(cls, username, password, fullname):
        user = cls(
            username=username,
            fullname=fullname,
        )
        user.update_guessed_names()
        user.set_password(password)
        return user

    @classmethod
    def create_unconfirmed(cls, username, password, fullname, do_confirm=True):
        """Create a new user who has begun registration but needs to verify
        their primary email address (username).
        """
        user = cls.create(username, password, fullname)
        user.add_email_verification(username)
        user.is_registered = False
        return user

    @classmethod
    def create_confirmed(cls, username, password, fullname):
        user = cls.create(username, password, fullname)
        user.is_registered = True
        user.is_claimed = True
        user.date_confirmed = user.date_registered
        return user

    def update_guessed_names(self):
        """Updates the CSL name fields inferred from the the full name.
        """
        parsed = utils.impute_names(self.fullname)
        self.given_name = parsed['given']
        self.middle_names = parsed['middle']
        self.family_name = parsed['family']
        self.suffix = parsed['suffix']

    def register(self, username, password=None):
        """Registers the user.
        """
        self.username = username
        if password:
            self.set_password(password)
        if username not in self.emails:
            self.emails.append(username)
        self.is_registered = True
        self.is_claimed = True
        self.date_confirmed = dt.datetime.utcnow()
        self.update_search()
        self.update_search_nodes()

        # Emit signal that a user has confirmed
        signals.user_confirmed.send(self)

        return self

    def add_unclaimed_record(self, node, referrer, given_name, email=None):
        """Add a new project entry in the unclaimed records dictionary.

        :param Node node: Node this unclaimed user was added to.
        :param User referrer: User who referred this user.
        :param str given_name: The full name that the referrer gave for this user.
        :param str email: The given email address.
        :returns: The added record
        """
        if not node.can_edit(user=referrer):
            raise PermissionsError(
                'Referrer does not have permission to add a contributor '
                'to project {0}'.format(node._primary_key))
        project_id = node._primary_key
        referrer_id = referrer._primary_key
        if email:
            clean_email = email.lower().strip()
        else:
            clean_email = None
        record = {
            'name': given_name,
            'referrer_id': referrer_id,
            'token': generate_confirm_token(),
            'email': clean_email
        }
        self.unclaimed_records[project_id] = record
        return record

    def display_full_name(self, node=None):
        """Return the full name , as it would display in a contributor list for a
        given node.

        NOTE: Unclaimed users may have a different name for different nodes.
        """
        if node:
            unclaimed_data = self.unclaimed_records.get(
                node._primary_key, None)
            if unclaimed_data:
                return unclaimed_data['name']
        return self.fullname

    @property
    def is_active(self):
        """Returns True if the user is active. The user must have activated
        their account, must not be deleted, suspended, etc.

        :return: bool
        """
        return (self.is_registered and self.password is not None
                and not self.is_merged and not self.is_disabled
                and self.is_confirmed())

    def get_unclaimed_record(self, project_id):
        """Get an unclaimed record for a given project_id.

        :raises: ValueError if there is no record for the given project.
        """
        try:
            return self.unclaimed_records[project_id]
        except KeyError:  # reraise as ValueError
            raise ValueError(
                'No unclaimed record for user {self._id} on node {project_id}'.
                format(**locals()))

    def get_claim_url(self, project_id, external=False):
        """Return the URL that an unclaimed user should use to claim their
        account. Return ``None`` if there is no unclaimed_record for the given
        project ID.

        :param project_id: The project ID for the unclaimed record
        :raises: ValueError if a record doesn't exist for the given project ID
        :rtype: dict
        :returns: The unclaimed record for the project
        """
        uid = self._primary_key
        base_url = settings.DOMAIN if external else '/'
        unclaimed_record = self.get_unclaimed_record(project_id)
        token = unclaimed_record['token']
        return '{base_url}user/{uid}/{project_id}/claim/?token={token}'\
                    .format(**locals())

    def set_password(self, raw_password):
        """Set the password for this user to the hash of ``raw_password``."""
        self.password = generate_password_hash(raw_password)

    def check_password(self, raw_password):
        """Return a boolean of whether ``raw_password`` was correct."""
        if not self.password or not raw_password:
            return False
        return check_password_hash(self.password, raw_password)

    def change_password(self, raw_old_password, raw_new_password,
                        raw_confirm_password):
        """Change the password for this user to the hash of ``raw_new_password``."""
        raw_old_password = (raw_old_password or '').strip()
        raw_new_password = (raw_new_password or '').strip()
        raw_confirm_password = (raw_confirm_password or '').strip()

        issues = []
        if not self.check_password(raw_old_password):
            issues.append('Old password is invalid')
        elif raw_old_password == raw_new_password:
            issues.append('Password cannot be the same')

        if not raw_old_password or not raw_new_password or not raw_confirm_password:
            issues.append('Passwords cannot be blank')
        elif len(raw_new_password) < 6:
            issues.append('Password should be at least six characters')

        if raw_new_password != raw_confirm_password:
            issues.append('Password does not match the confirmation')

        if issues:
            raise ChangePasswordError(issues)
        self.set_password(raw_new_password)

    def _set_email_token_expiration(self, token, expiration=None):
        """Set the expiration date for given email token.

        :param str token: The email token to set the expiration for.
        :param datetime expiration: Datetime at which to expire the token. If ``None``, the
            token will expire after ``settings.EMAIL_TOKEN_EXPIRATION`` hours. This is only
            used for testing purposes.
        """
        expiration = expiration or (dt.datetime.utcnow() + dt.timedelta(
            hours=settings.EMAIL_TOKEN_EXPIRATION))
        self.email_verifications[token]['expiration'] = expiration
        return expiration

    def add_email_verification(self, email, expiration=None):
        """Add an email verification token for a given email."""
        token = generate_confirm_token()

        self.email_verifications[token] = {'email': email.lower()}
        self._set_email_token_expiration(token, expiration=expiration)
        return token

    def get_confirmation_token(self, email, force=False):
        """Return the confirmation token for a given email.

        :param str email: Email to get the token for.
        :param bool force: If an expired token exists for the given email, generate a new
            token and return that token.

        :raises: ExpiredTokenError if trying to access a token that is expired and force=False.
        :raises: KeyError if there no token for the email.
        """
        for token, info in self.email_verifications.items():
            if info['email'].lower() == email.lower():
                if info['expiration'] < dt.datetime.utcnow():
                    if not force:
                        raise ExpiredTokenError(
                            'Token for email "{0}" is expired'.format(email))
                    else:
                        new_token = self.add_email_verification(email)
                        self.save()
                        return new_token
                return token
        raise KeyError('No confirmation token for email "{0}"'.format(email))

    def get_confirmation_url(self, email, external=True, force=False):
        """Return the confirmation url for a given email.

        :raises: ExpiredTokenError if trying to access a token that is expired.
        :raises: KeyError if there is no token for the email.
        """
        base = settings.DOMAIN if external else '/'
        token = self.get_confirmation_token(email, force=force)
        return "{0}confirm/{1}/{2}/".format(base, self._primary_key, token)

    def verify_confirmation_token(self, token):
        """Return whether or not a confirmation token is valid for this user.
        :rtype: bool
        """
        if token in self.email_verifications.keys():
            return self.email_verifications.get(
                token)['expiration'] > dt.datetime.utcnow()
        return False

    def verify_claim_token(self, token, project_id):
        """Return whether or not a claim token is valid for this user for
        a given node which they were added as a unregistered contributor for.
        """
        try:
            record = self.get_unclaimed_record(project_id)
        except ValueError:  # No unclaimed record for given pid
            return False
        return record['token'] == token

    def confirm_email(self, token):
        if self.verify_confirmation_token(token):
            email = self.email_verifications[token]['email']
            self.emails.append(email)
            # Complete registration if primary email
            if email.lower() == self.username.lower():
                self.register(self.username)
                self.date_confirmed = dt.datetime.utcnow()
            # Revoke token
            del self.email_verifications[token]
            # Clear unclaimed records, so user's name shows up correctly on
            # all projects
            self.unclaimed_records = {}
            self.save()
            # Note: We must manually update search here because the fullname
            # field has not changed
            self.update_search()
            self.update_search_nodes()
            return True
        else:
            return False

    def update_search_nodes(self):
        """Call `update_search` on all nodes on which the user is a
        contributor. Needed to add self to contributor lists in search upon
        registration or claiming.

        """
        for node in self.node__contributed:
            node.update_search()

    def is_confirmed(self):
        return bool(self.date_confirmed)

    @property
    def social_links(self):
        return {
            key: self.SOCIAL_FIELDS[key].format(val)
            for key, val in self.social.items()
            if val and self.SOCIAL_FIELDS.get(key)
        }

    @property
    def biblio_name(self):
        given_names = self.given_name + ' ' + self.middle_names
        surname = self.family_name
        if surname != given_names:
            initials = [
                name[0].upper() + '.' for name in given_names.split(' ')
                if name and re.search(r'\w', name[0], re.I)
            ]
            return u'{0}, {1}'.format(surname, ' '.join(initials))
        return surname

    @property
    def given_name_initial(self):
        """
        The user's preferred initialization of their given name.

        Some users with common names may choose to distinguish themselves from
        their colleagues in this way. For instance, there could be two
        well-known researchers in a single field named "Robert Walker".
        "Walker, R" could then refer to either of them. "Walker, R.H." could
        provide easy disambiguation.

        NOTE: The internal representation for this should never end with a
              period. "R" and "R.H" would be correct in the prior case, but
              "R.H." would not.
        """
        return self.given_name[0]

    @property
    def url(self):
        return '/{}/'.format(self._primary_key)

    @property
    def api_url(self):
        return '/api/v1/profile/{0}/'.format(self._primary_key)

    @property
    def absolute_url(self):
        return urlparse.urljoin(settings.DOMAIN, self.url)

    @property
    def display_absolute_url(self):
        url = self.absolute_url
        if url is not None:
            return re.sub(r'https?:', '', url).strip('/')

    @property
    def deep_url(self):
        return '/profile/{}/'.format(self._primary_key)

    @property
    def gravatar_url(self):
        return filters.gravatar(self,
                                use_ssl=True,
                                size=settings.GRAVATAR_SIZE_ADD_CONTRIBUTOR)

    def get_activity_points(self, db=None):
        db = db or framework.mongo.database
        return analytics.get_total_activity_count(self._primary_key, db=db)

    @property
    def is_disabled(self):
        """Whether or not this account has been disabled.

        Abstracts ``User.date_disabled``.

        :return: bool
        """
        return self.date_disabled is not None

    @is_disabled.setter
    def is_disabled(self, val):
        """Set whether or not this account has been disabled."""
        if val:
            self.date_disabled = dt.datetime.utcnow()
        else:
            self.date_disabled = None

    @property
    def is_merged(self):
        '''Whether or not this account has been merged into another account.
        '''
        return self.merged_by is not None

    @property
    def profile_url(self):
        return '/{}/'.format(self._id)

    def get_summary(self, formatter='long'):
        return {
            'user_fullname': self.fullname,
            'user_profile_url': self.profile_url,
            'user_display_name': name_formatters[formatter](self),
            'user_is_claimed': self.is_claimed
        }

    def save(self, *args, **kwargs):
        self.username = self.username.lower().strip(
        ) if self.username else None
        ret = super(User, self).save(*args, **kwargs)
        if self.SEARCH_UPDATE_FIELDS.intersection(ret) and self.is_confirmed():
            self.update_search()
        if settings.PIWIK_HOST and not self.piwik_token:
            try:
                piwik.create_user(self)
            except (piwik.PiwikException, ValueError):
                logger.error("Piwik user creation failed: " + self._id)
        return ret

    def update_search(self):
        from website import search
        try:
            search.search.update_user(self)
        except search.exceptions.SearchUnavailableError as e:
            logger.exception(e)
            log_exception()

    @classmethod
    def find_by_email(cls, email):
        try:
            user = cls.find_one(Q('emails', 'eq', email))
            return [user]
        except:
            return []

    def serialize(self, anonymous=False):
        return {
            'id':
            utils.privacy_info_handle(self._primary_key, anonymous),
            'fullname':
            utils.privacy_info_handle(self.fullname, anonymous, name=True),
            'registered':
            self.is_registered,
            'url':
            utils.privacy_info_handle(self.url, anonymous),
            'api_url':
            utils.privacy_info_handle(self.api_url, anonymous),
        }

    ###### OSF-Specific methods ######

    def watch(self, watch_config):
        """Watch a node by adding its WatchConfig to this user's ``watched``
        list. Raises ``ValueError`` if the node is already watched.

        :param watch_config: The WatchConfig to add.
        :param save: Whether to save the user.

        """
        watched_nodes = [each.node for each in self.watched]
        if watch_config.node in watched_nodes:
            raise ValueError('Node is already being watched.')
        watch_config.save()
        self.watched.append(watch_config)
        return None

    def unwatch(self, watch_config):
        """Unwatch a node by removing its WatchConfig from this user's ``watched``
        list. Raises ``ValueError`` if the node is not already being watched.

        :param watch_config: The WatchConfig to remove.
        :param save: Whether to save the user.

        """
        for each in self.watched:
            if watch_config.node._id == each.node._id:
                each.__class__.remove_one(each)
                return None
        raise ValueError('Node not being watched.')

    def is_watching(self, node):
        '''Return whether a not a user is watching a Node.'''
        watched_node_ids = set([config.node._id for config in self.watched])
        return node._id in watched_node_ids

    def get_recent_log_ids(self, since=None):
        '''Return a generator of recent logs' ids.

        :param since: A datetime specifying the oldest time to retrieve logs
        from. If ``None``, defaults to 60 days before today. Must be a tz-aware
        datetime because PyMongo's generation times are tz-aware.

        :rtype: generator of log ids (strings)
        '''
        log_ids = []
        # Default since to 60 days before today if since is None
        # timezone aware utcnow
        utcnow = dt.datetime.utcnow().replace(tzinfo=pytz.utc)
        since_date = since or (utcnow - dt.timedelta(days=60))
        for config in self.watched:
            # Extract the timestamps for each log from the log_id (fast!)
            # The first 4 bytes of Mongo's ObjectId encodes time
            # This prevents having to load each Log Object and access their
            # date fields
            node_log_ids = [
                log_id for log_id in config.node.logs._to_primary_keys()
                if bson.ObjectId(log_id).generation_time > since_date
                and log_id not in log_ids
            ]
            # Log ids in reverse chronological order
            log_ids = _merge_into_reversed(log_ids, node_log_ids)
        return (l_id for l_id in log_ids)

    def get_daily_digest_log_ids(self):
        '''Return a generator of log ids generated in the past day
        (starting at UTC 00:00).
        '''
        utcnow = dt.datetime.utcnow()
        midnight = dt.datetime(utcnow.year,
                               utcnow.month,
                               utcnow.day,
                               0,
                               0,
                               0,
                               tzinfo=pytz.utc)
        return self.get_recent_log_ids(since=midnight)

    def merge_user(self, user, save=False):
        """Merge a registered user into this account. This user will be
        a contributor on any project

        :param user: A User object to be merged.
        """
        # Inherit emails
        self.emails.extend(user.emails)
        # Inherit projects the user was a contributor for
        for node in user.node__contributed:
            node.add_contributor(
                contributor=self,
                permissions=node.get_permissions(user),
                visible=node.get_visible(user),
                log=False,
            )
            try:
                node.remove_contributor(
                    contributor=user,
                    auth=Auth(user=self),
                    log=False,
                )
            except ValueError:
                logger.error('Contributor {0} not in list on node {1}'.format(
                    user._id, node._id))
            node.save()
        # Inherits projects the user created
        for node in user.node__created:
            node.creator = self
            node.save()
        user.merged_by = self
        user.save()
        if save:
            self.save()
        return None

    def get_projects_in_common(self, other_user, primary_keys=True):
        """Returns either a collection of "shared projects" (projects that both users are contributors for)
        or just their primary keys
        """
        if primary_keys:
            projects_contributed_to = set(
                self.node__contributed._to_primary_keys())
            return projects_contributed_to.intersection(
                other_user.node__contributed._to_primary_keys())
        else:
            projects_contributed_to = set(self.node__contributed)
            return projects_contributed_to.intersection(
                other_user.node__contributed)

    def n_projects_in_common(self, other_user):
        """Returns number of "shared projects" (projects that both users are contributors for)"""
        return len(self.get_projects_in_common(other_user, primary_keys=True))
class SpamMixin(StoredObject):
    """Mixin to add to objects that can be marked as spam.
    """

    _meta = {'abstract': True}

    # # Node fields that trigger an update to search on save
    # SPAM_UPDATE_FIELDS = {
    #     'spam_status',
    # }
    spam_status = fields.IntegerField(default=SpamStatus.UNKNOWN, index=True)
    spam_pro_tip = fields.StringField(default=None)
    # Data representing the original spam indication
    # - author: author name
    # - author_email: email of the author
    # - content: data flagged
    # - headers: request headers
    #   - Remote-Addr: ip address from request
    #   - User-Agent: user agent from request
    #   - Referer: referrer header from request (typo +1, rtd)
    spam_data = fields.DictionaryField(default=dict)
    date_last_reported = fields.DateTimeField(default=None, index=True)

    # Reports is a dict of reports keyed on reporting user
    # Each report is a dictionary including:
    #  - date: date reported
    #  - retracted: if a report has been retracted
    #  - category: What type of spam does the reporter believe this is
    #  - text: Comment on the comment
    reports = fields.DictionaryField(default=dict, validate=_validate_reports)

    def flag_spam(self):
        # If ham and unedited then tell user that they should read it again
        if self.spam_status == SpamStatus.UNKNOWN:
            self.spam_status = SpamStatus.FLAGGED

    def remove_flag(self, save=False):
        if self.spam_status != SpamStatus.FLAGGED:
            return
        for report in self.reports.values():
            if not report.get('retracted', True):
                return
        self.spam_status = SpamStatus.UNKNOWN
        if save:
            self.save()

    @property
    def is_spam(self):
        return self.spam_status == SpamStatus.SPAM

    @property
    def is_spammy(self):
        return self.spam_status in [SpamStatus.FLAGGED, SpamStatus.SPAM]

    def report_abuse(self, user, save=False, **kwargs):
        """Report object is spam or other abuse of OSF

        :param user: User submitting report
        :param save: Save changes
        :param kwargs: Should include category and message
        :raises ValueError: if user is reporting self
        """
        if user == self.user:
            raise ValueError('User cannot report self.')
        self.flag_spam()
        date = datetime.utcnow()
        report = {'date': date, 'retracted': False}
        report.update(kwargs)
        if 'text' not in report:
            report['text'] = None
        self.reports[user._id] = report
        self.date_last_reported = report['date']
        if save:
            self.save()

    def retract_report(self, user, save=False):
        """Retract last report by user

        Only marks the last report as retracted because there could be
        history in how the object is edited that requires a user
        to flag or retract even if object is marked as HAM.
        :param user: User retracting
        :param save: Save changes
        """
        if user._id in self.reports:
            if not self.reports[user._id]['retracted']:
                self.reports[user._id]['retracted'] = True
                self.remove_flag()
        else:
            raise ValueError('User has not reported this content')
        if save:
            self.save()

    def confirm_ham(self, save=False):
        # not all mixins will implement check spam pre-req, only submit ham when it was incorrectly flagged
        if settings.SPAM_CHECK_ENABLED and self.spam_data and self.spam_status in [
                SpamStatus.FLAGGED, SpamStatus.SPAM
        ]:
            client = _get_client()
            client.submit_ham(
                user_ip=self.spam_data['headers']['Remote-Addr'],
                user_agent=self.spam_data['headers'].get('User-Agent'),
                referrer=self.spam_data['headers'].get('Referer'),
                comment_content=self.spam_data['content'],
                comment_author=self.spam_data['author'],
                comment_author_email=self.spam_data['author_email'],
            )
            logger.info('confirm_ham update sent')
        self.spam_status = SpamStatus.HAM
        if save:
            self.save()

    def confirm_spam(self, save=False):
        # not all mixins will implement check spam pre-req, only submit spam when it was incorrectly flagged
        if settings.SPAM_CHECK_ENABLED and self.spam_data and self.spam_status in [
                SpamStatus.UNKNOWN, SpamStatus.HAM
        ]:
            client = _get_client()
            client.submit_spam(
                user_ip=self.spam_data['headers']['Remote-Addr'],
                user_agent=self.spam_data['headers'].get('User-Agent'),
                referrer=self.spam_data['headers'].get('Referer'),
                comment_content=self.spam_data['content'],
                comment_author=self.spam_data['author'],
                comment_author_email=self.spam_data['author_email'],
            )
            logger.info('confirm_spam update sent')
        self.spam_status = SpamStatus.SPAM
        if save:
            self.save()

    @abc.abstractmethod
    def check_spam(self, saved_fields, request_headers, save=False):
        """Must return is_spam"""
        pass

    def do_check_spam(self, author, author_email, content, request_headers):
        if self.spam_status == SpamStatus.HAM:
            return False
        if self.is_spammy:
            return True

        client = _get_client()
        remote_addr = request_headers['Remote-Addr']
        user_agent = request_headers.get('User-Agent')
        referer = request_headers.get('Referer')
        is_spam, pro_tip = client.check_comment(
            user_ip=remote_addr,
            user_agent=user_agent,
            referrer=referer,
            comment_content=content,
            comment_author=author,
            comment_author_email=author_email)
        self.spam_pro_tip = pro_tip
        self.spam_data['headers'] = {
            'Remote-Addr': remote_addr,
            'User-Agent': user_agent,
            'Referer': referer,
        }
        self.spam_data['content'] = content
        self.spam_data['author'] = author
        self.spam_data['author_email'] = author_email
        if is_spam:
            self.flag_spam()
        return is_spam
Exemple #27
0
class ArchiveJob(StoredObject):

    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))

    # whether or not the ArchiveJob is complete (success or fail)
    done = fields.BooleanField(default=False)
    # whether or not emails have been sent for this ArchiveJob
    sent = fields.BooleanField(default=False)
    status = fields.StringField(default=ARCHIVER_INITIATED)
    datetime_initiated = fields.DateTimeField(default=datetime.datetime.utcnow)

    dst_node = fields.ForeignField('node', backref='active')
    src_node = fields.ForeignField('node')
    initiator = fields.ForeignField('user')

    target_addons = fields.ForeignField('archivetarget', list=True)

    # This field is used for stashing embargo URLs while still in the app context
    # Format: {
    #     'view': <str> url,
    #     'approve': <str> url,
    #     'disapprove': <str> url,
    # }
    meta = fields.DictionaryField()

    def __repr__(self):
        return (
            '<{ClassName}(_id={self._id}, done={self.done}, '
            ' status={self.status}, src_node={self.src_node}, dst_node={self.dst_node})>'
        ).format(ClassName=self.__class__.__name__, self=self)

    @property
    def children(self):
        return [
            node.archive_job for node in self.dst_node.nodes if node.primary
        ]

    @property
    def parent(self):
        parent_node = self.dst_node.parent_node
        return parent_node.archive_job if parent_node else None

    @property
    def success(self):
        return self.status == ARCHIVER_SUCCESS

    @property
    def pending(self):
        return any([
            target for target in self.target_addons
            if target.status not in (ARCHIVER_SUCCESS, ARCHIVER_FAILURE)
        ])

    def info(self):
        return self.src_node, self.dst_node, self.initiator

    def target_info(self):
        return [{
            'name': target.name,
            'status': target.status,
            'stat_result': target.stat_result,
            'errors': target.errors
        } for target in self.target_addons]

    def archive_tree_finished(self):
        if not self.pending:
            return len([
                ret for ret in
                [child.archive_tree_finished() for child in self.children]
                if ret
            ]) if len(self.children) else True
        return False

    def _fail_above(self):
        """Marks all ArchiveJob instances attached to Nodes above this as failed
        """
        parent = self.parent
        if parent:
            parent.status = ARCHIVER_FAILURE
            parent.save()

    def _post_update_target(self):
        """Checks for success or failure if the ArchiveJob on self.dst_node
        is finished
        """
        if self.status == ARCHIVER_FAILURE:
            return
        if not self.pending:
            self.done = True
            if any([
                    target.status for target in self.target_addons
                    if target.status in ARCHIVER_FAILURE_STATUSES
            ]):
                self.status = ARCHIVER_FAILURE
                self._fail_above()
            else:
                self.status = ARCHIVER_SUCCESS
            self.save()

    def get_target(self, addon_short_name):
        try:
            return [
                addon for addon in self.target_addons
                if addon.name == addon_short_name
            ][0]
        except IndexError:
            return None

    def _set_target(self, addon_short_name):
        if self.get_target(addon_short_name):
            return
        target = ArchiveTarget(name=addon_short_name)
        target.save()
        self.target_addons.append(target)

    def set_targets(self):
        addons = []
        for addon in [
                self.src_node.get_addon(name)
                for name in settings.ADDONS_ARCHIVABLE
                if settings.ADDONS_ARCHIVABLE[name] != 'none'
        ]:
            if not addon or not addon.complete or not isinstance(
                    addon, StorageAddonBase):
                continue
            archive_errors = getattr(addon, 'archive_errors', None)
            if not archive_errors or (archive_errors and not archive_errors()):
                if addon.config.short_name == 'dataverse':
                    addons.append(addon.config.short_name + '-draft')
                    addons.append(addon.config.short_name + '-published')
                else:
                    addons.append(addon.config.short_name)
        for addon in addons:
            self._set_target(addon)
        self.save()

    def update_target(self,
                      addon_short_name,
                      status,
                      stat_result=None,
                      errors=None):
        stat_result = stat_result or {}
        errors = errors or []

        target = self.get_target(addon_short_name)
        target.status = status
        target.errors = errors
        target.stat_result = stat_result
        target.save()
        self._post_update_target()
Exemple #28
0
class PreprintService(GuidStoredObject):

    _id = fields.StringField(primary=True)
    date_created = fields.DateTimeField(auto_now_add=True)
    date_modified = fields.DateTimeField(auto_now=True)
    provider = fields.ForeignField('PreprintProvider', index=True)
    node = fields.ForeignField('Node', index=True)
    is_published = fields.BooleanField(default=False, index=True)
    date_published = fields.DateTimeField()

    # This is a list of tuples of Subject id's. MODM doesn't do schema
    # validation for DictionaryFields, but would unsuccessfully attempt
    # to validate the schema for a list of lists of ForeignFields.
    #
    # Format: [[root_subject._id, ..., child_subject._id], ...]
    subjects = fields.DictionaryField(list=True)

    @property
    def primary_file(self):
        if not self.node:
            return
        return self.node.preprint_file

    @property
    def article_doi(self):
        if not self.node:
            return
        return self.node.preprint_article_doi

    @property
    def is_preprint_orphan(self):
        if not self.node:
            return
        return self.node.is_preprint_orphan

    @property
    def deep_url(self):
        # Required for GUID routing
        return '/preprints/{}/'.format(self._primary_key)

    @property
    def url(self):
        return '/{}/'.format(self._id)

    @property
    def absolute_url(self):
        return urlparse.urljoin(settings.DOMAIN, self.url)

    @property
    def absolute_api_v2_url(self):
        path = '/preprints/{}/'.format(self._id)
        return api_v2_url(path)

    def get_subjects(self):
        ret = []
        for subj_list in self.subjects:
            subj_hierarchy = []
            for subj_id in subj_list:
                subj = Subject.load(subj_id)
                if subj:
                    subj_hierarchy += ({'id': subj_id, 'text': subj.text}, )
            if subj_hierarchy:
                ret.append(subj_hierarchy)
        return ret

    def set_subjects(self, preprint_subjects, auth, save=False):
        if not self.node.has_permission(auth.user, ADMIN):
            raise PermissionsError('Only admins can change a preprint\'s subjects.')

        self.subjects = []
        for subj_list in preprint_subjects:
            subj_hierarchy = []
            for s in subj_list:
                subj_hierarchy.append(s)
            if subj_hierarchy:
                validate_subject_hierarchy(subj_hierarchy)
                self.subjects.append(subj_hierarchy)

        if save:
            self.save()

    def set_primary_file(self, preprint_file, auth, save=False):
        if not self.node.has_permission(auth.user, ADMIN):
            raise PermissionsError('Only admins can change a preprint\'s primary file.')

        if not isinstance(preprint_file, StoredFileNode):
            preprint_file = preprint_file.stored_object

        if preprint_file.node != self.node or preprint_file.provider != 'osfstorage':
            raise ValueError('This file is not a valid primary file for this preprint.')

        # there is no preprint file yet! This is the first time!
        if not self.node.preprint_file:
            self.node.preprint_file = preprint_file
        elif preprint_file != self.node.preprint_file:
            # if there was one, check if it's a new file
            self.node.preprint_file = preprint_file
            self.node.add_log(
                action=NodeLog.PREPRINT_FILE_UPDATED,
                params={},
                auth=auth,
                save=False,
            )

        if save:
            self.save()
            self.node.save()

    def set_published(self, published, auth, save=False):
        if not self.node.has_permission(auth.user, ADMIN):
            raise PermissionsError('Only admins can publish a preprint.')

        if self.is_published and not published:
            raise ValueError('Cannot unpublish preprint.')

        self.is_published = published

        if published:
            if not (self.node.preprint_file and self.node.preprint_file.node == self.node):
                raise ValueError('Preprint node is not a valid preprint; cannot publish.')
            if not self.provider:
                raise ValueError('Preprint provider not specified; cannot publish.')
            if not self.subjects:
                raise ValueError('Preprint must have at least one subject to be published.')
            self.date_published = datetime.datetime.utcnow()
            self.node._has_abandoned_preprint = False

            self.node.add_log(action=NodeLog.PREPRINT_INITIATED, params={}, auth=auth, save=False)

            if not self.node.is_public:
                self.node.set_privacy(
                    self.node.PUBLIC,
                    auth=None,
                    log=True
                )

        if save:
            self.node.save()
            self.save()

    def save(self, *args, **kwargs):
        saved_fields = super(PreprintService, self).save(*args, **kwargs)
        if saved_fields:
            enqueue_task(on_preprint_updated.s(self._id))
Exemple #29
0
class NodeWikiPage(GuidStoredObject):

    _id = fields.StringField(primary=True)

    page_name = fields.StringField(validate=validate_page_name)
    version = fields.IntegerField()
    date = fields.DateTimeField(auto_now_add=datetime.datetime.utcnow)
    is_current = fields.BooleanField()
    content = fields.StringField(default='')

    user = fields.ForeignField('user')
    node = fields.ForeignField('node')

    @property
    def deep_url(self):
        return '{}wiki/{}/'.format(self.node.deep_url, self.page_name)

    @property
    def url(self):
        return '{}wiki/{}/'.format(self.node.url, self.page_name)

    @property
    def rendered_before_update(self):
        return self.date < WIKI_CHANGE_DATE

    def html(self, node):
        """The cleaned HTML of the page"""
        sanitized_content = render_content(self.content, node=node)
        try:
            return linkify(
                sanitized_content,
                [
                    nofollow,
                ],
            )
        except TypeError:
            logger.warning('Returning unlinkified content.')
            return sanitized_content

    def raw_text(self, node):
        """ The raw text of the page, suitable for using in a test search"""

        return sanitize(self.html(node), tags=[], strip=True)

    def get_draft(self, node):
        """
        Return most recently edited version of wiki, whether that is the
        last saved version or the most recent sharejs draft.
        """

        db = wiki_utils.share_db()
        sharejs_uuid = wiki_utils.get_sharejs_uuid(node, self.page_name)

        doc_item = db['docs'].find_one({'_id': sharejs_uuid})
        if doc_item:
            sharejs_version = doc_item['_v']
            sharejs_timestamp = doc_item['_m']['mtime']
            sharejs_timestamp /= 1000  # Convert to appropriate units
            sharejs_date = datetime.datetime.utcfromtimestamp(
                sharejs_timestamp)

            if sharejs_version > 1 and sharejs_date > self.date:
                return doc_item['_data']

        return self.content

    def save(self, *args, **kwargs):
        rv = super(NodeWikiPage, self).save(*args, **kwargs)
        if self.node:
            self.node.update_search()
        return rv

    def rename(self, new_name, save=True):
        self.page_name = new_name
        if save:
            self.save()

    def to_json(self):
        return {}
Exemple #30
0
class Sanction(StoredObject):
    """Sanction class is a generic way to track approval states"""
    # Tell modularodm not to attach backends
    _meta = {
        'abstract': True,
    }

    _id = fields.StringField(primary=True, default=lambda: str(ObjectId()))

    # Neither approved not cancelled
    UNAPPROVED = 'unapproved'
    # Has approval
    APPROVED = 'approved'
    # Rejected by at least one person
    REJECTED = 'rejected'
    # Embargo has been completed
    COMPLETED = 'completed'

    state = fields.StringField(default=UNAPPROVED,
                               validate=validators.choice_in((
                                   UNAPPROVED,
                                   APPROVED,
                                   REJECTED,
                                   COMPLETED,
                               )))

    DISPLAY_NAME = 'Sanction'
    # SHORT_NAME must correspond with the associated foreign field to query against,
    # e.g. Node.find_one(Q(sanction.SHORT_NAME, 'eq', sanction))
    SHORT_NAME = 'sanction'

    APPROVAL_NOT_AUTHORIZED_MESSAGE = 'This user is not authorized to approve this {DISPLAY_NAME}'
    APPROVAL_INVALID_TOKEN_MESSAGE = 'Invalid approval token provided for this {DISPLAY_NAME}.'
    REJECTION_NOT_AUTHORIZED_MESSAEGE = 'This user is not authorized to reject this {DISPLAY_NAME}'
    REJECTION_INVALID_TOKEN_MESSAGE = 'Invalid rejection token provided for this {DISPLAY_NAME}.'

    # Controls whether or not the Sanction needs unanimous approval or just a single approval
    ANY = 'any'
    UNANIMOUS = 'unanimous'
    mode = UNANIMOUS

    initiation_date = fields.DateTimeField(
        auto_now_add=datetime.datetime.utcnow)
    # Expiration date-- Sanctions in the UNAPPROVED state that are older than their end_date
    # are automatically made ACTIVE by a daily cron job
    # Use end_date=None for a non-expiring Sanction
    end_date = fields.DateTimeField(default=None)

    # Sanction subclasses must have an initiated_by field
    # initiated_by = fields.ForeignField('user', backref='initiated')

    # Expanded: Dictionary field mapping admin IDs their approval status and relevant tokens:
    # {
    #   'b3k97': {
    #     'has_approved': False,
    #     'approval_token': 'Pew7wj1Puf7DENUPFPnXSwa1rf3xPN',
    #     'rejection_token': 'TwozClTFOic2PYxHDStby94bCQMwJy'}
    # }
    approval_state = fields.DictionaryField()

    def __repr__(self):
        return '<Sanction(end_date={self.end_date!r}) with _id {self._id!r}>'.format(
            self=self)

    @property
    def is_pending_approval(self):
        return self.state == Sanction.UNAPPROVED

    @property
    def is_approved(self):
        return self.state == Sanction.APPROVED

    @property
    def is_rejected(self):
        return self.state == Sanction.REJECTED

    def approve(self, user):
        raise NotImplementedError(
            "Sanction subclasses must implement an approve method.")

    def reject(self, user):
        raise NotImplementedError(
            "Sanction subclasses must implement an approve method.")

    def _on_reject(self, user):
        """Callback for rejection of a Sanction

        :param User user:
        """
        raise NotImplementedError(
            'Sanction subclasses must implement an #_on_reject method')

    def _on_complete(self, user):
        """Callback for when a Sanction has approval and enters the ACTIVE state

        :param User user:
        """
        raise NotImplementedError(
            'Sanction subclasses must implement an #_on_complete method')

    def forcibly_reject(self):
        self.state = Sanction.REJECTED