def migrate(dry=True): migrated = 0 pointers_with_invalid_backrefs = [] pointers = database.pointer.find({'$where': 'this._id.length <= 5'}, {'_id': True}) total = pointers.count() for i, doc in enumerate(pointers): pointer = Pointer.load(doc['_id']) with TokuTransaction(): old_id = pointer._id logger.info('({}/{}) Preparing to migrate Pointer {}'.format( i + 1, total, old_id)) pointer._legacy_id = old_id pointer._id = str(ObjectId()) try: if not dry: pointer.save() except ValueError: logger.warn( 'Removing backref for orphaned pointer: {}'.format(old_id)) if not dry: remove_invalid_backref(pointer) pointers_with_invalid_backrefs.append(old_id) pointer.save() logger.info('Successfully migrated Pointer {} _id to {}'.format( old_id, pointer._id)) migrated += 1 logger.info('Successfully migrated {} pointers'.format(migrated)) logger.info('Removed invalid backrefs on {} pointers: {}'.format( len(pointers_with_invalid_backrefs), pointers_with_invalid_backrefs))
class NodeLicenseRecord(StoredObject): _id = fields.StringField(primary=True, default=lambda: str(ObjectId())) node_license = fields.ForeignField('nodelicense', required=True) # Deliberately left as a StringField to support year ranges (e.g. 2012-2015) year = fields.StringField() copyright_holders = fields.StringField(list=True) @property def name(self): return self.node_license.name if self.node_license else None @property def text(self): return self.node_license.text if self.node_license else None @property def id(self): return self.node_license.id if self.node_license else None def to_json(self): return serialize_node_license_record(self) def copy(self): copied = NodeLicenseRecord(node_license=self.node_license, year=self.year, copyright_holders=self.copyright_holders) copied.save() return copied
class ArchiveTarget(StoredObject): """Stores the results of archiving a single addon """ _id = fields.StringField( primary=True, default=lambda: str(ObjectId()) ) # addon_short_name of target addon name = fields.StringField() status = fields.StringField(default=ARCHIVER_INITIATED) # <dict> representation of a website.archiver.AggregateStatResult # Format: { # 'target_id': <str>, # 'target_name': <str>, # 'targets': <list>(StatResult | AggregateStatResult), # 'num_files': <int>, # 'disk_usage': <float>, # } stat_result = fields.DictionaryField() errors = fields.StringField(list=True) def __repr__(self): return '<{0}(_id={1}, name={2}, status={3})>'.format( self.__class__.__name__, self._id, self.name, self.status )
class NotificationDigest(StoredObject): _id = fields.StringField(primary=True, default=lambda: str(ObjectId())) user_id = fields.StringField() timestamp = fields.DateTimeField() event = fields.StringField() message = fields.StringField() node_lineage = fields.StringField(list=True)
class PreprintProvider(StoredObject): _id = fields.StringField(primary=True, default=lambda: str(ObjectId())) name = fields.StringField(required=True) logo_name = fields.StringField() description = fields.StringField() banner_name = fields.StringField() external_url = fields.StringField() def get_absolute_url(self): return '{}preprint_providers/{}'.format(self.absolute_api_v2_url, self._id) @property def absolute_api_v2_url(self): path = '/preprint_providers/{}/'.format(self._id) return api_v2_url(path) @property def logo_path(self): if self.logo_name: return '/static/img/preprint_providers/{}'.format(self.logo_name) else: return None @property def banner_path(self): if self.logo_name: return '/static/img/preprint_providers/{}'.format(self.logo_name) else: return None
class NotificationDigest(StoredObject): _id = fields.StringField(primary=True, default=lambda: str(ObjectId())) user_id = fields.StringField(index=True) timestamp = fields.DateTimeField() send_type = fields.StringField(index=True, validate=validate_subscription_type) event = fields.StringField() message = fields.StringField() node_lineage = fields.StringField(list=True)
class NodeLicense(StoredObject): _id = fields.StringField(primary=True, default=lambda: str(ObjectId())) id = fields.StringField(required=True, unique=True, editable=False) name = fields.StringField(required=True, unique=True) text = fields.StringField(required=True) properties = fields.StringField(list=True)
class ApiOAuth2Scope(StoredObject): """ Store information about recognized OAuth2 scopes. Only scopes registered under this database model can be requested by third parties. """ _id = fields.StringField(primary=True, default=lambda: str(ObjectId())) name = fields.StringField(unique=True, required=True, index=True) description = fields.StringField(required=True) is_active = fields.BooleanField( default=True, index=True) # TODO: Add mechanism to deactivate a scope?
class ExternalAccount(StoredObject): """An account on an external service. Note that this object is not and should not be aware of what other objects are associated with it. This is by design, and this object should be kept as thin as possible, containing only those fields that must be stored in the database. The ``provider`` field is a de facto foreign key to an ``ExternalProvider`` object, as providers are not stored in the database. """ __indices__ = [ { 'key_or_list': [ ('provider', pymongo.ASCENDING), ('provider_id', pymongo.ASCENDING), ], 'unique': True, } ] _id = fields.StringField(default=lambda: str(ObjectId()), primary=True) # The OAuth credentials. One or both of these fields should be populated. # For OAuth1, this is usually the "oauth_token" # For OAuth2, this is usually the "access_token" oauth_key = fields.StringField() # For OAuth1, this is usually the "oauth_token_secret" # For OAuth2, this is not used oauth_secret = fields.StringField() # Used for OAuth2 only refresh_token = fields.StringField() expires_at = fields.DateTimeField() scopes = fields.StringField(list=True, default=lambda: list()) # The `name` of the service # This lets us query for only accounts on a particular provider provider = fields.StringField(required=True) # The proper 'name' of the service # Needed for account serialization provider_name = fields.StringField(required=True) # The unique, persistent ID on the remote service. provider_id = fields.StringField() # The user's name on the external service display_name = fields.StringField() # A link to the user's profile on the external service profile_url = fields.StringField() def __repr__(self): return '<ExternalAccount: {}/{}>'.format(self.provider, self.provider_id)
class Subject(StoredObject): _id = fields.StringField(primary=True, default=lambda: str(ObjectId())) text = fields.StringField(required=True) parents = fields.ForeignField('subject', list=True) children = fields.ForeignField('subject', list=True) @property def absolute_api_v2_url(self): return api_v2_url('taxonomies/{}/'.format(self._id)) @property def child_count(self): return len(self.children) def get_absolute_url(self): return self.absolute_api_v2_url
class NodeLicense(StoredObject): _id = fields.StringField(primary=True, default=lambda: str(ObjectId())) id = fields.StringField( required=True, unique= False, # Skip modular-odm's uniqueness implementation, depending on MongoDB's # instead (the decorator will install the proper index), so that we can # kludge a non-racey upsert in ensure_licenses. editable=False) name = fields.StringField( required=True, unique=False # Ditto. ) text = fields.StringField(required=True) properties = fields.StringField(list=True)
class ArchiveJob(StoredObject): _id = fields.StringField(primary=True, default=lambda: str(ObjectId())) # whether or not the ArchiveJob is complete (success or fail) done = fields.BooleanField(default=False) # whether or not emails have been sent for this ArchiveJob sent = fields.BooleanField(default=False) status = fields.StringField(default=ARCHIVER_INITIATED) datetime_initiated = fields.DateTimeField(default=datetime.datetime.utcnow) dst_node = fields.ForeignField('node', backref='active') src_node = fields.ForeignField('node') initiator = fields.ForeignField('user') target_addons = fields.ForeignField('archivetarget', list=True) # This field is used for stashing embargo URLs while still in the app context # Format: { # 'view': <str> url, # 'approve': <str> url, # 'disapprove': <str> url, # } meta = fields.DictionaryField() def __repr__(self): return ( '<{ClassName}(_id={self._id}, done={self.done}, ' ' status={self.status}, src_node={self.src_node}, dst_node={self.dst_node})>' ).format(ClassName=self.__class__.__name__, self=self) @property def children(self): return [ node.archive_job for node in self.dst_node.nodes if node.primary ] @property def parent(self): parent_node = self.dst_node.parent_node return parent_node.archive_job if parent_node else None @property def success(self): return self.status == ARCHIVER_SUCCESS @property def pending(self): return any([ target for target in self.target_addons if target.status not in (ARCHIVER_SUCCESS, ARCHIVER_FAILURE) ]) def info(self): return self.src_node, self.dst_node, self.initiator def target_info(self): return [{ 'name': target.name, 'status': target.status, 'stat_result': target.stat_result, 'errors': target.errors } for target in self.target_addons] def archive_tree_finished(self): if not self.pending: return len([ ret for ret in [child.archive_tree_finished() for child in self.children] if ret ]) if len(self.children) else True return False def _fail_above(self): """Marks all ArchiveJob instances attached to Nodes above this as failed """ parent = self.parent if parent: parent.status = ARCHIVER_FAILURE parent.save() def _post_update_target(self): """Checks for success or failure if the ArchiveJob on self.dst_node is finished """ if self.status == ARCHIVER_FAILURE: return if not self.pending: self.done = True if any([ target.status for target in self.target_addons if target.status in ARCHIVER_FAILURE_STATUSES ]): self.status = ARCHIVER_FAILURE self._fail_above() else: self.status = ARCHIVER_SUCCESS self.save() def get_target(self, addon_short_name): try: return [ addon for addon in self.target_addons if addon.name == addon_short_name ][0] except IndexError: return None def _set_target(self, addon_short_name): if self.get_target(addon_short_name): return target = ArchiveTarget(name=addon_short_name) target.save() self.target_addons.append(target) def set_targets(self): addons = [] for addon in [ self.src_node.get_addon(name) for name in settings.ADDONS_ARCHIVABLE if settings.ADDONS_ARCHIVABLE[name] != 'none' ]: if not addon or not addon.complete or not isinstance( addon, StorageAddonBase): continue archive_errors = getattr(addon, 'archive_errors', None) if not archive_errors or (archive_errors and not archive_errors()): if addon.config.short_name == 'dataverse': addons.append(addon.config.short_name + '-draft') addons.append(addon.config.short_name + '-published') else: addons.append(addon.config.short_name) for addon in addons: self._set_target(addon) self.save() def update_target(self, addon_short_name, status, stat_result=None, errors=None): stat_result = stat_result or {} errors = errors or [] target = self.get_target(addon_short_name) target.status = status target.errors = errors target.stat_result = stat_result target.save() self._post_update_target()
class Sanction(StoredObject): """Sanction class is a generic way to track approval states""" # Tell modularodm not to attach backends _meta = { 'abstract': True, } _id = fields.StringField(primary=True, default=lambda: str(ObjectId())) # Neither approved not cancelled UNAPPROVED = 'unapproved' # Has approval APPROVED = 'approved' # Rejected by at least one person REJECTED = 'rejected' # Embargo has been completed COMPLETED = 'completed' state = fields.StringField(default=UNAPPROVED, validate=validators.choice_in(( UNAPPROVED, APPROVED, REJECTED, COMPLETED, ))) DISPLAY_NAME = 'Sanction' # SHORT_NAME must correspond with the associated foreign field to query against, # e.g. Node.find_one(Q(sanction.SHORT_NAME, 'eq', sanction)) SHORT_NAME = 'sanction' APPROVAL_NOT_AUTHORIZED_MESSAGE = 'This user is not authorized to approve this {DISPLAY_NAME}' APPROVAL_INVALID_TOKEN_MESSAGE = 'Invalid approval token provided for this {DISPLAY_NAME}.' REJECTION_NOT_AUTHORIZED_MESSAEGE = 'This user is not authorized to reject this {DISPLAY_NAME}' REJECTION_INVALID_TOKEN_MESSAGE = 'Invalid rejection token provided for this {DISPLAY_NAME}.' # Controls whether or not the Sanction needs unanimous approval or just a single approval ANY = 'any' UNANIMOUS = 'unanimous' mode = UNANIMOUS initiation_date = fields.DateTimeField( auto_now_add=datetime.datetime.utcnow) # Expiration date-- Sanctions in the UNAPPROVED state that are older than their end_date # are automatically made ACTIVE by a daily cron job # Use end_date=None for a non-expiring Sanction end_date = fields.DateTimeField(default=None) # Sanction subclasses must have an initiated_by field # initiated_by = fields.ForeignField('user', backref='initiated') # Expanded: Dictionary field mapping admin IDs their approval status and relevant tokens: # { # 'b3k97': { # 'has_approved': False, # 'approval_token': 'Pew7wj1Puf7DENUPFPnXSwa1rf3xPN', # 'rejection_token': 'TwozClTFOic2PYxHDStby94bCQMwJy'} # } approval_state = fields.DictionaryField() def __repr__(self): return '<Sanction(end_date={self.end_date!r}) with _id {self._id!r}>'.format( self=self) @property def is_pending_approval(self): return self.state == Sanction.UNAPPROVED @property def is_approved(self): return self.state == Sanction.APPROVED @property def is_rejected(self): return self.state == Sanction.REJECTED def approve(self, user): raise NotImplementedError( "Sanction subclasses must implement an approve method.") def reject(self, user): raise NotImplementedError( "Sanction subclasses must implement an approve method.") def _on_reject(self, user): """Callback for rejection of a Sanction :param User user: """ raise NotImplementedError( 'Sanction subclasses must implement an #_on_reject method') def _on_complete(self, user): """Callback for when a Sanction has approval and enters the ACTIVE state :param User user: """ raise NotImplementedError( 'Sanction subclasses must implement an #_on_complete method') def forcibly_reject(self): self.state = Sanction.REJECTED
class ApiOAuth2PersonalToken(StoredObject): """Information for user-created personal access tokens This collection is also used by CAS to create the master list of available tokens. Any changes made to field names in this model must be echoed in the CAS implementation. """ _id = fields.StringField(primary=True, default=lambda: str(ObjectId())) # Name of the field being `token_id` is a CAS requirement. # This is the actual value of the token that's used to authenticate token_id = fields.StringField(default=functools.partial(random_string, length=70), unique=True) owner = fields.ForeignField('User', index=True, required=True) name = fields.StringField(required=True, index=True) # This field is a space delimited list of scopes, e.g. "osf.full_read osf.full_write" scopes = fields.StringField(required=True) is_active = fields.BooleanField(default=True, index=True) def deactivate(self, save=False): """ Deactivate an ApiOAuth2PersonalToken Does not delete the database record, but hides this instance from API """ client = cas.get_client() # Will raise a CasHttpError if deletion fails for any reason other than the token # not yet being created. This will also stop setting of active=False. try: resp = client.revoke_tokens({'token': self.token_id}) # noqa except cas.CasHTTPError as e: if e.code == 400: pass # Token hasn't been used yet, so not created in cas else: raise e self.is_active = False if save: self.save() return True @property def url(self): return '/settings/tokens/{}/'.format(self._id) @property def absolute_url(self): return urlparse.urljoin(settings.DOMAIN, self.url) # Properties used by Django and DRF "Links: self" field @property def absolute_api_v2_url(self): path = '/tokens/{}/'.format(self._id) return api_v2_url(path) # used by django and DRF def get_absolute_url(self): return self.absolute_api_v2_url
class ApiOAuth2Application(StoredObject): """Registration and key for user-created OAuth API applications This collection is also used by CAS to create the master list of available applications. Any changes made to field names in this model must be echoed in the CAS implementation. """ _id = fields.StringField(primary=True, default=lambda: str(ObjectId())) # Client ID and secret. Use separate ID field so ID format doesn't have to be restricted to database internals. client_id = fields.StringField( default=lambda: uuid.uuid4(). hex, # Not *guaranteed* unique, but very unlikely unique=True, index=True) client_secret = fields.StringField(default=generate_client_secret) is_active = fields.BooleanField( default=True, # Set to False if application is deactivated index=True) owner = fields.ForeignField('User', index=True, required=True) # User-specified application descriptors name = fields.StringField( index=True, required=True, validate=[string_required, MaxLengthValidator(200)]) description = fields.StringField(required=False, validate=MaxLengthValidator(1000)) date_created = fields.DateTimeField(auto_now_add=True, editable=False) home_url = fields.StringField(required=True, validate=URLValidator()) callback_url = fields.StringField(required=True, validate=URLValidator()) def deactivate(self, save=False): """ Deactivate an ApiOAuth2Application Does not delete the database record, but revokes all tokens and sets a flag that hides this instance from API """ client = cas.get_client() # Will raise a CasHttpError if deletion fails, which will also stop setting of active=False. resp = client.revoke_application_tokens(self.client_id, self.client_secret) # noqa self.is_active = False if save: self.save() return True def reset_secret(self, save=False): """ Reset the secret of an ApiOAuth2Application Revokes all tokens """ client = cas.get_client() client.revoke_application_tokens(self.client_id, self.client_secret) self.client_secret = generate_client_secret() if save: self.save() return True @property def url(self): return '/settings/applications/{}/'.format(self.client_id) @property def absolute_url(self): return urlparse.urljoin(settings.DOMAIN, self.url) # Properties used by Django and DRF "Links: self" field @property def absolute_api_v2_url(self): path = '/applications/{}/'.format(self.client_id) return api_v2_url(path) # used by django and DRF def get_absolute_url(self): return self.absolute_api_v2_url
def get_object_id(): return str(ObjectId())
class PreprintProvider(StoredObject): _id = fields.StringField(primary=True, default=lambda: str(ObjectId())) name = fields.StringField(required=True) logo_name = fields.StringField() header_text = fields.StringField() description = fields.StringField() domain = fields.StringField() banner_name = fields.StringField() external_url = fields.StringField() email_contact = fields.StringField() email_support = fields.StringField() example = fields.StringField() access_token = EncryptedStringField() advisory_board = fields.StringField() social_twitter = fields.StringField() social_facebook = fields.StringField() social_instagram = fields.StringField() subjects_acceptable = fields.DictionaryField(list=True, default=lambda: []) licenses_acceptable = fields.ForeignField('NodeLicense', list=True, default=lambda: []) @property def top_level_subjects(self): if len(self.subjects_acceptable) == 0: return Subject.find(Q('parents', 'eq', [])) tops = set([sub[0][0] for sub in self.subjects_acceptable]) return [Subject.load(sub) for sub in tops] @property def all_subjects(self): q = [] for rule in self.subjects_acceptable: if rule[1]: q.append(Q('parents', 'eq', Subject.load(rule[0][-1]))) if len(rule[0]) == 1: potential_parents = Subject.find( Q('parents', 'eq', Subject.load(rule[0][-1]))) for parent in potential_parents: q.append(Q('parents', 'eq', parent)) for sub in rule[0]: q.append(Q('_id', 'eq', sub)) return Subject.find(reduce(lambda x, y: x | y, q)) if len(q) > 1 else ( Subject.find(q[0]) if len(q) else Subject.find()) def get_absolute_url(self): return '{}preprint_providers/{}'.format(self.absolute_api_v2_url, self._id) @property def absolute_api_v2_url(self): path = '/preprint_providers/{}/'.format(self._id) return api_v2_url(path) @property def logo_path(self): if self.logo_name: return '/static/img/preprint_providers/{}'.format(self.logo_name) else: return None @property def banner_path(self): if self.logo_name: return '/static/img/preprint_providers/{}'.format(self.logo_name) else: return None