def run_job_and_get_output(self, entity_id_order=None): """Runs the validation jobs and returns its output. Args: entity_id_order: list(str)|None. The ordering of IDs to be returned from the validation outputs. If None, then the output is not changed. Returns: list(*). The validation job output. """ job_id = self.JOB_CLASS.create_new() self.assertEqual( self.count_jobs_in_mapreduce_taskqueue( taskqueue_services.QUEUE_NAME_ONE_OFF_JOBS), 0) self.JOB_CLASS.enqueue(job_id) self.assertEqual( self.count_jobs_in_mapreduce_taskqueue( taskqueue_services.QUEUE_NAME_ONE_OFF_JOBS), 1) self.process_and_flush_pending_mapreduce_tasks() output = [ ast.literal_eval(o) for o in self.JOB_CLASS.get_output(job_id) ] if entity_id_order is not None: by_entity_id_order = lambda output_str: python_utils.NEXT( (i for i, entity_id in enumerate(entity_id_order) if output_str.startswith('Entity id %s' % entity_id)), len(entity_id_order)) for _, sub_output in output: if isinstance(sub_output, list): sub_output.sort(key=by_entity_id_order) return output
def create_beam_job_run_model( self, dataflow_job_id='abc', job_id=None, job_name='FooJob', job_arguments=None, job_state=beam_job_models.BeamJobState.RUNNING.value): """Returns a new BeamJobRunModel with convenient default values. Args: dataflow_job_id: str|None. The ID of the dataflow job corresponding to the BeamJobRun. When this value is None, that signals that the job has been run synchronously (like a function call), and cannot be polled for updates. job_id: str|None. The ID of the job. If None, a value is generated. job_name: str. The name of the job class that implements the job's logic. job_state: str. The state of the job at the time the model was last updated. job_arguments: list(str)|None. The arguments provided to the job run. If None, an empty list will be used instead. Returns: BeamJobRunModel. The new model. """ if job_id is None: job_id = python_utils.NEXT(self._id_iter) if job_arguments is None: job_arguments = [] return beam_job_models.BeamJobRunModel(id=job_id, dataflow_job_id=dataflow_job_id, job_name=job_name, job_arguments=job_arguments, latest_job_state=job_state)
def _trusted_commit( self, committer_id, commit_type, commit_message, commit_cmds): """Record the event to the commit log after the model commit. Note that this extends the superclass method. Args: committer_id: str. The user_id of the user who committed the change. commit_type: str. The type of commit. Possible values are in core.storage.base_models.COMMIT_TYPE_CHOICES. commit_message: str. The commit description message. commit_cmds: list(dict). A list of commands, describing changes made in this model, which should give sufficient information to reconstruct the commit. Each dict always contains: cmd: str. Unique command. and then additional arguments for that command. """ super(TopicRightsModel, self)._trusted_commit( committer_id, commit_type, commit_message, commit_cmds) topic_rights = TopicRightsModel.get_by_id(self.id) if topic_rights.topic_is_published: status = constants.ACTIVITY_STATUS_PUBLIC else: status = constants.ACTIVITY_STATUS_PRIVATE TopicCommitLogEntryModel( id=('rights-%s-%s' % (self.id, self.version)), user_id=committer_id, topic_id=self.id, commit_type=commit_type, commit_message=commit_message, commit_cmds=commit_cmds, version=None, post_commit_status=status, post_commit_community_owned=False, post_commit_is_private=not topic_rights.topic_is_published ).put() snapshot_metadata_model = self.SNAPSHOT_METADATA_CLASS.get( self.get_snapshot_id(self.id, self.version)) snapshot_metadata_model.content_user_ids = list(sorted(set( self.manager_ids))) commit_cmds_user_ids = set() for commit_cmd in commit_cmds: user_id_attribute_names = python_utils.NEXT( cmd['user_id_attribute_names'] for cmd in feconf.TOPIC_RIGHTS_CHANGE_ALLOWED_COMMANDS if cmd['name'] == commit_cmd['cmd'] ) for user_id_attribute_name in user_id_attribute_names: commit_cmds_user_ids.add(commit_cmd[user_id_attribute_name]) snapshot_metadata_model.commit_cmds_user_ids = list( sorted(commit_cmds_user_ids)) snapshot_metadata_model.update_timestamps() snapshot_metadata_model.put()
def _trusted_commit(self, committer_id, commit_type, commit_message, commit_cmds): """Record the event to the commit log after the model commit. Note that this extends the superclass method. Args: committer_id: str. The user_id of the user who committed the change. commit_type: str. The type of commit. Possible values are in core.storage.base_models.COMMIT_TYPE_CHOICES. commit_message: str. The commit description message. commit_cmds: list(dict). A list of commands, describing changes made in this model, should give sufficient information to reconstruct the commit. Each dict always contains: cmd: str. Unique command. and then additional arguments for that command. """ super(ExplorationRightsModel, self)._trusted_commit(committer_id, commit_type, commit_message, commit_cmds) # Create and delete events will already be recorded in the # ExplorationModel. if commit_type not in ['create', 'delete']: ExplorationCommitLogEntryModel( id=('rights-%s-%s' % (self.id, self.version)), user_id=committer_id, exploration_id=self.id, commit_type=commit_type, commit_message=commit_message, commit_cmds=commit_cmds, version=None, post_commit_status=self.status, post_commit_community_owned=self.community_owned, post_commit_is_private=( self.status == constants.ACTIVITY_STATUS_PRIVATE)).put() snapshot_metadata_model = self.SNAPSHOT_METADATA_CLASS.get( self.get_snapshot_id(self.id, self.version)) snapshot_metadata_model.content_user_ids = list( sorted( set(self.owner_ids) | set(self.editor_ids) | set(self.voice_artist_ids) | set(self.viewer_ids))) commit_cmds_user_ids = set() for commit_cmd in commit_cmds: user_id_attribute_names = python_utils.NEXT( cmd['user_id_attribute_names'] for cmd in feconf.EXPLORATION_RIGHTS_CHANGE_ALLOWED_COMMANDS if cmd['name'] == commit_cmd['cmd']) for user_id_attribute_name in user_id_attribute_names: commit_cmds_user_ids.add(commit_cmd[user_id_attribute_name]) snapshot_metadata_model.commit_cmds_user_ids = list( sorted(commit_cmds_user_ids)) snapshot_metadata_model.update_timestamps() snapshot_metadata_model.put()
def create_oppia_user(self, deleted=False): """Returns an (auth_id, user_id) pair for a new user. Args: deleted: bool. Value for the user's deleted property. Returns: AuthIdUserIdPair. The association the user should create. """ auth_id = 'aid%d' % python_utils.NEXT(self._auth_id_generator) user_id = 'uid_%s' % auth_id user_models.UserSettingsModel( id=user_id, email=('*****@*****.**' % auth_id), deleted=deleted, role=feconf.ROLE_ID_EXPLORATION_EDITOR, preferred_language_codes=[constants.DEFAULT_LANGUAGE_CODE]).put() return auth_domain.AuthIdUserIdPair(auth_id, user_id)
def mock_import_users(user_records): """Mock function that fails according to the given input values.""" if python_utils.NEXT(call_error_sequence): raise firebase_exceptions.DataLossError('Failed to connect') total_records = len(user_records) kept_records, error_indices = [], [] for i, (record, error) in enumerate( python_utils.ZIP(user_records, user_error_sequence)): if error: error_indices.append(i) else: kept_records.append(record) if kept_records: self._import_users(kept_records) return self._create_user_import_result_fragile( total_records, error_indices=error_indices)
def validate(self): """Validates various properties of the story contents object. Raises: ValidationError: One or more attributes of the story contents are invalid. """ if not isinstance(self.nodes, list): raise utils.ValidationError( 'Expected nodes field to be a list, received %s' % self.nodes) if len(self.nodes) > 0: StoryNode.require_valid_node_id(self.initial_node_id) StoryNode.require_valid_node_id(self.next_node_id) initial_node_is_present = False node_id_list = [] for node in self.nodes: if not isinstance(node, StoryNode): raise utils.ValidationError( 'Expected each node to be a StoryNode object, received %s' % node) node.validate() for destination_node_id in node.destination_node_ids: if python_utils.NEXT( (node for node in self.nodes if node.id == destination_node_id), None) is None: raise utils.ValidationError( 'Expected all destination nodes to exist') if node.id == self.initial_node_id: initial_node_is_present = True # Checks whether the number in the id of any node is greater than # the value of next_node_id. if (StoryNode.get_number_from_node_id(node.id) >= StoryNode.get_number_from_node_id(self.next_node_id)): raise utils.ValidationError( 'The node with id %s is out of bounds.' % node.id) node_id_list.append(node.id) if len(self.nodes) > 0: if not initial_node_is_present: raise utils.ValidationError('Expected starting node to exist.') if len(node_id_list) > len(set(node_id_list)): raise utils.ValidationError( 'Expected all node ids to be distinct.') # nodes_queue stores the pending nodes to visit in the story that # are unlocked, in a 'queue' form with a First In First Out # structure. nodes_queue = [] is_node_visited = [False] * len(self.nodes) starting_node_index = self.get_node_index(self.initial_node_id) nodes_queue.append(self.nodes[starting_node_index].id) # The user is assumed to have all the prerequisite skills of the # starting node before starting the story. Also, this list models # the skill IDs acquired by a learner as they progress through the # story. simulated_skill_ids = copy.deepcopy( self.nodes[starting_node_index].prerequisite_skill_ids) # The following loop employs a Breadth First Search from the given # starting node and makes sure that the user has acquired all the # prerequisite skills required by the destination nodes 'unlocked' # by visiting a particular node by the time that node is finished. while len(nodes_queue) > 0: current_node_id = nodes_queue.pop() current_node_index = self.get_node_index(current_node_id) is_node_visited[current_node_index] = True current_node = self.nodes[current_node_index] for skill_id in current_node.acquired_skill_ids: simulated_skill_ids.append(skill_id) for node_id in current_node.destination_node_ids: node_index = self.get_node_index(node_id) # The following condition checks whether the destination # node for a particular node, has already been visited, in # which case the story would have loops, which are not # allowed. if is_node_visited[node_index]: raise utils.ValidationError( 'Loops are not allowed in stories.') destination_node = self.nodes[node_index] if not (set(destination_node.prerequisite_skill_ids). issubset(simulated_skill_ids)): raise utils.ValidationError( 'The prerequisite skills ' + ' '.join( set(destination_node.prerequisite_skill_ids) - set(simulated_skill_ids)) + ' were not completed before the node with id %s' ' was unlocked.' % node_id) nodes_queue.append(node_id) for index, node_visited in enumerate(is_node_visited): if not node_visited: raise utils.ValidationError( 'The node with id %s is disconnected from the ' 'story graph.' % self.nodes[index].id)
def _pseudonymize_models(activity_related_models, pseudonymized_id): """Pseudonymize user ID fields in the models. This function is run in a transaction, with the maximum number of activity_related_models being MAX_NUMBER_OF_OPS_IN_TRANSACTION. Args: activity_related_models: list(BaseModel). Models whose user IDs should be pseudonymized. pseudonymized_id: str. New pseudonymized user ID to be used for the models. """ pseudonymized_username = user_services.get_pseudonymous_username( pseudonymized_id) snapshot_metadata_models = [ model for model in activity_related_models if isinstance(model, snapshot_metadata_model_class) ] for snapshot_metadata_model in snapshot_metadata_models: if user_id == snapshot_metadata_model.committer_id: snapshot_metadata_model.committer_id = pseudonymized_id snapshot_metadata_model.update_timestamps() rights_snapshot_metadata_models = [ model for model in activity_related_models if isinstance(model, rights_snapshot_metadata_model_class) ] for rights_snapshot_metadata_model in rights_snapshot_metadata_models: for commit_cmd in rights_snapshot_metadata_model.commit_cmds: user_id_attribute_names = python_utils.NEXT( cmd['user_id_attribute_names'] for cmd in allowed_commands if cmd['name'] == commit_cmd['cmd']) for user_id_attribute_name in user_id_attribute_names: if commit_cmd[user_id_attribute_name] == user_id: commit_cmd[user_id_attribute_name] = pseudonymized_id assign_commit_message_match = re.match( rights_domain.ASSIGN_ROLE_COMMIT_MESSAGE_REGEX, rights_snapshot_metadata_model.commit_message) if assign_commit_message_match: rights_snapshot_metadata_model.commit_message = ( rights_domain.ASSIGN_ROLE_COMMIT_MESSAGE_TEMPLATE % ( pseudonymized_username, assign_commit_message_match.group(2), assign_commit_message_match.group(3), )) deassign_commit_message_match = re.match( rights_domain.DEASSIGN_ROLE_COMMIT_MESSAGE_REGEX, rights_snapshot_metadata_model.commit_message) if deassign_commit_message_match: rights_snapshot_metadata_model.commit_message = ( rights_domain.DEASSIGN_ROLE_COMMIT_MESSAGE_TEMPLATE % ( pseudonymized_username, deassign_commit_message_match.group(2), )) rights_snapshot_metadata_model.content_user_ids = [ pseudonymized_id if model_user_id == user_id else model_user_id for model_user_id in rights_snapshot_metadata_model.content_user_ids ] rights_snapshot_metadata_model.commit_cmds_user_ids = [ pseudonymized_id if model_user_id == user_id else model_user_id for model_user_id in rights_snapshot_metadata_model.commit_cmds_user_ids ] if user_id == rights_snapshot_metadata_model.committer_id: rights_snapshot_metadata_model.committer_id = pseudonymized_id rights_snapshot_metadata_model.update_timestamps() rights_snapshot_content_models = [ model for model in activity_related_models if isinstance(model, rights_snapshot_content_model_class) ] for rights_snapshot_content_model in rights_snapshot_content_models: model_dict = rights_snapshot_content_model.content for field_name in rights_user_id_fields: model_dict[field_name] = [ pseudonymized_id if field_id == user_id else field_id for field_id in model_dict[field_name] ] rights_snapshot_content_model.content = model_dict rights_snapshot_content_model.update_timestamps() commit_log_models = [ model for model in activity_related_models if isinstance(model, commit_log_model_class) ] for commit_log_model in commit_log_models: commit_log_model.user_id = pseudonymized_id commit_log_model.update_timestamps() datastore_services.put_multi(snapshot_metadata_models + rights_snapshot_metadata_models + rights_snapshot_content_models + commit_log_models)
def _pseudonymize_models(col_related_models, pseudonymized_id): """Pseudonymize user ID fields in the models. This function is run in a transaction, with the maximum number of activity_related_models being MAX_NUMBER_OF_OPS_IN_TRANSACTION. Args: col_related_models: list(BaseModel). Models whose user IDs should be pseudonymized. pseudonymized_id: str. New pseudonymized user ID to be used for the models. """ snapshot_metadata_models = [ model for model in col_related_models if isinstance(model, (snapshot_metadata_model_class, rights_snapshot_metadata_model_class)) ] allowed_commands = (feconf.COLLECTION_RIGHTS_CHANGE_ALLOWED_COMMANDS if activity_category == models.NAMES.collection else feconf.EXPLORATION_RIGHTS_CHANGE_ALLOWED_COMMANDS) for snapshot_metadata_model in snapshot_metadata_models: for commit_cmd in snapshot_metadata_model.commit_cmds: user_id_attribute_names = python_utils.NEXT( cmd['user_id_attribute_names'] for cmd in allowed_commands if cmd['name'] == commit_cmd['cmd']) for user_id_attribute_name in user_id_attribute_names: if commit_cmd[user_id_attribute_name] == user_id: commit_cmd[user_id_attribute_name] = pseudonymized_id snapshot_metadata_model.content_user_ids = [ pseudonymized_id if model_user_id == user_id else model_user_id for model_user_id in snapshot_metadata_model.content_user_ids ] snapshot_metadata_model.commit_cmds_user_ids = [ pseudonymized_id if model_user_id == user_id else model_user_id for model_user_id in snapshot_metadata_model.commit_cmds_user_ids ] if user_id == snapshot_metadata_model.committer_id: snapshot_metadata_model.committer_id = pseudonymized_id rights_snapshot_content_models = [ model for model in col_related_models if isinstance(model, rights_snapshot_content_model_class) ] for rights_snapshot_content_model in rights_snapshot_content_models: model_dict = rights_snapshot_content_model.content for field_name in ('owner_ids', 'editor_ids', 'voice_artist_ids', 'viewer_ids'): model_dict[field_name] = [ pseudonymized_id if field_id == user_id else field_id for field_id in model_dict[field_name] ] rights_snapshot_content_model.content = model_dict commit_log_models = [ model for model in col_related_models if isinstance(model, commit_log_model_class) ] for commit_log_model in commit_log_models: commit_log_model.user_id = pseudonymized_id ndb.put_multi(snapshot_metadata_models + rights_snapshot_content_models + commit_log_models)