def test_contribution_msec(self): # Test the contribution time shows up correctly as None. self.signup(self.EMAIL, self.USERNAME) self.login(self.EMAIL) user_id = self.get_user_id_from_email(self.EMAIL) response_dict = self.get_json( '/profilehandler/data/%s' % self.USERNAME) self.assertIsNone(response_dict['first_contribution_msec']) # Update the first_contribution_msec to the current time in # milliseconds. first_time_in_msecs = utils.get_current_time_in_millisecs() user_services.update_first_contribution_msec_if_not_set( user_id, first_time_in_msecs) # Test the contribution date correctly changes to current_time_in_msecs. response_dict = self.get_json( '/profilehandler/data/%s' % self.USERNAME) self.assertEqual( response_dict['first_contribution_msec'], first_time_in_msecs) # Test that the contribution date is not changed after the first time it # is set. second_time_in_msecs = utils.get_current_time_in_millisecs() user_services.update_first_contribution_msec_if_not_set( user_id, second_time_in_msecs) response_dict = self.get_json( '/profilehandler/data/%s' % self.USERNAME) self.assertEqual( response_dict['first_contribution_msec'], first_time_in_msecs)
def _run_job(cls, job_id, additional_job_params): """Starts the job.""" logging.info( 'Job %s started at %s' % (job_id, utils.get_current_time_in_millisecs())) cls.register_start(job_id) try: result = cls._run(additional_job_params) except Exception as e: logging.error(traceback.format_exc()) logging.error( 'Job %s failed at %s' % (job_id, utils.get_current_time_in_millisecs())) cls.register_failure( job_id, '%s\n%s' % (unicode(e), traceback.format_exc())) raise taskqueue_services.PermanentTaskFailure( 'Task failed: %s\n%s' % (unicode(e), traceback.format_exc())) # Note that the job may have been canceled after it started and before # it reached this stage. This will result in an exception when the # validity of the status code transition is checked. cls.register_completion(job_id, result) logging.info( 'Job %s completed at %s' % (job_id, utils.get_current_time_in_millisecs()))
def generate_new_thread_id(cls, entity_type: str, entity_id: str) -> str: """Generates a new thread ID which is unique. Args: entity_type: str. The type of the entity. entity_id: str. The ID of the entity. Returns: str. A thread ID that is different from the IDs of all the existing threads within the given entity. Raises: Exception. There were too many collisions with existing thread IDs when attempting to generate a new thread ID. """ for _ in python_utils.RANGE(_MAX_RETRIES): thread_id = ( '%s.%s.%s%s' % (entity_type, entity_id, utils.base64_from_int( int(utils.get_current_time_in_millisecs())), utils.base64_from_int(utils.get_random_int(_RAND_RANGE)))) if not cls.get_by_id(thread_id): return thread_id raise Exception( 'New thread id generator is producing too many collisions.')
def _real_enqueue(cls, job_id): entity_class_types = cls.entity_classes_to_map_over() entity_class_names = [ '%s.%s' % (entity_class_type.__module__, entity_class_type.__name__) for entity_class_type in entity_class_types ] kwargs = { 'job_name': job_id, 'mapper_spec': '%s.%s.map' % (cls.__module__, cls.__name__), 'reducer_spec': '%s.%s.reduce' % (cls.__module__, cls.__name__), 'input_reader_spec': ('core.jobs.MultipleDatastoreEntitiesInputReader'), 'output_writer_spec': ('mapreduce.output_writers.BlobstoreRecordsOutputWriter'), 'mapper_params': { MAPPER_PARAM_KEY_ENTITY_KINDS: entity_class_names, # Note that all parameters passed to the mapper need to be # strings. Also note that the value for this key is determined # just before enqueue time, so it will be roughly equal to the # actual enqueue time. MAPPER_PARAM_KEY_QUEUED_TIME_MSECS: str(utils.get_current_time_in_millisecs()), } } mr_pipeline = MapReduceJobPipeline( job_id, '%s.%s' % (cls.__module__, cls.__name__), kwargs) mr_pipeline.start(base_path='/mapreduce/worker/pipeline')
def enqueue( cls, job_id, queue_name, additional_job_params=None, shard_count=None): """Marks a job as queued and adds it to a queue for processing. Args: job_id: str. The ID of the job to enqueue. queue_name: str. The queue name the job should be run in. See core.platform.taskqueue.gae_taskqueue_services for supported values. additional_job_params: dict(str : *) or None. Additional parameters for the job. shard_count: int. Number of shards used for the job. """ # Ensure that preconditions are met. model = job_models.JobModel.get(job_id, strict=True) cls._require_valid_transition( job_id, model.status_code, STATUS_CODE_QUEUED) cls._require_correct_job_type(model.job_type) # Enqueue the job. cls._real_enqueue( job_id, queue_name, additional_job_params, shard_count) model.status_code = STATUS_CODE_QUEUED model.time_queued_msec = utils.get_current_time_in_millisecs() model.additional_job_params = additional_job_params model.update_timestamps() model.put()
def _change_activity_status(committer_id, activity_id, activity_type, new_status, commit_message): """Changes the status of the given activity. Args: committer_id: str. ID of the user who is performing the update action. activity_id: str. ID of the activity. activity_type: str. The type of activity. Possible values: constants.ACTIVITY_TYPE_EXPLORATION constants.ACTIVITY_TYPE_COLLECTION new_status: str. The new status of the activity. commit_message: str. The human-written commit message for this change. """ activity_rights = _get_activity_rights(activity_type, activity_id) old_status = activity_rights.status activity_rights.status = new_status if activity_type == constants.ACTIVITY_TYPE_EXPLORATION: cmd_type = CMD_CHANGE_EXPLORATION_STATUS elif activity_type == constants.ACTIVITY_TYPE_COLLECTION: cmd_type = CMD_CHANGE_COLLECTION_STATUS commit_cmds = [{ 'cmd': cmd_type, 'old_status': old_status, 'new_status': new_status }] if new_status != ACTIVITY_STATUS_PRIVATE: activity_rights.viewer_ids = [] if activity_rights.first_published_msec is None: activity_rights.first_published_msec = ( utils.get_current_time_in_millisecs()) _save_activity_rights(committer_id, activity_rights, activity_type, commit_message, commit_cmds) _update_activity_summary(activity_type, activity_rights)
def _change_activity_status(committer_id, activity_id, activity_type, new_status, commit_message): """Change the status of an activity. Commits changes. Args: - committer_id: str. The id of the user who is performing the update action. - activity_id: str. The id of the collection or activity. - activity_type: str. One of feconf.ACTIVITY_TYPE_EXPLORATION or feconf.ACTIVITY_TYPE_COLLECTION. - new_status: str. The new status of the exploration. - commit_message: str. The human-written commit message for this change. """ activity_rights = _get_activity_rights(activity_type, activity_id) old_status = activity_rights.status activity_rights.status = new_status if activity_type == feconf.ACTIVITY_TYPE_EXPLORATION: cmd_type = CMD_CHANGE_EXPLORATION_STATUS elif activity_type == feconf.ACTIVITY_TYPE_COLLECTION: cmd_type = CMD_CHANGE_COLLECTION_STATUS commit_cmds = [{ 'cmd': cmd_type, 'old_status': old_status, 'new_status': new_status }] if new_status != ACTIVITY_STATUS_PRIVATE: activity_rights.viewer_ids = [] if activity_rights.first_published_msec is None: activity_rights.first_published_msec = ( utils.get_current_time_in_millisecs()) _save_activity_rights(committer_id, activity_rights, activity_type, commit_message, commit_cmds) _update_activity_summary(activity_type, activity_rights)
def update_collection(committer_id, collection_id, change_list, commit_message): """Updates a collection. Commits changes. Args: committer_id: str. The id of the user who is performing the update action. collection_id: str. The collection id. change_list: list(dict). Each entry represents a CollectionChange object. These changes are applied in sequence to produce the resulting collection. commit_message: str or None. A description of changes made to the collection. For published collections, this must be present; for unpublished collections, it may be equal to None. """ is_public = rights_manager.is_collection_public(collection_id) if is_public and not commit_message: raise ValueError( 'Collection is public so expected a commit message but ' 'received none.') collection = apply_change_list(collection_id, change_list) _save_collection(committer_id, collection, commit_message, change_list) update_collection_summary(collection.id, committer_id) if (not rights_manager.is_collection_private(collection.id) and committer_id != feconf.MIGRATION_BOT_USER_ID): user_services.update_first_contribution_msec_if_not_set( committer_id, utils.get_current_time_in_millisecs())
def _real_enqueue(cls, job_id): entity_class_types = cls.entity_classes_to_map_over() entity_class_names = [ '%s.%s' % ( entity_class_type.__module__, entity_class_type.__name__) for entity_class_type in entity_class_types] kwargs = { 'job_name': job_id, 'mapper_spec': '%s.%s.map' % (cls.__module__, cls.__name__), 'reducer_spec': '%s.%s.reduce' % (cls.__module__, cls.__name__), 'input_reader_spec': ( 'core.jobs.MultipleDatastoreEntitiesInputReader'), 'output_writer_spec': ( 'mapreduce.output_writers.BlobstoreRecordsOutputWriter'), 'mapper_params': { MAPPER_PARAM_KEY_ENTITY_KINDS: entity_class_names, # Note that all parameters passed to the mapper need to be # strings. Also note that the value for this key is determined # just before enqueue time, so it will be roughly equal to the # actual enqueue time. MAPPER_PARAM_KEY_QUEUED_TIME_MSECS: str( utils.get_current_time_in_millisecs()), } } mr_pipeline = MapReduceJobPipeline( job_id, '%s.%s' % (cls.__module__, cls.__name__), kwargs) mr_pipeline.start(base_path='/mapreduce/worker/pipeline')
def register_completion( cls, job_id, output_list, max_output_len_chars=None): """Marks a job as completed. Args: job_id: str. The ID of the job to complete. output_list: list(object). The output produced by the job. max_output_len_chars: int or None. Max length of output_list. If None, the default maximum output length is used. """ _default_max_len_chars = 900000 _max_output_len_chars = ( _default_max_len_chars if max_output_len_chars is None else max_output_len_chars) # Ensure that preconditions are met. model = job_models.JobModel.get(job_id, strict=True) cls._require_valid_transition( job_id, model.status_code, STATUS_CODE_COMPLETED) cls._require_correct_job_type(model.job_type) model.status_code = STATUS_CODE_COMPLETED model.time_finished_msec = utils.get_current_time_in_millisecs() model.output = cls._compress_output_list( output_list, _max_output_len_chars) model.update_timestamps() model.put() cls._post_completed_hook(job_id)
def _validate_time_fields(cls, item): """Validate the time fields in entity. Args: item: datastore_services.Model. JobModel to validate. """ if item.time_started_msec and ( item.time_queued_msec > item.time_started_msec): cls._add_error( 'time queued check', 'Entity id %s: time queued %s is greater ' 'than time started %s' % ( item.id, item.time_queued_msec, item.time_started_msec)) if item.time_finished_msec and ( item.time_started_msec > item.time_finished_msec): cls._add_error( 'time started check', 'Entity id %s: time started %s is greater ' 'than time finished %s' % ( item.id, item.time_started_msec, item.time_finished_msec)) current_time_msec = utils.get_current_time_in_millisecs() if item.time_finished_msec > current_time_msec: cls._add_error( 'time finished check', 'Entity id %s: time finished %s is greater ' 'than the current time' % ( item.id, item.time_finished_msec))
def run(self, job_id, job_class_str, output): """Extracts the results of a MR job and registers its completion. Args: job_id: str. The ID of the job to run. job_class_str: str. Should uniquely identify each type of job. output: str. The output produced by the job. """ job_class = mapreduce_util.for_name(job_class_str) try: iterator = input_readers.GoogleCloudStorageInputReader( output, 0) results_list = [] for item_reader in iterator: for item in item_reader: results_list.append(json.loads(item)) job_class.register_completion(job_id, results_list) except Exception as e: logging.exception( 'Job %s failed at %s' % ( job_id, utils.get_current_time_in_millisecs() ) ) job_class.register_failure( job_id, '%s\n%s' % (python_utils.UNICODE(e), traceback.format_exc()))
def cancel(cls, job_id, user_id): """Marks a job as canceled. Args: job_id: str. The ID of the job to cancel. user_id: str. The id of the user who cancelled the job. """ # Ensure that preconditions are met. model = job_models.JobModel.get(job_id, strict=True) cls._require_valid_transition( job_id, model.status_code, STATUS_CODE_CANCELED) cls._require_correct_job_type(model.job_type) cancel_message = 'Canceled by %s' % (user_id or 'system') # Cancel the job. cls._pre_cancel_hook(job_id, cancel_message) model.status_code = STATUS_CODE_CANCELED model.time_finished_msec = utils.get_current_time_in_millisecs() model.error = cancel_message model.update_timestamps() model.put() cls._post_cancel_hook(job_id, cancel_message)
def _get_search_rank(collection_id): """Returns an integer determining the document's rank in search. Featured collections get a ranking bump, and so do collections that have been more recently updated. """ rights = rights_manager.get_collection_rights(collection_id) rank = _DEFAULT_RANK + (_STATUS_PUBLICIZED_BONUS if rights.status == rights_manager.ACTIVITY_STATUS_PUBLICIZED else 0) # Iterate backwards through the collection history metadata until we find # the most recent snapshot that was committed by a human. last_human_update_ms = 0 snapshots_metadata = get_collection_snapshots_metadata(collection_id) for snapshot_metadata in reversed(snapshots_metadata): if snapshot_metadata['committer_id'] != feconf.MIGRATION_BOT_USER_ID: last_human_update_ms = snapshot_metadata['created_on_ms'] break _time_now_ms = utils.get_current_time_in_millisecs() time_delta_days = int( (_time_now_ms - last_human_update_ms) / _MS_IN_ONE_DAY) if time_delta_days == 0: rank += 80 elif time_delta_days == 1: rank += 50 elif 2 <= time_delta_days <= 7: rank += 35 # Ranks must be non-negative. return max(rank, 0)
def _get_search_rank(collection_id): """Returns an integer determining the document's rank in search. Featured collections get a ranking bump, and so do collections that have been more recently updated. """ rights = rights_manager.get_collection_rights(collection_id) rank = _DEFAULT_RANK + ( _STATUS_PUBLICIZED_BONUS if rights.status == rights_manager.ACTIVITY_STATUS_PUBLICIZED else 0) # Iterate backwards through the collection history metadata until we find # the most recent snapshot that was committed by a human. last_human_update_ms = 0 snapshots_metadata = get_collection_snapshots_metadata(collection_id) for snapshot_metadata in reversed(snapshots_metadata): if snapshot_metadata['committer_id'] != feconf.MIGRATION_BOT_USER_ID: last_human_update_ms = snapshot_metadata['created_on_ms'] break _time_now_ms = utils.get_current_time_in_millisecs() time_delta_days = int( (_time_now_ms - last_human_update_ms) / _MS_IN_ONE_DAY) if time_delta_days == 0: rank += 80 elif time_delta_days == 1: rank += 50 elif 2 <= time_delta_days <= 7: rank += 35 # Ranks must be non-negative. return max(rank, 0)
def _change_activity_status(committer_id, activity_id, activity_type, new_status, commit_message): """Change the status of an activity. Commits changes. Args: - committer_id: str. The id of the user who is performing the update action. - activity_id: str. The id of the collection or activity. - activity_type: str. One of feconf.ACTIVITY_TYPE_EXPLORATION or feconf.ACTIVITY_TYPE_COLLECTION. - new_status: str. The new status of the exploration. - commit_message: str. The human-written commit message for this change. """ activity_rights = _get_activity_rights(activity_type, activity_id) old_status = activity_rights.status activity_rights.status = new_status if activity_type == feconf.ACTIVITY_TYPE_EXPLORATION: cmd_type = CMD_CHANGE_EXPLORATION_STATUS elif activity_type == feconf.ACTIVITY_TYPE_COLLECTION: cmd_type = CMD_CHANGE_COLLECTION_STATUS commit_cmds = [{"cmd": cmd_type, "old_status": old_status, "new_status": new_status}] if new_status != ACTIVITY_STATUS_PRIVATE: activity_rights.viewer_ids = [] if activity_rights.first_published_msec is None: activity_rights.first_published_msec = utils.get_current_time_in_millisecs() _save_activity_rights(committer_id, activity_rights, activity_type, commit_message, commit_cmds) _update_activity_summary(activity_type, activity_rights)
def get(self): """Handles GET requests.""" recent_job_data = jobs.get_data_for_recent_jobs() unfinished_job_data = jobs.get_data_for_unfinished_jobs() for job in unfinished_job_data: job['can_be_canceled'] = job['is_cancelable'] and any([ klass.__name__ == job['job_type'] for klass in ( jobs_registry.ONE_OFF_JOB_MANAGERS + ( jobs_registry.AUDIT_JOB_MANAGERS))]) queued_or_running_job_types = set([ job['job_type'] for job in unfinished_job_data]) one_off_job_status_summaries = [{ 'job_type': klass.__name__, 'is_queued_or_running': ( klass.__name__ in queued_or_running_job_types) } for klass in jobs_registry.ONE_OFF_JOB_MANAGERS] audit_job_status_summaries = [{ 'job_type': klass.__name__, 'is_queued_or_running': ( klass.__name__ in queued_or_running_job_types) } for klass in jobs_registry.AUDIT_JOB_MANAGERS] self.render_json({ 'human_readable_current_time': ( utils.get_human_readable_time_string( utils.get_current_time_in_millisecs())), 'one_off_job_status_summaries': one_off_job_status_summaries, 'audit_job_status_summaries': audit_job_status_summaries, 'recent_job_data': recent_job_data, 'unfinished_job_data': unfinished_job_data, })
def update_collection( committer_id, collection_id, change_list, commit_message): """Update an collection. Commits changes. Args: - committer_id: str. The id of the user who is performing the update action. - collection_id: str. The collection id. - change_list: list of dicts, each representing a CollectionChange object. These changes are applied in sequence to produce the resulting collection. - commit_message: str or None. A description of changes made to the collection. For published collections, this must be present; for unpublished collections, it may be equal to None. """ is_public = rights_manager.is_collection_public(collection_id) if is_public and not commit_message: raise ValueError( 'Collection is public so expected a commit message but ' 'received none.') collection = apply_change_list(collection_id, change_list) _save_collection(committer_id, collection, commit_message, change_list) update_collection_summary(collection.id, committer_id) if not rights_manager.is_collection_private(collection.id): user_services.update_first_contribution_msec_if_not_set( committer_id, utils.get_current_time_in_millisecs())
def _validate_time_fields(cls, item): """Validate the time fields in entity. Args: item: datastore_services.Model. ContinuousComputationModel to validate. """ if item.last_started_msec > item.last_finished_msec and ( item.last_started_msec > item.last_stopped_msec): cls._add_error( 'last started check', 'Entity id %s: last started %s is greater ' 'than both last finished %s and last stopped %s' % ( item.id, item.last_started_msec, item.last_finished_msec, item.last_stopped_msec)) current_time_msec = utils.get_current_time_in_millisecs() if item.last_finished_msec > current_time_msec: cls._add_error( 'last finished check', 'Entity id %s: last finished %s is greater ' 'than the current time' % ( item.id, item.last_finished_msec)) if item.last_stopped_msec > current_time_msec: cls._add_error( 'last stopped check', 'Entity id %s: last stopped %s is greater ' 'than the current time' % ( item.id, item.last_stopped_msec))
def get(self): """Handles GET requests.""" recent_job_data = jobs.get_data_for_recent_jobs() unfinished_job_data = jobs.get_data_for_unfinished_jobs() for job in unfinished_job_data: job['can_be_canceled'] = job['is_cancelable'] and any([ klass.__name__ == job['job_type'] for klass in (jobs_registry.ONE_OFF_JOB_MANAGERS + (jobs_registry.AUDIT_JOB_MANAGERS)) ]) queued_or_running_job_types = set( [job['job_type'] for job in unfinished_job_data]) one_off_job_status_summaries = [{ 'job_type': klass.__name__, 'is_queued_or_running': (klass.__name__ in queued_or_running_job_types) } for klass in jobs_registry.ONE_OFF_JOB_MANAGERS] audit_job_status_summaries = [{ 'job_type': klass.__name__, 'is_queued_or_running': (klass.__name__ in queued_or_running_job_types) } for klass in jobs_registry.AUDIT_JOB_MANAGERS] continuous_computations_data = jobs.get_continuous_computations_info( jobs_registry.ALL_CONTINUOUS_COMPUTATION_MANAGERS) for computation in continuous_computations_data: if computation['last_started_msec']: computation['human_readable_last_started'] = ( utils.get_human_readable_time_string( computation['last_started_msec'])) if computation['last_stopped_msec']: computation['human_readable_last_stopped'] = ( utils.get_human_readable_time_string( computation['last_stopped_msec'])) if computation['last_finished_msec']: computation['human_readable_last_finished'] = ( utils.get_human_readable_time_string( computation['last_finished_msec'])) self.render_json({ 'continuous_computations_data': continuous_computations_data, 'human_readable_current_time': (utils.get_human_readable_time_string( utils.get_current_time_in_millisecs())), 'one_off_job_status_summaries': one_off_job_status_summaries, 'audit_job_status_summaries': audit_job_status_summaries, 'recent_job_data': recent_job_data, 'unfinished_job_data': unfinished_job_data, })
def cleanup_old_jobs_pipelines(): """Clean the pipelines of old jobs.""" num_cleaned = 0 max_age_msec = ( MAX_MAPREDUCE_METADATA_RETENTION_MSECS + 7 * 24 * 60 * 60 * 1000) # Only consider jobs that started at most 1 week before recency_msec. # The latest start time that a job scheduled for cleanup may have. max_start_time_msec = ( utils.get_current_time_in_millisecs() - MAX_MAPREDUCE_METADATA_RETENTION_MSECS ) # Get all pipeline ids from jobs that started between max_age_msecs # and max_age_msecs + 1 week, before now. pipeline_id_to_job_instance = {} job_instances = job_models.JobModel.get_recent_jobs(1000, max_age_msec) for job_instance in job_instances: if ( job_instance.time_started_msec < max_start_time_msec and not job_instance.has_been_cleaned_up ): if 'root_pipeline_id' in job_instance.metadata: pipeline_id = job_instance.metadata['root_pipeline_id'] pipeline_id_to_job_instance[pipeline_id] = job_instance # Clean up pipelines. for pline in pipeline.get_root_list()['pipelines']: pipeline_id = pline['pipelineId'] job_definitely_terminated = ( pline['status'] == 'done' or pline['status'] == 'aborted' or pline['currentAttempt'] > pline['maxAttempts'] ) have_start_time = 'startTimeMs' in pline job_started_too_long_ago = ( have_start_time and pline['startTimeMs'] < max_start_time_msec ) if (job_started_too_long_ago or (not have_start_time and job_definitely_terminated)): # At this point, the map/reduce pipeline is either in a # terminal state, or has taken so long that there's no # realistic possibility that there might be a race condition # between this and the job actually completing. if pipeline_id in pipeline_id_to_job_instance: job_instance = pipeline_id_to_job_instance[pipeline_id] job_instance.has_been_cleaned_up = True job_instance.update_timestamps() job_instance.put() # This enqueues a deferred cleanup item. p = pipeline.Pipeline.from_id(pipeline_id) if p: p.cleanup() num_cleaned += 1 logging.warning('%s MR jobs cleaned up.' % num_cleaned)
def get(self): """Handles GET requests.""" demo_exploration_ids = feconf.DEMO_EXPLORATIONS.keys() recent_job_data = jobs.get_data_for_recent_jobs() unfinished_job_data = jobs.get_data_for_unfinished_jobs() for job in unfinished_job_data: job['can_be_canceled'] = job['is_cancelable'] and any([ klass.__name__ == job['job_type'] for klass in jobs_registry.ONE_OFF_JOB_MANAGERS]) queued_or_running_job_types = set([ job['job_type'] for job in unfinished_job_data]) one_off_job_specs = [{ 'job_type': klass.__name__, 'is_queued_or_running': ( klass.__name__ in queued_or_running_job_types) } for klass in jobs_registry.ONE_OFF_JOB_MANAGERS] continuous_computations_data = jobs.get_continuous_computations_info( jobs_registry.ALL_CONTINUOUS_COMPUTATION_MANAGERS) for computation in continuous_computations_data: if computation['last_started_msec']: computation['human_readable_last_started'] = ( utils.get_human_readable_time_string( computation['last_started_msec'])) if computation['last_stopped_msec']: computation['human_readable_last_stopped'] = ( utils.get_human_readable_time_string( computation['last_stopped_msec'])) if computation['last_finished_msec']: computation['human_readable_last_finished'] = ( utils.get_human_readable_time_string( computation['last_finished_msec'])) self.values.update({ 'continuous_computations_data': continuous_computations_data, 'demo_collections': sorted(feconf.DEMO_COLLECTIONS.iteritems()), 'demo_explorations': sorted(feconf.DEMO_EXPLORATIONS.iteritems()), 'demo_exploration_ids': demo_exploration_ids, 'human_readable_current_time': ( utils.get_human_readable_time_string( utils.get_current_time_in_millisecs())), 'one_off_job_specs': one_off_job_specs, 'recent_job_data': recent_job_data, 'rte_components_html': jinja2.utils.Markup( rte_component_registry.Registry.get_html_for_all_components()), 'unfinished_job_data': unfinished_job_data, 'value_generators_js': jinja2.utils.Markup( editor.get_value_generators_js()), }) self.render_template('pages/admin/admin.html')
def get_new_id(cls, entity_name): """Overwrites superclass method. Args: entity_name: str. The name of the entity to create a new job id for. Returns: str. A job id. """ job_type = entity_name current_time_str = str(int(utils.get_current_time_in_millisecs())) random_int = random.randint(0, 1000) return '%s-%s-%s' % (job_type, current_time_str, random_int)
def generate_new_thread_id(cls, exploration_id): """Generates a new thread id, unique within the exploration. Exploration ID + the generated thread ID is globally unique. """ for _ in range(_MAX_RETRIES): thread_id = ( utils.base64_from_int(utils.get_current_time_in_millisecs()) + utils.base64_from_int(utils.get_random_int(_RAND_RANGE))) if not cls.get_by_exp_and_thread_id(exploration_id, thread_id): return thread_id raise Exception( 'New thread id generator is producing too many collisions.')
def test_invalid_last_stopped_msec(self): current_time_msec = utils.get_current_time_in_millisecs() self.model_instance.last_stopped_msec = current_time_msec * 10.0 self.model_instance.update_timestamps() self.model_instance.put() expected_output = [ ( u'[u\'failed validation check for last stopped check ' 'of ContinuousComputationModel\', [u\'Entity id %s: ' 'last stopped %s is greater than the current time\']]' ) % (self.model_instance.id, self.model_instance.last_stopped_msec)] self.run_job_and_check_output( expected_output, sort=False, literal_eval=False)
def _stop_computation_transactional(): """Transactional implementation for marking a continuous computation as stopping/idle. """ cc_model = job_models.ContinuousComputationModel.get(cls.__name__) # If there is no job currently running, go to IDLE immediately. new_status_code = ( job_models.CONTINUOUS_COMPUTATION_STATUS_CODE_STOPPING if do_unfinished_jobs_exist else job_models.CONTINUOUS_COMPUTATION_STATUS_CODE_IDLE) cc_model.status_code = new_status_code cc_model.last_stopped_msec = utils.get_current_time_in_millisecs() cc_model.put()
def register_failure(cls, job_id, error): """Marks a job as failed.""" # Ensure that preconditions are met. model = job_models.JobModel.get(job_id, strict=True) cls._require_valid_transition(job_id, model.status_code, STATUS_CODE_FAILED) cls._require_correct_job_type(model.job_type) model.status_code = STATUS_CODE_FAILED model.time_finished_msec = utils.get_current_time_in_millisecs() model.error = error model.put() cls._post_failure_hook(job_id)
def register_completion(cls, job_id, output): """Marks a job as completed.""" # Ensure that preconditions are met. model = job_models.JobModel.get(job_id, strict=True) cls._require_valid_transition(job_id, model.status_code, STATUS_CODE_COMPLETED) cls._require_correct_job_type(model.job_type) model.status_code = STATUS_CODE_COMPLETED model.time_finished_msec = utils.get_current_time_in_millisecs() model.output = output model.put() cls._post_completed_hook(job_id)
def register_start(cls, job_id, metadata=None): model = job_models.JobModel.get(job_id, strict=True) cls._require_valid_transition(job_id, model.status_code, STATUS_CODE_STARTED) cls._require_correct_job_type(model.job_type) cls._pre_start_hook(job_id) model.metadata = metadata model.status_code = STATUS_CODE_STARTED model.time_started_msec = utils.get_current_time_in_millisecs() model.put() cls._post_start_hook(job_id)
def _real_enqueue(cls, job_id, additional_job_params): entity_class_types = cls.entity_classes_to_map_over() entity_class_names = [ '%s.%s' % (entity_class_type.__module__, entity_class_type.__name__) for entity_class_type in entity_class_types ] kwargs = { 'job_name': job_id, 'mapper_spec': '%s.%s.map' % (cls.__module__, cls.__name__), 'reducer_spec': '%s.%s.reduce' % (cls.__module__, cls.__name__), 'input_reader_spec': ('core.jobs.MultipleDatastoreEntitiesInputReader'), 'output_writer_spec': ('core.jobs.GoogleCloudStorageConsistentJsonOutputWriter'), 'mapper_params': { MAPPER_PARAM_KEY_ENTITY_KINDS: entity_class_names, # Note that all parameters passed to the mapper need to be # strings. Also note that the value for this key is determined # just before enqueue time, so it will be roughly equal to the # actual enqueue time. MAPPER_PARAM_KEY_QUEUED_TIME_MSECS: str(utils.get_current_time_in_millisecs()), }, 'reducer_params': { 'output_writer': { 'bucket_name': app_identity.get_default_gcs_bucket_name(), 'content_type': 'text/plain', 'naming_format': 'mrdata/$name/$id/output-$num', } } } if additional_job_params is not None: for param_name in additional_job_params: if param_name in kwargs['mapper_params']: raise Exception( 'Additional job param %s shadows an existing mapper ' 'param' % param_name) kwargs['mapper_params'][param_name] = copy.deepcopy( additional_job_params[param_name]) mr_pipeline = MapReduceJobPipeline( job_id, '%s.%s' % (cls.__module__, cls.__name__), kwargs) mr_pipeline.start(base_path='/mapreduce/worker/pipeline')
def _start_computation_transactional(): """Transactional implementation for marking a continuous computation as started. """ cc_model = job_models.ContinuousComputationModel.get(cls.__name__, strict=False) if cc_model is None: cc_model = job_models.ContinuousComputationModel(id=cls.__name__) if cc_model.status_code != job_models.CONTINUOUS_COMPUTATION_STATUS_CODE_IDLE: raise Exception("Attempted to start computation %s, which is already " "running." % cls.__name__) cc_model.status_code = job_models.CONTINUOUS_COMPUTATION_STATUS_CODE_RUNNING cc_model.last_started_msec = utils.get_current_time_in_millisecs() cc_model.put()
def publish_collection_and_update_user_profiles(committer_id, col_id): """Publishes the collection with publish_collection() function in rights_manager.py, as well as updates first_contribution_msec. It is the responsibility of the caller to check that the collection is valid prior to publication. """ rights_manager.publish_collection(committer_id, col_id) contribution_time_msec = utils.get_current_time_in_millisecs() collection_summary = get_collection_summary_by_id(col_id) contributor_ids = collection_summary.contributor_ids for contributor in contributor_ids: user_services.update_first_contribution_msec_if_not_set( contributor, contribution_time_msec)
def run(self, job_id, job_class_str, output): job_class = mapreduce_util.for_name(job_class_str) try: iterator = input_readers.GoogleCloudStorageInputReader(output, 0) results_list = [] for item_reader in iterator: for item in item_reader: results_list.append(json.loads(item)) job_class.register_completion(job_id, results_list) except Exception as e: logging.error(traceback.format_exc()) logging.error("Job %s failed at %s" % (job_id, utils.get_current_time_in_millisecs())) job_class.register_failure(job_id, "%s\n%s" % (unicode(e), traceback.format_exc()))
def _run_job(cls, job_id, additional_job_params): """Starts the job.""" logging.info('Job %s started at %s' % (job_id, utils.get_current_time_in_millisecs())) cls.register_start(job_id) try: result = cls._run(additional_job_params) except Exception as e: logging.error(traceback.format_exc()) logging.error('Job %s failed at %s' % (job_id, utils.get_current_time_in_millisecs())) cls.register_failure( job_id, '%s\n%s' % (unicode(e), traceback.format_exc())) raise taskqueue_services.PermanentTaskFailure( 'Task failed: %s\n%s' % (unicode(e), traceback.format_exc())) # Note that the job may have been canceled after it started and before # it reached this stage. This will result in an exception when the # validity of the status code transition is checked. cls.register_completion(job_id, result) logging.info('Job %s completed at %s' % (job_id, utils.get_current_time_in_millisecs()))
def test_invalid_time_finished_msec(self): current_time_msec = utils.get_current_time_in_millisecs() self.model_instance.time_finished_msec = current_time_msec * 10.0 self.model_instance.update_timestamps() self.model_instance.put() expected_output = [ (u'[u\'failed validation check for time finished ' 'check of JobModel\', [u\'Entity id %s: time ' 'finished %s is greater than the current time\']]') % (self.model_instance.id, self.model_instance.time_finished_msec), u'[u\'fully-validated JobModel\', 1]' ] self.run_job_and_check_output(expected_output, sort=True, literal_eval=False)
def setUp(self): super(JobModelValidatorTests, self).setUp() current_time_str = python_utils.UNICODE( int(utils.get_current_time_in_millisecs())) random_int = random.randint(0, 1000) self.model_instance = job_models.JobModel( id='test-%s-%s' % (current_time_str, random_int), status_code=job_models.STATUS_CODE_NEW, job_type='test', time_queued_msec=1, time_started_msec=10, time_finished_msec=20) self.model_instance.update_timestamps() self.model_instance.put() self.job_class = ( prod_validation_jobs_one_off.JobModelAuditOneOffJob)
def run(self, job_id, job_class_str, output): job_class = mapreduce_util.for_name(job_class_str) try: iterator = input_readers.GoogleCloudStorageInputReader(output, 0) results_list = [] for item_reader in iterator: for item in item_reader: results_list.append(json.loads(item)) job_class.register_completion(job_id, results_list) except Exception as e: logging.error(traceback.format_exc()) logging.error('Job %s failed at %s' % (job_id, utils.get_current_time_in_millisecs())) job_class.register_failure( job_id, '%s\n%s' % (unicode(e), traceback.format_exc()))
def get_recent_jobs(cls, limit, recency_msec): """Gets at most limit jobs with respect to a time after recency_msec. Args: limit: int. A limit on the number of jobs to return. recency_msec: int. The number of milliseconds earlier than the current time. Returns: list(JobModel) or None. A list of at most `limit` jobs that come after recency_msec time. """ earliest_time_msec = (utils.get_current_time_in_millisecs() - recency_msec) return cls.query().filter(cls.time_queued_msec > earliest_time_msec ).order(-cls.time_queued_msec).fetch(limit)
def enqueue(cls, job_id, additional_job_params=None): """Marks a job as queued and adds it to a queue for processing.""" # Ensure that preconditions are met. model = job_models.JobModel.get(job_id, strict=True) cls._require_valid_transition(job_id, model.status_code, STATUS_CODE_QUEUED) cls._require_correct_job_type(model.job_type) # Enqueue the job. cls._pre_enqueue_hook(job_id) cls._real_enqueue(job_id, additional_job_params) model.status_code = STATUS_CODE_QUEUED model.time_queued_msec = utils.get_current_time_in_millisecs() model.put() cls._post_enqueue_hook(job_id)
def enqueue(cls, job_id, additional_job_params=None): """Marks a job as queued and adds it to a queue for processing.""" # Ensure that preconditions are met. model = job_models.JobModel.get(job_id, strict=True) cls._require_valid_transition( job_id, model.status_code, STATUS_CODE_QUEUED) cls._require_correct_job_type(model.job_type) # Enqueue the job. cls._pre_enqueue_hook(job_id) cls._real_enqueue(job_id, additional_job_params) model.status_code = STATUS_CODE_QUEUED model.time_queued_msec = utils.get_current_time_in_millisecs() model.put() cls._post_enqueue_hook(job_id)
def get_recent_jobs(cls, limit, recency_msec): """Gets at most limit jobs with respect to a time after recency_msec. Args: limit: int. A limit on the number of jobs to return. recency_msec: int. The number of milliseconds earlier than the current time. Returns: list(JobModel) or None. A list of at most `limit` jobs that come after recency_msec time. """ earliest_time_msec = ( utils.get_current_time_in_millisecs() - recency_msec) return cls.query().filter( cls.time_queued_msec > earliest_time_msec ).order(-cls.time_queued_msec).fetch(limit)
def cancel(cls, job_id, user_id): # Ensure that preconditions are met. model = job_models.JobModel.get(job_id, strict=True) cls._require_valid_transition(job_id, model.status_code, STATUS_CODE_CANCELED) cls._require_correct_job_type(model.job_type) cancel_message = "Canceled by %s" % (user_id or "system") # Cancel the job. cls._pre_cancel_hook(job_id, cancel_message) model.status_code = STATUS_CODE_CANCELED model.time_finished_msec = utils.get_current_time_in_millisecs() model.error = cancel_message model.put() cls._post_cancel_hook(job_id, cancel_message)
def _real_enqueue(cls, job_id, additional_job_params): entity_class_types = cls.entity_classes_to_map_over() entity_class_names = [ '%s.%s' % ( entity_class_type.__module__, entity_class_type.__name__) for entity_class_type in entity_class_types] kwargs = { 'job_name': job_id, 'mapper_spec': '%s.%s.map' % (cls.__module__, cls.__name__), 'reducer_spec': '%s.%s.reduce' % (cls.__module__, cls.__name__), 'input_reader_spec': ( 'core.jobs.MultipleDatastoreEntitiesInputReader'), 'output_writer_spec': ( 'core.jobs.GoogleCloudStorageConsistentJsonOutputWriter'), 'mapper_params': { MAPPER_PARAM_KEY_ENTITY_KINDS: entity_class_names, # Note that all parameters passed to the mapper need to be # strings. Also note that the value for this key is determined # just before enqueue time, so it will be roughly equal to the # actual enqueue time. MAPPER_PARAM_KEY_QUEUED_TIME_MSECS: str( utils.get_current_time_in_millisecs()), }, 'reducer_params': { 'output_writer': { 'bucket_name': app_identity.get_default_gcs_bucket_name(), 'content_type': 'text/plain', 'naming_format': 'mrdata/$name/$id/output-$num', } } } if additional_job_params is not None: for param_name in additional_job_params: if param_name in kwargs['mapper_params']: raise Exception( 'Additional job param %s shadows an existing mapper ' 'param' % param_name) kwargs['mapper_params'][param_name] = copy.deepcopy( additional_job_params[param_name]) mr_pipeline = MapReduceJobPipeline( job_id, '%s.%s' % (cls.__module__, cls.__name__), kwargs) mr_pipeline.start(base_path='/mapreduce/worker/pipeline')
def _get_search_rank(collection_id): """Returns an integer determining the document's rank in search. Featured collections get a ranking bump, and so do collections that have been more recently updated. """ # TODO(bhenning): Improve this calculation. Some possible suggestions for # a better ranking include using an average of the search ranks of each # exploration referenced in the collection and/or demoting collections # for any validation errors from explorations referenced in the collection. _STATUS_PUBLICIZED_BONUS = 30 # This is done to prevent the rank hitting 0 too easily. Note that # negative ranks are disallowed in the Search API. _DEFAULT_RANK = 20 collection = get_collection_by_id(collection_id) rights = rights_manager.get_collection_rights(collection_id) summary = get_collection_summary_by_id(collection_id) rank = _DEFAULT_RANK + ( _STATUS_PUBLICIZED_BONUS if rights.status == rights_manager.ACTIVITY_STATUS_PUBLICIZED else 0) # Iterate backwards through the collection history metadata until we find # the most recent snapshot that was committed by a human. last_human_update_ms = 0 snapshots_metadata = get_collection_snapshots_metadata(collection_id) for snapshot_metadata in reversed(snapshots_metadata): if snapshot_metadata['committer_id'] != feconf.MIGRATION_BOT_USER_ID: last_human_update_ms = snapshot_metadata['created_on_ms'] break _TIME_NOW_MS = utils.get_current_time_in_millisecs() _MS_IN_ONE_DAY = 24 * 60 * 60 * 1000 time_delta_days = int( (_TIME_NOW_MS - last_human_update_ms) / _MS_IN_ONE_DAY) if time_delta_days == 0: rank += 80 elif time_delta_days == 1: rank += 50 elif 2 <= time_delta_days <= 7: rank += 35 # Ranks must be non-negative. return max(rank, 0)
def run(self, job_id, job_class_str, output): job_class = mapreduce_util.for_name(job_class_str) try: iterator = input_readers.RecordsReader(output, 0) results_list = [] for item in iterator: # Map/reduce puts reducer output into blobstore files as a # string obtained via "str(result)". Use AST as a safe # alternative to eval() to get the Python object back. results_list.append(ast.literal_eval(item)) job_class.register_completion(job_id, results_list) except Exception as e: logging.error(traceback.format_exc()) logging.error( 'Job %s failed at %s' % (job_id, utils.get_current_time_in_millisecs())) job_class.register_failure( job_id, '%s\n%s' % (unicode(e), traceback.format_exc()))
def register_completion(cls, job_id, output_list): """Marks a job as completed.""" _MAX_OUTPUT_LENGTH_CHARS = 900000 # Ensure that preconditions are met. model = job_models.JobModel.get(job_id, strict=True) cls._require_valid_transition( job_id, model.status_code, STATUS_CODE_COMPLETED) cls._require_correct_job_type(model.job_type) model.status_code = STATUS_CODE_COMPLETED model.time_finished_msec = utils.get_current_time_in_millisecs() # TODO(bhenning): Add tests for this. output_str_list = ['%s' % output_value for output_value in output_list] # De-duplicate the lines of output since it's not very useful to repeat # them. counter = collections.Counter(list(output_str_list)) output_str_frequency_list = [ (output_str, counter[output_str]) for output_str in counter] output_str_list = [ line if freq == 1 else '%s (%d times)' % (line, freq) for (line, freq) in output_str_frequency_list ] cutoff_index = 0 total_output_size = 0 for idx, output_str in enumerate(output_str_list): cutoff_index += 1 total_output_size += len(output_str) if total_output_size >= _MAX_OUTPUT_LENGTH_CHARS: max_element_length = ( total_output_size - _MAX_OUTPUT_LENGTH_CHARS) output_str_list[idx] = output_str[:max_element_length] output_str_list[idx] += ' <TRUNCATED>' break model.output = output_str_list[:cutoff_index] model.put() cls._post_completed_hook(job_id)
def generate_new_thread_id(cls, exploration_id): """Generates a new thread ID which is unique within the exploration. Args: exploration_id: str. The ID of the exploration. Returns: str. A thread ID that is different from the IDs of all the existing threads within the given exploration. Raises: Exception: There were too many collisions with existing thread IDs when attempting to generate a new thread ID. """ for _ in range(_MAX_RETRIES): thread_id = ( utils.base64_from_int(utils.get_current_time_in_millisecs()) + utils.base64_from_int(utils.get_random_int(_RAND_RANGE))) if not cls.get_by_exp_and_thread_id(exploration_id, thread_id): return thread_id raise Exception( 'New thread id generator is producing too many collisions.')
def _real_enqueue(cls, job_id, additional_job_params): entity_class_types = cls.entity_classes_to_map_over() entity_class_names = [ "%s.%s" % (entity_class_type.__module__, entity_class_type.__name__) for entity_class_type in entity_class_types ] kwargs = { "job_name": job_id, "mapper_spec": "%s.%s.map" % (cls.__module__, cls.__name__), "reducer_spec": "%s.%s.reduce" % (cls.__module__, cls.__name__), "input_reader_spec": ("core.jobs.MultipleDatastoreEntitiesInputReader"), "output_writer_spec": ("core.jobs.GoogleCloudStorageConsistentJsonOutputWriter"), "mapper_params": { MAPPER_PARAM_KEY_ENTITY_KINDS: entity_class_names, # Note that all parameters passed to the mapper need to be # strings. Also note that the value for this key is determined # just before enqueue time, so it will be roughly equal to the # actual enqueue time. MAPPER_PARAM_KEY_QUEUED_TIME_MSECS: str(utils.get_current_time_in_millisecs()), }, "reducer_params": { "output_writer": { "bucket_name": app_identity.get_default_gcs_bucket_name(), "content_type": "text/plain", "naming_format": "mrdata/$name/$id/output-$num", } }, } if additional_job_params is not None: for param_name in additional_job_params: if param_name in kwargs["mapper_params"]: raise Exception("Additional job param %s shadows an existing mapper " "param" % param_name) kwargs["mapper_params"][param_name] = copy.deepcopy(additional_job_params[param_name]) mr_pipeline = MapReduceJobPipeline(job_id, "%s.%s" % (cls.__module__, cls.__name__), kwargs) mr_pipeline.start(base_path="/mapreduce/worker/pipeline")
def get_recent_jobs(cls, limit, recency_msec): earliest_time_msec = ( utils.get_current_time_in_millisecs() - recency_msec) return cls.query().filter( cls.time_queued_msec > earliest_time_msec ).order(-cls.time_queued_msec).fetch(limit)
def get(self): """Handles GET requests.""" self.values['counters'] = [{ 'name': counter.name, 'description': counter.description, 'value': counter.value } for counter in counters.Registry.get_all_counters()] if counters.HTML_RESPONSE_COUNT.value: average_time = ( counters.HTML_RESPONSE_TIME_SECS.value / counters.HTML_RESPONSE_COUNT.value) self.values['counters'].append({ 'name': 'average-html-response-time-secs', 'description': 'Average HTML response time in seconds', 'value': average_time }) if counters.JSON_RESPONSE_COUNT.value: average_time = ( counters.JSON_RESPONSE_TIME_SECS.value / counters.JSON_RESPONSE_COUNT.value) self.values['counters'].append({ 'name': 'average-json-response-time-secs', 'description': 'Average JSON response time in seconds', 'value': average_time }) demo_exploration_ids = feconf.DEMO_EXPLORATIONS.keys() recent_job_data = jobs.get_data_for_recent_jobs() unfinished_job_data = jobs.get_data_for_unfinished_jobs() for job in unfinished_job_data: job['can_be_canceled'] = job['is_cancelable'] and any([ klass.__name__ == job['job_type'] for klass in jobs_registry.ONE_OFF_JOB_MANAGERS]) queued_or_running_job_types = set([ job['job_type'] for job in unfinished_job_data]) one_off_job_specs = [{ 'job_type': klass.__name__, 'is_queued_or_running': ( klass.__name__ in queued_or_running_job_types) } for klass in jobs_registry.ONE_OFF_JOB_MANAGERS] continuous_computations_data = jobs.get_continuous_computations_info( jobs_registry.ALL_CONTINUOUS_COMPUTATION_MANAGERS) for computation in continuous_computations_data: if computation['last_started_msec']: computation['human_readable_last_started'] = ( utils.get_human_readable_time_string( computation['last_started_msec'])) if computation['last_stopped_msec']: computation['human_readable_last_stopped'] = ( utils.get_human_readable_time_string( computation['last_stopped_msec'])) if computation['last_finished_msec']: computation['human_readable_last_finished'] = ( utils.get_human_readable_time_string( computation['last_finished_msec'])) self.values.update({ 'continuous_computations_data': continuous_computations_data, 'demo_collections': sorted(feconf.DEMO_COLLECTIONS.iteritems()), 'demo_explorations': sorted(feconf.DEMO_EXPLORATIONS.iteritems()), 'demo_exploration_ids': demo_exploration_ids, 'human_readable_current_time': ( utils.get_human_readable_time_string( utils.get_current_time_in_millisecs())), 'one_off_job_specs': one_off_job_specs, 'recent_job_data': recent_job_data, 'rte_components_html': jinja2.utils.Markup( rte_component_registry.Registry.get_html_for_all_components()), 'unfinished_job_data': unfinished_job_data, 'value_generators_js': jinja2.utils.Markup( editor.VALUE_GENERATORS_JS.value), }) self.render_template('admin/admin.html')
def get(self): """Clean up intermediate data items for completed M/R jobs that started more than MAX_MAPREDUCE_METADATA_RETENTION_MSECS milliseconds ago. Map/reduce runs leave around a large number of rows in several tables. This data is useful to have around for a while: - it helps diagnose any problems with jobs that may be occurring - it shows where resource usage is occurring However, after a few days, this information is less relevant, and should be cleaned up. """ recency_msec = MAX_MAPREDUCE_METADATA_RETENTION_MSECS num_cleaned = 0 min_age_msec = recency_msec # Only consider jobs that started at most 1 week before recency_msec. max_age_msec = recency_msec + 7 * 24 * 60 * 60 * 1000 # The latest start time that a job scheduled for cleanup may have. max_start_time_msec = ( utils.get_current_time_in_millisecs() - min_age_msec) # Get all pipeline ids from jobs that started between max_age_msecs # and max_age_msecs + 1 week, before now. pipeline_id_to_job_instance = {} job_instances = job_models.JobModel.get_recent_jobs(1000, max_age_msec) for job_instance in job_instances: if (job_instance.time_started_msec < max_start_time_msec and not job_instance.has_been_cleaned_up): if 'root_pipeline_id' in job_instance.metadata: pipeline_id = job_instance.metadata['root_pipeline_id'] pipeline_id_to_job_instance[pipeline_id] = job_instance # Clean up pipelines. for pline in pipeline.get_root_list()['pipelines']: pipeline_id = pline['pipelineId'] job_definitely_terminated = ( pline['status'] == 'done' or pline['status'] == 'aborted' or pline['currentAttempt'] > pline['maxAttempts']) have_start_time = 'startTimeMs' in pline job_started_too_long_ago = ( have_start_time and pline['startTimeMs'] < max_start_time_msec) if (job_started_too_long_ago or (not have_start_time and job_definitely_terminated)): # At this point, the map/reduce pipeline is either in a # terminal state, or has taken so long that there's no # realistic possibility that there might be a race condition # between this and the job actually completing. if pipeline_id in pipeline_id_to_job_instance: job_instance = pipeline_id_to_job_instance[pipeline_id] job_instance.has_been_cleaned_up = True job_instance.put() # This enqueues a deferred cleanup item. p = pipeline.Pipeline.from_id(pipeline_id) if p: p.cleanup() num_cleaned += 1 logging.warning('%s MR jobs cleaned up.' % num_cleaned) if job_models.JobModel.do_unfinished_jobs_exist( jobs.JobCleanupManager.__name__): logging.warning('A previous cleanup job is still running.') else: jobs.JobCleanupManager.enqueue( jobs.JobCleanupManager.create_new(), additional_job_params={ jobs.MAPPER_PARAM_MAX_START_TIME_MSEC: max_start_time_msec }) logging.warning('Deletion jobs for auxiliary entities kicked off.')
def _update_last_finished_time_transactional(): cc_model = job_models.ContinuousComputationModel.get(cls.__name__) cc_model.last_finished_msec = utils.get_current_time_in_millisecs() cc_model.put()
def get_new_id(cls, entity_name): """Overwrites superclass method.""" job_type = entity_name current_time_str = str(int(utils.get_current_time_in_millisecs())) random_int = random.randint(0, 1000) return '%s-%s-%s' % (job_type, current_time_str, random_int)