def get_classifier_training_job_from_model(classifier_training_job_model): """Gets a classifier training job domain object from a classifier training job model. Args: classifier_training_job_model: ClassifierTrainingJobModel. Classifier training job instance in datastore. Returns: classifier_training_job: ClassifierTrainingJob. Domain object for the classifier training job. """ return classifier_domain.ClassifierTrainingJob( classifier_training_job_model.id, classifier_training_job_model.algorithm_id, classifier_training_job_model.interaction_id, classifier_training_job_model.exp_id, classifier_training_job_model.exp_version, classifier_training_job_model.next_scheduled_check_time, classifier_training_job_model.state_name, classifier_training_job_model.status, classifier_training_job_model.training_data, classifier_training_job_model.algorithm_version)
def handle_trainable_states(exploration, state_names): """Creates ClassifierTrainingJobModel instances for all the state names passed into the function. If this function is called with version number 1, we are creating jobs for all trainable states in the exploration. Otherwise, a new job is being created for the states where retraining is required. Args: exploration: Exploration. The Exploration domain object. state_names: list(str). List of state names. """ job_dicts_list = [] exp_id = exploration.id exp_version = exploration.version for state_name in state_names: state = exploration.states[state_name] training_data = state.get_training_data() interaction_id = state.interaction.id algorithm_id = feconf.INTERACTION_CLASSIFIER_MAPPING[ interaction_id]['algorithm_id'] next_scheduled_check_time = datetime.datetime.utcnow() algorithm_version = feconf.INTERACTION_CLASSIFIER_MAPPING[ interaction_id]['algorithm_version'] # Validate the job. dummy_classifier_training_job = classifier_domain.ClassifierTrainingJob( 'job_id_dummy', algorithm_id, interaction_id, exp_id, exp_version, next_scheduled_check_time, state_name, feconf.TRAINING_JOB_STATUS_NEW, training_data, algorithm_version) dummy_classifier_training_job.validate() job_dicts_list.append({ 'algorithm_id': algorithm_id, 'interaction_id': interaction_id, 'exp_id': exp_id, 'exp_version': exp_version, 'next_scheduled_check_time': next_scheduled_check_time, 'state_name': state_name, 'training_data': training_data, 'status': feconf.TRAINING_JOB_STATUS_NEW, 'algorithm_version': algorithm_version }) # Create all the classifier training jobs. job_ids = classifier_models.ClassifierTrainingJobModel.create_multi( job_dicts_list) # Create mapping for each job. For StateTrainingJobsMapping, we can # append domain objects to send to the state_training_jobs_mappings dict # because we know all the attributes required for creating the Domain # object unlike ClassifierTrainingJob class where we don't know the job_id. state_training_jobs_mappings = [] for job_id_index, job_id in enumerate(job_ids): state_training_jobs_mapping = ( classifier_domain.StateTrainingJobsMapping( job_dicts_list[job_id_index]['exp_id'], job_dicts_list[job_id_index]['exp_version'], job_dicts_list[job_id_index]['state_name'], {job_dicts_list[job_id_index]['algorithm_id']: job_id})) state_training_jobs_mapping.validate() state_training_jobs_mappings.append(state_training_jobs_mapping) classifier_models.StateTrainingJobsMappingModel.create_multi( state_training_jobs_mappings)
def migrate_state_training_jobs(state_training_jobs_mapping): """Migrate exploration training jobs to latest version of algorithm_id and algorithm_version. This function lazily migrates an older classifier training job and trains new classifiers. Specifically, if training job exploration mapping of an <exploration, version, state> triplet is missing job_id for some algorithm_id, or if the job_id exists but it has been trained on a now obsolete algorithm, we re-submit the jobs. The function goes through existing training job exploration mapping and identifies three different types of algorithm IDs. 1. algorithm_ids_to_upgrade: Those which exist but needs to be upgraded a new algorithm (because existing one has been deprecated) by re-submitting the training job. 2. algorithm_ids_to_add: Those which doesn't exist and needs to be added by submitting a new training job. 3. algorithm_ids_to_remove: Those which needs to be removed since these algorithms are no longer supported. Once all three types of algorithm IDs are filtered, the function performs specific tasks tailored to each of them. We call this a lazy migration because it happens only when there is a query to retrieve a trained model for given <exploration, version, state> and algorithm_id. Args: state_training_jobs_mapping: StateTrainingJobsMapping. Domain object containing exploration to training job id mapping. This mapping is used to figure out jobs that need to be re-submitted, added or removed. """ exp_id = state_training_jobs_mapping.exp_id exp_version = state_training_jobs_mapping.exp_version state_name = state_training_jobs_mapping.state_name exploration = exp_fetchers.get_exploration_by_id( exp_id, version=exp_version) interaction_id = exploration.states[state_name].interaction.id algorithm_id = feconf.INTERACTION_CLASSIFIER_MAPPING[ interaction_id]['algorithm_id'] algorithm_version = feconf.INTERACTION_CLASSIFIER_MAPPING[ interaction_id]['algorithm_version'] algorithm_id_to_algorithm_version = { algorithm_id: algorithm_version } # The list below contains only one element because as of now we only # support a single algorithm id per interaction type. However once the # support for multiple algorithm ids (see issue #10217) is added, the list # of possible algorithm ids can be retrieved from # feconf.INTERACTION_CLASSIFIER_MAPPING. possible_algorithm_ids = [algorithm_id] algorithm_ids_to_add = set(possible_algorithm_ids).difference( set(state_training_jobs_mapping.algorithm_ids_to_job_ids.keys())) algorithm_ids_to_remove = set( state_training_jobs_mapping.algorithm_ids_to_job_ids.keys()).difference( set(possible_algorithm_ids)) algorithm_ids_to_upgrade = set(possible_algorithm_ids).intersection( set(state_training_jobs_mapping.algorithm_ids_to_job_ids.keys())) if len(algorithm_ids_to_add) > 0: job_dicts = [] for algorithm_id in algorithm_ids_to_add: next_scheduled_check_time = datetime.datetime.utcnow() training_data = exploration.states[state_name].get_training_data() classifier_domain.ClassifierTrainingJob( 'job_id_dummy', algorithm_id, interaction_id, exp_id, exp_version, next_scheduled_check_time, state_name, feconf.TRAINING_JOB_STATUS_NEW, training_data, algorithm_version).validate() job_dicts.append({ 'algorithm_id': algorithm_id, 'interaction_id': interaction_id, 'exp_id': exp_id, 'exp_version': exp_version, 'next_scheduled_check_time': next_scheduled_check_time, 'state_name': state_name, 'training_data': training_data, 'status': feconf.TRAINING_JOB_STATUS_NEW, 'algorithm_version': algorithm_version }) job_ids = classifier_models.ClassifierTrainingJobModel.create_multi( job_dicts) for algorithm_id, job_id in python_utils.ZIP( algorithm_ids_to_add, job_ids): state_training_jobs_mapping.algorithm_ids_to_job_ids[ algorithm_id] = job_id if algorithm_ids_to_upgrade: for algorithm_id in algorithm_ids_to_upgrade: classifier_training_job = ( classifier_models.ClassifierTrainingJobModel.get_by_id( state_training_jobs_mapping.algorithm_ids_to_job_ids[ algorithm_id])) classifier_training_job.algorithm_version = ( algorithm_id_to_algorithm_version[algorithm_id]) classifier_training_job.next_scheduled_check_time = ( datetime.datetime.utcnow()) classifier_training_job.status = feconf.TRAINING_JOB_STATUS_NEW classifier_training_job.update_timestamps() classifier_training_job.put() if algorithm_ids_to_remove: for algorithm_id in algorithm_ids_to_remove: delete_classifier_training_job( state_training_jobs_mapping.algorithm_ids_to_job_ids[ algorithm_id]) state_training_jobs_mapping.algorithm_ids_to_job_ids.pop( algorithm_id) state_training_jobs_mapping_model = ( classifier_models.StateTrainingJobsMappingModel.get_model( exp_id, exp_version, state_name)) state_training_jobs_mapping.validate() state_training_jobs_mapping_model.algorithm_ids_to_job_ids = ( state_training_jobs_mapping.algorithm_ids_to_job_ids) state_training_jobs_mapping_model.update_timestamps() state_training_jobs_mapping_model.put()