Esempio n. 1
0
def get_classifier_training_job_from_model(classifier_training_job_model):
    """Gets a classifier training job domain object from a classifier
    training job model.

    Args:
        classifier_training_job_model: ClassifierTrainingJobModel. Classifier
            training job instance in datastore.

    Returns:
        classifier_training_job: ClassifierTrainingJob. Domain object for the
        classifier training job.
    """
    return classifier_domain.ClassifierTrainingJob(
        classifier_training_job_model.id,
        classifier_training_job_model.algorithm_id,
        classifier_training_job_model.interaction_id,
        classifier_training_job_model.exp_id,
        classifier_training_job_model.exp_version,
        classifier_training_job_model.next_scheduled_check_time,
        classifier_training_job_model.state_name,
        classifier_training_job_model.status,
        classifier_training_job_model.training_data,
        classifier_training_job_model.algorithm_version)
Esempio n. 2
0
def handle_trainable_states(exploration, state_names):
    """Creates ClassifierTrainingJobModel instances for all the state names
    passed into the function. If this function is called with version number 1,
    we are creating jobs for all trainable states in the exploration. Otherwise,
    a new job is being created for the states where retraining is required.

    Args:
        exploration: Exploration. The Exploration domain object.
        state_names: list(str). List of state names.
    """
    job_dicts_list = []
    exp_id = exploration.id
    exp_version = exploration.version
    for state_name in state_names:
        state = exploration.states[state_name]
        training_data = state.get_training_data()
        interaction_id = state.interaction.id
        algorithm_id = feconf.INTERACTION_CLASSIFIER_MAPPING[
            interaction_id]['algorithm_id']
        next_scheduled_check_time = datetime.datetime.utcnow()
        algorithm_version = feconf.INTERACTION_CLASSIFIER_MAPPING[
            interaction_id]['algorithm_version']

        # Validate the job.
        dummy_classifier_training_job = classifier_domain.ClassifierTrainingJob(
            'job_id_dummy', algorithm_id, interaction_id, exp_id, exp_version,
            next_scheduled_check_time, state_name,
            feconf.TRAINING_JOB_STATUS_NEW, training_data,
            algorithm_version)
        dummy_classifier_training_job.validate()

        job_dicts_list.append({
            'algorithm_id': algorithm_id,
            'interaction_id': interaction_id,
            'exp_id': exp_id,
            'exp_version': exp_version,
            'next_scheduled_check_time': next_scheduled_check_time,
            'state_name': state_name,
            'training_data': training_data,
            'status': feconf.TRAINING_JOB_STATUS_NEW,
            'algorithm_version': algorithm_version
        })

    # Create all the classifier training jobs.
    job_ids = classifier_models.ClassifierTrainingJobModel.create_multi(
        job_dicts_list)

    # Create mapping for each job. For StateTrainingJobsMapping, we can
    # append domain objects to send to the state_training_jobs_mappings dict
    # because we know all the attributes required for creating the Domain
    # object unlike ClassifierTrainingJob class where we don't know the job_id.
    state_training_jobs_mappings = []
    for job_id_index, job_id in enumerate(job_ids):
        state_training_jobs_mapping = (
            classifier_domain.StateTrainingJobsMapping(
                job_dicts_list[job_id_index]['exp_id'],
                job_dicts_list[job_id_index]['exp_version'],
                job_dicts_list[job_id_index]['state_name'],
                {job_dicts_list[job_id_index]['algorithm_id']: job_id}))
        state_training_jobs_mapping.validate()
        state_training_jobs_mappings.append(state_training_jobs_mapping)

    classifier_models.StateTrainingJobsMappingModel.create_multi(
        state_training_jobs_mappings)
Esempio n. 3
0
def migrate_state_training_jobs(state_training_jobs_mapping):
    """Migrate exploration training jobs to latest version of algorithm_id
    and algorithm_version.

    This function lazily migrates an older classifier training job and
    trains new classifiers. Specifically, if training job exploration mapping of
    an <exploration, version, state> triplet is missing job_id for some
    algorithm_id, or if the job_id exists but it has been trained on a now
    obsolete algorithm, we re-submit the jobs.

    The function goes through existing training job exploration mapping and
    identifies three different types of algorithm IDs.
        1. algorithm_ids_to_upgrade: Those which exist but needs to be
            upgraded a new algorithm (because existing one has been deprecated)
            by re-submitting the training job.
        2. algorithm_ids_to_add: Those which doesn't exist and needs to be added
            by submitting a new training job.
        3. algorithm_ids_to_remove: Those which needs to be removed since these
            algorithms are no longer supported.

    Once all three types of algorithm IDs are filtered, the function performs
    specific tasks tailored to each of them. We call this a lazy migration
    because it happens only when there is a query to retrieve a trained model
    for given <exploration, version, state> and algorithm_id.

    Args:
        state_training_jobs_mapping: StateTrainingJobsMapping. Domain
            object containing exploration to training job id mapping. This
            mapping is used to figure out jobs that need to be re-submitted,
            added or removed.
    """
    exp_id = state_training_jobs_mapping.exp_id
    exp_version = state_training_jobs_mapping.exp_version
    state_name = state_training_jobs_mapping.state_name

    exploration = exp_fetchers.get_exploration_by_id(
        exp_id, version=exp_version)
    interaction_id = exploration.states[state_name].interaction.id

    algorithm_id = feconf.INTERACTION_CLASSIFIER_MAPPING[
        interaction_id]['algorithm_id']
    algorithm_version = feconf.INTERACTION_CLASSIFIER_MAPPING[
        interaction_id]['algorithm_version']

    algorithm_id_to_algorithm_version = {
        algorithm_id: algorithm_version
    }

    # The list below contains only one element because as of now we only
    # support a single algorithm id per interaction type. However once the
    # support for multiple algorithm ids (see issue #10217) is added, the list
    # of possible algorithm ids can be retrieved from
    # feconf.INTERACTION_CLASSIFIER_MAPPING.
    possible_algorithm_ids = [algorithm_id]

    algorithm_ids_to_add = set(possible_algorithm_ids).difference(
        set(state_training_jobs_mapping.algorithm_ids_to_job_ids.keys()))

    algorithm_ids_to_remove = set(
        state_training_jobs_mapping.algorithm_ids_to_job_ids.keys()).difference(
            set(possible_algorithm_ids))

    algorithm_ids_to_upgrade = set(possible_algorithm_ids).intersection(
        set(state_training_jobs_mapping.algorithm_ids_to_job_ids.keys()))

    if len(algorithm_ids_to_add) > 0:
        job_dicts = []

        for algorithm_id in algorithm_ids_to_add:
            next_scheduled_check_time = datetime.datetime.utcnow()
            training_data = exploration.states[state_name].get_training_data()

            classifier_domain.ClassifierTrainingJob(
                'job_id_dummy', algorithm_id, interaction_id, exp_id,
                exp_version, next_scheduled_check_time, state_name,
                feconf.TRAINING_JOB_STATUS_NEW, training_data,
                algorithm_version).validate()

            job_dicts.append({
                'algorithm_id': algorithm_id,
                'interaction_id': interaction_id,
                'exp_id': exp_id,
                'exp_version': exp_version,
                'next_scheduled_check_time': next_scheduled_check_time,
                'state_name': state_name,
                'training_data': training_data,
                'status': feconf.TRAINING_JOB_STATUS_NEW,
                'algorithm_version': algorithm_version
            })

        job_ids = classifier_models.ClassifierTrainingJobModel.create_multi(
            job_dicts)

        for algorithm_id, job_id in python_utils.ZIP(
                algorithm_ids_to_add, job_ids):
            state_training_jobs_mapping.algorithm_ids_to_job_ids[
                algorithm_id] = job_id

    if algorithm_ids_to_upgrade:
        for algorithm_id in algorithm_ids_to_upgrade:
            classifier_training_job = (
                classifier_models.ClassifierTrainingJobModel.get_by_id(
                    state_training_jobs_mapping.algorithm_ids_to_job_ids[
                        algorithm_id]))
            classifier_training_job.algorithm_version = (
                algorithm_id_to_algorithm_version[algorithm_id])
            classifier_training_job.next_scheduled_check_time = (
                datetime.datetime.utcnow())
            classifier_training_job.status = feconf.TRAINING_JOB_STATUS_NEW
            classifier_training_job.update_timestamps()
            classifier_training_job.put()

    if algorithm_ids_to_remove:
        for algorithm_id in algorithm_ids_to_remove:
            delete_classifier_training_job(
                state_training_jobs_mapping.algorithm_ids_to_job_ids[
                    algorithm_id])
            state_training_jobs_mapping.algorithm_ids_to_job_ids.pop(
                algorithm_id)

    state_training_jobs_mapping_model = (
        classifier_models.StateTrainingJobsMappingModel.get_model(
            exp_id, exp_version, state_name))
    state_training_jobs_mapping.validate()
    state_training_jobs_mapping_model.algorithm_ids_to_job_ids = (
        state_training_jobs_mapping.algorithm_ids_to_job_ids)
    state_training_jobs_mapping_model.update_timestamps()
    state_training_jobs_mapping_model.put()