Ejemplo n.º 1
0
    def test_store_classifier_data(self):
        """Test the store_classifier_data method."""
        exp_id = u'1'
        next_scheduled_check_time = datetime.datetime.utcnow()
        state_name = 'Home'
        interaction_id = 'TextInput'

        job_id = self._create_classifier_training_job(
            feconf.INTERACTION_CLASSIFIER_MAPPING['TextInput']['algorithm_id'],
            interaction_id, exp_id, 1, next_scheduled_check_time, [],
            state_name, feconf.TRAINING_JOB_STATUS_PENDING, {}, 1)

        # Retrieve classifier data from GCS and ensure that content is same.
        classifier_training_job = (
            classifier_services.get_classifier_training_job_by_id(job_id))
        classifier_data = (
            self._get_classifier_data_from_classifier_training_job(
                classifier_training_job))
        self.assertEqual(json.loads(classifier_data.model_json), {})

        classifier_data_proto = text_classifier_pb2.TextClassifierFrozenModel()
        classifier_data_proto.model_json = json.dumps(
            {'classifier_data': 'data'})
        classifier_services.store_classifier_data(job_id,
                                                  classifier_data_proto)

        classifier_training_job = (
            classifier_services.get_classifier_training_job_by_id(job_id))
        classifier_data = (
            self._get_classifier_data_from_classifier_training_job(
                classifier_training_job))
        self.assertDictEqual(json.loads(classifier_data.model_json),
                             {'classifier_data': 'data'})
Ejemplo n.º 2
0
 def test_save_and_get_classifier_data(self) -> None:
     """Test that classifier data is stored and retrieved correctly."""
     fs_services.save_classifier_data('exp_id', 'job_id',
                                      self.classifier_data_proto)
     filepath = 'job_id-classifier-data.pb.xz'
     fs = fs_services.GcsFileSystem(feconf.ENTITY_TYPE_EXPLORATION,
                                    'exp_id')
     classifier_data = utils.decompress_from_zlib(fs.get(filepath))
     classifier_data_proto = text_classifier_pb2.TextClassifierFrozenModel()
     classifier_data_proto.ParseFromString(classifier_data)
     self.assertEqual(classifier_data_proto.model_json,
                      self.classifier_data_proto.model_json)
Ejemplo n.º 3
0
 def setUp(self) -> None:
     super(FileSystemClassifierDataTests, self).setUp()
     self.fs = fs_services.GcsFileSystem(feconf.ENTITY_TYPE_EXPLORATION,
                                         'exp_id')
     self.classifier_data_proto = (
         text_classifier_pb2.TextClassifierFrozenModel())
     self.classifier_data_proto.model_json = json.dumps({
         'param1':
         40,
         'param2': [34.2, 54.13, 95.23],
         'submodel': {
             'param1': 12
         }
     })
 def _create_classifier_training_job(
         self, algorithm_id, interaction_id, exp_id, exp_version,
         next_scheduled_check_time, training_data, state_name, status,
         classifier_data, algorithm_version):
     """Creates a new classifier training job model and stores
     classfier data in a file.
     """
     job_id = classifier_models.ClassifierTrainingJobModel.create(
         algorithm_id, interaction_id, exp_id, exp_version,
         next_scheduled_check_time, training_data, state_name, status,
         algorithm_version)
     classifier_data_proto = text_classifier_pb2.TextClassifierFrozenModel()
     classifier_data_proto.model_json = json.dumps(classifier_data)
     fs_services.save_classifier_data(exp_id, job_id, classifier_data_proto)
     return job_id
Ejemplo n.º 5
0
    def setUp(self):
        super(ClassifierTrainingJobModelValidatorTests, self).setUp()

        self.signup(self.OWNER_EMAIL, self.OWNER_USERNAME)

        self.owner_id = self.get_user_id_from_email(self.OWNER_EMAIL)

        explorations = [
            exp_domain.Exploration.create_default_exploration(
                '%s' % i,
                title='title %d' % i,
                category='category%d' % i,
            ) for i in python_utils.RANGE(2)
        ]

        for exp in explorations:
            exp.add_states(['StateTest%s' % exp.id])
            exp_services.save_new_exploration(self.owner_id, exp)

        next_scheduled_check_time = datetime.datetime.utcnow()
        classifier_data_proto = text_classifier_pb2.TextClassifierFrozenModel()
        classifier_data_proto.model_json = json.dumps(
            {'classifier_data': 'data'})

        id0 = classifier_models.ClassifierTrainingJobModel.create(
            'TextClassifier', 'TextInput', '0', 1, next_scheduled_check_time,
            [{
                'answer_group_index': 1,
                'answers': ['a1', 'a2']
            }], 'StateTest0', feconf.TRAINING_JOB_STATUS_NEW, 1)
        fs_services.save_classifier_data('TextClassifier', id0,
                                         classifier_data_proto)
        self.model_instance_0 = (
            classifier_models.ClassifierTrainingJobModel.get_by_id(id0))
        id1 = classifier_models.ClassifierTrainingJobModel.create(
            'TextClassifier', 'TextInput', '1', 1, next_scheduled_check_time,
            [{
                'answer_group_index': 1,
                'answers': ['a1', 'a2']
            }], 'StateTest1', feconf.TRAINING_JOB_STATUS_NEW, 1)
        fs_services.save_classifier_data('TextClassifier', id1,
                                         classifier_data_proto)
        self.model_instance_1 = (
            classifier_models.ClassifierTrainingJobModel.get_by_id(id1))

        self.job_class = (prod_validation_jobs_one_off.
                          ClassifierTrainingJobModelAuditOneOffJob)
Ejemplo n.º 6
0
    def setUp(self):
        super(NextJobHandlerTest, self).setUp()

        self.exp_id = 'exp_id1'
        self.title = 'Testing Classifier storing'
        self.category = 'Test'
        interaction_id = 'TextInput'
        self.algorithm_id = feconf.INTERACTION_CLASSIFIER_MAPPING[
            interaction_id]['algorithm_id']
        self.algorithm_version = feconf.INTERACTION_CLASSIFIER_MAPPING[
            interaction_id]['algorithm_version']
        self.training_data = [
            {
                u'answer_group_index': 1,
                u'answers': [u'a1', u'a2']
            },
            {
                u'answer_group_index': 2,
                u'answers': [u'a2', u'a3']
            }
        ]
        self.job_id = classifier_models.ClassifierTrainingJobModel.create(
            self.algorithm_id, interaction_id, self.exp_id, 1,
            datetime.datetime.utcnow(), self.training_data, 'Home',
            feconf.TRAINING_JOB_STATUS_NEW, 1)
        self.classifier_data = text_classifier_pb2.TextClassifierFrozenModel()
        self.classifier_data.model_json = ''
        fs_services.save_classifier_data(
            self.exp_id, self.job_id, self.classifier_data)

        self.expected_response = {
            u'job_id': self.job_id,
            u'training_data': self.training_data,
            u'algorithm_id': self.algorithm_id,
            u'algorithm_version': self.algorithm_version
        }

        self.payload = {}
        self.payload['vm_id'] = feconf.DEFAULT_VM_ID
        secret = feconf.DEFAULT_VM_SHARED_SECRET
        self.payload['message'] = json.dumps({})
        self.payload['signature'] = classifier_services.generate_signature(
            python_utils.convert_to_bytes(secret),
            python_utils.convert_to_bytes(self.payload['message']),
            self.payload['vm_id'])
Ejemplo n.º 7
0
    def setUp(self):
        super(TrainedClassifierHandlerTests, self).setUp()

        self.exp_id = 'exp_id1'
        self.title = 'Testing Classifier storing'
        self.category = 'Test'
        yaml_path = os.path.join(feconf.TESTS_DATA_DIR,
                                 'string_classifier_test.yaml')
        with python_utils.open_file(yaml_path, 'r') as yaml_file:
            self.yaml_content = yaml_file.read()
        self.signup(self.CURRICULUM_ADMIN_EMAIL,
                    self.CURRICULUM_ADMIN_USERNAME)
        self.signup('*****@*****.**', 'mod')

        assets_list = []
        with self.swap(feconf, 'ENABLE_ML_CLASSIFIERS', True):
            exp_services.save_new_exploration_from_yaml_and_assets(
                feconf.SYSTEM_COMMITTER_ID, self.yaml_content, self.exp_id,
                assets_list)
        self.exploration = exp_fetchers.get_exploration_by_id(self.exp_id)
        self.algorithm_id = feconf.INTERACTION_CLASSIFIER_MAPPING[
            self.exploration.states['Home'].interaction.id]['algorithm_id']
        self.algorithm_version = feconf.INTERACTION_CLASSIFIER_MAPPING[
            self.exploration.states['Home'].interaction.
            id]['algorithm_version']

        self.classifier_data = {
            '_alpha': 0.1,
            '_beta': 0.001,
            '_prediction_threshold': 0.5,
            '_training_iterations': 25,
            '_prediction_iterations': 5,
            '_num_labels': 10,
            '_num_docs': 12,
            '_num_words': 20,
            '_label_to_id': {
                'text': 1
            },
            '_word_to_id': {
                'hello': 2
            },
            '_w_dp': [],
            '_b_dl': [],
            '_l_dp': [],
            '_c_dl': [],
            '_c_lw': [],
            '_c_l': [],
        }
        classifier_training_job = (
            classifier_services.get_classifier_training_job(
                self.exp_id, self.exploration.version, 'Home',
                self.algorithm_id))
        self.assertIsNotNone(classifier_training_job)
        self.job_id = classifier_training_job.job_id

        # TODO(pranavsid98): Replace the three commands below with
        # mark_training_job_pending after Giritheja's PR gets merged.
        classifier_training_job_model = (
            classifier_models.ClassifierTrainingJobModel.get(self.job_id,
                                                             strict=False))
        classifier_training_job_model.status = (
            feconf.TRAINING_JOB_STATUS_PENDING)
        classifier_training_job_model.update_timestamps()
        classifier_training_job_model.put()

        self.job_result = (training_job_response_payload_pb2.
                           TrainingJobResponsePayload.JobResult())
        self.job_result.job_id = self.job_id

        classifier_frozen_model = (
            text_classifier_pb2.TextClassifierFrozenModel())
        classifier_frozen_model.model_json = json.dumps(self.classifier_data)

        self.job_result.text_classifier.CopyFrom(classifier_frozen_model)

        self.payload_proto = (
            training_job_response_payload_pb2.TrainingJobResponsePayload())
        self.payload_proto.job_result.CopyFrom(self.job_result)
        self.payload_proto.vm_id = feconf.DEFAULT_VM_ID
        self.secret = feconf.DEFAULT_VM_SHARED_SECRET
        self.payload_proto.signature = classifier_services.generate_signature(
            python_utils.convert_to_bytes(self.secret),
            python_utils.convert_to_bytes(
                self.payload_proto.job_result.SerializeToString()),
            self.payload_proto.vm_id)

        self.payload_for_fetching_next_job_request = {
            'vm_id': feconf.DEFAULT_VM_ID,
            'message': json.dumps({})
        }

        self.payload_for_fetching_next_job_request['signature'] = (
            classifier_services.generate_signature(
                python_utils.convert_to_bytes(self.secret),
                python_utils.convert_to_bytes(
                    self.payload_for_fetching_next_job_request['message']),
                self.payload_for_fetching_next_job_request['vm_id']))