def test_intializing_embeds_from_config(self): feature_config = FeatureConfig( word_feat=WordFeatConfig( embedding_init_strategy=EmbedInitStrategy.RANDOM, embed_dim=5, pretrained_embeddings_path=tests_module.TEST_BASE_DIR, ) ) data_handler = JointModelDataHandler.from_config( JointModelDataHandler.Config(), feature_config, [DocLabelConfig(), WordLabelConfig()], featurizer=SimpleFeaturizer.from_config( SimpleFeaturizer.Config(), feature_config ), ) data_handler.init_metadata_from_path(TRAIN_FILE, EVAL_FILE, TEST_FILE) pretrained_embeds = data_handler.metadata.features[ DatasetFieldName.TEXT_FIELD ].pretrained_embeds_weight # test random initialization (values should be non-0) np.testing.assert_array_less( [0, 0, 0, 0, 0], np.absolute(pretrained_embeds[11].numpy()) ) feature_config = FeatureConfig( word_feat=WordFeatConfig( embedding_init_strategy=EmbedInitStrategy.ZERO, embed_dim=5, pretrained_embeddings_path=tests_module.TEST_BASE_DIR, ) ) data_handler = JointModelDataHandler.from_config( JointModelDataHandler.Config(), feature_config, [DocLabelConfig(), WordLabelConfig()], featurizer=SimpleFeaturizer.from_config( SimpleFeaturizer.Config(), feature_config ), ) data_handler.init_metadata_from_path(TRAIN_FILE, EVAL_FILE, TEST_FILE) pretrained_embeds = data_handler.metadata.features[ DatasetFieldName.TEXT_FIELD ].pretrained_embeds_weight # test zero initialization (values should all be 0) np.testing.assert_array_equal([0, 0, 0, 0, 0], pretrained_embeds[11].numpy())
class Config(Task.Config): model: SeqNNModel.Config = SeqNNModel.Config() trainer: Trainer.Config = Trainer.Config() labels: DocLabelConfig = DocLabelConfig() data_handler: SeqModelDataHandler.Config = SeqModelDataHandler.Config() metric_reporter: ClassificationMetricReporter.Config = ( ClassificationMetricReporter.Config())
def example_config(cls): return cls.Config( labels=[DocLabelConfig(), WordLabelConfig()], model=BaggingDocEnsemble_Deprecated.Config( models=[DocModel_Deprecated.Config()] ), )
def setUp(self): self.data_handler = JointModelDataHandler.from_config( JointModelDataHandler.Config(), FeatureConfig(), [DocLabelConfig(), WordLabelConfig()], featurizer=SimpleFeaturizer.from_config(SimpleFeaturizer.Config(), FeatureConfig()), )
class Config(Task_Deprecated.Config): model: SeqNNModel_Deprecated.Config = SeqNNModel_Deprecated.Config() trainer: Trainer.Config = Trainer.Config() labels: DocLabelConfig = DocLabelConfig() data_handler: SeqModelDataHandler.Config = SeqModelDataHandler.Config() metric_reporter: ClassificationMetricReporter.Config = ( ClassificationMetricReporter.Config()) exporter: Optional[DenseFeatureExporter.Config] = None
def setUp(self): file_name = tests_module.test_file( "contextual_intent_slot_train_tiny.tsv") self.dh = ContextualIntentSlotModelDataHandler.from_config( ContextualIntentSlotModelDataHandler.Config(), ModelInputConfig(), [DocLabelConfig(), WordLabelConfig()], featurizer=SimpleFeaturizer(SimpleFeaturizer.Config(), ModelInputConfig()), ) self.data = self.dh.read_from_file(file_name, self.dh.raw_columns)
def test_read_file_with_dense_features(self): data_handler_config = ContextualIntentSlotModelDataHandler.Config() data_handler_config.columns_to_read.append(ModelInput.DENSE) dense_file_name = tests_module.test_file( "contextual_intent_slot_train_tiny_dense.tsv") data_handler = ContextualIntentSlotModelDataHandler.from_config( data_handler_config, ModelInputConfig(), [DocLabelConfig(), WordLabelConfig()], featurizer=SimpleFeaturizer(SimpleFeaturizer.Config(), ModelInputConfig()), ) dense_data = list( data_handler.read_from_file(dense_file_name, data_handler.raw_columns)) self.assertEqual(dense_data[0][ModelInput.DENSE], "[0,1,2,3,4]")
def setUp(self): self.train_data = [{ DFColumn.DOC_LABEL: "cu:discuss_where", DFColumn.UTTERANCE: '["where do you wanna meet?", "MPK"]', }] self.eval_data = [ { DFColumn.DOC_LABEL: "cu:discuss_where", DFColumn.UTTERANCE: '["how about SF?", "sounds good"]', }, { DFColumn.DOC_LABEL: "cu:other", DFColumn.UTTERANCE: '["lol"]' }, ] self.test_data = [ { DFColumn.DOC_LABEL: "cu:discuss_where", DFColumn.UTTERANCE: '["MPK sounds good to me"]', }, { DFColumn.DOC_LABEL: "cu:other", DFColumn.UTTERANCE: '["great", "awesome"]', }, ] self.dh = SeqModelDataHandler.from_config( SeqModelDataHandler.Config(), FeatureConfig(), DocLabelConfig(), featurizer=SimpleFeaturizer.from_config(SimpleFeaturizer.Config(), FeatureConfig()), )
def example_config(cls): return cls.Config(labels=[DocLabelConfig(), WordLabelConfig()])