class ModelInput(Model.Config.ModelInput): pos_response: TokenTensorizer.Config = TokenTensorizer.Config( column="pos_response") neg_response: TokenTensorizer.Config = TokenTensorizer.Config( column="neg_response") query: TokenTensorizer.Config = TokenTensorizer.Config( column="query")
def get_tensorizers(add_dict_feat=False, add_contextual_feat=False): schema = {"source_sequence": str, "dict_feat": Gazetteer, "target_sequence": str} data_source = TSVDataSource.from_config( TSVDataSource.Config( train_filename=TEST_FILE_NAME, field_names=["source_sequence", "dict_feat", "target_sequence"], ), schema, ) src_tensorizer = TokenTensorizer.from_config( TokenTensorizer.Config( column="source_sequence", add_eos_token=True, add_bos_token=True ) ) tgt_tensorizer = TokenTensorizer.from_config( TokenTensorizer.Config( column="target_sequence", add_eos_token=True, add_bos_token=True ) ) tensorizers = {"src_seq_tokens": src_tensorizer, "trg_seq_tokens": tgt_tensorizer} initialize_tensorizers(tensorizers, data_source.train) if add_dict_feat: tensorizers["dict_feat"] = GazetteerTensorizer.from_config( GazetteerTensorizer.Config( text_column="source_sequence", dict_column="dict_feat" ) ) initialize_tensorizers( {"dict_feat": tensorizers["dict_feat"]}, data_source.train ) return tensorizers
class ModelInput(BasePairwiseModel.Config.ModelInput): tokens1: TokenTensorizer.Config = TokenTensorizer.Config(column="text1") tokens2: TokenTensorizer.Config = TokenTensorizer.Config(column="text2") labels: LabelTensorizer.Config = LabelTensorizer.Config() # for metric reporter raw_text: JoinStringTensorizer.Config = JoinStringTensorizer.Config( columns=["text1", "text2"] )
def test_batch_predict_caffe2_model(self): with tempfile.NamedTemporaryFile() as snapshot_file, tempfile.NamedTemporaryFile() as caffe2_model_file: train_data = tests_module.test_file("train_data_tiny.tsv") eval_data = tests_module.test_file("test_data_tiny.tsv") config = PyTextConfig( task=DocumentClassificationTask.Config( model=DocModel.Config( inputs=DocModel.Config.ModelInput( tokens=TokenTensorizer.Config(), dense=FloatListTensorizer.Config( column="dense", dim=1, error_check=True ), labels=LabelTensorizer.Config(), ) ), data=Data.Config( source=TSVDataSource.Config( train_filename=train_data, eval_filename=eval_data, test_filename=eval_data, field_names=["label", "slots", "text", "dense"], ) ), ), version=21, save_snapshot_path=snapshot_file.name, export_caffe2_path=caffe2_model_file.name, ) task = create_task(config.task) task.export(task.model, caffe2_model_file.name) model = task.model save(config, model, meta=None, tensorizers=task.data.tensorizers) pt_results = task.predict(task.data.data_source.test) def assert_caffe2_results_correct(caffe2_results): for pt_res, res in zip(pt_results, caffe2_results): np.testing.assert_array_almost_equal( pt_res["score"].tolist()[0], [score[0] for score in res.values()], ) results = batch_predict_caffe2_model( snapshot_file.name, caffe2_model_file.name ) self.assertEqual(4, len(results)) assert_caffe2_results_correct(results) results = batch_predict_caffe2_model( snapshot_file.name, caffe2_model_file.name, cache_size=2 ) self.assertEqual(4, len(results)) assert_caffe2_results_correct(results) results = batch_predict_caffe2_model( snapshot_file.name, caffe2_model_file.name, cache_size=-1 ) self.assertEqual(4, len(results)) assert_caffe2_results_correct(results)
class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config() word_labels: SlotLabelTensorizer.Config = SlotLabelTensorizer.Config( allow_unknown=True) doc_labels: LabelTensorizer.Config = LabelTensorizer.Config( allow_unknown=True) doc_weight: Optional[FloatTensorizer.Config] = None word_weight: Optional[FloatTensorizer.Config] = None
class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config() word_labels: SlotLabelTensorizer.Config = SlotLabelTensorizer.Config( allow_unknown=True) doc_labels: LabelTensorizer.Config = LabelTensorizer.Config( allow_unknown=True) doc_weight: FloatTensorizer.Config = FloatTensorizer.Config( column="doc_weight") word_weight: FloatTensorizer.Config = FloatTensorizer.Config( column="word_weight")
def _get_tensorizers(self): schema = {"source_sequence": str, "target_sequence": str} data_source = TSVDataSource.from_config( TSVDataSource.Config( train_filename=tests_module.test_file( "compositional_seq2seq_unit.tsv"), field_names=["source_sequence", "target_sequence"], ), schema, ) src_tensorizer = TokenTensorizer.from_config( TokenTensorizer.Config(column="source_sequence", add_eos_token=True, add_bos_token=True)) tgt_tensorizer = TokenTensorizer.from_config( TokenTensorizer.Config(column="target_sequence", add_eos_token=True, add_bos_token=True)) tensorizers = { "src_seq_tokens": src_tensorizer, "trg_seq_tokens": tgt_tensorizer, } initialize_tensorizers(tensorizers, data_source.train) return tensorizers
class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config( add_bos_token=True, add_eos_token=True)
class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config() labels: LabelTensorizer.Config = LabelTensorizer.Config()
class RegressionModelInput(DocModel.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config() labels: NumericLabelTensorizer.Config = NumericLabelTensorizer.Config( )
class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config() labels: LabelTensorizer.Config = LabelTensorizer.Config(allow_unknown=True) # for metric reporter raw_text: RawString.Config = RawString.Config(column="text")
class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config() dense: Optional[FloatListTensorizer.Config] = None labels: LabelTensorizer.Config = LabelTensorizer.Config()
class ModelInput(BaseModel.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config( column="tokenized_text") actions: AnnotationNumberizer.Config = AnnotationNumberizer.Config( )
class ModelInput(Model.Config.ModelInput): src_seq_tokens: TokenTensorizer.Config = TokenTensorizer.Config() trg_seq_tokens: TokenTensorizer.Config = TokenTensorizer.Config() dict_feat: Optional[GazetteerTensorizer.Config] = None
class ModelInput(BasePairwiseModel.Config.ModelInput): tokens1: TokenTensorizer.Config = TokenTensorizer.Config( column="text1") tokens2: TokenTensorizer.Config = TokenTensorizer.Config( column="text2") labels: LabelTensorizer.Config = LabelTensorizer.Config()
class ModelInput(Model.Config.ModelInput): tokens: Optional[TokenTensorizer.Config] = TokenTensorizer.Config( add_bos_token=True, add_eos_token=True)
class ModelInput(Model.Config.ModelInput): src_seq_tokens: TokenTensorizer.Config = TokenTensorizer.Config() trg_seq_tokens: TokenTensorizer.Config = TokenTensorizer.Config() dict_feat: Optional[GazetteerTensorizer.Config] = None contextual_token_embedding: Optional[ ByteTokenTensorizer.Config] = None
class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config( add_bos_token=True, add_eos_token=True ) # for metric reporter raw_text: RawString.Config = RawString.Config(column="text")
class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config() labels: WordLabelTensorizer.Config = WordLabelTensorizer.Config() # for metric reporter raw_text: RawString.Config = RawString.Config(column="text")
class ModelInput(Model.Config.ModelInput): tokens: TokenTensorizer.Config = TokenTensorizer.Config() slots: TokenTensorizer.Config = TokenTensorizer.Config( column="slots")