def test_doc_export_to_caffe2_with_logits( self, num_doc_classes, test_num_words, test_num_dict_feat, num_predictions, test_num_chars, ): for config in DOC_CONFIGS_WITH_EXPORT_LOGITS: config = self._get_config(DocClassificationTask.Config, config) metadata = self._get_metadata(num_doc_classes, 0) py_model = create_model(config.model, config.features, metadata) exporter = create_exporter(config.exporter, config.features, config.labels, metadata) with tempfile.NamedTemporaryFile(delete=False, suffix=".predictor") as pred_file: print(pred_file.name) output_names = exporter.export_to_caffe2( py_model, pred_file.name) workspace.ResetWorkspace() pred_net = pe.prepare_prediction_net(pred_file.name, CAFFE2_DB_TYPE) for _i in range(num_predictions): pred_net = pe.prepare_prediction_net(pred_file.name, CAFFE2_DB_TYPE) test_inputs = self._get_rand_input( config.features, BATCH_SIZE, W_VOCAB_SIZE, DICT_VOCAB_SIZE, CHAR_VOCAB_SIZE, test_num_words, test_num_dict_feat, test_num_chars, ) self._feed_c2_input( workspace, test_inputs, exporter.input_names, metadata.feature_itos_map, ) workspace.RunNetOnce(pred_net) c2_out = [ list(workspace.FetchBlob(o_name)) for o_name in output_names ] py_model.eval() py_outs = py_model(*test_inputs) np.testing.assert_array_almost_equal( py_outs.view(-1).detach().numpy(), np.array(c2_out[-1]).flatten()) # Do log_softmax since we do that before exporting predictor nets py_outs = F.log_softmax(py_outs, 1) np.testing.assert_array_almost_equal( py_outs.view(-1).detach().numpy(), np.array(c2_out[:-1]).flatten())
def from_config(cls, task_config, metadata=None, model_state=None): """ Create the task from config, and optionally load metadata/model_state This function will create components including :class:`~DataHandler`, :class:`~Trainer`, :class:`~MetricReporter`, :class:`~Exporter`, and wire them up. Args: task_config (Task.Config): the config of the current task metadata: saved global context of this task, e.g: vocabulary, will be generated by :class:`~DataHandler` if it's None model_state: saved model parameters, will be loaded into model when given """ if hasattr(task_config.labels, "target_prob"): assert task_config.labels.target_prob == isinstance( task_config.model.output_layer.loss, ( KLDivergenceBCELoss.Config, KLDivergenceCELoss.Config, SoftHardBCELoss.Config, ), ), "target_prob must be set to True for KD losses" featurizer = create_featurizer(task_config.featurizer, task_config.features) # load data data_handler = create_data_handler( task_config.data_handler, task_config.features, task_config.labels, featurizer=featurizer, ) print("\nLoading data...") if metadata: data_handler.load_metadata(metadata) else: data_handler.init_metadata() metadata = data_handler.metadata model = create_model(task_config.model, task_config.features, metadata) if model_state: model.load_state_dict(model_state) if cuda.CUDA_ENABLED: model = model.cuda() metric_reporter = create_metric_reporter(task_config.metric_reporter, metadata) exporter = (create_exporter( task_config.exporter, task_config.features, task_config.labels, data_handler.metadata, task_config.model, ) if task_config.exporter else None) return cls( trainer=create_trainer(task_config.trainer, model), data_handler=data_handler, model=model, metric_reporter=metric_reporter, exporter=exporter, )
def from_config(cls, task_config, metadata=None, model_state=None): """ Create the task from config, and optionally load metadata/model_state This function will create components including :class:`~DataHandler`, :class:`~Trainer`, :class:`~Optimizer`, :class:`~Scheduler`, :class:`~MetricReporter`, :class:`~Exporter`, and wire them up. Args: task_config (Task.Config): the config of the current task metadata: saved global context of this task, e.g: vocabulary, will be generated by :class:`~DataHandler` if it's None model_state: saved model parameters, will be loaded into model when given """ print("Task parameters:\n") pprint(config_to_json(type(task_config), task_config)) featurizer = create_featurizer(task_config.featurizer, task_config.features) # load data data_handler = create_data_handler( task_config.data_handler, task_config.features, task_config.labels, featurizer=featurizer, ) print("\nLoading data...") if metadata: data_handler.load_metadata(metadata) else: data_handler.init_metadata() metadata = data_handler.metadata model = create_model(task_config.model, task_config.features, metadata) if model_state: model.load_state_dict(model_state) if cuda_utils.CUDA_ENABLED: model = model.cuda() metric_reporter = create_metric_reporter(task_config.metric_reporter, metadata) optimizer = create_optimizer(task_config.optimizer, model) exporter = ( create_exporter( task_config.exporter, task_config.features, task_config.labels, data_handler.metadata, task_config.model, ) if task_config.exporter else None ) return cls( trainer=create_trainer(task_config.trainer), data_handler=data_handler, model=model, metric_reporter=metric_reporter, optimizer=optimizer, lr_scheduler=Scheduler( optimizer, task_config.scheduler, metric_reporter.lower_is_better ), exporter=exporter, )
def from_config( cls, config: Config, tensorizers: Dict[str, Tensorizer], *args, **kwargs ): """Factory method to construct an instance of Ensemble or one its derived classes from the module's config object and tensorizers It creates sub-models in the ensemble using the sub-model's configuration. Args: config (Config): Configuration object specifying all the parameters of Ensemble. tensorizers (Dict[str, Tensorizer]): Tensorizer specifying all the parameters of the input features to the model. Returns: type: An instance of Ensemble. """ sub_models = [] for sub_model_config in config.models: sub_model_config.init_from_saved_state = config.init_from_saved_state sub_models.append( create_model(sub_model_config, tensorizers, *args, **kwargs) ) return cls(config, sub_models, *args, **kwargs)
def test_freeze_all_embedding(self): model = create_model( DocModel_Deprecated.Config(), FeatureConfig(freeze=True), metadata=mock_metadata(), ) for param in model.embedding.parameters(): self.assertFalse(param.requires_grad)
def test_wordblstm_export_to_caffe2( self, export_num_words, export_num_dict_feat, num_word_classes, test_num_words, test_num_dict_feat, num_predictions, test_num_chars, ): for WORD_CONFIG in WORD_CONFIGS: config = self._get_config(WordTaggingTask_Deprecated.Config, WORD_CONFIG) metadata = self._get_metadata(0, num_word_classes) py_model = create_model(config.model, config.features, metadata) exporter = create_exporter(config.exporter, config.features, config.labels, metadata) with tempfile.NamedTemporaryFile( delete=False, suffix=".{}".format(".predictor")) as pred_file: output_names = exporter.export_to_caffe2( py_model, pred_file.name) workspace.ResetWorkspace() pred_net = pe.prepare_prediction_net(pred_file.name, CAFFE2_DB_TYPE) for _i in range(num_predictions): test_inputs = self._get_rand_input( config.features, BATCH_SIZE, W_VOCAB_SIZE, DICT_VOCAB_SIZE, CHAR_VOCAB_SIZE, test_num_words, test_num_dict_feat, test_num_chars, ) self._feed_c2_input( workspace, test_inputs, exporter.input_names, metadata.feature_itos_map, ) workspace.RunNetOnce(pred_net) c2_out = [ list(workspace.FetchBlob(o_name)) for o_name in output_names ] py_model.eval() py_outs = py_model(*test_inputs) context = {SEQ_LENS: test_inputs[-1]} target = None pred, score = py_model.get_pred(py_outs, target, context) np.testing.assert_array_almost_equal( torch.transpose(score, 1, 2).contiguous().view(-1).detach().numpy(), np.array(c2_out).flatten(), )
def from_config(cls, task_config, metadata=None, model_state=None): print("Task parameters:\n") pprint(config_to_json(type(task_config), task_config)) data_handlers = OrderedDict() exporters = OrderedDict() for name, task in task_config.tasks.items(): featurizer = create_featurizer(task.featurizer, task.features) data_handlers[name] = create_data_handler(task.data_handler, task.features, task.labels, featurizer=featurizer) data_handler = DisjointMultitaskDataHandler(task_config.data_handler, data_handlers) print("\nLoading data...") if metadata: data_handler.load_metadata(metadata) else: data_handler.init_metadata() metadata = data_handler.metadata exporters = { name: (create_exporter( task.exporter, task.features, task.labels, data_handler.data_handlers[name].metadata, task.model, ) if task.exporter else None) for name, task in task_config.tasks.items() } metric_reporter = DisjointMultitaskMetricReporter( OrderedDict( (name, create_metric_reporter(task.metric_reporter, metadata[name])) for name, task in task_config.tasks.items()), target_task_name=task_config.metric_reporter.target_task_name, ) model = DisjointMultitaskModel( OrderedDict( (name, create_model(task.model, task.features, metadata[name])) for name, task in task_config.tasks.items())) if model_state: model.load_state_dict(model_state) if cuda_utils.CUDA_ENABLED: model = model.cuda() optimizers = create_optimizer(model, task_config.optimizer) return cls( exporters=exporters, trainer=create_trainer(task_config.trainer), data_handler=data_handler, model=model, metric_reporter=metric_reporter, optimizers=optimizers, lr_scheduler=Scheduler(optimizers, task_config.scheduler, metric_reporter.lower_is_better), )
def test_seq_nn_export_to_caffe2( self, export_num_words, export_num_dict_feat, num_doc_classes, num_word_classes, test_num_words, test_num_dict_feat, num_predictions, test_num_chars, test_num_seq, ): config = self._get_config(SeqNNTask_Deprecated.Config, SEQ_NN_CONFIG) metadata = self._get_seq_metadata(num_doc_classes, 0) py_model = create_model(config.model, config.features, metadata) exporter = create_exporter( config.exporter, config.features, config.labels, metadata ) with tempfile.NamedTemporaryFile( delete=False, suffix=".{}".format(".predictor") ) as pred_file: print(pred_file.name) output_names = exporter.export_to_caffe2(py_model, pred_file.name) workspace.ResetWorkspace() pred_net = pe.prepare_prediction_net(pred_file.name, CAFFE2_DB_TYPE) for _i in range(num_predictions): test_inputs = self._get_seq_nn_rand_input( config.features, BATCH_SIZE, W_VOCAB_SIZE, DICT_VOCAB_SIZE, CHAR_VOCAB_SIZE, test_num_words, test_num_dict_feat, test_num_chars, test_num_seq, ) self._feed_c2_input( workspace, test_inputs, exporter.input_names, metadata.feature_itos_map ) workspace.RunNetOnce(pred_net) c2_out = [list(workspace.FetchBlob(o_name)) for o_name in output_names] py_model.eval() py_outs = py_model(*test_inputs) # Do log_softmax since we do that before exporting predictor nets py_outs = F.log_softmax(py_outs, 1) np.testing.assert_array_almost_equal( py_outs.view(-1).detach().numpy(), np.array(c2_out).flatten() )
def _create_dummy_model(self): return create_model( DocModel_Deprecated.Config( representation=BiLSTMDocAttention.Config( save_path=self.representation_path), decoder=MLPDecoder.Config(save_path=self.decoder_path), ), FeatureConfig( word_feat=WordEmbedding.Config( embed_dim=300, save_path=self.word_embedding_path), save_path=self.embedding_path, ), self._create_dummy_meta_data(), )
def from_config(cls, task_config, metadata=None, model_state=None): print("(mldc/task/gpt_task.py def from_config) Task parameters:\n") pprint(config_to_json(type(task_config), task_config)) featurizer = create_featurizer( task_config.featurizer, task_config.features, text_embedder_config=task_config.text_embedder ) # featurizer :: text embedder GPT2Embed # load data data_handler = create_data_handler( task_config.data_handler, task_config.features, task_config.labels, text_embedder_config=task_config.text_embedder, featurizer=featurizer, ) print( "\n(mldc/task/retrieval.py GptTask def from_config) Loading data..." ) if metadata: data_handler.load_metadata(metadata) else: data_handler.init_metadata() metadata = data_handler.metadata task_config.features.seq_word_feat.embed_dim = data_handler.text_embedder.embed_dim print("create model!") model = create_model(task_config.model, task_config.features, metadata) if model_state: model.load_state_dict(model_state) if cuda_utils.CUDA_ENABLED: model = model.cuda() metric_reporter = create_metric_reporter( task_config.metric_reporter, metadata, text_embedder=task_config.text_embedder) return cls( trainer=create_trainer(task_config.trainer), data_handler=data_handler, model=model, metric_reporter=metric_reporter, model_needs_meta_training=task_config.model_needs_meta_training, )
def DISABLED_test_freeze_word_embedding(self): model = create_model( DocModel.Config(), FeatureConfig( word_feat=WordFeatConfig(freeze=True, mlp_layer_dims=[4]), dict_feat=DictFeatConfig(), ), metadata=mock_metadata(), ) # word embedding for param in model.embedding[0].word_embedding.parameters(): self.assertFalse(param.requires_grad) for param in model.embedding[0].mlp.parameters(): self.assertTrue(param.requires_grad) # dict feat embedding for param in model.embedding[1].parameters(): self.assertTrue(param.requires_grad)
def from_config(cls, config: Config, feat_config: FeatureConfig, *args, **kwargs): """Factory method to construct an instance of Ensemble or one its derived classes from the module's config object and the field's metadata object. It creates sub-models in the ensemble using the sub-model's configuration. Args: config (Config): Configuration object specifying all the parameters of Ensemble. feat_config (FeatureConfig): Configuration object specifying all the parameters of the input features to the model. Returns: type: An instance of Ensemble. """ sub_models = [ create_model(sub_model_config, feat_config, *args, **kwargs) for sub_model_config in config.models ] return cls(config, sub_models, *args, **kwargs)
def from_config( cls, task_config: Config, metadata=None, model_state=None, tensorizers=None, rank=0, world_size=1, ): print("Task parameters:\n") pprint(config_to_json(type(task_config), task_config)) data_handlers = OrderedDict() exporters = OrderedDict() for name, task in task_config.tasks.items(): featurizer = create_featurizer(task.featurizer, task.features) data_handlers[name] = create_data_handler( task.data_handler, task.features, task.labels, featurizer=featurizer ) data_handler = DisjointMultitaskDataHandler( task_config.data_handler, data_handlers, target_task_name=task_config.target_task_name, ) print("\nLoading data...") if metadata: data_handler.load_metadata(metadata) else: data_handler.init_metadata() metadata = data_handler.metadata exporters = { name: ( create_exporter( task.exporter, task.features, task.labels, data_handler.data_handlers[name].metadata, task.model, ) if task.exporter else None ) for name, task in task_config.tasks.items() } task_weights = { task_name: task_config.task_weights.get(task_name, 1) for task_name in task_config.tasks.keys() } metric_reporter = DisjointMultitaskMetricReporter( OrderedDict( (name, create_metric_reporter(task.metric_reporter, metadata[name])) for name, task in task_config.tasks.items() ), loss_weights=task_weights, target_task_name=task_config.target_task_name, use_subtask_select_metric=( task_config.metric_reporter.use_subtask_select_metric ), ) model = DisjointMultitaskModel( OrderedDict( (name, create_model(task.model, task.features, metadata[name])) for name, task in task_config.tasks.items() ), loss_weights=task_weights, ) if model_state: model.load_state_dict(model_state) if cuda.CUDA_ENABLED: model = model.cuda() return cls( target_task_name=task_config.target_task_name, exporters=exporters, trainer=create_trainer(task_config.trainer, model), data_handler=data_handler, model=model, metric_reporter=metric_reporter, )
def test_load_save(self): text_field_meta = FieldMeta() text_field_meta.vocab = VocabStub() text_field_meta.vocab_size = 4 text_field_meta.unk_token_idx = 1 text_field_meta.pad_token_idx = 0 text_field_meta.pretrained_embeds_weight = None label_meta = FieldMeta() label_meta.vocab = VocabStub() label_meta.vocab_size = 3 metadata = CommonMetadata() metadata.features = {DatasetFieldName.TEXT_FIELD: text_field_meta} metadata.target = label_meta saved_model = create_model( DocModel.Config( representation=BiLSTMDocAttention.Config( save_path=self.representation_path), decoder=MLPDecoder.Config(save_path=self.decoder_path), ), FeatureConfig(save_path=self.embedding_path), metadata, ) saved_model.save_modules() loaded_model = create_model( DocModel.Config( representation=BiLSTMDocAttention.Config( load_path=self.representation_path), decoder=MLPDecoder.Config(load_path=self.decoder_path), ), FeatureConfig(load_path=self.embedding_path), metadata, ) random_model = create_model( DocModel.Config(representation=BiLSTMDocAttention.Config(), decoder=MLPDecoder.Config()), FeatureConfig(), metadata, ) # Loaded and saved modules should be equal. Neither should be equal to # a randomly initialised model. for p1, p2, p3 in itertools.zip_longest( saved_model.embedding.parameters(), loaded_model.embedding.parameters(), random_model.embedding.parameters(), ): self.assertTrue(p1.equal(p2)) self.assertFalse(p3.equal(p1)) self.assertFalse(p3.equal(p2)) for p1, p2, p3 in itertools.zip_longest( saved_model.representation.parameters(), loaded_model.representation.parameters(), random_model.representation.parameters(), ): self.assertTrue(p1.equal(p2)) self.assertFalse(p3.equal(p1)) self.assertFalse(p3.equal(p2)) for p1, p2, p3 in itertools.zip_longest( saved_model.decoder.parameters(), loaded_model.decoder.parameters(), random_model.decoder.parameters(), ): self.assertTrue(p1.equal(p2)) self.assertFalse(p3.equal(p1)) self.assertFalse(p3.equal(p2))
def test_joint_export_to_caffe2( self, export_num_words, export_num_dict_feat, num_doc_classes, num_word_classes, test_num_words, test_num_dict_feat, num_predictions, test_num_chars, ): config = self._get_config(JointTextTask.Config, JOINT_CONFIG) metadata = self._get_metadata(num_doc_classes, num_word_classes) py_model = create_model(config.model, config.features, metadata) exporter = create_exporter(config.exporter, config.features, config.labels, metadata) with tempfile.NamedTemporaryFile( delete=False, suffix=".{}".format(".predictor")) as pred_file: exporter.export_to_caffe2(py_model, pred_file.name) workspace.ResetWorkspace() pred_net = pe.prepare_prediction_net(pred_file.name, CAFFE2_DB_TYPE) for _i in range(num_predictions): test_inputs = self._get_rand_input( config.features, BATCH_SIZE, W_VOCAB_SIZE, DICT_VOCAB_SIZE, CHAR_VOCAB_SIZE, test_num_words, test_num_dict_feat, test_num_chars, ) self._feed_c2_input(workspace, test_inputs, exporter.input_names, metadata.feature_itos_map) workspace.RunNetOnce(pred_net) doc_output_names = [ "{}:{}".format("doc_scores", class_name) for class_name in metadata.label_names[0] ] word_output_names = [ "{}:{}".format("word_scores", class_name) for class_name in metadata.label_names[1] ] py_model.eval() logits = py_model(*test_inputs) context = {SEQ_LENS: test_inputs[-1]} target = None (d_pred, w_pred), (d_score, w_score) = py_model.get_pred(logits, target, context) c2_doc_out = [] for o_name in doc_output_names: c2_doc_out.extend(list(workspace.FetchBlob(o_name))) np.testing.assert_array_almost_equal( d_score.view(-1).detach().numpy(), np.array(c2_doc_out).flatten()) c2_word_out = [] for o_name in word_output_names: c2_word_out.extend(list(workspace.FetchBlob(o_name))) np.testing.assert_array_almost_equal( torch.transpose(w_score, 1, 2).contiguous().view(-1).detach().numpy(), np.array(c2_word_out).flatten(), )