def add_task(self, task: EmmentalTask) -> None: r"""Add a single task into MTL network. Args: task(EmmentalTask): A task to add. """ # Combine module_pool from all tasks for key in task.module_pool.keys(): if key in self.module_pool.keys(): if Meta.config["model_config"]["dataparallel"]: task.module_pool[key] = nn.DataParallel(self.module_pool[key]) else: task.module_pool[key] = self.module_pool[key] else: if Meta.config["model_config"]["dataparallel"]: self.module_pool[key] = nn.DataParallel(task.module_pool[key]) else: self.module_pool[key] = task.module_pool[key] # Collect task name self.task_names.add(task.name) # Collect task flow self.task_flows[task.name] = task.task_flow # Collect loss function self.loss_funcs[task.name] = task.loss_func # Collect output function self.output_funcs[task.name] = task.output_func # Collect scorer self.scorers[task.name] = task.scorer # Collect weight self.weights[task.name] = task.weight # Move model to specified device self._move_to_device()
def get_task(task_names, emb_dim, char_dict_size): cnn_module = CNN_Text(char_dict_size, emb_dim) cnn_out_dim = 300 # TODO: Get rid of this hardcode tasks = [] for task_name in task_names: task = EmmentalTask( name=task_name, module_pool=nn.ModuleDict({ "cnn_text": cnn_module, f"{task_name}_pred_head": nn.Linear(cnn_out_dim, 2) }), task_flow=[ { "name": "cnn_text", "module": "cnn_text", "inputs": [("_input_", "emb")], }, { "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": [("cnn_text", 0)], }, ], loss_func=partial(ce_loss, task_name), output_func=partial(output, task_name), scorer=Scorer(metrics=["accuracy", "f1", "precision", "recall"]), ) tasks.append(task) return tasks
def create_task(task_name, n_class=2, model="resnet18", pretrained=True): feature_extractor = get_cnn(model, pretrained, num_classes=n_class) loss = sce_loss output = output_classification logger.info(f"Built model: {feature_extractor}") return EmmentalTask( name=task_name, module_pool=nn.ModuleDict({ "feature": feature_extractor, f"{task_name}_pred_head": IdentityModule() }), task_flow=[ { "name": "feature", "module": "feature", "inputs": [("_input_", "image")] }, { "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": [("feature", 0)], }, ], loss_func=partial(loss, f"{task_name}_pred_head"), output_func=partial(output, f"{task_name}_pred_head"), scorer=Scorer(metrics=["precision", "recall", "f1"]), )
def build_model(bert_model_name, last_hidden_dropout_prob=0.0): bert_module = BertModule(bert_model_name) bert_output_dim = 768 if "base" in bert_model_name else 1024 task_cardinality = ( len(SuperGLUE_LABEL_MAPPING[TASK_NAME].keys()) if SuperGLUE_LABEL_MAPPING[TASK_NAME] is not None else 1 ) metrics = ( SuperGLUE_TASK_METRIC_MAPPING[TASK_NAME] if TASK_NAME in SuperGLUE_TASK_METRIC_MAPPING else [] ) customize_metric_funcs = {} loss_fn = partial(utils.ce_loss, f"{TASK_NAME}_pred_head") output_fn = partial(utils.output, f"{TASK_NAME}_pred_head") task = EmmentalTask( name=TASK_NAME, module_pool=nn.ModuleDict( { "bert_module": bert_module, f"{TASK_NAME}_feature": BertLastCLSModule( dropout_prob=last_hidden_dropout_prob ), f"{TASK_NAME}_pred_head": nn.Linear(bert_output_dim, task_cardinality), } ), task_flow=[ { "name": f"{TASK_NAME}_bert_module", "module": "bert_module", "inputs": [ ("_input_", "token_ids"), ("_input_", "token_segments"), ("_input_", "token_masks"), ], }, { "name": f"{TASK_NAME}_feature", "module": f"{TASK_NAME}_feature", "inputs": [(f"{TASK_NAME}_bert_module", 0)], }, { "name": f"{TASK_NAME}_pred_head", "module": f"{TASK_NAME}_pred_head", "inputs": [(f"{TASK_NAME}_feature", 0)], }, ], loss_func=loss_fn, output_func=output_fn, scorer=Scorer(metrics=metrics, customize_metric_funcs=customize_metric_funcs), ) return task
def create_task(args): feature_extractor = AutoModel.from_pretrained(args.model) task = EmmentalTask( name=args.task_name, module_pool=nn.ModuleDict({ "feature_extractor": FeatureExtractor(feature_extractor), "pred_head": nn.Linear(feature_extractor.config.hidden_size, len(args.label_fields)), }), task_flow=[ { "name": "feature_extractor", "module": "feature_extractor", "inputs": [ ("_input_", "feat_input_ids"), ], }, { "name": "pred_head", "module": "pred_head", "inputs": [("feature_extractor", 0)], }, ], loss_func=partial(ce_loss, "pred_head"), output_func=partial(output, "pred_head"), scorer=Scorer(customize_metric_funcs={ "multi_label_scorer": partial(multi_label_scorer, args.label_fields) }), ) return task
def _init_model(self, encoder_class, encoder_args, decoder_class, decoder_args, task_to_label_dict): encoder_module = getattr(modules, encoder_class)(**encoder_args) tasks = [ EmmentalTask( name=task_name, module_pool=nn.ModuleDict({ f'encoder_module': encoder_module, f'decoder_module_{task_name}': getattr(modules, decoder_class)(2, **decoder_args), }), task_flow=[ { 'name': 'encoder_module', 'module': 'encoder_module', 'inputs': [('_input_', 'exam')] }, { 'name': f'decoder_module_{task_name}', 'module': f'decoder_module_{task_name}', 'inputs': [('encoder_module', 0)], }, ], loss_func=partial(ce_loss, task_name), output_func=partial(output, task_name), scorer=Scorer(metrics=['accuracy']), ) for task_name in task_to_label_dict.keys() ] model = EmmentalModel(name='cow-tus-model', tasks=tasks) return model
def _init_model(self, encoder_class, encoder_args, decoder_class, decoder_args, input_shape, task_to_label_dict): encoder_module = getattr(modules)(encoder_class, pretrained=True) encoder_output_dim = encoder_module.get_output_dim() tasks = [ EmmentalTask( name=task_name, module_pool=nn.ModuleDict({ f'encoder_module': encoder_module, f'decoder_module_{task_name}': getattr(modules, decoder_class)(emb_dim, 2, **decoder_args), }), task_flow=[ { 'name': 'encoder_module', 'module': 'encoder_module', 'inputs': [('_input_', 'exam')] }, { 'name': f'decoder_module_{task_name}', 'module': f'decoder_module_{task_name}', 'inputs': [('encoder_module', 0)], }, ], loss_func=partial(ce_loss, task_name), output_func=partial(output, task_name), scorer=Scorer(metrics=[ 'accuracy', 'roc_auc', 'precision', 'recall', 'f1' ]), ) for task_name in task_to_label_dict.keys() ] model = EmmentalModel(name='cow-tus-model', tasks=tasks) return model
def get_superglue_task(task_names, bert_model_name): tasks = dict() bert_module = BertModule(bert_model_name) bert_output_dim = 768 if "base" in bert_model_name else 1024 for task_name in task_names: task_cardinality = (len(SuperGLUE_LABEL_MAPPING[task_name].keys()) if SuperGLUE_LABEL_MAPPING[task_name] is not None else 1) metrics = (SuperGLUE_TASK_METRIC_MAPPING[task_name] if task_name in SuperGLUE_TASK_METRIC_MAPPING else []) customize_metric_funcs = ({ "em": em, "em_f1": em_f1 } if task_name == "MultiRC" else {}) loss_fn = partial(ce_loss, f"{task_name}_pred_head") output_fn = partial(output, f"{task_name}_pred_head") if task_name == "MultiRC": task = EmmentalTask( name=task_name, module_pool=nn.ModuleDict({ "bert_module": bert_module, "bert_last_CLS": BertLastCLSModule(), f"{task_name}_pred_head": nn.Linear(bert_output_dim, task_cardinality), }), task_flow=[ { "name": f"{task_name}_bert_module", "module": "bert_module", "inputs": [("_input_", "token_ids")], }, { "name": f"{task_name}_bert_last_CLS", "module": "bert_last_CLS", "inputs": [(f"{task_name}_bert_module", 0)], }, { "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": [(f"{task_name}_bert_last_CLS", 0)], }, ], loss_func=loss_fn, output_func=output_fn, scorer=Scorer(metrics=metrics, customize_metric_funcs=customize_metric_funcs), ) tasks[task_name] = task return tasks
def build_model(bert_model_name, last_hidden_dropout_prob=None): if last_hidden_dropout_prob: raise NotImplementedError( f"TODO: last_hidden_dropout_prob for {TASK_NAME}") bert_module = BertModule(bert_model_name) bert_output_dim = 768 if "base" in bert_model_name else 1024 metrics = (SuperGLUE_TASK_METRIC_MAPPING[TASK_NAME] if TASK_NAME in SuperGLUE_TASK_METRIC_MAPPING else []) customize_metric_funcs = {} loss_fn = partial(utils.ce_loss, f"{TASK_NAME}_pred_head") output_fn = partial(utils.output, f"{TASK_NAME}_pred_head") task = EmmentalTask( name=TASK_NAME, module_pool=nn.ModuleDict({ "bert_module": bert_module, f"{TASK_NAME}_pred_head": SpanClassifierModule(d_inp=bert_output_dim, proj_dim=bert_output_dim // 2), }), task_flow=[ { "name": f"{TASK_NAME}_bert_module", "module": "bert_module", "inputs": [ ("_input_", "token_ids"), ("_input_", "token_segments"), ("_input_", "token_masks"), ], }, { "name": f"{TASK_NAME}_pred_head", "module": f"{TASK_NAME}_pred_head", "inputs": [ (f"{TASK_NAME}_bert_module", 0), ("_input_", "token1_idx"), ("_input_", "token2_idx"), ("_input_", "token_masks"), ], }, ], loss_func=loss_fn, output_func=output_fn, scorer=Scorer(metrics=metrics, customize_metric_funcs=customize_metric_funcs), ) return task
def add_task(self, task: EmmentalTask) -> None: r"""Add a single task into MTL network. Args: task(EmmentalTask): A task to add. """ if not isinstance(task, EmmentalTask): raise ValueError(f"Unrecognized task type {task}.") if task.name in self.task_names: raise ValueError( f"Found duplicate task {task.name}, different task should use " f"different task name.") # Combine module_pool from all tasks for key in task.module_pool.keys(): if key in self.module_pool.keys(): if Meta.config["model_config"]["dataparallel"]: task.module_pool[key] = nn.DataParallel( self.module_pool[key]) else: task.module_pool[key] = self.module_pool[key] else: if Meta.config["model_config"]["dataparallel"]: self.module_pool[key] = nn.DataParallel( task.module_pool[key]) else: self.module_pool[key] = task.module_pool[key] # Collect task name self.task_names.add(task.name) # Collect task flow self.task_flows[task.name] = task.task_flow # Collect loss function self.loss_funcs[task.name] = task.loss_func # Collect output function self.output_funcs[task.name] = task.output_func # Collect scorer self.scorers[task.name] = task.scorer # Collect weight self.weights[task.name] = task.weight # Move model to specified device self._move_to_device()
def get_gule_task(task_names, bert_model_name): tasks = dict() bert_module = BertModule(bert_model_name) bert_output_dim = 768 if "base" in bert_model_name else 1024 for task_name in task_names: task_cardinality = ( len(LABEL_MAPPING[task_name].keys()) if LABEL_MAPPING[task_name] is not None else 1 ) metrics = METRIC_MAPPING[task_name] if task_name == "STS-B": loss_fn = partial(mse_loss, task_name) else: loss_fn = partial(ce_loss, task_name) task = EmmentalTask( name=task_name, module_pool=nn.ModuleDict( { "bert_module": bert_module, f"{task_name}_pred_head": nn.Linear( bert_output_dim, task_cardinality ), } ), task_flow=[ { "name": "input", "module": "bert_module", "inputs": [ ("_input_", "token_ids"), ("_input_", "token_segments"), ("_input_", "token_masks"), ], }, { "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": [("input", 1)], }, ], loss_func=loss_fn, output_func=partial(output, task_name), scorer=Scorer(metrics=metrics), ) tasks[task_name] = task return tasks
def create_task(args): task_name = args.task n_class = TASK_NUM_CLASS[args.task] if args.model in ["wide_resnet"]: feature_extractor = ALL_MODELS[args.model]( args.wide_resnet_depth, args.wide_resnet_width, args.wide_resnet_dropout, n_class, has_fc=False, ) n_hidden_dim = feature_extractor( torch.randn(TASK_INPUT_SIZE[args.task])).size()[-1] elif args.model == "mlp": n_hidden_dim = args.mlp_hidden_dim input_dim = np.prod(TASK_INPUT_SIZE[args.task]) feature_extractor = ALL_MODELS[args.model](input_dim, n_hidden_dim, n_class, has_fc=False) else: raise ValueError(f"Invalid model {args.model}") loss = sce_loss output = output_classification logger.info(f"Built model: {feature_extractor}") return EmmentalTask( name=args.task, module_pool=nn.ModuleDict({ "feature": feature_extractor, f"{task_name}_pred_head": nn.Linear(n_hidden_dim, n_class), }), task_flow=[ { "name": "feature", "module": "feature", "inputs": [("_input_", "image")] }, { "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": [("feature", 0)], }, ], loss_func=partial(loss, f"{task_name}_pred_head"), output_func=partial(output, f"{task_name}_pred_head"), scorer=Scorer(metrics=TASK_METRIC[task_name]), )
def add_task(self, task: EmmentalTask) -> None: """Add a single task into MTL network. Args: task: A task to add. """ if not isinstance(task, EmmentalTask): raise ValueError(f"Unrecognized task type {task}.") if task.name in self.task_names: raise ValueError( f"Found duplicate task {task.name}, different task should use " f"different task name." ) # Combine module_pool from all tasks for key in task.module_pool.keys(): if key in self.module_pool.keys(): task.module_pool[key] = self.module_pool[key] else: self.module_pool[key] = task.module_pool[key] # Collect task name self.task_names.add(task.name) # Collect task flow self.task_flows[task.name] = task.task_flow # Collect loss function self.loss_funcs[task.name] = task.loss_func # Collect output function self.output_funcs[task.name] = task.output_func # Collect action outputs self.action_outputs[task.name] = task.action_outputs # Collect module device self.module_device.update(task.module_device) # Collect scorer self.scorers[task.name] = task.scorer # Collect weight self.task_weights[task.name] = task.weight # Collect require prob for eval self.require_prob_for_evals[task.name] = task.require_prob_for_eval # Collect require pred for eval self.require_pred_for_evals[task.name] = task.require_pred_for_eval # Move model to specified device self._move_to_device()
def create_task(args): task_name = args.task n_class = TASK_NUM_CLASS[args.task] bert_module = BertModule(args.model) bert_output_dim = 768 if "base" in args.model else 1024 loss = sce_loss output = output_classification logger.info(f"Built model: {bert_module}") return EmmentalTask( name=args.task, module_pool=nn.ModuleDict({ "feature": bert_module, f"{task_name}_pred_head": nn.Linear(bert_output_dim, n_class), }), task_flow=[ { "name": "feature", "module": "feature", "inputs": [ ("_input_", "token_ids"), ("_input_", "token_segments"), ("_input_", "token_masks"), ], }, { "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": [("feature", 1)], }, ], loss_func=partial(loss, f"{task_name}_pred_head"), output_func=partial(output, f"{task_name}_pred_head"), scorer=Scorer(metrics=TASK_METRIC[task_name]), )
def create_task(task_names, cnn_encoder="densenet121"): input_shape = (3, 224, 224) cnn_module = TorchVisionEncoder(cnn_encoder, pretrained=True) classification_layer_dim = cnn_module.get_frm_output_size(input_shape) tasks = [] for task_name in task_names: loss_fn = partial(ce_loss, f"{task_name}_pred_head") output_func = partial(output, f"{task_name}_pred_head") scorer = Scorer(metrics=["roc_auc"]) task = EmmentalTask( name=task_name, module_pool=nn.ModuleDict({ "feature": cnn_module, f"{task_name}_pred_head": nn.Linear(classification_layer_dim, 2), }), task_flow=[ { "name": "feature", "module": "feature", "inputs": [("_input_", "image")], }, { "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": [("feature", 0)], }, ], loss_func=loss_fn, output_func=output_func, scorer=scorer, ) tasks.append(task) return tasks
def build_model(bert_model_name, last_hidden_dropout_prob=0.0): bert_module = BertModule(bert_model_name) bert_output_dim = 768 if "base" in bert_model_name else 1024 metrics = (SuperGLUE_TASK_METRIC_MAPPING[TASK_NAME] if TASK_NAME in SuperGLUE_TASK_METRIC_MAPPING else []) customize_metric_funcs = {} loss_fn = partial(utils.ce_loss_multiple_choice, f"{TASK_NAME}_pred_head", NUM_CHOICES) output_fn = partial(utils.output_multiple_choice, f"{TASK_NAME}_pred_head", NUM_CHOICES) task = EmmentalTask( name=TASK_NAME, module_pool=nn.ModuleDict({ f"{TASK_NAME}_multiple_choice_module": MultipleChoiceModule(NUM_CHOICES), "bert_module": bert_module, f"{TASK_NAME}_feature": BertLastCLSModule(dropout_prob=last_hidden_dropout_prob), f"{TASK_NAME}_pred_head": nn.Linear(bert_output_dim, 1), }), task_flow=[ { "name": f"{TASK_NAME}_multiple_choice_module", "module": f"{TASK_NAME}_multiple_choice_module", "inputs": [], }, { "name": f"{TASK_NAME}_bert_module", "module": "bert_module", "inputs": [ (f"{TASK_NAME}_multiple_choice_module", 0), (f"{TASK_NAME}_multiple_choice_module", 1), (f"{TASK_NAME}_multiple_choice_module", 2), ], }, { "name": f"{TASK_NAME}_feature", "module": f"{TASK_NAME}_feature", "inputs": [(f"{TASK_NAME}_bert_module", 0)], }, { "name": f"{TASK_NAME}_pred_head", "module": f"{TASK_NAME}_pred_head", "inputs": [(f"{TASK_NAME}_feature", 0)], }, ], loss_func=loss_fn, output_func=output_fn, scorer=Scorer(metrics=metrics, customize_metric_funcs=customize_metric_funcs), ) return task
cnn_module = TorchVisionEncoder(CNN_ENCODER, pretrained=True) classification_layer_dim = cnn_module.get_frm_output_size(input_shape) tasks = [ EmmentalTask( name=task_name, module_pool=nn.ModuleDict( { "cnn": cnn_module, f"classification_module_{task_name}": ClassificationModule( classification_layer_dim, 2 ), } ), task_flow=[ {"name": "cnn", "module": "cnn", "inputs": [("_input_", "image")]}, { "name": f"classification_module_{task_name}", "module": f"classification_module_{task_name}", "inputs": [("cnn", 0)], }, ], loss_func=partial(ce_loss, task_name), output_func=partial(output, task_name), scorer=Scorer(metrics=["accuracy", "roc_auc"]), ) for task_name in task_list ] # Defining model and trainer mtl_model = EmmentalModel(name="Chexnet", tasks=tasks)
def create_task(args, entity_symbols=None, slice_datasets=None): """Creates a type prediction task. Args: args: args entity_symbols: entity symbols slice_datasets: slice datasets used in scorer (default None) Returns: EmmentalTask for type prediction """ if entity_symbols is None: entity_symbols = EntitySymbols.load_from_cache( load_dir=os.path.join( args.data_config.entity_dir, args.data_config.entity_map_dir ), alias_cand_map_file=args.data_config.alias_cand_map, alias_idx_file=args.data_config.alias_idx_map, ) # Create sentence encoder bert_model = BertEncoder( args.data_config.word_embedding, output_size=args.model_config.hidden_size ) # Create type prediction module # Add 1 for pad type type_prediction = TypePred( args.model_config.hidden_size, args.data_config.type_prediction.dim, args.data_config.type_prediction.num_types + 1, embedding_utils.get_max_candidates(entity_symbols, args.data_config), ) # Create scorer sliced_scorer = BootlegSlicedScorer( args.data_config.train_in_candidates, slice_datasets ) # Create module pool # BERT model will be shared across tasks as long as the name matches module_pool = nn.ModuleDict( {BERT_MODEL_NAME: bert_model, "type_prediction": type_prediction} ) # Create task flow task_flow = [ { "name": BERT_MODEL_NAME, "module": BERT_MODEL_NAME, "inputs": [ ("_input_", "entity_cand_eid"), ("_input_", "token_ids"), ], # We pass the entity_cand_eids to BERT in case of embeddings that require word information }, { "name": "type_prediction", "module": "type_prediction", # output: embedding_dict, batch_type_pred "inputs": [ (BERT_MODEL_NAME, 0), # sentence embedding ("_input_", "start_span_idx"), ], }, ] return EmmentalTask( name=TYPE_PRED_TASK, module_pool=module_pool, task_flow=task_flow, loss_func=partial(type_loss, "type_prediction"), output_func=partial(type_output, "type_prediction"), require_prob_for_eval=False, require_pred_for_eval=True, scorer=Scorer( customize_metric_funcs={ f"{TYPE_PRED_TASK}_scorer": sliced_scorer.type_pred_score } ), )
def create_task(args, entity_symbols=None, slice_datasets=None): """Returns an EmmentalTask for named entity disambiguation (NED). Args: args: args entity_symbols: entity symbols (default None) slice_datasets: slice datasets used in scorer (default None) Returns: EmmentalTask for NED """ if entity_symbols is None: entity_symbols = EntitySymbols.load_from_cache( load_dir=os.path.join(args.data_config.entity_dir, args.data_config.entity_map_dir), alias_cand_map_file=args.data_config.alias_cand_map, alias_idx_file=args.data_config.alias_idx_map, ) # Create sentence encoder bert_model = BertEncoder(args.data_config.word_embedding, output_size=args.model_config.hidden_size) # Gets the tasks that query for the individual embeddings (e.g., word, entity, type, kg) # The device dict will store which embedding modules we want on the cpu ( embedding_task_flows, # task flows for standard embeddings (e.g., kg, type, entity) embedding_module_pool, # module for standard embeddings embedding_module_device_dict, # module device dict for standard embeddings # some embeddings output indices for BERT so we handle these embeddings in our BERT layer # (see comments in get_through_bert_embedding_tasks) extra_bert_embedding_layers, embedding_payload_inputs, # the layers that are fed into the payload embedding_total_sizes, # total size of all embeddings ) = get_embedding_tasks(args, entity_symbols) # Add the extra embedding layers to BERT module for emb_obj in extra_bert_embedding_layers: bert_model.add_embedding(emb_obj) # Create the embedding payload, attention network, and prediction layer modules if args.model_config.attn_class == "BootlegM2E": embedding_payload = EmbeddingPayload(args, entity_symbols, embedding_total_sizes) attn_network = BootlegM2E(args, entity_symbols) pred_layer = PredictionLayer(args) elif args.model_config.attn_class == "Bootleg": embedding_payload = EmbeddingPayload(args, entity_symbols, embedding_total_sizes) attn_network = Bootleg(args, entity_symbols) pred_layer = PredictionLayer(args) elif args.model_config.attn_class == "BERTNED": # Baseline model embedding_payload = EmbeddingPayloadBase(args, entity_symbols, embedding_total_sizes) attn_network = BERTNED(args, entity_symbols) pred_layer = NoopPredictionLayer(args) else: raise ValueError(f"{args.model_config.attn_class} is not supported.") sliced_scorer = BootlegSlicedScorer(args.data_config.train_in_candidates, slice_datasets) # Create module pool and combine with embedding module pool module_pool = nn.ModuleDict({ BERT_MODEL_NAME: bert_model, "embedding_payload": embedding_payload, "attn_network": attn_network, PRED_LAYER: pred_layer, }) module_pool.update(embedding_module_pool) # Create task flow task_flow = [ { "name": BERT_MODEL_NAME, "module": BERT_MODEL_NAME, "inputs": [ ("_input_", "entity_cand_eid"), ("_input_", "token_ids"), ], # We pass the entity_cand_eids to BERT in case of embeddings that require word information }, *embedding_task_flows, # Add task flows to create embedding inputs { "name": "embedding_payload", "module": "embedding_payload", # outputs: embedding_tensor "inputs": [ ("_input_", "start_span_idx"), ("_input_", "end_span_idx"), *embedding_payload_inputs, # all embeddings ], }, { "name": "attn_network", "module": "attn_network", # output: predictions from layers, output entity embeddings "inputs": [ (BERT_MODEL_NAME, 0), # sentence embedding (BERT_MODEL_NAME, 1), # sentence embedding mask ("embedding_payload", 0), ("_input_", "entity_cand_eid_mask"), ("_input_", "start_span_idx"), ("_input_", "end_span_idx"), ( "_input_", "batch_on_the_fly_kg_adj", ), # special kg adjacency embedding prepped in dataloader ], }, { "name": PRED_LAYER, "module": PRED_LAYER, "inputs": [ ( "attn_network", "intermed_scores", ), # output predictions from intermediate layers from the model ( "attn_network", "ent_embs", ), # output entity embeddings (from all KG modules) ( "attn_network", "final_scores", ), # score (empty except for baseline model) ], }, ] return EmmentalTask( name=NED_TASK, module_pool=module_pool, task_flow=task_flow, loss_func=disambig_loss, output_func=disambig_output, require_prob_for_eval=False, require_pred_for_eval=True, # action_outputs are used to stitch together sentence fragments action_outputs=[ ("_input_", "sent_idx"), ("_input_", "subsent_idx"), ("_input_", "alias_orig_list_pos"), ("_input_", "for_dump_gold_cand_K_idx_train"), (PRED_LAYER, "ent_embs"), # entity embeddings ], scorer=Scorer(customize_metric_funcs={ f"{NED_TASK}_scorer": sliced_scorer.bootleg_score }), module_device=embedding_module_device_dict, )
def create_task( task_names: Union[str, List[str]], n_arities: Union[int, List[int]], n_features: int, n_classes: Union[int, List[int]], emb_layer: Optional[EmbeddingModule], model: str = "LSTM", mode: str = "MTL", ) -> List[EmmentalTask]: """Create task from relation(s). :param task_names: Relation name(s), If str, only one relation; If List[str], multiple relations. :param n_arities: The arity of each relation. :param n_features: The multimodal feature set size. :param n_classes: Number of classes for each task. (Only support classification task now). :param emb_layer: The embedding layer for LSTM. No need for LogisticRegression model. :param model: Model name (available models: "LSTM", "LogisticRegression"), defaults to "LSTM". :param mode: Learning mode (available modes: "STL", "MTL"), defaults to "MTL". """ if model not in ["LSTM", "LogisticRegression"]: raise ValueError( f"Unrecognized model {model}. Only support {['LSTM', 'LogisticRegression']}" ) if mode not in ["STL", "MTL"]: raise ValueError( f"Unrecognized mode {mode}. Only support {['STL', 'MTL']}") config = get_config()["learning"][model] logger.info(f"{model} model config: {config}") if not isinstance(task_names, list): task_names = [task_names] if not isinstance(n_arities, list): n_arities = [n_arities] if not isinstance(n_classes, list): n_classes = [n_classes] tasks = [] for task_name, n_arity, n_class in zip(task_names, n_arities, n_classes): if mode == "MTL": feature_module_name = "shared_feature" else: feature_module_name = f"{task_name}_feature" if model == "LSTM": module_pool = nn.ModuleDict({ "emb": emb_layer, feature_module_name: SparseLinear(n_features + 1, config["hidden_dim"], bias=config["bias"]), }) for i in range(n_arity): module_pool.update({ f"{task_name}_lstm{i}": RNN( num_classes=0, emb_size=emb_layer.dim, lstm_hidden=config["hidden_dim"], attention=config["attention"], dropout=config["dropout"], bidirectional=config["bidirectional"], ) }) module_pool.update({ f"{task_name}_pred_head": ConcatLinear( [f"{task_name}_lstm{i}" for i in range(n_arity)] + [feature_module_name], config["hidden_dim"] * (2 * n_arity + 1) if config["bidirectional"] else config["hidden_dim"] * (n_arity + 1), n_class, ) }) task_flow = [] task_flow += [{ "name": f"{task_name}_emb{i}", "module": "emb", "inputs": [("_input_", f"m{i}")], } for i in range(n_arity)] task_flow += [{ "name": f"{task_name}_lstm{i}", "module": f"{task_name}_lstm{i}", "inputs": [(f"{task_name}_emb{i}", 0), ("_input_", f"m{i}_mask")], } for i in range(n_arity)] task_flow += [{ "name": feature_module_name, "module": feature_module_name, "inputs": [ ("_input_", "feature_index"), ("_input_", "feature_weight"), ], }] task_flow += [{ "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": None, }] elif model == "LogisticRegression": module_pool = nn.ModuleDict({ feature_module_name: SparseLinear(n_features + 1, config["hidden_dim"], bias=config["bias"]), f"{task_name}_pred_head": ConcatLinear([feature_module_name], config["hidden_dim"], n_class), }) task_flow = [ { "name": feature_module_name, "module": feature_module_name, "inputs": [ ("_input_", "feature_index"), ("_input_", "feature_weight"), ], }, { "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": None, }, ] else: raise ValueError(f"Unrecognized model {model}.") tasks.append( EmmentalTask( name=task_name, module_pool=module_pool, task_flow=task_flow, loss_func=partial(loss, f"{task_name}_pred_head"), output_func=partial(output, f"{task_name}_pred_head"), scorer=Scorer( metrics=["accuracy", "precision", "recall", "f1"]), )) return tasks
def test_e2e(caplog): """Run an end-to-end test.""" caplog.set_level(logging.INFO) dirpath = "temp_test_e2e" Meta.reset() emmental.init(dirpath) # Generate synthetic data N = 50 X = np.random.random((N, 2)) * 2 - 1 Y1 = (X[:, 0] > X[:, 1] + 0.25).astype(int) + 1 Y2 = (-X[:, 0] > X[:, 1] + 0.25).astype(int) + 1 # Create dataset and dataloader splits = [0.8, 0.1, 0.1] X_train, X_dev, X_test = [], [], [] Y1_train, Y1_dev, Y1_test = [], [], [] Y2_train, Y2_dev, Y2_test = [], [], [] for i in range(N): if i <= N * splits[0]: X_train.append(torch.Tensor(X[i])) Y1_train.append(Y1[i]) Y2_train.append(Y2[i]) elif i < N * (splits[0] + splits[1]): X_dev.append(torch.Tensor(X[i])) Y1_dev.append(Y1[i]) Y2_dev.append(Y2[i]) else: X_test.append(torch.Tensor(X[i])) Y1_test.append(Y1[i]) Y2_test.append(Y2[i]) Y1_train = torch.from_numpy(np.array(Y1_train)) Y1_dev = torch.from_numpy(np.array(Y1_dev)) Y1_test = torch.from_numpy(np.array(Y1_test)) Y2_train = torch.from_numpy(np.array(Y1_train)) Y2_dev = torch.from_numpy(np.array(Y2_dev)) Y2_test = torch.from_numpy(np.array(Y2_test)) train_dataset1 = EmmentalDataset( name="synthetic", X_dict={"data": X_train}, Y_dict={"label1": Y1_train} ) train_dataset2 = EmmentalDataset( name="synthetic", X_dict={"data": X_train}, Y_dict={"label2": Y2_train} ) dev_dataset1 = EmmentalDataset( name="synthetic", X_dict={"data": X_dev}, Y_dict={"label1": Y1_dev} ) dev_dataset2 = EmmentalDataset( name="synthetic", X_dict={"data": X_dev}, Y_dict={"label2": Y2_dev} ) test_dataset1 = EmmentalDataset( name="synthetic", X_dict={"data": X_test}, Y_dict={"label1": Y2_test} ) test_dataset2 = EmmentalDataset( name="synthetic", X_dict={"data": X_test}, Y_dict={"label2": Y2_test} ) task_to_label_dict = {"task1": "label1"} train_dataloader1 = EmmentalDataLoader( task_to_label_dict=task_to_label_dict, dataset=train_dataset1, split="train", batch_size=10, ) dev_dataloader1 = EmmentalDataLoader( task_to_label_dict=task_to_label_dict, dataset=dev_dataset1, split="valid", batch_size=10, ) test_dataloader1 = EmmentalDataLoader( task_to_label_dict=task_to_label_dict, dataset=test_dataset1, split="test", batch_size=10, ) task_to_label_dict = {"task2": "label2"} train_dataloader2 = EmmentalDataLoader( task_to_label_dict=task_to_label_dict, dataset=train_dataset2, split="train", batch_size=10, ) dev_dataloader2 = EmmentalDataLoader( task_to_label_dict=task_to_label_dict, dataset=dev_dataset2, split="valid", batch_size=10, ) test_dataloader2 = EmmentalDataLoader( task_to_label_dict=task_to_label_dict, dataset=test_dataset2, split="test", batch_size=10, ) # Create task def ce_loss(task_name, immediate_ouput_dict, Y, active): module_name = f"{task_name}_pred_head" return F.cross_entropy( immediate_ouput_dict[module_name][0][active], (Y.view(-1) - 1)[active] ) def output(task_name, immediate_ouput_dict): module_name = f"{task_name}_pred_head" return F.softmax(immediate_ouput_dict[module_name][0], dim=1) task_name = "task1" task1 = EmmentalTask( name=task_name, module_pool=nn.ModuleDict( {"input_module": nn.Linear(2, 8), f"{task_name}_pred_head": nn.Linear(8, 2)} ), task_flow=[ { "name": "input", "module": "input_module", "inputs": [("_input_", "data")], }, { "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": [("input", 0)], }, ], loss_func=partial(ce_loss, task_name), output_func=partial(output, task_name), scorer=Scorer(metrics=["accuracy", "roc_auc"]), ) task_name = "task2" task2 = EmmentalTask( name=task_name, module_pool=nn.ModuleDict( {"input_module": nn.Linear(2, 8), f"{task_name}_pred_head": nn.Linear(8, 2)} ), task_flow=[ { "name": "input", "module": "input_module", "inputs": [("_input_", "data")], }, { "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": [("input", 0)], }, ], loss_func=partial(ce_loss, task_name), output_func=partial(output, task_name), scorer=Scorer(metrics=["accuracy", "roc_auc"]), ) # Build model mtl_model = EmmentalModel(name="all", tasks=[task1, task2]) # Create learner emmental_learner = EmmentalLearner() # Update learning config Meta.update_config( config={"learner_config": {"n_epochs": 10, "optimizer_config": {"lr": 0.01}}} ) # Learning emmental_learner.learn( mtl_model, [train_dataloader1, train_dataloader2, dev_dataloader1, dev_dataloader2], ) test1_score = mtl_model.score(test_dataloader1) test2_score = mtl_model.score(test_dataloader2) assert test1_score["task1/synthetic/test/accuracy"] >= 0.5 assert test1_score["task1/synthetic/test/roc_auc"] >= 0.6 assert test2_score["task2/synthetic/test/accuracy"] >= 0.5 assert test2_score["task2/synthetic/test/roc_auc"] >= 0.6 shutil.rmtree(dirpath)
def test_e2e(caplog): """Run an end-to-end test.""" caplog.set_level(logging.INFO) dirpath = "temp_test_e2e" use_exact_log_path = False Meta.reset() emmental.init(dirpath, use_exact_log_path=use_exact_log_path) config = { "meta_config": { "seed": 0 }, "learner_config": { "n_epochs": 3, "optimizer_config": { "lr": 0.01, "grad_clip": 100 }, }, "logging_config": { "counter_unit": "epoch", "evaluation_freq": 1, "writer_config": { "writer": "tensorboard", "verbose": True }, "checkpointing": True, "checkpointer_config": { "checkpoint_path": None, "checkpoint_freq": 1, "checkpoint_metric": { "model/all/train/loss": "min" }, "checkpoint_task_metrics": None, "checkpoint_runway": 1, "checkpoint_all": False, "clear_intermediate_checkpoints": True, "clear_all_checkpoints": True, }, }, } emmental.Meta.update_config(config) # Generate synthetic data N = 500 X = np.random.random((N, 2)) * 2 - 1 Y1 = (X[:, 0] > X[:, 1] + 0.25).astype(int) Y2 = (X[:, 0] > X[:, 1] + 0.2).astype(int) X = [torch.Tensor(X[i]) for i in range(N)] # Create dataset and dataloader X_train, X_dev, X_test = ( X[:int(0.8 * N)], X[int(0.8 * N):int(0.9 * N)], X[int(0.9 * N):], ) Y1_train, Y1_dev, Y1_test = ( torch.tensor(Y1[:int(0.8 * N)]), torch.tensor(Y1[int(0.8 * N):int(0.9 * N)]), torch.tensor(Y1[int(0.9 * N):]), ) Y2_train, Y2_dev, Y2_test = ( torch.tensor(Y2[:int(0.8 * N)]), torch.tensor(Y2[int(0.8 * N):int(0.9 * N)]), torch.tensor(Y2[int(0.9 * N):]), ) train_dataset1 = EmmentalDataset(name="synthetic", X_dict={"data": X_train}, Y_dict={"label1": Y1_train}) train_dataset2 = EmmentalDataset(name="synthetic", X_dict={"data": X_train}, Y_dict={"label2": Y2_train}) dev_dataset1 = EmmentalDataset(name="synthetic", X_dict={"data": X_dev}, Y_dict={"label1": Y1_dev}) dev_dataset2 = EmmentalDataset(name="synthetic", X_dict={"data": X_dev}, Y_dict={"label2": Y2_dev}) test_dataset1 = EmmentalDataset(name="synthetic", X_dict={"data": X_test}, Y_dict={"label1": Y1_test}) test_dataset2 = EmmentalDataset(name="synthetic", X_dict={"data": X_test}, Y_dict={"label2": Y2_test}) task_to_label_dict = {"task1": "label1"} train_dataloader1 = EmmentalDataLoader( task_to_label_dict=task_to_label_dict, dataset=train_dataset1, split="train", batch_size=10, ) dev_dataloader1 = EmmentalDataLoader( task_to_label_dict=task_to_label_dict, dataset=dev_dataset1, split="valid", batch_size=10, ) test_dataloader1 = EmmentalDataLoader( task_to_label_dict=task_to_label_dict, dataset=test_dataset1, split="test", batch_size=10, ) task_to_label_dict = {"task2": "label2"} train_dataloader2 = EmmentalDataLoader( task_to_label_dict=task_to_label_dict, dataset=train_dataset2, split="train", batch_size=10, ) dev_dataloader2 = EmmentalDataLoader( task_to_label_dict=task_to_label_dict, dataset=dev_dataset2, split="valid", batch_size=10, ) test_dataloader2 = EmmentalDataLoader( task_to_label_dict=task_to_label_dict, dataset=test_dataset2, split="test", batch_size=10, ) # Create task def ce_loss(task_name, immediate_ouput_dict, Y, active): module_name = f"{task_name}_pred_head" return F.cross_entropy(immediate_ouput_dict[module_name][0][active], (Y.view(-1))[active]) def output(task_name, immediate_ouput_dict): module_name = f"{task_name}_pred_head" return F.softmax(immediate_ouput_dict[module_name][0], dim=1) task_metrics = {"task1": ["accuracy"], "task2": ["accuracy", "roc_auc"]} tasks = [ EmmentalTask( name=task_name, module_pool=nn.ModuleDict({ "input_module": nn.Linear(2, 8), f"{task_name}_pred_head": nn.Linear(8, 2), }), task_flow=[ { "name": "input", "module": "input_module", "inputs": [("_input_", "data")], }, { "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": [("input", 0)], }, ], loss_func=partial(ce_loss, task_name), output_func=partial(output, task_name), scorer=Scorer(metrics=task_metrics[task_name]), ) for task_name in ["task1", "task2"] ] # Build model mtl_model = EmmentalModel(name="all", tasks=tasks) # Create learner emmental_learner = EmmentalLearner() # Learning emmental_learner.learn( mtl_model, [ train_dataloader1, train_dataloader2, dev_dataloader1, dev_dataloader2 ], ) test1_score = mtl_model.score(test_dataloader1) test2_score = mtl_model.score(test_dataloader2) assert test1_score["task1/synthetic/test/accuracy"] >= 0.7 assert (test1_score["model/all/test/macro_average"] == test1_score["task1/synthetic/test/accuracy"]) assert test2_score["task2/synthetic/test/accuracy"] >= 0.7 assert test2_score["task2/synthetic/test/roc_auc"] >= 0.7 shutil.rmtree(dirpath)
def add_slice_tasks(task_name, base_task, slice_func_dict, hidden_dim=1024): tasks = [] # base task info base_module_pool = base_task.module_pool base_task_flow = base_task.task_flow base_scorer = base_task.scorer # sanity check the model assert f"{task_name}_feature" in [ x["name"] for x in base_task_flow ], f"{task_name}_feature should in the task module_pool" assert isinstance( base_module_pool[f"{task_name}_pred_head"], nn.Linear), f"{task_name}_pred_head should be a nn.Linear layer" # extract last layer info last_linear_layer_size = (( base_module_pool[f"{task_name}_pred_head"].module.in_features, base_module_pool[f"{task_name}_pred_head"].module.out_features, ) if Meta.config["model_config"]["dataparallel"] else ( base_module_pool[f"{task_name}_pred_head"].in_features, base_module_pool[f"{task_name}_pred_head"].out_features, )) # remove the origin head del base_module_pool[f"{task_name}_pred_head"] for idx, i in enumerate(base_task_flow): if i["name"] == f"{task_name}_pred_head": action_idx = idx break del base_task_flow[action_idx] # ind heads type = "ind" for slice_name in slice_func_dict.keys(): slice_ind_module_pool = base_module_pool slice_ind_module_pool[ f"{task_name}_slice_{type}_{slice_name}_head"] = nn.Linear( last_linear_layer_size[0], 2) slice_ind_task_flow = base_task_flow + [{ "name": f"{task_name}_slice_{type}_{slice_name}_head", "module": f"{task_name}_slice_{type}_{slice_name}_head", "inputs": [(f"{task_name}_feature", 0)], }] task = EmmentalTask( name=f"{task_name}_slice_{type}_{slice_name}", module_pool=slice_ind_module_pool, task_flow=slice_ind_task_flow, loss_func=partial(ce_loss, f"{task_name}_slice_{type}_{slice_name}_head"), output_func=partial(output, f"{task_name}_slice_{type}_{slice_name}_head"), scorer=Scorer(metrics=["f1", "accuracy"]), ) tasks.append(task) # pred heads type = "pred" shared_linear_module = nn.Linear(hidden_dim, last_linear_layer_size[1]) for slice_name in slice_func_dict.keys(): slice_pred_module_pool = base_module_pool slice_pred_module_pool[ f"{task_name}_slice_feat_{slice_name}"] = nn.Linear( last_linear_layer_size[0], hidden_dim) slice_pred_module_pool[ f"{task_name}_slice_{type}_linear_head"] = shared_linear_module slice_pred_task_flow = base_task_flow + [ { "name": f"{task_name}_slice_feat_{slice_name}", "module": f"{task_name}_slice_feat_{slice_name}", "inputs": [(f"{task_name}_feature", 0)], }, { "name": f"{task_name}_slice_{type}_{slice_name}_head", "module": f"{task_name}_slice_{type}_linear_head", "inputs": [(f"{task_name}_slice_feat_{slice_name}", 0)], }, ] task = EmmentalTask( name=f"{task_name}_slice_{type}_{slice_name}", module_pool=slice_pred_module_pool, task_flow=slice_pred_task_flow, loss_func=partial(ce_loss, f"{task_name}_slice_{type}_{slice_name}_head"), output_func=partial(output, f"{task_name}_slice_{type}_{slice_name}_head"), scorer=base_scorer, ) tasks.append(task) # master master_task = EmmentalTask( name=f"{task_name}", module_pool=nn.ModuleDict({ f"{task_name}_pred_feat": master_module.SliceMasterModule(), f"{task_name}_pred_head": nn.Linear(hidden_dim, last_linear_layer_size[1]), }), task_flow=[ { "name": f"{task_name}_pred_feat", "module": f"{task_name}_pred_feat", "inputs": [], }, { "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": [(f"{task_name}_pred_feat", 0)], }, ], loss_func=partial(ce_loss, f"{task_name}_pred_head"), output_func=partial(output, f"{task_name}_pred_head"), scorer=base_scorer, ) tasks.append(master_task) return tasks
def build_slice_tasks( task: EmmentalTask, slice_func_dict: Dict[str, Callable], slice_scorer: Optional[Scorer] = None, slice_distribution: Dict[str, Tensor] = {}, dropout: float = 0.0, slice_ind_head_module: Optional[nn.Module] = None, sep_slice_ind_feature: bool = False, ) -> List[EmmentalTask]: """Build slice tasks based on slicing functions. We assume the original task flow contains feature extractor and predictor head. - The predictor head action should be the last action - The feature extractor action should be input of the predictor head action For each slicing this function will create two corresponding tasks - A slice indicator task to learn whether the data sample is in the slice or not. - A slice predictor task that is only learned on the data samples in that slice All slice tasks are based on feature extractor module and a slice attention module will combine all slice task head to make the final predictions. Args: task: Task to do slicing learning. slice_func_dict: Slicing functions. slice_scorer: Slice scorer, defaults to None. slice_distribution: Slice data class distribution, defaults to {}. dropout: Dropout, defaults to 0.0. slice_ind_head_module: Slice indicator head module, defaults to None. sep_slice_ind_feature: Whether to use sep slice ind feature, defaults to False. Returns: List of tasks. """ # Collect task predictor module info base_task_predictor_action = task.task_flow[-1] base_task_predictor_module = task.module_pool[base_task_predictor_action.module] if isinstance(base_task_predictor_module, nn.DataParallel): base_task_predictor_module = base_task_predictor_module.module task_feature_size = base_task_predictor_module.in_features task_cardinality = base_task_predictor_module.out_features # Remove the predictor head module and action base_task_module_pool = task.module_pool del base_task_module_pool[base_task_predictor_action.module] # type: ignore base_task_task_flow = task.task_flow[:-1] tasks = [] slice_module_pool = nn.ModuleDict() for module_name, module in task.module_pool.items(): slice_module_pool[module_name] = module slice_actions = [action for action in base_task_task_flow] if slice_ind_head_module is None: slice_ind_head_module = nn.Linear(task_feature_size, 2) assert isinstance(slice_ind_head_module, nn.Module) if slice_scorer is None or not isinstance(slice_scorer, Scorer): slice_scorer = Scorer(metrics=["f1"]) # Create slice indicator tasks. # (Note: indicator only has two classes, e.g, in the slice or out) for slice_name in slice_func_dict.keys(): # Create task name ind_task_name = f"{task.name}_slice:ind_{slice_name}" # Create ind module ind_head_module_name = f"{ind_task_name}_head" ind_head_module = copy.deepcopy(slice_ind_head_module) ind_head_dropout_module_name = f"{task.name}_slice:dropout_{slice_name}" ind_head_dropout_module = nn.Dropout(p=dropout) # Create module_pool ind_module_pool = nn.ModuleDict( { module_name: module for module_name, module in base_task_module_pool.items() } ) ind_module_pool[ind_head_dropout_module_name] = ind_head_dropout_module ind_module_pool[ind_head_module_name] = ind_head_module assert len(base_task_predictor_action.inputs) == 1 ind_head_dropout_module_input_name = base_task_predictor_action.inputs[0][0] ind_head_dropout_module_input_idx = 1 if sep_slice_ind_feature else 0 # Create task_flow ind_task_flow = [action for action in base_task_task_flow] ind_task_flow.extend( [ Action( name=ind_head_dropout_module_name, module=ind_head_dropout_module_name, inputs=[ ( ind_head_dropout_module_input_name, ind_head_dropout_module_input_idx, ) ], ), Action( name=ind_head_module_name, module=ind_head_module_name, inputs=[(ind_head_dropout_module_name, 0)], ), ] ) # Add slice specific module to slice_module_pool slice_module_pool[ind_head_module_name] = ind_head_module slice_actions.extend( [ Action( name=ind_head_dropout_module_name, module=ind_head_dropout_module_name, inputs=[ ( ind_head_dropout_module_input_name, ind_head_dropout_module_input_idx, ) ], ), Action( name=ind_head_module_name, module=ind_head_module_name, inputs=[(ind_head_dropout_module_name, 0)], ), ] ) # Loss function if ind_task_name in slice_distribution: loss = partial( utils.ce_loss, ind_head_module_name, weight=move_to_device( slice_distribution[ind_task_name], Meta.config["model_config"]["device"], ), ) else: loss = partial(utils.ce_loss, ind_head_module_name) tasks.append( EmmentalTask( name=ind_task_name, module_pool=ind_module_pool, task_flow=ind_task_flow, loss_func=loss, output_func=partial(utils.output, ind_head_module_name), scorer=slice_scorer, ) ) # Create slice predictor tasks # Create share predictor for all slice predictor shared_pred_head_module_name = f"{task.name}_slice:shared_pred" shared_pred_head_module = nn.Linear(task_feature_size, task_cardinality) # Add slice specific module to slice_module_pool slice_module_pool[shared_pred_head_module_name] = shared_pred_head_module for slice_name in slice_func_dict.keys(): # Create task name pred_task_name = f"{task.name}_slice:pred_{slice_name}" # Create pred module pred_head_module_name = f"{pred_task_name}_head" pred_transform_module_name = f"{task.name}_slice:transform_{slice_name}" pred_transform_module = nn.Linear(task_feature_size, task_feature_size) # Create module_pool pred_module_pool = nn.ModuleDict( { module_name: module for module_name, module in base_task_module_pool.items() } ) pred_module_pool[pred_transform_module_name] = pred_transform_module pred_module_pool[shared_pred_head_module_name] = shared_pred_head_module # Create task_flow pred_task_flow = [action for action in base_task_task_flow] pred_task_flow.extend( [ Action( name=pred_transform_module_name, module=pred_transform_module_name, inputs=base_task_predictor_action.inputs, ), Action( name=pred_head_module_name, module=shared_pred_head_module_name, inputs=[(pred_transform_module_name, 0)], ), ] ) # Add slice specific module to slice_module_pool slice_module_pool[pred_transform_module_name] = pred_transform_module slice_actions.extend( [ Action( name=pred_transform_module_name, module=pred_transform_module_name, inputs=base_task_predictor_action.inputs, ), Action( name=pred_head_module_name, module=shared_pred_head_module_name, inputs=[(pred_transform_module_name, 0)], ), ] ) # Loss function if pred_task_name in slice_distribution: loss = partial( utils.ce_loss, pred_head_module_name, weight=move_to_device( slice_distribution[pred_task_name], Meta.config["model_config"]["device"], ), ) else: loss = partial(utils.ce_loss, pred_head_module_name) tasks.append( EmmentalTask( name=pred_task_name, module_pool=pred_module_pool, task_flow=pred_task_flow, loss_func=loss, output_func=partial(utils.output, pred_head_module_name), scorer=task.scorer, ) ) # Create master task # Create task name master_task_name = task.name # Create attention module master_attention_module_name = f"{master_task_name}_attention" master_attention_module = SliceAttentionModule( slice_ind_key="_slice:ind_", slice_pred_key="_slice:pred_", slice_pred_feat_key="_slice:transform_", ) # Create module pool master_head_module_name = f"{master_task_name}_head" master_head_module = base_task_predictor_module master_module_pool = slice_module_pool master_module_pool[master_attention_module_name] = master_attention_module master_module_pool[master_head_module_name] = master_head_module # Create task_flow master_task_flow = slice_actions + [ Action( name=master_attention_module_name, module=master_attention_module_name, inputs=[], # type: ignore ), Action( name=master_head_module_name, module=master_head_module_name, inputs=[(master_attention_module_name, 0)], ), ] tasks.append( EmmentalTask( name=master_task_name, module_pool=master_module_pool, task_flow=master_task_flow, loss_func=partial(utils.ce_loss, master_head_module_name), output_func=partial(utils.output, master_head_module_name), scorer=task.scorer, ) ) return tasks
def create_task(task_name, args, nclasses, emb_layer): if args.model == "cnn": input_module = IdentityModule() feature_extractor = CNN(emb_layer.n_d, widths=[3, 4, 5], filters=args.n_filters) d_out = args.n_filters * 3 elif args.model == "lstm": input_module = IdentityModule() feature_extractor = LSTM(emb_layer.n_d, args.dim, args.depth, dropout=args.dropout) d_out = args.dim elif args.model == "mlp": input_module = Average() feature_extractor = nn.Sequential(nn.Linear(emb_layer.n_d, args.dim), nn.ReLU()) d_out = args.dim else: raise ValueError(f"Unrecognized model {args.model}.") return EmmentalTask( name=task_name, module_pool=nn.ModuleDict({ "emb": emb_layer, "input": input_module, "feature": feature_extractor, "dropout": nn.Dropout(args.dropout), f"{task_name}_pred_head": nn.Linear(d_out, nclasses), }), task_flow=[ { "name": "emb", "module": "emb", "inputs": [("_input_", "feature")] }, { "name": "input", "module": "input", "inputs": [("emb", 0)], }, { "name": "feature", "module": "feature", "inputs": [("input", 0)] }, { "name": "dropout", "module": "dropout", "inputs": [("feature", 0)] }, { "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": [("dropout", 0)], }, ], loss_func=partial(ce_loss, f"{task_name}_pred_head"), output_func=partial(output, f"{task_name}_pred_head"), scorer=Scorer(metrics=["accuracy"]), )
def get_gule_task(task_names, xlnet_model_name): tasks = dict() xlnet_module = XLNetModule(xlnet_model_name) xlnet_output_dim = 768 if "base" in xlnet_model_name else 1024 last_hidden_dropout_prob = 0.0 for task_name in task_names: task_cardinality = (len(LABEL_MAPPING[task_name].keys()) if LABEL_MAPPING[task_name] is not None else 1) metrics = METRIC_MAPPING[task_name] if task_name == "STS-B": loss_fn = partial(mse_loss, task_name) else: loss_fn = partial(ce_loss, task_name) #loss_fn = partial(ce_loss, f"{task_name}_pred_head") #output_fn = partial(utils.output, f"{task_name}_pred_head") task = EmmentalTask( name=task_name, module_pool=nn.ModuleDict({ "xlnet_module": xlnet_module, f"{task_name}_feature": XLNetLastCLSModule(dropout_prob=last_hidden_dropout_prob), f"{task_name}_pred_head": nn.Linear(xlnet_output_dim, task_cardinality), }), task_flow=[ { "name": f"{task_name}_xlnet_module", "module": "xlnet_module", "inputs": [ ("_input_", "token_ids"), ("_input_", "token_segments"), ("_input_", "token_masks"), ], }, { "name": f"{task_name}_feature", "module": f"{task_name}_feature", "inputs": [(f"{task_name}_xlnet_module", 0)], }, { "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": [(f"{task_name}_feature", 0)], }, ], loss_func=loss_fn, output_func=partial(output, task_name), scorer=Scorer(metrics=metrics), ) tasks[task_name] = task return tasks
def test_model(caplog): """Unit test of model.""" caplog.set_level(logging.INFO) dirpath = "temp_test_model" Meta.reset() emmental.init(dirpath) def ce_loss(module_name, immediate_output_dict, Y, active): return F.cross_entropy(immediate_output_dict[module_name][0][active], (Y.view(-1))[active]) def output(module_name, immediate_output_dict): return F.softmax(immediate_output_dict[module_name][0], dim=1) task1 = EmmentalTask( name="task_1", module_pool=nn.ModuleDict({ "m1": nn.Linear(10, 10, bias=False), "m2": nn.Linear(10, 2, bias=False) }), task_flow=[ { "name": "m1", "module": "m1", "inputs": [("_input_", "data")] }, { "name": "m2", "module": "m2", "inputs": [("m1", 0)] }, ], loss_func=partial(ce_loss, "m2"), output_func=partial(output, "m2"), scorer=Scorer(metrics=["accuracy"]), ) new_task1 = EmmentalTask( name="task_1", module_pool=nn.ModuleDict({ "m1": nn.Linear(10, 5, bias=False), "m2": nn.Linear(5, 2, bias=False) }), task_flow=[ { "name": "m1", "module": "m1", "inputs": [("_input_", "data")] }, { "name": "m2", "module": "m2", "inputs": [("m1", 0)] }, ], loss_func=partial(ce_loss, "m2"), output_func=partial(output, "m2"), scorer=Scorer(metrics=["accuracy"]), ) task2 = EmmentalTask( name="task_2", module_pool=nn.ModuleDict({ "m1": nn.Linear(10, 5, bias=False), "m2": nn.Linear(5, 2, bias=False) }), task_flow=[ { "name": "m1", "module": "m1", "inputs": [("_input_", "data")] }, { "name": "m2", "module": "m2", "inputs": [("m1", 0)] }, ], loss_func=partial(ce_loss, "m2"), output_func=partial(output, "m2"), scorer=Scorer(metrics=["accuracy"]), ) config = {"model_config": {"dataparallel": False}} emmental.Meta.update_config(config) model = EmmentalModel(name="test", tasks=task1) assert repr(model) == "EmmentalModel(name=test)" assert model.name == "test" assert model.task_names == set(["task_1"]) assert model.module_pool["m1"].weight.data.size() == (10, 10) assert model.module_pool["m2"].weight.data.size() == (2, 10) model.update_task(new_task1) assert model.module_pool["m1"].weight.data.size() == (5, 10) assert model.module_pool["m2"].weight.data.size() == (2, 5) model.update_task(task2) assert model.task_names == set(["task_1"]) model.add_task(task2) assert model.task_names == set(["task_1", "task_2"]) model.remove_task("task_1") assert model.task_names == set(["task_2"]) model.remove_task("task_1") assert model.task_names == set(["task_2"]) model.save(f"{dirpath}/saved_model.pth") model.load(f"{dirpath}/saved_model.pth") # Test add_tasks model = EmmentalModel(name="test") model.add_tasks([task1, task2]) assert model.task_names == set(["task_1", "task_2"]) shutil.rmtree(dirpath)
def create_task(args): task_name = "TACRED" bert_model = BertModel.from_pretrained(args.bert_model, cache_dir="./cache/") bert_output_dim = 768 if "base" in args.bert_model else 1024 config = ENT_BERT_ENCODER_CONFIG if ( args.ent_emb_file is not None or args.static_ent_emb_file is not None or args.type_emb_file is not None or args.rel_emb_file is not None ): config["num_hidden_layers"] = args.kg_encoder_layer output_size = ENT_BERT_ENCODER_CONFIG["hidden_size"] else: output_size = bert_output_dim ENT_BERT_ENCODER_CONFIG["hidden_size"] = output_size config = BertConfig.from_dict(config) logger.info(config) encoder = EntBertEncoder( config, bert_output_dim, output_size, args.ent_emb_file, args.static_ent_emb_file, args.type_emb_file, args.rel_emb_file, tanh=args.tanh, norm=args.norm, ) task = EmmentalTask( name=task_name, module_pool=nn.ModuleDict( { "bert": bert_model, "encoder": encoder, f"{task_name}_pred_head": nn.Linear( output_size, len(LABEL_TO_ID.keys()) ), } ), task_flow=[ { "name": "bert", "module": "bert", "inputs": [ ("_input_", "token_ids"), ("_input_", "token_segments"), ("_input_", "token_masks"), ], }, { "name": "encoder", "module": "encoder", "inputs": [ ("bert", 0), ("_input_", "token_ent_ids"), ("_input_", "token_static_ent_ids"), ("_input_", "token_type_ent_ids"), ("_input_", "token_rel_ent_ids"), ("_input_", "token_masks"), ], }, { "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": [("encoder", 1)], }, ], loss_func=partial(ce_loss, f"{task_name}_pred_head"), output_func=partial(output, f"{task_name}_pred_head"), scorer=Scorer(customize_metric_funcs={"tacred_scorer": tacred_scorer}), ) return task