def create_model(resnet_cnn): # freeze the resnet weights for param in resnet_cnn.parameters(): param.requires_grad = False # define input features in_features = resnet_cnn.fc.in_features feature_extractor = nn.Sequential(*list(resnet_cnn.children())[:-1]) # initialize FC layer: maps 3 sets of image features to class logits WEMB_SIZE = 100 fc = nn.Linear(in_features * 3 + 2 * WEMB_SIZE, 3) init_fc(fc) # define layers module_pool = nn.ModuleDict( { "feat_extractor": feature_extractor, "prediction_head": fc, "feat_concat": FlatConcat(), "word_emb": WordEmb(), } ) # define task flow through modules op_sequence = get_op_sequence() pred_cls_task = Task( name="visual_relation_task", module_pool=module_pool, op_sequence=op_sequence, scorer=Scorer(metrics=["f1_micro"]), ) return MultitaskClassifier([pred_cls_task])
def create_task(task_name: str, module_suffixes: List[str]) -> Task: module1_name = f"linear1{module_suffixes[0]}" module2_name = f"linear2{module_suffixes[1]}" module_pool = nn.ModuleDict({ module1_name: nn.Sequential(nn.Linear(2, 20), nn.ReLU()), module2_name: nn.Linear(20, 2), }) op1 = Operation(module_name=module1_name, inputs=[("_input_", "coordinates")]) op2 = Operation(module_name=module2_name, inputs=[op1.name]) op_sequence = [op1, op2] task = Task( name=task_name, module_pool=module_pool, op_sequence=op_sequence, scorer=Scorer(metrics=["accuracy"]), ) return task
def create_task(task_name, module_suffixes=("", "")): module1_name = f"linear1{module_suffixes[0]}" module2_name = f"linear2{module_suffixes[1]}" linear1 = nn.Linear(2, 2) linear1.weight.data.copy_(torch.eye(2)) linear1.bias.data.copy_(torch.zeros((2, ))) linear2 = nn.Linear(2, 2) linear2.weight.data.copy_(torch.eye(2)) linear2.bias.data.copy_(torch.zeros((2, ))) module_pool = nn.ModuleDict({ module1_name: nn.Sequential(linear1, nn.ReLU()), module2_name: linear2 }) op0 = Operation(module_name=module1_name, inputs=[("_input_", "data")], name="op0") op1 = Operation(module_name=module2_name, inputs=[op0.name], name="op1") op_sequence = [op0, op1] task = Task(name=task_name, module_pool=module_pool, op_sequence=op_sequence) return task
def test_task_creation(self): module_pool = nn.ModuleDict({ "linear1": nn.Sequential(nn.Linear(2, 10), nn.ReLU()), "linear2": nn.Linear(10, 1), }) op_sequence = [ Operation(name="the_first_layer", module_name="linear1", inputs=["_input_"]), Operation( name="the_second_layer", module_name="linear2", inputs=["the_first_layer"], ), ] task = Task(name=TASK_NAME, module_pool=module_pool, op_sequence=op_sequence) # Task has no functionality on its own # Here we only confirm that the object was initialized self.assertEqual(task.name, TASK_NAME)
def test_no_input_spec(self): # Confirm model doesn't break when a module does not specify specific inputs dataset = create_dataloader("task", shuffle=False).dataset task = Task( name="task", module_pool=nn.ModuleDict({"identity": nn.Identity()}), op_sequence=[Operation("identity", [])], ) model = MultitaskClassifier(tasks=[task], dataparallel=False) outputs = model.forward(dataset.X_dict, ["task"]) self.assertIn("_input_", outputs)
def create_dummy_task(task_name): # Create dummy task module_pool = nn.ModuleDict( {"linear1": nn.Linear(2, 10), "linear2": nn.Linear(10, 2)} ) op_sequence = [ Operation(name="encoder", module_name="linear1", inputs=["_input_"]), Operation(name="prediction_head", module_name="linear2", inputs=["encoder"]), ] task = Task(name=task_name, module_pool=module_pool, op_sequence=op_sequence) return task
def test_score_shuffled(self): # Test scoring with a shuffled dataset set_seed(123) class SimpleVoter(nn.Module): def forward(self, x): """Set class 0 to -1 if x and 1 otherwise""" mask = x % 2 == 0 out = torch.zeros(x.shape[0], 2) out[mask, 0] = 1 # class 0 out[~mask, 1] = 1 # class 1 return out # Create model task_name = "VotingTask" module_name = "simple_voter" module_pool = nn.ModuleDict({module_name: SimpleVoter()}) op0 = Operation(module_name=module_name, inputs=[("_input_", "data")], name="op0") op_sequence = [op0] task = Task(name=task_name, module_pool=module_pool, op_sequence=op_sequence) model = MultitaskClassifier([task]) # Create dataset y_list = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] x_list = [i for i in range(len(y_list))] Y = torch.LongTensor(y_list * 100) X = torch.FloatTensor(x_list * 100) dataset = DictDataset(name="dataset", split="train", X_dict={"data": X}, Y_dict={task_name: Y}) # Create dataloaders dataloader = DictDataLoader(dataset, batch_size=2, shuffle=False) scores = model.score([dataloader]) self.assertEqual(scores["VotingTask/dataset/train/accuracy"], 0.6) dataloader_shuffled = DictDataLoader(dataset, batch_size=2, shuffle=True) scores_shuffled = model.score([dataloader_shuffled]) self.assertEqual(scores_shuffled["VotingTask/dataset/train/accuracy"], 0.6)
def __init__( self, base_architecture: nn.Module, head_dim: int, slice_names: List[str], input_data_key: str = DEFAULT_INPUT_DATA_KEY, task_name: str = DEFAULT_TASK_NAME, scorer: Scorer = Scorer(metrics=["accuracy", "f1"]), **multitask_kwargs: Any, ) -> None: # Initialize module_pool with 1) base_architecture and 2) prediction_head # Assuming `head_dim` can be used to map base_architecture to prediction_head module_pool = nn.ModuleDict({ "base_architecture": base_architecture, "prediction_head": nn.Linear(head_dim, 2), }) # Create op_sequence from base_architecture -> prediction_head op_sequence = [ Operation( name="input_op", module_name="base_architecture", inputs=[("_input_", input_data_key)], ), Operation(name="head_op", module_name="prediction_head", inputs=["input_op"]), ] # Initialize base_task using specified base_architecture self.base_task = Task( name=task_name, module_pool=module_pool, op_sequence=op_sequence, scorer=scorer, ) # Convert base_task to associated slice_tasks slice_tasks = convert_to_slice_tasks(self.base_task, slice_names) # Initialize a MultitaskClassifier with all slice_tasks model_name = f"{task_name}_slicing_classifier" super().__init__(tasks=slice_tasks, name=model_name, **multitask_kwargs) self.slice_names = slice_names
def create_task(task_name, module_suffixes=("", "")): module1_name = f"linear1{module_suffixes[0]}" module2_name = f"linear2{module_suffixes[1]}" module_pool = nn.ModuleDict( { module1_name: nn.Sequential(nn.Linear(2, 10), nn.ReLU()), module2_name: nn.Linear(10, 2), } ) op1 = Operation(module_name=module1_name, inputs=[("_input_", "data")]) op2 = Operation(module_name=module2_name, inputs=[op1.name]) op_sequence = [op1, op2] task = Task(name=task_name, module_pool=module_pool, op_sequence=op_sequence) return task
inputs=[("_input_", task_data_name)]) # "Pass the output of op1 (the BERT module) as input to the head_module" op2 = Operation(name=task_head_name, module_name=task_head_name, inputs=["bert_module"]) op_sequence = [op1, op2] # Create the Task object, which includes the same name as that in dataloaders, all modules used, # and the sequence in which they are used. # Loss and scoring functions are added based on task type task_object = Task( name=task_formal_name, module_pool=module_pool, op_sequence=op_sequence, loss_func=task_type_function_mapping[task_type]["loss_function"], output_func=partial(F.softmax, dim=1), scorer=task_type_function_mapping[task_type]["scorer"], ) # Add task to list of tasks tasks.append(task_object) # Input list of tasks to MultitaskClassifier object to create model with architecture set for each task model = MultitaskClassifier(tasks) # Set out trainer settings - I.e. how the model will train trainer_config = { "progress_bar": True, "n_epochs": 2, "lr": 0.02,
# %% [markdown] # Putting this all together, we define the circle task: # %% from functools import partial import torch.nn.functional as F from snorkel.analysis import Scorer from snorkel.classification import Task circle_task = Task( name="circle_task", module_pool=module_pool, op_sequence=op_sequence, loss_func=F.cross_entropy, output_func=partial(F.softmax, dim=1), scorer=Scorer(metrics=["accuracy"]), ) # %% [markdown] # Note that `Task` objects are not dependent on a particular dataset; multiple datasets can be passed through the same modules for pre-training or co-training. # %% [markdown] # ### Again, but faster # %% [markdown] # We'll now define the square task, but more succinctly—for example, using the fact that the default name for an `Operation` is its `module_name` (since most tasks only use their modules once per forward pass). # # We'll also define the square task to share the first module in its task flow (`base_mlp`) with the circle task to demonstrate how to share modules. (Note that this is purely for illustrative purposes; for this toy task, it is quite possible that this is not the optimal arrangement of modules). #