Beispiel #1
0
    def __init__(self,
                 task_graph=None,
                 input_module=None,
                 seed=None,
                 **kwargs):
        defaults = recursive_merge_dicts(em_default_config,
                                         mt_em_default_config,
                                         misses='insert')
        self.config = recursive_merge_dicts(defaults, kwargs)

        # If no task_graph is specified, default to a single binary task
        if task_graph is None:
            task_graph = TaskHierarchy(edges=[], cardinalities=[2])
        self.task_graph = task_graph
        self.K_t = self.task_graph.K_t  # Cardinalities by task
        self.T = self.task_graph.T  # Total number of tasks

        MTClassifier.__init__(self, cardinalities=self.K_t, seed=seed)

        if input_module is None:
            input_module = IdentityModule(self.config['layer_output_dims'][0])

        self._build(input_module)

        # Show network
        if self.config['verbose']:
            print("\nNetwork architecture:")
            self._print()
            print()
Beispiel #2
0
    def __init__(
        self,
        layer_out_dims,
        input_modules=None,
        middle_modules=None,
        head_modules=None,
        K=[],
        task_graph=None,
        **kwargs,
    ):
        kwargs["layer_out_dims"] = layer_out_dims
        config = recursive_merge_dicts(em_default_config,
                                       mt_em_default_config,
                                       misses="insert")
        config = recursive_merge_dicts(config, kwargs)
        MTClassifier.__init__(self, K, config)

        if task_graph is None:
            if K is None:
                raise ValueError("You must supply either a list of "
                                 "cardinalities (K) or a TaskGraph.")
            task_graph = TaskGraph(K)
        self.task_graph = task_graph
        self.K = self.task_graph.K  # Cardinalities by task
        self.t = self.task_graph.t  # Total number of tasks
        assert len(self.K) == self.t

        self._build(input_modules, middle_modules, head_modules)

        # Show network
        if self.config["verbose"]:
            print("\nNetwork architecture:")
            self._print()
            print()
Beispiel #3
0
 def test_recursive_merge_dicts(self):
     x = {"foo": {"Foo": {"FOO": 1}}, "bar": 2, "baz": 3}
     y = {"FOO": 4, "bar": 5}
     z = {"foo": 6}
     w = recursive_merge_dicts(x, y, verbose=False)
     self.assertEqual(w["bar"], 5)
     self.assertEqual(w["foo"]["Foo"]["FOO"], 4)
     with self.assertRaises(ValueError):
         recursive_merge_dicts(x, z, verbose=False)
Beispiel #4
0
    def __init__(self, **kwargs):
        self.config = recursive_merge_dicts(trainer_defaults, kwargs, misses="insert")

        # Set random seeds
        if self.config["seed"] is None:
            self.config["seed"] = np.random.randint(1e6)
        set_seed(self.config["seed"])
Beispiel #5
0
    def __init__(
        self,
        layer_out_dims,
        input_module=None,
        middle_modules=None,
        head_module=None,
        **kwargs,
    ):

        if len(layer_out_dims) < 2 and not kwargs["skip_head"]:
            raise ValueError(
                "Arg layer_out_dims must have at least two "
                "elements corresponding to the output dim of the input module "
                "and the cardinality of the task. If the input module is the "
                "IdentityModule, then the output dim of the input module will "
                "be equal to the dimensionality of your input data points")

        # Add layer_out_dims to kwargs so it will be merged into the config dict
        kwargs["layer_out_dims"] = layer_out_dims
        config = recursive_merge_dicts(em_default_config,
                                       kwargs,
                                       misses="insert")
        super().__init__(k=layer_out_dims[-1], config=config)

        self._build(input_module, middle_modules, head_module)

        # Show network
        if self.config["verbose"]:
            print("\nNetwork architecture:")
            self._print()
            print()
Beispiel #6
0
 def __init__(self, input_dim, output_dim=2, **kwargs):
     layer_out_dims = [input_dim, output_dim]
     overrides = {"input_batchnorm": False, "input_dropout": 0.0}
     kwargs = recursive_merge_dicts(
         kwargs, overrides, misses="insert", verbose=False
     )
     super().__init__(layer_out_dims, **kwargs)
Beispiel #7
0
    def train_model(self,
                    train_data,
                    valid_data=None,
                    log_writer=None,
                    **kwargs):
        self.config = recursive_merge_dicts(self.config, kwargs)

        # If train_data is provided as a tuple (X, Y), we can make sure Y is in
        # the correct format
        # NOTE: Better handling for if train_data is Dataset or DataLoader...?
        if isinstance(train_data, (tuple, list)):
            X, Y = train_data
            Y = self._preprocess_Y(self._to_torch(Y, dtype=torch.FloatTensor),
                                   self.k)
            train_data = (X, Y)

        # Convert input data to data loaders
        train_loader = self._create_data_loader(train_data, shuffle=True)

        # Create loss function
        loss_fn = self._get_loss_fn()

        # Execute training procedure
        self._train_model(train_loader,
                          loss_fn,
                          valid_data=valid_data,
                          log_writer=log_writer)
def generate_configs_and_commands(args, launch_args, search_space, n=None):
    # Create directory with all configurations saved
    configspace_path = "%s/configspace" % args.outputpath
    if not os.path.exists(configspace_path):
        os.makedirs(configspace_path)

    # Save searchspace
    with open("%s/search_space" % configspace_path, "w") as f:
        f.write(json.dumps(search_space))

    tuner = RandomSearchTuner(None, seed=time.time())
    configs = tuner.config_generator(search_space, n, tuner.rng, True)

    command_dicts = []
    for i, random_config in enumerate(configs):

        # Recursive merge dicts launch_args with sampled parameters
        config_to_use = recursive_merge_dicts(launch_args,
                                              random_config,
                                              misses="insert")

        # Add commit hash to config
        config_to_use["commit_hash"] = args.commit_hash
        config_to_use["ami"] = args.ami

        # Write to directory
        config_path = "%s/config_%d.json" % (configspace_path, i)
        with open(config_path, "w") as f:
            json.dump(config_to_use, f)

        # Create command dict
        command_dicts.append(
            create_command_dict(args, config_path, config_to_use))

    return command_dicts
Beispiel #9
0
 def test_recursive_merge_dicts(self):
     x = {
         'foo': {'Foo': {'FOO': 1}},
         'bar': 2,
         'baz': 3,
     }
     y = {
         'FOO': 4,
         'bar': 5,
     }
     z = {
         'foo': 6
     }
     w = recursive_merge_dicts(x, y, verbose=False)
     self.assertEqual(w['bar'], 5)
     self.assertEqual(w['foo']['Foo']['FOO'], 4)
     with self.assertRaises(ValueError):
         recursive_merge_dicts(x, z, verbose=False)
Beispiel #10
0
 def __init__(self, input_dim, output_dim=2, padding_idx=0, **kwargs):
     layer_out_dims = [input_dim, output_dim]
     sparse_linear = SparseLinearModule(
         vocab_size=input_dim, embed_size=output_dim, padding_idx=padding_idx
     )
     overrides = {"input_batchnorm": False, "input_dropout": 0.0}
     kwargs = recursive_merge_dicts(
         kwargs, overrides, misses="insert", verbose=False
     )
     super().__init__(layer_out_dims, head_module=sparse_linear, **kwargs)
Beispiel #11
0
    def __init__(self, cardinality=2, input_module=None, **kwargs):
        self.config = recursive_merge_dicts( em_default_config, kwargs)
        super().__init__(cardinality, seed=self.config['seed'])

        if input_module is None:
            input_module = IdentityModule(self.config['layer_output_dims'][0])

        self._build(input_module)

       # Show network
        if self.config['verbose']:
            print("\nNetwork architecture:")
            self._print()
            print()
Beispiel #12
0
    def train(self, L, **kwargs):
        """Train the model (i.e. estimate mu) in one of two ways, depending on
        whether source dependencies are provided or not:
        
        (1) No dependencies (conditionally independent sources): Estimate mu
        subject to constraints:
            (1a) O_{B(i,j)} - (mu P mu.T)_{B(i,j)} = 0, for i != j, where B(i,j)
                is the block of entries corresponding to sources i,j
            (1b) np.sum( mu P, 1 ) = diag(O)
        
        (2) Source dependencies:
            - First, estimate Z subject to the inverse form
            constraint:
                (2a) O_\Omega + (ZZ.T)_\Omega = 0, \Omega is the deps mask
            - Then, compute Q = mu P mu.T
            - Finally, estimate mu subject to mu P mu.T = Q and (1b)
        """
        self.config = recursive_merge_dicts(self.config,
                                            kwargs,
                                            misses='ignore')

        if self.inv_form:
            # Compute O, O^{-1}, and initialize params
            if self.config['verbose']:
                print("Computing O^{-1}...")
            self._generate_O_inv(L)
            self._init_params()

            # Estimate Z, compute Q = \mu P \mu^T
            if self.config['verbose']:
                print("Estimating Z...")
            self._train(self.loss_inv_Z)
            self.Q = torch.from_numpy(self.get_Q()).float()

            # Estimate \mu
            if self.config['verbose']:
                print("Estimating \mu...")
            self._train(self.loss_inv_mu)
        else:
            # Compute O and initialize params
            if self.config['verbose']:
                print("Computing O...")
            self._generate_O(L)
            self._init_params()

            # Estimate \mu
            if self.config['verbose']:
                print("Estimating \mu...")
            self._train(self.loss_mu)
Beispiel #13
0
    def __init__(self, tasks, **kwargs):
        self.config = recursive_merge_dicts(model_defaults,
                                            kwargs,
                                            misses="insert")

        # Set random seed before initializing module weights
        if self.config["seed"] is None:
            self.config["seed"] = np.random.randint(1e6)
        set_seed(self.config["seed"])

        super().__init__()

        # Build network
        self._build(tasks)
        self.task_map = {task.name: task for task in tasks}

        # Load weights
        if self.config["model_weights"]:
            self.load_weights(self.config["model_weights"])

        # Half precision
        if self.config["fp16"]:
            print("metal_model.py: Using fp16")
            self.half()

        # Move model to device now, then move data to device in forward() or calculate_loss()
        if self.config["device"] >= 0:
            if torch.cuda.is_available():
                if self.config["verbose"]:
                    print("Using GPU...")
                self.to(torch.device(f"cuda:{self.config['device']}"))
            else:
                if self.config["verbose"]:
                    print("No cuda device available. Using cpu instead.")

        # Show network
        if self.config["verbose"]:
            print("\nNetwork architecture:")
            print(self)
            print()
            num_params = sum(p.numel() for p in self.parameters()
                             if p.requires_grad)
            print(f"Total number of parameters: {num_params}")
Beispiel #14
0
    def train(self, X_train, Y_train, X_dev=None, Y_dev=None, **kwargs):
        self.config = recursive_merge_dicts(self.config, kwargs)
        train_config = self.config["train_config"]

        Y_train = self._to_torch(Y_train, dtype=torch.FloatTensor)
        Y_dev = self._to_torch(Y_dev)

        # Make data loaders
        loader_config = train_config["data_loader_config"]
        train_loader = self._make_data_loader(X_train, Y_train, loader_config)

        # Initialize the model
        self.reset()

        # Create loss function
        loss_fn = self._get_loss_fn()

        # Execute training procedure
        self._train(train_loader, loss_fn, X_dev=X_dev, Y_dev=Y_dev)
Beispiel #15
0
    def __init__(self, K=None, task_graph=None, **kwargs):
        """
        Args:
            K: A t-length list of task cardinalities (overrided by task_graph
                if task_graph is not None)
            task_graph: TaskGraph: A TaskGraph which defines a feasible set of
                task label vectors; overrides K if provided
        """
        config = recursive_merge_dicts(lm_default_config, kwargs)
        MTClassifier.__init__(self, K, config)

        if task_graph is None:
            task_graph = TaskGraph(K)
        self.task_graph = task_graph

        # Note: While K is a list of the cardinalities of the tasks, k is the
        # cardinality of the feasible set. These are always the same for a
        # single-task model, but rarely the same for a multi-task model.
        self.k = self.task_graph.k
Beispiel #16
0
    def __init__(self, m, k=2, task_graph=None, p=None, deps=[], **kwargs):
        """
        Args:
            m: int: Number of sources
            k: int: Number of true classes
            task_graph: TaskGraph: A TaskGraph which defines a feasible set of
                task label vectors; note this overrides k
            p: np.array: Class balance
            deps: list: A list of source dependencies as tuples of indices 
            kwargs:
                - seed: int: Random state seed
        """
        self.config = recursive_merge_dicts(lm_model_defaults, kwargs)
        super().__init__()
        self.k = k
        self.m = m

        # TaskGraph; note overrides k if present
        self.task_graph = task_graph
        if self.task_graph is not None:
            self.k = len(self.task_graph)
        self.multi_task = (self.task_graph is not None)

        # Class balance- assume uniform if not provided
        if p is None:
            self.p = (1 / self.k) * np.ones(self.k)
        else:
            self.p = p
        self.P = torch.diag(torch.from_numpy(self.p)).float()

        # Dependencies
        self.deps = deps
        self.c_tree = get_clique_tree(range(self.m), self.deps)

        # Whether to take the simple conditionally independent approach, or the
        # "inverse form" approach for handling dependencies
        # This flag allows us to eg test the latter even with no deps present
        self.inv_form = (len(self.deps) > 0)
Beispiel #17
0
    def __init__(self, input_dim, **kwargs):
        overrides = {
            'batchnorm': False,
            'dropout': 0.0,
            'layer_output_dims': [input_dim],
        }
        kwargs = recursive_merge_dicts(kwargs,
                                       overrides,
                                       misses='insert',
                                       verbose=False)
        super().__init__(cardinality=2, **kwargs)


# class SoftmaxRegression(EndModel):
#     """A softmax regression classifier for a multi-class single-task problem"""
#     def __init__(self, input_dim, output_dim, **kwargs):
#         raise NotImplementedError
#         overrides = {
#             'batchnorm': False,
#             'layer_output_dims': [input_dim],
#         }
#         kwargs = recursive_merge_dicts(kwargs, overrides, verbose=False)
#         label_map = [range(output_dim)]
#         super().__init__(label_map, **kwargs)
Beispiel #18
0
 def update_config(self, update_dict):
     """Updates self.config with the values in a given update dictionary"""
     self.config = recursive_merge_dicts(self.config, update_dict)
Beispiel #19
0
def create_glue_tasks_payloads(task_names, skip_payloads=False, **kwargs):
    assert len(task_names) > 0

    config = recursive_merge_dicts(task_defaults, kwargs)

    if config["seed"] is None:
        config["seed"] = np.random.randint(1e6)
        print(f"Using random seed: {config['seed']}")
    set_seed(config["seed"])

    # share bert encoder for all tasks

    if config["encoder_type"] == "bert":
        bert_kwargs = config["bert_kwargs"]
        bert_model = BertRaw(config["bert_model"], **bert_kwargs)
        if "base" in config["bert_model"]:
            neck_dim = 768
        elif "large" in config["bert_model"]:
            neck_dim = 1024
        input_module = bert_model
        pooler = bert_model.pooler if bert_kwargs["pooler"] else None
        cls_middle_module = BertExtractCls(pooler=pooler,
                                           dropout=config["dropout"])
    else:
        raise NotImplementedError

    # Create dict override dl_kwarg for specific task
    # e.g. {"STSB": {"batch_size": 2}}
    task_dl_kwargs = {}
    if config["task_dl_kwargs"]:
        task_configs_str = [
            tuple(config.split("."))
            for config in config["task_dl_kwargs"].split(",")
        ]
        for (task_name, kwarg_key, kwarg_val) in task_configs_str:
            if kwarg_key == "batch_size":
                kwarg_val = int(kwarg_val)
            task_dl_kwargs[task_name] = {kwarg_key: kwarg_val}

    tasks = []
    payloads = []
    for task_name in task_names:
        # If a flag is specified for attention, use it, otherwise use identity module
        if config["attention"]:
            print("Using soft attention head")
            attention_module = SoftAttentionModule(neck_dim)
        else:
            attention_module = IdentityModule()

        # Pull out names of auxiliary tasks to be dealt with in a second step
        # TODO: fix this logic for cases where auxiliary task for task_name has
        # its own payload
        has_payload = task_name not in config["auxiliary_task_dict"]

        # Note whether this task has auxiliary tasks that apply to it and require spacy
        run_spacy = False
        for aux_task, target_payloads in config["auxiliary_task_dict"].items():
            run_spacy = run_spacy or (task_name in target_payloads
                                      and aux_task in SPACY_TASKS
                                      and aux_task in task_names)

        # Override general dl kwargs with task-specific kwargs
        dl_kwargs = copy.deepcopy(config["dl_kwargs"])
        if task_name in task_dl_kwargs:
            dl_kwargs.update(task_dl_kwargs[task_name])

        # Each primary task has data_loaders to load
        if has_payload and not skip_payloads:
            if config["preprocessed"]:
                datasets = load_glue_datasets(
                    dataset_name=task_name,
                    splits=config["splits"],
                    bert_vocab=config["bert_model"],
                    max_len=config["max_len"],
                    max_datapoints=config["max_datapoints"],
                    run_spacy=run_spacy,
                    verbose=True,
                )
            else:
                datasets = create_glue_datasets(
                    dataset_name=task_name,
                    splits=config["splits"],
                    bert_vocab=config["bert_model"],
                    max_len=config["max_len"],
                    max_datapoints=config["max_datapoints"],
                    generate_uids=kwargs.get("generate_uids", False),
                    run_spacy=run_spacy,
                    verbose=True,
                )
            # Wrap datasets with DataLoader objects
            data_loaders = create_glue_dataloaders(
                datasets,
                dl_kwargs=dl_kwargs,
                split_prop=config["split_prop"],
                splits=config["splits"],
                seed=config["seed"],
            )

        if task_name == "COLA":
            scorer = Scorer(
                standard_metrics=["accuracy"],
                custom_metric_funcs={matthews_corr: ["matthews_corr"]},
            )
            task = ClassificationTask(
                name=task_name,
                input_module=input_module,
                middle_module=cls_middle_module,
                attention_module=attention_module,
                head_module=BinaryHead(neck_dim),
                scorer=scorer,
            )

        elif task_name == "SST2":
            task = ClassificationTask(
                name=task_name,
                input_module=input_module,
                middle_module=cls_middle_module,
                attention_module=attention_module,
                head_module=BinaryHead(neck_dim),
            )

        elif task_name == "MNLI":
            task = ClassificationTask(
                name=task_name,
                input_module=input_module,
                middle_module=cls_middle_module,
                attention_module=attention_module,
                head_module=MulticlassHead(neck_dim, 3),
                scorer=Scorer(standard_metrics=["accuracy"]),
            )

        elif task_name == "SNLI":
            task = ClassificationTask(
                name=task_name,
                input_module=input_module,
                middle_module=cls_middle_module,
                attention_module=attention_module,
                head_module=MulticlassHead(neck_dim, 3),
                scorer=Scorer(standard_metrics=["accuracy"]),
            )

        elif task_name == "RTE":
            task = ClassificationTask(
                name=task_name,
                input_module=input_module,
                middle_module=cls_middle_module,
                attention_module=attention_module,
                head_module=BinaryHead(neck_dim),
                scorer=Scorer(standard_metrics=["accuracy"]),
            )

        elif task_name == "WNLI":
            task = ClassificationTask(
                name=task_name,
                input_module=input_module,
                middle_module=cls_middle_module,
                attention_module=attention_module,
                head_module=BinaryHead(neck_dim),
                scorer=Scorer(standard_metrics=["accuracy"]),
            )

        elif task_name == "QQP":
            task = ClassificationTask(
                name=task_name,
                input_module=input_module,
                middle_module=cls_middle_module,
                attention_module=attention_module,
                head_module=BinaryHead(neck_dim),
                scorer=Scorer(
                    custom_metric_funcs={acc_f1: ["accuracy", "f1", "acc_f1"]
                                         }),
            )

        elif task_name == "MRPC":
            task = ClassificationTask(
                name=task_name,
                input_module=input_module,
                middle_module=cls_middle_module,
                attention_module=attention_module,
                head_module=BinaryHead(neck_dim),
                scorer=Scorer(
                    custom_metric_funcs={acc_f1: ["accuracy", "f1", "acc_f1"]
                                         }),
            )

        elif task_name == "STSB":
            scorer = Scorer(
                standard_metrics=[],
                custom_metric_funcs={
                    pearson_spearman: [
                        "pearson_corr",
                        "spearman_corr",
                        "pearson_spearman",
                    ]
                },
            )

            task = RegressionTask(
                name=task_name,
                input_module=input_module,
                middle_module=cls_middle_module,
                attention_module=attention_module,
                head_module=RegressionHead(neck_dim),
                scorer=scorer,
            )

        elif task_name == "QNLI":
            task = ClassificationTask(
                name=task_name,
                input_module=input_module,
                middle_module=cls_middle_module,
                attention_module=attention_module,
                head_module=BinaryHead(neck_dim),
                scorer=Scorer(standard_metrics=["accuracy"]),
            )

        # AUXILIARY TASKS

        elif task_name == "THIRD":
            # A toy task that predict which third of the sentence each token is in
            OUT_DIM = 3
            task = TokenClassificationTask(
                name="THIRD",
                input_module=input_module,
                attention_module=attention_module,
                head_module=BertTokenClassificationHead(neck_dim, OUT_DIM),
                loss_multiplier=config["auxiliary_loss_multiplier"],
            )

        elif task_name == "BLEU":
            task = RegressionTask(
                name=task_name,
                input_module=input_module,
                middle_module=cls_middle_module,
                attention_module=attention_module,
                head_module=RegressionHead(neck_dim),
                output_hat_func=torch.sigmoid,
                loss_hat_func=(lambda out, Y_gold: F.mse_loss(
                    torch.sigmoid(out), Y_gold)),
                scorer=Scorer(custom_metric_funcs={mse: ["mse"]}),
                loss_multiplier=config["auxiliary_loss_multiplier"],
            )

        elif task_name == "SPACY_NER":
            OUT_DIM = len(SPACY_TAGS["SPACY_NER"])
            task = TokenClassificationTask(
                name=task_name,
                input_module=input_module,
                attention_module=attention_module,
                head_module=BertTokenClassificationHead(neck_dim, OUT_DIM),
                loss_multiplier=config["auxiliary_loss_multiplier"],
            )

        elif task_name == "SPACY_POS":
            OUT_DIM = len(SPACY_TAGS["SPACY_POS"])
            task = TokenClassificationTask(
                name=task_name,
                input_module=input_module,
                attention_module=attention_module,
                head_module=BertTokenClassificationHead(neck_dim, OUT_DIM),
                loss_multiplier=config["auxiliary_loss_multiplier"],
            )

        else:
            msg = (f"Task name {task_name} was not recognized as a primary or "
                   f"auxiliary task.")
            raise Exception(msg)

        tasks.append(task)

        # Gather slice names
        slice_names = (config["slice_dict"].get(task_name, [])
                       if config["slice_dict"] else [])

        # Add a task for each slice
        for slice_name in slice_names:
            slice_task_name = f"{task_name}_slice:{slice_name}"
            slice_task = create_slice_task(task, slice_task_name)
            tasks.append(slice_task)

        if has_payload and not skip_payloads:
            # Create payloads (and add slices/auxiliary tasks as applicable)
            for split, data_loader in data_loaders.items():
                payload_name = f"{task_name}_{split}"
                labels_to_tasks = {f"{task_name}_gold": task_name}
                payload = Payload(payload_name, data_loader, labels_to_tasks,
                                  split)

                # Add auxiliary label sets if applicable
                auxiliary_task_dict = config["auxiliary_task_dict"]
                for aux_task_name, target_payloads in auxiliary_task_dict.items(
                ):
                    if aux_task_name in task_names and task_name in target_payloads:
                        aux_task_func = auxiliary_task_functions[aux_task_name]
                        payload = aux_task_func(payload)

                # Add a labelset slice to each split
                dataset = payload.data_loader.dataset
                for slice_name in slice_names:
                    slice_task_name = f"{task_name}_slice:{slice_name}"
                    slice_labels = create_slice_labels(
                        dataset,
                        base_task_name=task_name,
                        slice_name=slice_name)
                    labelset_slice_name = f"{task_name}_slice:{slice_name}"
                    payload.add_label_set(slice_task_name, labelset_slice_name,
                                          slice_labels)

                payloads.append(payload)

    return tasks, payloads
Beispiel #20
0
 def __init__(self, k=2, **kwargs):
     config = recursive_merge_dicts(lm_default_config, kwargs)
     super().__init__(k, config)
Beispiel #21
0
    def _test_model_config(
        self,
        idx,
        config,
        dev_data,
        init_args=[],
        train_args=[],
        init_kwargs={},
        train_kwargs={},
        module_args={},
        module_kwargs={},
        verbose=False,
        **score_kwargs,
    ):

        # Integrating generated config into init kwargs and train kwargs
        init_kwargs = recursive_merge_dicts(init_kwargs,
                                            config,
                                            misses="insert")
        train_kwargs = recursive_merge_dicts(train_kwargs,
                                             config,
                                             misses="insert")

        # Also make sure train kwargs includes validation metric
        train_kwargs["validation_metric"] = self.validation_metric

        # Initialize modules if provided
        for module_name, module_class in self.module_classes.items():

            # Also integrate generated config into module kwargs so that module
            # hyperparameters can be searched over as well
            module_kwargs[module_name] = recursive_merge_dicts(
                module_kwargs[module_name], config, misses="insert")

            # Initialize module
            init_kwargs[module_name] = module_class(
                *module_args[module_name], **module_kwargs[module_name])

        # Init model
        model = self.model_class(*init_args, **init_kwargs)

        # Search params
        # Select any params in search space that have list or dict
        search_params = {}
        for k, v in config.items():
            if k in self.search_space.keys():
                if isinstance(self.search_space[k], (list, dict)):
                    search_params[k] = v

        if verbose:
            print("=" * 60)
            print(f"[{idx}] Testing {search_params}")
            print("=" * 60)

        # Initialize a new LogWriter and train the model, returning the score
        log_writer = None
        if self.log_writer_class is not None:
            log_writer = self.log_writer_class(
                log_dir=self.log_subdir,
                run_dir=".",
                run_name=f"model_search_{idx}",
            )
        model.train_model(
            *train_args,
            **train_kwargs,
            dev_data=dev_data,
            verbose=verbose,
            log_writer=log_writer,
        )

        score = model.score(
            dev_data,
            metric=self.validation_metric,
            verbose=False,  # Score is already printed in train_model above
            **score_kwargs,
        )

        # If score better than best_score, save
        if score > self.best_score:
            self.best_score = score
            self.best_index = idx
            self.best_config = config
            self._save_best_model(model)

        # Save high-level run stats (in addition to per-model log)
        time_elapsed = time() - self.start_time
        self.run_stats.append({
            "idx": idx,
            "time_elapsed": time_elapsed,
            "search_params": search_params,
            "score": score,
        })
        return score, model
Beispiel #22
0
    def train(self, X_train, Y_train, X_dev=None, Y_dev=None, **kwargs):
        self.config = recursive_merge_dicts(self.config, kwargs)
        train_config = self.config['train_config']

        Y_train = self._to_torch(Y_train)
        Y_dev = self._to_torch(Y_dev)

        if train_config['use_cuda']:
            raise NotImplementedError
            # TODO: fix this
            # X = X.cuda(self.gpu_id)
            # Y = Y.cuda(self.gpu_id)
            # TODO: put model on gpu

        # Make data loaders
        loader_config = train_config['data_loader_config']
        train_loader = self._make_data_loader(X_train, Y_train, loader_config)
        evaluate_dev = (X_dev is not None and Y_dev is not None)

        # Set the optimizer
        optimizer_config = train_config['optimizer_config']
        optimizer = self._set_optimizer(optimizer_config)

        # Set the lr scheduler
        scheduler_config = train_config['scheduler_config']
        lr_scheduler = self._set_scheduler(scheduler_config, optimizer)

        # Initialize the model
        self.reset()

        # Train the model
        for epoch in range(train_config['n_epochs']):
            epoch_loss = 0.0
            for i, data in enumerate(train_loader):
                X, Y = data

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward pass to calculate outputs
                output = self.forward(X)
                loss = self._get_loss(output, Y)

                # Backward pass to calculate gradients
                loss.backward()

                # Clip gradients
                # if grad_clip:
                #     torch.nn.utils.clip_grad_norm(
                #        self.net.parameters(), grad_clip)

                # Perform optimizer step
                optimizer.step()

                # Keep running sum of losses
                epoch_loss += loss.detach() * X.shape[0]

            # Calculate average loss per training example
            # Saving division until this stage protects against the potential
            # mistake of averaging batch losses when the last batch is an orphan
            train_loss = epoch_loss / len(train_loader.dataset)

            if evaluate_dev:
                val_metric = train_config['validation_metric']
                dev_score = self.score(X_dev, Y_dev, metric=val_metric, 
                    verbose=False)
            
            # Apply learning rate scheduler
            if (lr_scheduler is not None 
                and epoch + 1 >= scheduler_config['lr_freeze']):
                if scheduler_config['scheduler'] == 'reduce_on_plateau':
                    if evaluate_dev:
                        lr_scheduler.step(dev_score)
                else:
                    lr_scheduler.step()

            # Report progress
            if (self.config['verbose'] and 
                (epoch % train_config['print_every'] == 0 
                or epoch == train_config['n_epochs'] - 1)):
                msg = f'[E:{epoch+1}]\tTrain Loss: {train_loss:.3f}'
                if evaluate_dev:
                    msg += f'\tDev score: {dev_score:.3f}'
                print(msg)

        if self.config['verbose']:
            print('Finished Training')
            
            if self.config['show_plots']:
                if self.k == 2:
                    Y_p_train = self.predict_proba(X_train)
                    plot_probabilities_histogram(Y_p_train[:, 0], 
                        title="Training Set Predictions")

            if X_dev is not None and Y_dev is not None:
                Y_ph_dev = self.predict(X_dev)

                print("Confusion Matrix (Dev)")
                mat = confusion_matrix(Y_ph_dev, Y_dev, pretty_print=True)                
Beispiel #23
0
    def train_model(self, model, payloads, results_path=None, **kwargs):
        # NOTE: misses="insert" so we can log extra metadata (e.g. num_parameters)
        # and eventually write to disk.
        self.config = recursive_merge_dicts(self.config,
                                            kwargs,
                                            misses="insert")

        self.task_names = [task_name for task_name in model.task_map]
        self.payload_names = [payload.name for payload in payloads]
        train_payloads = [p for p in payloads if p.split == "train"]
        if not train_payloads:
            msg = "At least one payload must have property payload.split=='train'"
            raise Exception(msg)

        # Calculate epoch statistics
        # NOTE: We calculate approximate count size using batch_size * num_batches
        self.batches_per_epoch = sum(
            [len(p.data_loader) for p in train_payloads])
        self.examples_per_epoch = sum([
            len(p.data_loader) * p.data_loader.batch_size
            for p in train_payloads
        ])
        if self.config["verbose"]:
            print(f"Beginning train loop.")
            print(
                f"Expecting approximately {self.examples_per_epoch} examples total "
                f"and {self.batches_per_epoch} batches per epoch from "
                f"{len(train_payloads)} payload(s) in the train split.")

        # Check inputs
        self._check_metrics()

        # Set training components
        self._set_writer()
        self._set_logger()
        self._set_checkpointer(model)
        self._set_optimizer(model)
        self._set_lr_scheduler(
            model)  # TODO: Support more detailed training schedules
        self._set_task_scheduler(model, payloads)

        # Record config
        if self.writer:
            self.writer.write_config(self.config)

        # Train the model
        # TODO: Allow other ways to train besides 1 epoch of all datasets
        model.train()
        # Dict metrics_hist contains the most recently recorded value of all metrics
        self.metrics_hist = {}
        self._reset_losses()
        for epoch in range(self.config["n_epochs"]):
            progress_bar = self.config["progress_bar"] and self.config[
                "verbose"]
            t = tqdm(
                enumerate(self.task_scheduler.get_batches(payloads, "train")),
                total=self.batches_per_epoch,
                disable=(not progress_bar),
            )
            for batch_num, (batch, payload_name, labels_to_tasks) in t:
                # NOTE: actual batch_size may not equal config's target batch_size,
                # for example due to orphan batches. We base batch size off of Y instead
                # of X because we know Y will contain tensors, whereas X can be of any
                # format the input_module accepts, including tuples of tensors, etc.
                _, Ys = batch
                batch_size = len(next(iter(Ys.values())))
                batch_id = epoch * self.batches_per_epoch + batch_num

                # Zero the parameter gradients
                self.optimizer.zero_grad()

                # Forward pass to calculate the average loss per example by task
                # Counts stores the number of examples in each batch with labels by task
                loss_dict, count_dict = model.calculate_loss(
                    *batch, payload_name, labels_to_tasks)

                # NOTE: If there were no "active" examples, loss_dict is empty
                # Skip additional loss-based computation at this point
                if not loss_dict:
                    continue

                loss = sum(loss_dict.values())
                if torch.isnan(loss):
                    msg = "Loss is NaN. Consider reducing learning rate."
                    raise Exception(msg)

                # Backward pass to calculate gradients
                # Loss is an average loss per example
                if model.config["fp16"]:
                    self.optimizer.backward(loss)
                else:
                    loss.backward()

                # Clip gradient norm (not individual gradient magnitudes)
                # max_grad_value = max([p.grad.abs().max().item() for p in model.parameters()])
                if self.config["grad_clip"]:
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   self.config["grad_clip"])

                # Perform optimizer step
                self.optimizer.step()

                # Update loss
                for loss_name in loss_dict:
                    if count_dict[loss_name]:
                        self.running_losses[loss_name] += (
                            loss_dict[loss_name].item() *
                            count_dict[loss_name])
                        self.running_examples[loss_name] += count_dict[
                            loss_name]

                # Calculate metrics, log, and checkpoint as necessary
                metrics_dict = self._execute_logging(model, payloads,
                                                     batch_size)
                # Confirm metrics being produced are in proper format
                if epoch == 0 and batch_num == 0:
                    self._validate_metrics_dict(metrics_dict)

                # Apply learning rate scheduler
                self._update_lr_scheduler(model, batch_id)

                # tqdm output
                if len(model.task_map) == 1:
                    t.set_postfix(loss=metrics_dict["model/train/all/loss"])
                else:
                    losses = {}
                    for key, val in metrics_dict.items():
                        if "loss" in key:
                            losses[key] = val
                    t.set_postfix(losses)

            if results_path:
                if not os.path.exists(results_path):
                    os.makedirs(results_path)

                train_metrics_dict = self.calculate_metrics(model,
                                                            payloads,
                                                            split="train")
                valid_metrics_dict = self.calculate_metrics(model,
                                                            payloads,
                                                            split="valid")

                output_eval_file = os.path.join(results_path,
                                                "training_metrics.txt")

                if epoch > 0:
                    append_write = 'a'  # append if already started run
                else:
                    append_write = 'w'  # create/overwrite file at the start of training

                with open(output_eval_file, append_write) as writer:
                    writer.write("Epoch {0}:\n".format(epoch))
                    for key in sorted(train_metrics_dict.keys()):
                        writer.write("Training: %s = %s\n" %
                                     (key, str(train_metrics_dict[key])))
                    for key in sorted(valid_metrics_dict.keys()):
                        writer.write("Validation: %s = %s\n" %
                                     (key, str(valid_metrics_dict[key])))

        model.eval()
        # Restore best model if applicable
        if self.checkpointer and self.checkpointer.checkpoint_best:
            # First do a final checkpoint at the end of training
            metrics_dict = self._execute_logging(model,
                                                 payloads,
                                                 batch_size,
                                                 force_log=True)

            self.checkpointer.load_best_model(model=model)
            # Copy best model to log directory
            if self.writer:
                path_to_best = os.path.join(self.checkpointer.checkpoint_dir,
                                            "best_model.pth")
                path_to_logs = self.writer.log_subdir
                if os.path.isfile(path_to_best):
                    copy2(path_to_best, path_to_logs)

        # Print final performance values
        if self.config["verbose"]:
            print("Finished training")
        # Calculate metrics for all splits if test_split=None
        test_split = self.config["metrics_config"]["test_split"]
        metrics_dict = self.calculate_metrics(model,
                                              payloads,
                                              split=test_split)
        if self.config["verbose"]:
            pprint(metrics_dict)

        # Clean up checkpoints
        if self.checkpointer and self.config["checkpoint_cleanup"]:
            print("Cleaning checkpoints")
            self.checkpointer.clean_up()

        # Write log if applicable
        if self.writer:
            # convert from numpy to python float
            metrics_dict = recursive_transform(
                metrics_dict, lambda x: type(x).__module__ == np.__name__,
                float)

            self.writer.write_metrics(metrics_dict)
            self.writer.write_log()
            self.writer.close()

            # pickle and save the full model
            full_model_path = os.path.join(self.writer.log_subdir,
                                           "model_state_dict.pkl")
            torch.save(model.state_dict(), full_model_path)
            print(f"Full model saved at {full_model_path}")

        return metrics_dict
        type=int,
        default=np.random.randint(1e6),
        help="A single seed to use for trainer, model, and task configs",
    )
    parser.add_argument("--model_type",
                        type=str,
                        default="metal",
                        help="Baseline model type")
    parser = add_flags_from_config(parser, trainer_defaults)
    parser = add_flags_from_config(parser, model_defaults)
    parser = add_flags_from_config(parser, task_defaults)
    args = parser.parse_args()

    # Extract flags into their respective config files
    trainer_config = recursive_merge_dicts(trainer_defaults,
                                           vars(args),
                                           misses="ignore")
    model_config = recursive_merge_dicts(model_defaults,
                                         vars(args),
                                         misses="ignore")
    task_config = recursive_merge_dicts(task_defaults,
                                        vars(args),
                                        misses="ignore")
    args = parser.parse_args()

    task_names = args.tasks.split(",")
    assert len(task_names) == 1
    task_name = task_names[0]

    # Create tasks and payloads
    task_config["slice_dict"] = None
Beispiel #25
0
    def train_model(
        self,
        L_train,
        Y_dev=None,
        deps=[],
        class_balance=None,
        log_writer=None,
        **kwargs,
    ):
        """Train the model (i.e. estimate mu) in one of two ways, depending on
        whether source dependencies are provided or not:

        Args:
            L_train: An [n,m] scipy.sparse matrix with values in {0,1,...,k}
                corresponding to labels from supervision sources on the
                training set
            Y_dev: Target labels for the dev set, for estimating class_balance
            deps: (list of tuples) known dependencies between supervision
                sources. If not provided, sources are assumed to be independent.
                TODO: add automatic dependency-learning code
            class_balance: (np.array) each class's percentage of the population

        (1) No dependencies (conditionally independent sources): Estimate mu
        subject to constraints:
            (1a) O_{B(i,j)} - (mu P mu.T)_{B(i,j)} = 0, for i != j, where B(i,j)
                is the block of entries corresponding to sources i,j
            (1b) np.sum( mu P, 1 ) = diag(O)

        (2) Source dependencies:
            - First, estimate Z subject to the inverse form
            constraint:
                (2a) O_\Omega + (ZZ.T)_\Omega = 0, \Omega is the deps mask
            - Then, compute Q = mu P mu.T
            - Finally, estimate mu subject to mu P mu.T = Q and (1b)
        """
        self.config = recursive_merge_dicts(self.config,
                                            kwargs,
                                            misses="ignore")
        train_config = self.config["train_config"]

        # TODO: Implement logging for label model?
        if log_writer is not None:
            raise NotImplementedError("Logging for LabelModel.")

        # Note that the LabelModel class implements its own (centered) L2 reg.
        l2 = train_config.get("l2", 0)

        self._set_class_balance(class_balance, Y_dev)
        self._set_constants(L_train)
        self._set_dependencies(deps)
        self._check_L(L_train)

        # Whether to take the simple conditionally independent approach, or the
        # "inverse form" approach for handling dependencies
        # This flag allows us to eg test the latter even with no deps present
        self.inv_form = len(self.deps) > 0

        # Creating this faux dataset is necessary for now because the LabelModel
        # loss functions do not accept inputs, but Classifer._train_model()
        # expects training data to feed to the loss functions.
        dataset = MetalDataset([0], [0])
        train_loader = DataLoader(dataset)
        if self.inv_form:
            # Compute O, O^{-1}, and initialize params
            if self.config["verbose"]:
                print("Computing O^{-1}...")
            self._generate_O_inv(L_train)
            self._init_params()

            # Estimate Z, compute Q = \mu P \mu^T
            if self.config["verbose"]:
                print("Estimating Z...")
            self._train_model(train_loader, self.loss_inv_Z)
            self.Q = torch.from_numpy(self.get_Q()).float()

            # Estimate \mu
            if self.config["verbose"]:
                print("Estimating \mu...")
            self._train_model(train_loader, partial(self.loss_inv_mu, l2=l2))
        else:
            # Compute O and initialize params
            if self.config["verbose"]:
                print("Computing O...")
            self._generate_O(L_train)
            self._init_params()

            # Estimate \mu
            if self.config["verbose"]:
                print("Estimating \mu...")
            self._train_model(train_loader, partial(self.loss_mu, l2=l2))
Beispiel #26
0
    def search(
        self,
        search_space,
        dev_data,
        init_args=[],
        train_args=[],
        init_kwargs={},
        train_kwargs={},
        max_search=None,
        shuffle=True,
        verbose=True,
        **score_kwargs,
    ):
        """
        Args:
            search_space: see config_generator() documentation
            dev_data: a tuple of Tensors (X,Y), a Dataset, or a DataLoader of
                X (data) and Y (labels) for the dev split
            init_args: (list) positional args for initializing the model
            train_args: (list) positional args for training the model
            init_kwargs: (dict) keyword args for initializing the model
            train_kwargs: (dict) keyword args for training the model
            max_search: see config_generator() documentation
            shuffle: see config_generator() documentation

        Returns:
            best_model: the highest performing trained model

        Note: Initialization is performed by ModelTuner instead of passing a
        pre-initialized model so that tuning may be performed over all model
        parameters, including the network architecture (which is defined before
        the train loop).
        """
        self._clear_state()

        # Generate configs
        configs = self.config_generator(search_space, max_search, shuffle)

        # Commence search
        for i, config in enumerate(configs):
            # Unless seeds are given explicitly, give each config a unique one
            if config.get("seed", None) is None:
                config["seed"] = self.seed + i

            # Integrating generated config into init kwargs and train kwargs
            init_kwargs = recursive_merge_dicts(init_kwargs, config)
            train_kwargs = recursive_merge_dicts(train_kwargs, config)

            score, model = self._test_model_config(
                i,
                config,
                dev_data,
                init_args=init_args,
                train_args=train_args,
                init_kwargs=init_kwargs,
                train_kwargs=train_kwargs,
                verbose=verbose,
                **score_kwargs,
            )

        print("=" * 60)
        print(f"[SUMMARY]")
        print(f"Best model: [{self.best_index}]")
        print(f"Best config: {self.best_config}")
        print(f"Best score: {self.best_score}")
        print("=" * 60)

        # Return best model
        return self._load_best_model(clean_up=True)