Ejemplo n.º 1
0
    def __init__(self, config):
        super().__init__()
        self.config = config

        graph_transformations = []
        for transform in config.graph_transform_args:
            param_dict = (dict(transform["params"])
                          if transform["params"] is not None else {})
            graph_transformations.append(
                configmapper.get_object("transforms",
                                        transform["type"])(**param_dict))
        self.graph_transform = (transforms.Compose(graph_transformations)
                                if graph_transformations != [] else None)

        image_transformations = []
        for transform in config.image_transform_args:
            param_dict = (dict(transform["params"])
                          if transform["params"] is not None else {})
            image_transformations.append(
                configmapper.get_object("transforms",
                                        transform["type"])(**param_dict))
        self.image_transform = (transforms.Compose(image_transformations)
                                if image_transformations != [] else None)

        if config.filepath.indices_csv != None:
            data_path = config.filepath.indices_csv
        else:
            data_path = config.filepath.data

        self.dir_path = config.filepath.data
        self.data = pd.read_csv(data_path)
        self.image_paths = np.array(self.data["path"])
        self.labels = np.array(self.data["img_id"])
Ejemplo n.º 2
0
    def __init__(self, config):
        self.config = config

        transformations = []
        if hasattr(config, "transform_args"):
            for transform in config.transform_args:
                param_dict = (dict(transform["params"])
                              if transform["params"] is not None else {})
                transformations.append(
                    configmapper.get_object("transforms",
                                            transform["type"])(**param_dict))

        self.transform = (transforms.Compose(transformations)
                          if transformations != [] else None)

        pre_transformations = []
        if hasattr(config, "pre_transform_args"):
            for pre_transform in config.pre_transform_args:
                param_dict = (dict(pre_transform["params"])
                              if pre_transform["params"] is not None else {})
                pre_transformations.append(
                    configmapper.get_object(
                        "transforms", pre_transform["type"])(**param_dict))

        self.pre_transform = (transforms.Compose(pre_transformations)
                              if pre_transformations != [] else None)
        self.dataset = datasets.MNISTSuperpixels(
            root=config.load_dataset_args.path,
            train=self.config.split == "train",
            transform=self.transform,
            pre_transform=self.pre_transform,
        )
Ejemplo n.º 3
0
    def __init__(self, config):
        super().__init__()
        self.config = config

        graph_transformations = []
        for transform in config.graph_transform_args:
            param_dict = (dict(transform["params"])
                          if transform["params"] is not None else {})
            graph_transformations.append(
                configmapper.get_object("transforms",
                                        transform["type"])(**param_dict))
        self.graph_transform = (transforms.Compose(graph_transformations)
                                if graph_transformations != [] else None)

        image_transformations = []
        for transform in config.image_transform_args:
            param_dict = (dict(transform["params"])
                          if transform["params"] is not None else {})
            image_transformations.append(
                configmapper.get_object("transforms",
                                        transform["type"])(**param_dict))
        self.image_transform = (transforms.Compose(image_transformations)
                                if image_transformations != [] else None)

        self.data_paths_df = pd.read_csv(config.data_paths_csv)
        self.data_paths_df["path"] = self.data_paths_df["path"].apply(
            lambda x: os.path.join(
                "/".join(config.data_paths_csv.split("/")[:-1]), x))
    def preprocess(self, model_config, data_config):
        train_dataset = configmapper.get_object(
            "datasets", data_config.main.name)(data_config.train,
                                               self.tokenizer)
        val_dataset = configmapper.get_object(
            "datasets", data_config.main.name)(data_config.val, self.tokenizer)
        model = configmapper.get_object("models", model_config.name)(
            self.embeddings, **model_config.params.as_dict())

        return model, train_dataset, val_dataset
Ejemplo n.º 5
0
    def __init__(self, config):
        super(Projection, self).__init__()
        self.cnn = configmapper.get_object("models", config.cnn_config.name)(
            config.cnn_config)
        self.gcn = configmapper.get_object("models", config.gnn_config.name)(
            config.gnn_config)

        self.linear_layer = Linear(
            config.cnn_config.num_classes + config.gnn_config.num_classes,
            config.num_classes,
        )
        self.loss_fn = CrossEntropyLoss()
Ejemplo n.º 6
0
    def __init__(self, config):
        super().__init__()
        self.config = config

        graph_transformations = []
        for transform in config.graph_transform_args:
            param_dict = (
                dict(transform["params"]) if transform["params"] is not None else {}
            )
            graph_transformations.append(
                configmapper.get_object("transforms", transform["type"])(**param_dict)
            )
        self.graph_transform = (
            transforms.Compose(graph_transformations)
            if graph_transformations != []
            else None
        )

        image_transformations = []
        for transform in config.image_transform_args:
            param_dict = (
                dict(transform["params"]) if transform["params"] is not None else {}
            )
            image_transformations.append(
                configmapper.get_object("transforms", transform["type"])(**param_dict)
            )
        self.image_transform = (
            transforms.Compose(image_transformations)
            if image_transformations != []
            else None
        )

        with open(config.filepath.image, "rb") as f:
            # First 16 bytes contain some metadata
            _ = f.read(4)
            size = struct.unpack(">I", f.read(4))[0]
            _ = f.read(8)
            self.images = np.frombuffer(f.read(), dtype=np.uint8).reshape(size, 28, 28)
        # Labels
        with open(config.filepath.labels, "rb") as f:
            # First 8 bytes contain some metadata
            _ = f.read(8)
            self.labels = np.frombuffer(f.read(), dtype=np.uint8)

        if config.filepath.indices_csv != None:
            filtered_indices = list(pd.read_csv(config.filepath.indices_csv)["index"])
            self.images = np.take(self.images, filtered_indices, axis=0)
            self.labels = np.take(self.labels, filtered_indices, axis=0)
Ejemplo n.º 7
0
    def __init__(self, config):
        super().__init__()
        self.config = config

        transformations = []
        for transform in config.transform_args:
            param_dict = (dict(transform["params"])
                          if transform["params"] is not None else {})
            transformations.append(
                configmapper.get_object("transforms",
                                        transform["type"])(**param_dict))
        self.transform = (transforms.Compose(transformations)
                          if transformations != [] else None)

        with open(config.filepath.data, "rb") as f:

            self.data = pickle.load(f, encoding="bytes")
        self.images = self.data[b"data"]
        self.labels = self.data[config.label.encode("UTF-8")]

        if config.filepath.indices_csv != None:
            filtered_indices = list(
                pd.read_csv(config.filepath.indices_csv)["index"])
            self.images = np.take(self.images, filtered_indices, axis=0)
            self.labels = np.take(self.labels, filtered_indices, axis=0)

        self.images = np.transpose(np.reshape(self.images, (-1, 3, 32, 32)),
                                   (0, 2, 3, 1))
Ejemplo n.º 8
0
    def __init__(self, config):
        super().__init__()
        self.config = config

        transformations = []
        for transform in config.transform_args:
            param_dict = (dict(transform["params"])
                          if transform["params"] is not None else {})
            transformations.append(
                configmapper.get_object("transforms",
                                        transform["type"])(**param_dict))
        self.transform = (transforms.Compose(transformations)
                          if transformations != [] else None)

        self.data = fetch_lfw_people(data_home=config.filepath.data,
                                     color=True,
                                     min_faces_per_person=20)

        if config.filepath.indices_csv != None:
            filtered_indices = list(
                pd.read_csv(config.filepath.indices_csv)["indices"])
            self.images = np.take(self.data.images, filtered_indices, axis=0)
            self.labels = np.take(self.data.target, filtered_indices, axis=0)
        else:
            self.images = self.data.images
            self.labels = self.data.target

        self.images = self.images.astype(np.uint8)
    def __init__(self, config):
        """
        Args:
            config (src.utils.module.Config): configuration for preprocessor
        """
        super(GlovePreprocessor, self).__init__()
        self.config = config
        self.tokenizer = configmapper.get_object(
            "tokenizers", self.config.main.preprocessor.tokenizer.name
        )(**self.config.main.preprocessor.tokenizer.init_params.as_dict())
        self.tokenizer_params = (self.config.main.preprocessor.tokenizer.
                                 init_vector_params.as_dict())

        self.tokenizer.initialize_vectors(**self.tokenizer_params)
        self.embeddings = configmapper.get_object(
            "embeddings", self.config.main.preprocessor.embedding.name)(
                self.tokenizer.text_field.vocab.vectors,
                self.tokenizer.text_field.vocab.stoi[
                    self.tokenizer.text_field.pad_token],
            )
Ejemplo n.º 10
0
def convert_params_to_dict(params):
    dic = {}
    for k, v in params.as_dict():
        try:
            obj = configmapper.get_object("params", v)
            dic[k] = v
        except:
            print(
                f"Undefined {v} for the given key: {k} in mapper        ,storing original value"
            )
            dic[k] = v
    return dic
 def __init__(self, config):
     """
     Args:
         config (src.utils.module.Config): configuration for preprocessor
     """
     super(TransformersConcretenessPreprocessor, self).__init__()
     self.config = config
     self.tokenizer = configmapper.get_object(
         "tokenizers",
         self.config.main.preprocessor.tokenizer.name).from_pretrained(
             **
             self.config.main.preprocessor.tokenizer.init_params.as_dict())
Ejemplo n.º 12
0
def map_dict_to_obj(dic):
    result_dic = {}
    if dic is not None:
        for k, v in dic.items():
            if isinstance(v, dict):
                result_dic[k] = map_dict_to_obj(v)
            else:
                try:
                    obj = configmapper.get_object("params", v)
                    result_dic[k] = obj
                except:
                    result_dic[k] = v
    return result_dic
 def __init__(self, config):
     self._config = config
     self.metrics = {
         configmapper.get_object("metrics", metric["type"]):
         metric["params"]
         for metric in self._config.main_config.metrics
     }
     self.train_config = self._config.train
     self.val_config = self._config.val
     self.log_label = self.train_config.log.log_label
     if self.train_config.log_and_val_interval is not None:
         self.val_log_together = True
     print("Logging with label: ", self.log_label)
Ejemplo n.º 14
0
    def __init__(self, config):
        self.config = config
        self.image_column_name = config.image_column_name
        self.label_column_name = config.label_column_name
        self.channels_first_input = config.channels_first_input

        transformations = []
        for transform in config.transform_args:
            param_dict = (dict(transform["params"])
                          if transform["params"] is not None else {})
            transformations.append(
                configmapper.get_object("transforms",
                                        transform["type"])(**param_dict))
        self.transform = (transforms.Compose(transformations)
                          if transformations != [] else None)

        self.raw_dataset = load_dataset(**config.load_dataset_args)
        if config.remove_columns is not None:
            self.raw_dataset = self.raw_dataset.remove_columns(
                config.remove_columns)
        self.raw_dataset.set_format(
            "torch", columns=self.raw_dataset["train"].column_names)

        features = datasets.Features({
            self.image_column_name:
            datasets.Array3D(
                shape=tuple(self.config.features.image_output_shape),
                dtype="float32",
            ),
            self.label_column_name:
            datasets.features.ClassLabel(
                names=list(self.config.features.label_names)),
        })

        self.train_dataset = self.raw_dataset.map(
            self.prepare_features,
            features=features,
            batched=True,
            batch_size=64,
        )

        if self.image_column_name != "image":
            self.train_dataset = self.train_dataset.rename_column(
                self.image_column_name, "image")
        if self.label_column_name != "label":
            self.train_dataset = self.train_dataset.rename_column(
                self.label_column_name, "label")

        self.train_dataset.set_format("torch", columns=["image", "label"])
Ejemplo n.º 15
0
 def __init__(self, config):
     self._config = config
     self.metrics = {
         configmapper.get_object("metrics", metric["type"]):
         metric["params"]
         for metric in self._config.main_config.metrics
     }
     self.train_config = self._config.train
     self.val_config = self._config.val
     self.log_label = self.train_config.log.log_label
     self.device = torch.device(self._config.main_config.device.name)
     if self.train_config.log_and_val_interval is not None:
         self.train_config.val_interval = self.train_config.log_and_val_interval
         self.train_config.log.log_interval = self.train_config.log_and_val_interval
     print("Logging with label: ", self.log_label)
Ejemplo n.º 16
0
    def __init__(self, config):
        self._config = config
        self.metrics = {
            configmapper.get_object("metrics", metric["type"]):
            metric["params"]
            for metric in self._config.main_config.metrics
        }

        self.train_config = self._config.train
        self.val_config = self._config.val
        self.log_label = self.train_config.log.log_label
        if self.train_config.log_and_val_interval is not None:
            self.val_log_together = True
        ckpts_dir = os.path.split(self.train_config.save_on.best_path)[0]
        if not os.path.exists(ckpts_dir):
            os.makedirs(ckpts_dir)
        print("Logging with label: ", self.log_label)
Ejemplo n.º 17
0
    def __init__(self, config):
        self.config = config

        transformations = []
        for transform in config.transform_args:
            param_dict = (dict(transform["params"])
                          if transform["params"] is not None else {})
            transformations.append(
                configmapper.get_object("transforms",
                                        transform["type"])(**param_dict))
        self.transform = (transforms.Compose(transformations)
                          if transformations != [] else None)

        self.dataset = datasets.MNIST(
            config.load_dataset_args.path,
            download=True,
            train=self.config.split == "train",
            transform=self.transform,
        )
Ejemplo n.º 18
0
    def val(
        self,
        model,
        dataset,
        global_step,
        train_logger=None,
        train_log_values=None,
        log=True,
    ):
        append_text = self.val_config.append_text
        criterion_params = self.train_config.criterion.params
        if criterion_params:
            criterion = configmapper.get_object(
                "losses",
                self.train_config.criterion.type)(**criterion_params.as_dict(),
                                                  device=self.device)
        else:
            criterion = configmapper.get_object(
                "losses", self.train_config.criterion.type)()
        if train_logger is not None:
            val_logger = train_logger
        else:
            val_logger = Logger(**self.val_config.log.logger_params.as_dict())

        if train_log_values is not None:
            val_log_values = train_log_values
        else:
            val_log_values = self.val_config.log.vals.as_dict()

        if self._config.dataloader_type == "geometric":
            val_loader = GeometricDataLoader(
                dataset, **self.val_config.loader_params.as_dict())
        else:
            val_loader = DataLoader(dataset=dataset,
                                    **self.val_config.loader_params.as_dict())

        all_outputs = torch.Tensor().to(self.device)
        if self.train_config.label_type == "float":
            all_labels = torch.FloatTensor().to(self.device)
        else:
            all_labels = torch.LongTensor().to(self.device)

        with torch.no_grad():
            model.eval()
            val_loss = 0
            for j, batch in enumerate(val_loader):
                for key in batch:
                    batch[key] = batch[key].to(self.device)

                inputs = {}
                for key in self._config.input_key:
                    inputs[key] = batch[key]
                labels = batch["label"]

                # NOW THIS MUST BE HANDLED IN THE DATASET CLASS
                # if self.train_config.label_type == "float":
                # # Specific to Float Type
                #     labels = labels.float()

                outputs = model(**inputs)

                loss = criterion(outputs, labels)
                val_loss += loss.item()

                all_labels = torch.cat((all_labels, labels), 0)
                outputs = outputs[0] * self.alpha + outputs[1] * (1 -
                                                                  self.alpha)
                if self.train_config.label_type == "float":
                    all_outputs = torch.cat((all_outputs, outputs), 0)
                else:
                    all_outputs = torch.cat(
                        (all_outputs, torch.argmax(outputs, axis=1)), 0)

            val_loss = val_loss / len(val_loader)

            val_loss_name = self.train_config.criterion.type

            # print(all_outputs, all_labels)
            metric_list = [
                metric(all_labels.cpu(),
                       all_outputs.detach().cpu(), **self.metrics[metric])
                for metric in self.metrics
            ]
            metric_name_list = [
                metric["type"] for metric in self._config.main_config.metrics
            ]
            return_dic = dict(
                zip(
                    [
                        val_loss_name,
                    ] + metric_name_list,
                    [
                        val_loss,
                    ] + metric_list,
                ))
            if log:
                val_scores = self.log(
                    val_loss,
                    val_loss_name,
                    metric_list,
                    metric_name_list,
                    val_logger,
                    val_log_values,
                    global_step,
                    append_text,
                )
                return val_scores
            return return_dic
Ejemplo n.º 19
0
    def train(self, model, train_dataset, val_dataset=None, logger=None):

        device = torch.device(self._config.main_config.device.name)
        model.to(device)
        optim_params = self.train_config.optimizer.params
        if optim_params:
            optimizer = configmapper.get_object(
                "optimizers",
                self.train_config.optimizer.type)(model.parameters(),
                                                  **optim_params.as_dict())
        else:
            optimizer = configmapper.get_object(
                "optimizers",
                self.train_config.optimizer.type)(model.parameters())

        if self.train_config.scheduler is not None:
            scheduler_params = self.train_config.scheduler.params
            if scheduler_params:
                scheduler = configmapper.get_object(
                    "schedulers", self.train_config.scheduler.type)(
                        optimizer, **scheduler_params.as_dict())
            else:
                scheduler = configmapper.get_object(
                    "schedulers", self.train_config.scheduler.type)(optimizer)

        criterion_params = self.train_config.criterion.params
        if criterion_params:
            criterion = configmapper.get_object(
                "losses",
                self.train_config.criterion.type)(**criterion_params.as_dict())
        else:
            criterion = configmapper.get_object(
                "losses", self.train_config.criterion.type)()

        train_loader = DataLoader(
            train_dataset,
            **self.train_config.loader_params.as_dict(),
            collate_fn=train_dataset.custom_collate_fn,
        )
        # train_logger = Logger(**self.train_config.log.logger_params.as_dict())

        max_epochs = self.train_config.max_epochs
        batch_size = self.train_config.loader_params.batch_size

        if self.val_log_together:
            val_interval = self.train_config.log_and_val_interval
            log_interval = val_interval
        else:
            val_interval = self.train_config.val_interval
            log_interval = self.train_config.log.log_interval

        if logger is None:
            train_logger = Logger(
                **self.train_config.log.logger_params.as_dict())
        else:
            train_logger = logger
        train_log_values = self.train_config.log.values.as_dict()

        best_score = (-math.inf if self.train_config.save_on.desired == "max"
                      else math.inf)
        save_on_score = self.train_config.save_on.score
        best_step = -1
        best_model = None

        print("\nTraining\n")
        # print(max_steps)

        global_step = 0
        for epoch in range(1, max_epochs + 1):
            print("Epoch: {}/{}, Global Step: {}".format(
                epoch, max_epochs, global_step))
            train_loss = 0
            val_loss = 0

            all_labels = torch.FloatTensor().to(device)
            all_outputs = torch.Tensor().to(device)

            pbar = tqdm(total=math.ceil(len(train_dataset) / batch_size))
            pbar.set_description("Epoch " + str(epoch))

            val_counter = 0

            for step, batch in enumerate(train_loader):
                optimizer.zero_grad()
                batch = [torch.tensor(value, device=device) for value in batch]
                # print(batch[0].shape,batch)
                *inputs, labels = batch
                # print(inputs[0],inputs[1])
                # labels = labels.float()
                outputs = model(inputs)
                # print(outputs,labels)
                loss = criterion(outputs, labels)
                loss.backward()

                all_labels = torch.cat((all_labels, labels), 0)
                all_outputs = torch.cat(
                    (all_outputs, torch.argmax(outputs, axis=1)), 0)

                train_loss += loss.item()
                optimizer.step()

                if self.train_config.scheduler is not None:
                    scheduler.step(epoch + i / len(train_loader))

                # print(train_loss)
                # print(step+1)

                pbar.set_postfix_str(f"Train Loss: {train_loss /(step+1)}")
                pbar.update(1)

                global_step += 1

                # Need to check if we want global_step or local_step

                if val_dataset is not None and (
                        global_step) % val_interval == 0:
                    print("\nEvaluating\n")
                    val_scores = self.val(
                        model,
                        val_dataset,
                        criterion,
                        device,
                        global_step,
                        train_logger,
                        train_log_values,
                    )
                    model.train()
                    save_flag = 0
                    if self.train_config.save_on is not None:
                        train_loss_name = self.train_config.criterion.type
                        training_loss = train_loss / global_step

                        metric_list = [
                            metric(
                                all_outputs.detach().cpu(),
                                all_labels.cpu(),
                                **self.metrics[metric],
                            ) for metric in self.metrics
                        ]
                        metric_name_list = [
                            metric["type"]
                            for metric in self._config.main_config.metrics
                        ]

                        train_scores = dict(
                            zip(
                                [
                                    train_loss_name,
                                ] + metric_name_list,
                                [
                                    training_loss,
                                ] + metric_list,
                            ))

                        if self.train_config.save_on.desired == "min":
                            if val_scores[save_on_score] < best_score:
                                save_flag = 1
                                best_score = val_scores[save_on_score]
                                best_step = global_step
                        else:
                            if val_scores[save_on_score] > best_score:
                                save_flag = 1
                                best_score = val_scores[save_on_score]
                                best_step = global_step
                        if save_flag:
                            torch.save(
                                {
                                    "model_state_dict": model,
                                    "best_step": best_step,
                                    "best_score": best_score,
                                    "save_on_score": save_on_score,
                                },
                                self.train_config.save_on.best_path.format(
                                    self.log_label),
                            )

                            hparam_list = []
                            hparam_name_list = []
                            if self.train_config.log.values.hparams is not None:
                                for hparam in self.train_config.log.values.hparams:
                                    hparam_list.append(
                                        get_item_in_config(
                                            self._config, hparam["path"]))
                                    hparam_name_list.append(hparam["name"])

                                val_keys, val_values = zip(*val_scores.items())
                                train_keys, train_values = zip(
                                    *train_scores.items())
                                val_keys = list(val_keys)
                                train_keys = list(train_keys)
                                val_values = list(val_values)
                                train_values = list(train_values)
                                for i, key in enumerate(val_keys):
                                    val_keys[i] = (
                                        f"hparams/{self.log_label}/best_val_val_"
                                        + val_keys[i])
                                for i, key in enumerate(train_keys):
                                    train_keys[i] = (
                                        f"hparams/{self.log_label}/best_val_train_"
                                        + train_keys[i])
                                train_logger.save_hyperparams(
                                    hparam_list,
                                    hparam_name_list,
                                    train_values + val_values,
                                    train_keys + val_keys,
                                )

                if (global_step - 1) % log_interval == 0:
                    print("\nLogging\n")

                    train_loss_name = self.train_config.criterion.type
                    outputs = torch.argmax(outputs, axis=1)
                    metric_list = [
                        metric(outputs.detach().cpu(), labels.cpu(),
                               **self.metrics[metric])
                        for metric in self.metrics
                    ]
                    metric_name_list = [
                        metric["type"]
                        for metric in self._config.main_config.metrics
                    ]
                    train_scores = self.log(
                        train_loss / global_step,
                        train_loss_name,
                        metric_list,
                        metric_name_list,
                        train_logger,
                        train_log_values,
                        global_step,
                        append_text=self.train_config.append_text,
                    )

            pbar.close()
            if not os.path.exists(self.train_config.checkpoint.checkpoint_dir):
                os.makedirs(self.train_config.checkpoint.checkpoint_dir)

            torch.save(
                model.state_dict(),
                f"{self.train_config.checkpoint.checkpoint_dir}_{str(self.train_config.log.log_label)}"
                + "_" + str(epoch) + ".pth",
            )
            '''
Ejemplo n.º 20
0
grid_search = args.grid_search
# log_dir = "/content/drive/MyDrive/SuperPixels/logs/"
log_dir = "./logs/"
# Seed
seed(train_config.main_config.seed)

# Data
if "main" in data_config.as_dict().keys():  # Regular Data
    if args.validation:
        train_data_config = data_config.train_val.train
        val_data_config = data_config.train_val.val
    else:
        train_data_config = data_config.train
        val_data_config = data_config.val

    train_data = configmapper.get_object(
        "datasets", train_data_config.name)(train_data_config)
    val_data = configmapper.get_object("datasets",
                                       val_data_config.name)(val_data_config)

else:  # HF Type Data
    dataset = configmapper.get_object("datasets",
                                      data_config.name)(data_config)
    train_data = dataset.train_dataset["train"]
    val_data = dataset.train_dataset["test"]

# Logger

logger = Logger(log_path=os.path.join(
    log_dir,
    args.config_dir.strip("/").split("/")[-1] +
    ("" if args.validation else "_orig"),
Ejemplo n.º 21
0
    default=False,
)
### Update Tips : Can provide more options to the user.
### Can also provide multiple verbosity levels.

args = parser.parse_args()
# print(vars(args))
model_config = Config(path=args.model)
train_config = Config(path=args.train)
data_config = Config(path=args.data)
grid_search = args.grid_search

# verbose = args.verbose

# Preprocessor, Dataset, Model
preprocessor = configmapper.get_object(
    "preprocessors", data_config.main.preprocessor.name)(data_config)

if grid_search:
    train_configs = generate_grid_search_configs(train_config,
                                                 train_config.grid_search)
    print(f"Total Configurations Generated: {len(train_configs)}")

    logger = Logger(**train_config.grid_search.hyperparams.train.log.
                    logger_params.as_dict())

    for train_config in train_configs:
        print(train_config)

        ## Seed
        seed(train_config.main_config.seed)
Ejemplo n.º 22
0
    help="The configuration for model training/evaluation",
)
parser.add_argument(
    "--data",
    type=str,
    action="store",
    help="The configuration for data",
)

args = parser.parse_args()
# print(vars(args))
train_config = OmegaConf.load(args.train)
data_config = OmegaConf.load(args.data)

print(data_config.train_files)
dataset = configmapper.get_object("datasets", data_config.name)(data_config)
untokenized_train_dataset = dataset.dataset
tokenized_train_dataset = dataset.tokenized_inputs
tokenized_test_dataset = dataset.test_tokenized_inputs

model_class = configmapper.get_object("models", train_config.model_name)

if "toxic-bert" in train_config.pretrained_args.pretrained_model_name_or_path:
    toxicbert_model = AutoModelForSequenceClassification.from_pretrained(
        train_config.pretrained_args.pretrained_model_name_or_path)
    train_config.pretrained_args.pretrained_model_name_or_path = "bert-base-uncased"
    model = model_class.from_pretrained(**train_config.pretrained_args)
    model.bert = deepcopy(toxicbert_model.bert)
    gc.collect()

elif "toxic-roberta" in train_config.pretrained_args.pretrained_model_name_or_path:
Ejemplo n.º 23
0
 def __init__(self, config):
     super().__init__()
     self.cnn = configmapper.get_object("models", config.cnn_config.name)(
         config.cnn_config)
     self.gcn = configmapper.get_object("models", config.gnn_config.name)(
         config.gnn_config)
    def train(self, model, train_dataset, val_dataset=None, logger=None):
        device = torch.device(self._config.main_config.device.name)
        model.to(device)
        optim_params = self.train_config.optimizer.params
        if optim_params:
            optimizer = configmapper.get_object(
                "optimizers",
                self.train_config.optimizer.type)(model.parameters(),
                                                  **optim_params.as_dict())
        else:
            optimizer = configmapper.get_object(
                "optimizers",
                self.train_config.optimizer.type)(model.parameters())

        if self.train_config.scheduler is not None:
            scheduler_params = self.train_config.scheduler.params
            if scheduler_params:
                scheduler = configmapper.get_object(
                    "schedulers", self.train_config.scheduler.type)(
                        optimizer, **scheduler_params.as_dict())
            else:
                scheduler = configmapper.get_object(
                    "schedulers", self.train_config.scheduler.type)(optimizer)

        criterion_params = self.train_config.criterion.params
        if criterion_params:
            criterion = configmapper.get_object(
                "losses",
                self.train_config.criterion.type)(**criterion_params.as_dict())
        else:
            criterion = configmapper.get_object(
                "losses", self.train_config.criterion.type)()
        if "custom_collate_fn" in dir(train_dataset):
            train_loader = DataLoader(
                dataset=train_dataset,
                collate_fn=train_dataset.custom_collate_fn,
                **self.train_config.loader_params.as_dict(),
            )
        else:
            train_loader = DataLoader(
                dataset=train_dataset,
                **self.train_config.loader_params.as_dict())
        # train_logger = Logger(**self.train_config.log.logger_params.as_dict())

        max_epochs = self.train_config.max_epochs
        batch_size = self.train_config.loader_params.batch_size

        if self.val_log_together:
            val_interval = self.train_config.log_and_val_interval
            log_interval = val_interval
        else:
            val_interval = self.train_config.val_interval
            log_interval = self.train_config.log.log_interval

        if logger is None:
            train_logger = Logger(
                **self.train_config.log.logger_params.as_dict())
        else:
            train_logger = logger

        train_log_values = self.train_config.log.values.as_dict()

        best_score = (-math.inf if self.train_config.save_on.desired == "max"
                      else math.inf)
        save_on_score = self.train_config.save_on.score
        best_step = -1
        best_model = None

        best_hparam_list = None
        best_hparam_name_list = None
        best_metrics_list = None
        best_metrics_name_list = None

        # print("\nTraining\n")
        # print(max_steps)

        global_step = 0
        for epoch in range(1, max_epochs + 1):
            print("Epoch: {}/{}, Global Step: {}".format(
                epoch, max_epochs, global_step))
            train_loss = 0
            val_loss = 0

            if (self.train_config.label_type == 'float'):
                all_labels = torch.FloatTensor().to(device)
            else:
                all_labels = torch.LongTensor().to(device)

            all_outputs = torch.Tensor().to(device)

            train_scores = None
            val_scores = None

            pbar = tqdm(total=math.ceil(len(train_dataset) / batch_size))
            pbar.set_description("Epoch " + str(epoch))

            val_counter = 0

            for step, batch in enumerate(train_loader):
                model.train()
                optimizer.zero_grad()
                inputs, labels = batch

                if (self.train_config.label_type == 'float'
                    ):  ##Specific to Float Type
                    labels = labels.float()

                for key in inputs:
                    inputs[key] = inputs[key].to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                loss = criterion(torch.squeeze(outputs), labels)
                loss.backward()

                all_labels = torch.cat((all_labels, labels), 0)

                if (self.train_config.label_type == 'float'):
                    all_outputs = torch.cat((all_outputs, outputs), 0)
                else:
                    all_outputs = torch.cat(
                        (all_outputs, torch.argmax(outputs, axis=1)), 0)

                train_loss += loss.item()
                optimizer.step()

                if self.train_config.scheduler is not None:
                    if isinstance(scheduler, ReduceLROnPlateau):
                        scheduler.step(train_loss / (step + 1))
                    else:
                        scheduler.step()

                # print(train_loss)
                # print(step+1)

                pbar.set_postfix_str(f"Train Loss: {train_loss /(step+1)}")
                pbar.update(1)

                global_step += 1

                # Need to check if we want global_step or local_step

                if val_dataset is not None and (global_step -
                                                1) % val_interval == 0:
                    # print("\nEvaluating\n")
                    val_scores = self.val(
                        model,
                        val_dataset,
                        criterion,
                        device,
                        global_step,
                        train_logger,
                        train_log_values,
                    )

                    #save_flag = 0
                    if self.train_config.save_on is not None:

                        ## BEST SCORES UPDATING

                        train_scores = self.get_scores(
                            train_loss,
                            global_step,
                            self.train_config.criterion.type,
                            all_outputs,
                            all_labels,
                        )

                        best_score, best_step, save_flag = self.check_best(
                            val_scores, save_on_score, best_score, global_step)

                        store_dict = {
                            "model_state_dict": model.state_dict(),
                            "best_step": best_step,
                            "best_score": best_score,
                            "save_on_score": save_on_score,
                        }

                        path = self.train_config.save_on.best_path.format(
                            self.log_label)

                        self.save(store_dict, path, save_flag)

                        if save_flag and train_log_values[
                                "hparams"] is not None:
                            (
                                best_hparam_list,
                                best_hparam_name_list,
                                best_metrics_list,
                                best_metrics_name_list,
                            ) = self.update_hparams(train_scores,
                                                    val_scores,
                                                    desc="best_val")
                # pbar.close()
                if (global_step - 1) % log_interval == 0:
                    # print("\nLogging\n")
                    train_loss_name = self.train_config.criterion.type
                    metric_list = [
                        metric(all_labels.cpu(),
                               all_outputs.detach().cpu(),
                               **self.metrics[metric])
                        for metric in self.metrics
                    ]
                    metric_name_list = [
                        metric['type']
                        for metric in self._config.main_config.metrics
                    ]

                    train_scores = self.log(
                        train_loss / (step + 1),
                        train_loss_name,
                        metric_list,
                        metric_name_list,
                        train_logger,
                        train_log_values,
                        global_step,
                        append_text=self.train_config.append_text,
                    )
            pbar.close()
            if not os.path.exists(self.train_config.checkpoint.checkpoint_dir):
                os.makedirs(self.train_config.checkpoint.checkpoint_dir)

            if self.train_config.save_after_epoch:
                store_dict = {
                    "model_state_dict": model.state_dict(),
                }

                path = f"{self.train_config.checkpoint.checkpoint_dir}_{str(self.train_config.log.log_label)}_{str(epoch)}.pth"

                self.save(store_dict, path, save_flag=1)

        if epoch == max_epochs:
            # print("\nEvaluating\n")
            val_scores = self.val(
                model,
                val_dataset,
                criterion,
                device,
                global_step,
                train_logger,
                train_log_values,
            )

            # print("\nLogging\n")
            train_loss_name = self.train_config.criterion.type
            metric_list = [
                metric(all_labels.cpu(),
                       all_outputs.detach().cpu(), **self.metrics[metric])
                for metric in self.metrics
            ]
            metric_name_list = [
                metric['type'] for metric in self._config.main_config.metrics
            ]

            train_scores = self.log(
                train_loss / len(train_loader),
                train_loss_name,
                metric_list,
                metric_name_list,
                train_logger,
                train_log_values,
                global_step,
                append_text=self.train_config.append_text,
            )

            if self.train_config.save_on is not None:

                ## BEST SCORES UPDATING

                train_scores = self.get_scores(
                    train_loss,
                    len(train_loader),
                    self.train_config.criterion.type,
                    all_outputs,
                    all_labels,
                )

                best_score, best_step, save_flag = self.check_best(
                    val_scores, save_on_score, best_score, global_step)

                store_dict = {
                    "model_state_dict": model.state_dict(),
                    "best_step": best_step,
                    "best_score": best_score,
                    "save_on_score": save_on_score,
                }

                path = self.train_config.save_on.best_path.format(
                    self.log_label)

                self.save(store_dict, path, save_flag)

                if save_flag and train_log_values["hparams"] is not None:
                    (
                        best_hparam_list,
                        best_hparam_name_list,
                        best_metrics_list,
                        best_metrics_name_list,
                    ) = self.update_hparams(train_scores,
                                            val_scores,
                                            desc="best_val")

                ## FINAL SCORES UPDATING + STORING
                train_scores = self.get_scores(
                    train_loss,
                    len(train_loader),
                    self.train_config.criterion.type,
                    all_outputs,
                    all_labels,
                )

                store_dict = {
                    "model_state_dict": model.state_dict(),
                    "final_step": global_step,
                    "final_score": train_scores[save_on_score],
                    "save_on_score": save_on_score,
                }

                path = self.train_config.save_on.final_path.format(
                    self.log_label)

                self.save(store_dict, path, save_flag=1)
                if train_log_values["hparams"] is not None:
                    (
                        final_hparam_list,
                        final_hparam_name_list,
                        final_metrics_list,
                        final_metrics_name_list,
                    ) = self.update_hparams(train_scores,
                                            val_scores,
                                            desc="final")
                    train_logger.save_hyperparams(
                        best_hparam_list,
                        best_hparam_name_list,
                        [
                            int(self.log_label),
                        ] + best_metrics_list + final_metrics_list,
                        [
                            "hparams/log_label",
                        ] + best_metrics_name_list + final_metrics_name_list,
                    )
Ejemplo n.º 25
0
    action="store",
    help="The configuration for data",
)

args = parser.parse_args()
ig_config = Config(path=args.config)

model_config = Config(path=args.model)
data_config = Config(path=args.data)

# verbose = args.verbose

# Preprocessor, Dataset, Model

preprocessor = configmapper.get_object(
    "preprocessors", data_config.main.preprocessor.name
)(data_config)


model, train_data, val_data = preprocessor.preprocess(model_config, data_config)

tokenizer = AutoTokenizer.from_pretrained(
    model_config.params.pretrained_model_name_or_path
)
# model = configmapper.get_object("models", model_config.name).from_pretrained(
#     'bert-large-uncased'
# )
model.load_state_dict(torch.load(ig_config.checkpoint_path))

# Initialize BertIntegratedGradients
big = MyIntegratedGradients(ig_config, model, val_data, tokenizer)
Ejemplo n.º 26
0
    def train(self, model, train_dataset, val_dataset=None, logger=None):
        model.to(self.device)
        optim_params = self.train_config.optimizer.params
        if optim_params:
            optimizer = configmapper.get_object(
                "optimizers",
                self.train_config.optimizer.type)(model.parameters(),
                                                  **optim_params.as_dict())
        else:
            optimizer = configmapper.get_object(
                "optimizers",
                self.train_config.optimizer.type)(model.parameters())

        if self.train_config.scheduler is not None:
            scheduler_params = self.train_config.scheduler.params
            if scheduler_params:
                scheduler = configmapper.get_object(
                    "schedulers", self.train_config.scheduler.type)(
                        optimizer, **scheduler_params.as_dict())
            else:
                scheduler = configmapper.get_object(
                    "schedulers", self.train_config.scheduler.type)(optimizer)

        criterion_params = self.train_config.criterion.params
        if criterion_params:
            criterion = configmapper.get_object(
                "losses",
                self.train_config.criterion.type)(**criterion_params.as_dict(),
                                                  device=self.device)
        else:
            criterion = configmapper.get_object(
                "losses", self.train_config.criterion.type)()
        if self._config.dataloader_type == "geometric":
            train_loader = GeometricDataLoader(
                train_dataset, **self.train_config.loader_params.as_dict())
        else:
            train_loader = DataLoader(
                dataset=train_dataset,
                **self.train_config.loader_params.as_dict())

        max_epochs = self.train_config.max_epochs
        batch_size = self.train_config.loader_params.batch_size
        interval_type = self.train_config.interval_type
        val_interval = self.train_config.val_interval
        log_interval = self.train_config.log.log_interval

        if logger is None:
            train_logger = Logger(
                **self.train_config.log.logger_params.as_dict())
        else:
            train_logger = logger

        train_log_values = self.train_config.log.vals.as_dict()

        best_score = (-math.inf if self.train_config.save_on.desired == "max"
                      else math.inf)
        save_on_score = self.train_config.save_on.score
        best_step = -1

        best_hparam_list = None
        best_hparam_name_list = None
        best_metrics_list = None
        best_metrics_name_list = None

        # print("\nTraining\n")
        # print(max_steps)

        global_step = 0
        for epoch in range(1, max_epochs + 1):
            print("Epoch: {}/{}, Global Step: {}".format(
                epoch, max_epochs, global_step))
            train_loss = 0
            if self.train_config.label_type == "float":
                all_labels = torch.FloatTensor().to(self.device)
            else:
                all_labels = torch.LongTensor().to(self.device)

            all_outputs = torch.Tensor().to(self.device)

            train_scores = None
            val_scores = None

            pbar = tqdm(total=math.ceil(len(train_dataset) / batch_size))
            pbar.set_description("Epoch " + str(epoch))

            for step, batch in enumerate(train_loader):
                model.train()
                optimizer.zero_grad()
                for key in batch:
                    batch[key] = batch[key].to(self.device)

                inputs = {}
                for key in self._config.input_key:
                    inputs[key] = batch[key]
                labels = batch["label"]

                # NOW THIS MUST BE HANDLED IN THE DATASET CLASS
                # if self.train_config.label_type == "float":
                # # Specific to Float Type
                #     labels = labels.float()

                outputs = model(**inputs)

                # Can remove this at a later stage?
                # I think `losses.backward()` should work.
                loss = criterion(outputs, labels)
                loss.backward()

                all_labels = torch.cat((all_labels, labels), 0)
                outputs = outputs[0] * self.alpha + outputs[1] * (1 -
                                                                  self.alpha)
                if self.train_config.label_type == "float":
                    all_outputs = torch.cat((all_outputs, outputs), 0)
                else:
                    all_outputs = torch.cat(
                        (all_outputs, torch.argmax(outputs, axis=1)), 0)

                train_loss += loss.item()
                optimizer.step()

                if self.train_config.scheduler is not None:
                    if isinstance(scheduler, ReduceLROnPlateau):
                        scheduler.step(train_loss / (step + 1))
                    else:
                        scheduler.step()

                # print(train_loss)
                # print(step+1)

                pbar.set_postfix_str(f"Train Loss: {train_loss /(step+1)}")
                pbar.update(1)

                global_step += 1

                # Need to check if we want global_step or local_step
                if interval_type == "step":
                    if (val_dataset is not None
                            and (global_step - 1) % val_interval == 0):
                        # print("\nEvaluating\n")
                        val_scores = self.val(
                            model,
                            val_dataset,
                            global_step,
                            train_logger,
                            train_log_values,
                        )

                        # save_flag = 0
                        if self.train_config.save_on is not None:

                            # BEST SCORES UPDATING

                            train_scores = self.get_scores(
                                train_loss,
                                global_step,
                                self.train_config.criterion.type,
                                all_outputs,
                                all_labels,
                            )

                            best_score, best_step, save_flag = self.check_best(
                                val_scores, save_on_score, best_score,
                                global_step)

                            store_dict = {
                                "model_state_dict": model.state_dict(),
                                "best_step": best_step,
                                "best_score": best_score,
                                "save_on_score": save_on_score,
                            }

                            path = os.path.join(
                                train_logger.log_path,
                                self.train_config.save_on.best_path.format(
                                    self.log_label),
                            )

                            self.save(store_dict, path, save_flag)

                            if save_flag and train_log_values[
                                    "hparams"] is not None:
                                (
                                    best_hparam_list,
                                    best_hparam_name_list,
                                    best_metrics_list,
                                    best_metrics_name_list,
                                ) = self.update_hparams(train_scores,
                                                        val_scores,
                                                        desc="best_val")
                    # pbar.close()
                    if (global_step - 1) % log_interval == 0:
                        # print("\nLogging\n")
                        train_loss_name = self.train_config.criterion.type
                        metric_list = [
                            metric(
                                all_labels.cpu(),
                                all_outputs.detach().cpu(),
                                **self.metrics[metric],
                            ) for metric in self.metrics
                        ]
                        metric_name_list = [
                            metric["type"]
                            for metric in self._config.main_config.metrics
                        ]

                        train_scores = self.log(
                            train_loss / (step + 1),
                            train_loss_name,
                            metric_list,
                            metric_name_list,
                            train_logger,
                            train_log_values,
                            global_step,
                            append_text=self.train_config.append_text,
                        )
            pbar.close()
            if not os.path.exists(
                    os.path.join(train_logger.log_path,
                                 self.train_config.checkpoint.checkpoint_dir)):
                os.makedirs(
                    os.path.join(
                        train_logger.log_path,
                        self.train_config.checkpoint.checkpoint_dir,
                    ))

            if self.train_config.save_after_epoch:
                store_dict = {
                    "model_state_dict": model.state_dict(),
                }

                path = f"{os.path.join(train_logger.log_path, self.train_config.checkpoint.checkpoint_dir)}epoch_{str(self.train_config.log.log_label)}_{str(epoch)}.pth"

                self.save(store_dict, path, save_flag=1)
            if interval_type == "epoch":
                if val_dataset is not None and (epoch) % val_interval == 0:
                    # print("\nEvaluating\n")
                    val_scores = self.val(
                        model,
                        val_dataset,
                        epoch,
                        train_logger,
                        train_log_values,
                    )

                    # save_flag = 0
                    if self.train_config.save_on is not None:

                        # BEST SCORES UPDATING

                        train_scores = self.get_scores(
                            train_loss,
                            epoch,
                            self.train_config.criterion.type,
                            all_outputs,
                            all_labels,
                        )

                        best_score, best_epoch, save_flag = self.check_best(
                            val_scores, save_on_score, best_score, epoch)

                        store_dict = {
                            "model_state_dict": model.state_dict(),
                            "best_epoch": best_epoch,
                            "best_score": best_score,
                            "save_on_score": save_on_score,
                        }

                        path = os.path.join(
                            train_logger.log_path,
                            self.train_config.save_on.best_path.format(
                                self.log_label),
                        )

                        self.save(store_dict, path, save_flag)

                        if save_flag and train_log_values[
                                "hparams"] is not None:
                            (
                                best_hparam_list,
                                best_hparam_name_list,
                                best_metrics_list,
                                best_metrics_name_list,
                            ) = self.update_hparams(train_scores,
                                                    val_scores,
                                                    desc="best_val")

                # pbar.close()
                if (epoch) % log_interval == 0:
                    # print("\nLogging\n")
                    train_loss_name = self.train_config.criterion.type
                    metric_list = [
                        metric(
                            all_labels.cpu(),
                            all_outputs.detach().cpu(),
                            **self.metrics[metric],
                        ) for metric in self.metrics
                    ]
                    metric_name_list = [
                        metric["type"]
                        for metric in self._config.main_config.metrics
                    ]

                    train_scores = self.log(
                        train_loss / len(train_loader),
                        train_loss_name,
                        metric_list,
                        metric_name_list,
                        train_logger,
                        train_log_values,
                        epoch,
                        append_text=self.train_config.append_text,
                    )
            if epoch == max_epochs:
                # print("\nEvaluating\n")
                if interval_type == "step":
                    val_scores = self.val(
                        model,
                        val_dataset,
                        global_step,
                        train_logger,
                        train_log_values,
                    )

                    # print("\nLogging\n")
                    train_loss_name = self.train_config.criterion.type
                    metric_list = [
                        metric(
                            all_labels.cpu(),
                            all_outputs.detach().cpu(),
                            **self.metrics[metric],
                        ) for metric in self.metrics
                    ]
                    metric_name_list = [
                        metric["type"]
                        for metric in self._config.main_config.metrics
                    ]

                    train_scores = self.log(
                        train_loss / len(train_loader),
                        train_loss_name,
                        metric_list,
                        metric_name_list,
                        train_logger,
                        train_log_values,
                        global_step,
                        append_text=self.train_config.append_text,
                    )

                if self.train_config.save_on is not None:

                    # BEST SCORES UPDATING

                    train_scores = self.get_scores(
                        train_loss,
                        len(train_loader),
                        self.train_config.criterion.type,
                        all_outputs,
                        all_labels,
                    )

                    best_score, best_step, save_flag = self.check_best(
                        val_scores, save_on_score, best_score, global_step)

                    store_dict = {
                        "model_state_dict": model.state_dict(),
                        "best_step": best_step,
                        "best_score": best_score,
                        "save_on_score": save_on_score,
                    }

                    path = os.path.join(
                        train_logger.log_path,
                        self.train_config.save_on.best_path.format(
                            self.log_label),
                    )

                    self.save(store_dict, path, save_flag)

                    if save_flag and train_log_values["hparams"] is not None:
                        (
                            best_hparam_list,
                            best_hparam_name_list,
                            best_metrics_list,
                            best_metrics_name_list,
                        ) = self.update_hparams(train_scores,
                                                val_scores,
                                                desc="best_val")

                    # FINAL SCORES UPDATING + STORING
                    train_scores = self.get_scores(
                        train_loss,
                        len(train_loader),
                        self.train_config.criterion.type,
                        all_outputs,
                        all_labels,
                    )

                    store_dict = {
                        "model_state_dict": model.state_dict(),
                        "final_step": global_step,
                        "final_score": train_scores[save_on_score],
                        "save_on_score": save_on_score,
                    }

                    path = os.path.join(
                        train_logger.log_path,
                        self.train_config.save_on.final_path.format(
                            self.log_label),
                    )

                    self.save(store_dict, path, save_flag=1)
                    if train_log_values["hparams"] is not None:
                        (
                            final_hparam_list,
                            final_hparam_name_list,
                            final_metrics_list,
                            final_metrics_name_list,
                        ) = self.update_hparams(train_scores,
                                                val_scores,
                                                desc="final")
                        train_logger.save_hyperparams(
                            best_hparam_list,
                            best_hparam_name_list,
                            [
                                int(self.log_label),
                            ] + best_metrics_list + final_metrics_list,
                            [
                                "hparams/log_label",
                            ] + best_metrics_name_list +
                            final_metrics_name_list,
                        )
Ejemplo n.º 27
0
#     help="The configuration for data",
# )

args = parser.parse_args()
model_config = OmegaConf.load(os.path.join(args.config_dir, "model.yaml"))
train_config = OmegaConf.load(os.path.join(args.config_dir, "train.yaml"))
data_config = OmegaConf.load(os.path.join(args.config_dir, "dataset.yaml"))

# Seed
seed(train_config.args.seed)  # just in case

# Data
if "main" in dict(data_config).keys():  # Regular Data
    train_data_config = data_config.train
    val_data_config = data_config.val
    train_data = configmapper.get_object(
        "datasets", train_data_config.name)(train_data_config)
    val_data = configmapper.get_object("datasets",
                                       val_data_config.name)(val_data_config)

else:  # HF Type Data
    dataset = configmapper.get_object("datasets",
                                      data_config.name)(data_config)
    train_data = dataset.train_dataset["train"]
    val_data = dataset.train_dataset["test"]

# Model
model = configmapper.get_object("models", model_config.name)(model_config)

args = TrainingArguments(
    **OmegaConf.to_container(train_config.args, resolve=True))
# Checking for Checkpoints