def load_checkpoint(checkpoint_dir, max_try=5): """ :param checkpoint_dir: e.g., /opt/ml/checkpoints :param max_try: number of times to try loading checkpoint before giving up. :return xgb_model: file path of stored xgb model. None if no checkpoint. :return iteration: iterations completed before last checkpoiint. """ if not checkpoint_dir or not os.path.exists(checkpoint_dir): return None, 0 regex = r"^{0}\.[0-9]+$".format(CHECKPOINT_FILENAME) checkpoints = [f for f in os.listdir(checkpoint_dir) if re.match(regex, f)] if not checkpoints: return None, 0 checkpoints.sort() xgb_model, iteration = None, 0 for _ in range(max_try): try: latest_checkpoint = checkpoints.pop() xgb_model = os.path.join(checkpoint_dir, latest_checkpoint) booster = Booster() booster.load_model(xgb_model) filename, extension = latest_checkpoint.split('.') iteration = int(extension) + 1 break except XGBoostError: logging.debug("Wrong checkpoint model format %s", latest_checkpoint) return xgb_model, iteration
class BreastCancerTrainable(Trainable): def setup(self, config): self.config = config self.nthread = config.pop("nthread", 1) self.model: xgb.Booster = None # Load dataset data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True) # Split into train and test set train_x, test_x, train_y, test_y = train_test_split( data, labels, test_size=0.25 ) # Build input matrices for XGBoost self.train_set = xgb.DMatrix(train_x, label=train_y) self.test_set = xgb.DMatrix(test_x, label=test_y) def step(self): # you can also obtain current trial resources: current_resources = self.trial_resources if isinstance(current_resources, PlacementGroupFactory): self.nthread = current_resources.head_cpus else: self.nthread = current_resources.cpu results = {} config = self.config.copy() config["nthread"] = int(self.nthread) self.model = xgb.train( config, self.train_set, evals=[(self.test_set, "eval")], verbose_eval=False, xgb_model=self.model, evals_result=results, num_boost_round=1, ) print(config, results) return {"eval-logloss": results["eval"]["logloss"][-1], "nthread": self.nthread} def save_checkpoint(self, checkpoint_dir): path = os.path.join(checkpoint_dir, "checkpoint") with open(path, "wb") as outputFile: pickle.dump((self.config, self.nthread, self.model.save_raw()), outputFile) return path def load_checkpoint(self, checkpoint_path): with open(checkpoint_path, "rb") as inputFile: self.config, self.nthread, raw_model = pickle.load(inputFile) self.model = Booster() self.model.load_model(bytearray(raw_model)) data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True) # Split into train and test set train_x, test_x, train_y, test_y = train_test_split( data, labels, test_size=0.25 ) # Build input matrices for XGBoost self.train_set = xgb.DMatrix(train_x, label=train_y) self.test_set = xgb.DMatrix(test_x, label=test_y)
def deserialize_booster(ser_model_string): """ Deserialize an xgboost.core.Booster from the input ser_model_string. """ booster = Booster() # TODO: change to use string io tmp_file_name = os.path.join(_get_or_create_tmp_dir(), f"{uuid.uuid4()}.json") with open(tmp_file_name, "w", encoding="utf-8") as f: f.write(ser_model_string) booster.load_model(tmp_file_name) return booster