def main(args): p_list = ProcessorFactory.make_process() for i, p in enumerate(p_list): get_main_logger(get_version()).info(f"<< {i} fold start >>") p.data_preprocess() p.load_condition() best_score = p.training() get_main_logger(get_version()).info(__fold_log(best_score, get_version(), i)) get_main_logger(get_version()).info(f"<< {i} fold finish >>")
def load_model(self): task_name = self.config["summary"]["task"] if is_kagglekernel(): model_dir = Path(__file__).absolute().parents[3] / "model{}_aptos2019".format(get_version()) else: model_dir = Path(__file__).absolute().parents[1] / "model" w_path = model_dir / "{}_{}.pth".format(get_version(), self.fold) model_config = self.config["train"]["model"] model_module = import_module("model." + model_config["name"]) model = getattr(model_module, "get_" + model_config["name"])(task=task_name, weight=torch.load(w_path)) self.model = model.to(self.device)
def wrapper(*args, **kargs): version = get_version() method_name = dargs[0] start = time.time() get_main_logger(version).info(f"====>> start {method_name}") result = func(*args, **kargs) elapsed_time = int(time.time() - start) minits, sec = divmod(elapsed_time, 60) hour, minits = divmod(minits, 60) get_main_logger(version).info( f"<<==== finish {method_name}: [elapsed time] >> {hour:0>2}:{minits:0>2}:{sec:0>2}" ) return result
def __init__(self, fold): """ process abs class. Notes ----- [version, fold] is can't access from child class. So, log utils should be implemented in this class. """ self.__version = str(get_version()) self.__fold = str(fold) log_list = [ "epoch", "train_loss", "valid_loss", "train_qwk", "valid_qwk" ] self.__log_df = pd.DataFrame(index=None, columns=log_list) create_train_logger(self.__version + "_" + self.__fold) get_train_logger(self.__version + "_" + self.__fold).debug( "\t".join(log_list))
def __load_config(self): """ Loading yaml file. Returns ------- config : dict information of process condition. """ version = get_version() config_dir = Path(__file__).parents[1] / "config" config_file_list = list(config_dir.glob(f"{version}*.yml")) if len(config_file_list) > 1: print(f"Duplicate Config File Error. >> version : {version}") raise AssertionError with open(config_file_list[0], "r") as f: config_dict = yaml.safe_load(f) return config_dict
for i, p in enumerate(p_list): get_main_logger(get_version()).info(f"<< {i} fold start >>") p.data_preprocess() p.load_condition() best_score = p.training() get_main_logger(get_version()).info(__fold_log(best_score, get_version(), i)) get_main_logger(get_version()).info(f"<< {i} fold finish >>") def __fold_log(result, version, fold): text = "\n\t== [{}] {} fold best ==\n\tepoch\t\t: {}\n\ttrain_loss\t: {}\n\tvalid_loss\t: {}\n\ttrain_qwk\t: {}\n\tvalid_qwk\t: {}".format( str(version), fold, result["epoch"], result["train_loss"], result["valid_loss"], result["train_qwk"], result["valid_qwk"] ) return text if __name__ == "__main__": gc.enable() version = get_version() create_main_logger(version) try: main(get_option()) except NotImplementedError: get_main_logger(version).info("Not Implemented Exception Occured.")
def inference(self): batch_size = self.config["dataloader"]["batch_size"] self.model = self.model.to(self.device) self.model.eval() for i, data in enumerate(self.test_loader): inputs = data["image"].to(self.device, dtype=torch.float) outputs = self.model(inputs) if torch.cuda.is_available(): outputs = outputs.cpu() self.predict[i * batch_size:(i + 1) * batch_size] = np.argmax(outputs.detach().numpy(), axis=1) if is_kagglekernel(): submit_path = str(Path(__file__).absolute().parents[4] / "working" / "submission.csv") print(submit_path) else: submit_path = str(Path(__file__).absolute().parents[1] / "data" / "submit" / "{}.csv".format(get_version())) submission_df = pd.read_csv(self.__input_path() / "sample_submission.csv") submission_df["diagnosis"] = self.predict.astype(int) submission_df.to_csv(submit_path, index=False)