def train(self) : self.optimizer = tf.keras.optimizers.Adam(beta_1 = 0.9, beta_2 = 0.98, epsilon = 1e-9) if self.use_label_smoothing : self.loss_function = tf.keras.losses.CategoricalCrossentropy(from_logits = True) else : self.loss_function = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) self.loss_metric = tf.keras.metrics.Mean(name = "train_loss") self.acc_metric = tf.keras.metrics.SparseCategoricalAccuracy(name = "train_acc") ckpt = tf.train.Checkpoint(model = self.transformer, opt = self.optimizer) if self.ckpt_path is not None : fname, self.initial_epoch = load_checkpoint(Path(self.ckpt_path).resolve(), self.ckpt_epoch) print("\nCheckpoint File : {}\n".format(fname)) ckpt.mapped = {"model" : self.transformer, "opt" : self.optimizer} ckpt.restore(fname) progbar = tf.keras.utils.Progbar(target = self.num_train) self.count = 0 for epoch in range(self.initial_epoch, self.initial_epoch + self.epochs) : K.set_value(self.optimizer.lr, self._get_lr(epoch)) progbar.update(0) self.loss_metric.reset_states() self.acc_metric.reset_states() start_time = korea_time(None) for train_src, train_tar in self.train_dataset : num_data = K.int_shape(train_src)[0] logits = self.forward(train_src, train_tar) progbar.add(num_data) end_time = korea_time(None) epoch_loss = self.loss_metric.result() epoch_acc = self.acc_metric.result() ckpt_prefix = self.ckpt_folder / "Epoch-{}_Loss-{:.5f}_Acc-{:5f}".format( epoch, epoch_loss, epoch_acc) ckpt.save(file_prefix = ckpt_prefix) print("Epoch = [{:5d}] Loss = [{:8.6f}] Acc = [{:8.6f}] LR = [{:.10f}]\n".format( epoch, epoch_loss, epoch_acc, K.get_value(self.optimizer.lr))) # model result 저장 msg = "Epoch = [{:5d}] - End Time [ {} ]\n".format(epoch, end_time.strftime("%Y/%m/%d %H:%M:%S")) msg += "Elapsed Time = {}\n".format(end_time - start_time) msg += "Learning Rate = [{:.10f}]\n".format(K.get_value(self.optimizer.lr)) msg += "Loss : [{:8.6f}] - Acc : [{:8.6f}]\n".format(epoch_loss, epoch_acc) msg += " - " * 15 + "\n\n" with self.training_result_file.open("a+", encoding = "utf-8") as fp : fp.write(msg) if self.test_result_file is not None : self.translate(epoch)
def Arguments(self, **kwargs): msg = "Time : [ {} ]\n".format(korea_time()) msg += "Log File Path = {}\n".format(self.log_file) for k, v in list(kwargs.items()): msg += "{} = {}\n".format(k, v) msg += "\n" + "-- " * 10 + "\n\n" self.write(msg)
def StartCrawling(self, words): msg = "Time : [ {} ]\n".format(korea_time()) msg += "Crawling을 시작합니다.\n" msg += "<수집할 단어>\n" for idx in range(len(words)): msg += "[{:4d}] : {}\n".format(idx + 1, words[idx]) msg += "\n" + " -- " * 10 + "\n\n" self.write(msg)
def NoInputsError(self, file_no): msg = "Time : [ {} ]\n".format(korea_time()) msg += "File Name : {} Line : {}\n".format(self.file_name, file_no) msg += "kwargs[\"words\"], kwargs[\"file\"] 모두 입력되지 않았습니다.\n" msg += "프로그램을 종료합니다...\n" msg += "\n" + " -- " * 10 + "\n\n" self.write(msg) self._quit()
def FileNotExistsError(self, file_no, file_path): msg = "Time : [ {} ]\n".format(korea_time()) msg += "File Name : {} Line : {}\n".format(self.file_name, file_no) msg += "파일 [ {} ]가 존재하지 않습니다.\n".format(file_path) msg += "프로그램을 종료합니다...\n" msg += "\n" + " -- " * 10 + "\n\n" self.write(msg) self._quit()
def TooManyPagesError(self, file_no, pages): msg = "Time : [ {} ]\n".format(korea_time()) msg += "File Name : {} Line : {}\n".format(self.file_name, file_no) msg += "kwargs[\"pages\"]에는 최대 2개의 숫자까지 전달 가능합니다.\n" msg += "입력된 pages : {}\n".format(pages) msg += "프로그램을 종료합니다...\n" msg += "\n" + " -- " * 10 + "\n\n" self.write(msg) self._quit()
def InvalidSetError(self, file_no, word, inputs): msg = "Time : [ {} ]\n".format(korea_time()) msg += "File Name : {} Line : {}\n".format(self.file_name, file_no) msg += "Input file의 단어 [ {} ]에 잘못된 입력값이 있습니다.\n".format(word) msg += "입력값 : {}\n".format(inputs) msg += "프로그램을 종료합니다...\n" msg += "\n" + " -- " * 10 + "\n\n" self.write(msg) self._quit()
def InvalidPagesError(self, file_no, start_page, end_page): msg = "Time : [ {} ]\n".format(korea_time()) msg += "File Name : {} Line : {}\n".format(self.file_name, file_no) msg += "잘못된 page 값\n" msg += "start page : [{:3d}] end page : [{:3d}]\n".format( start_page, end_page) msg += "프로그램을 종료합니다...\n" msg += "\n" + " -- " * 10 + "\n\n" self.write(msg) self._quit()
def create_folder(self, folder_location) : # 현재 위치에 결과 저장 self.folder = Path.cwd() if folder_location is None else Path(folder_location).resolve() if not self.folder.exists() : self.logger.FileNotExistsError(sys._getframe().f_lineno, self.folder) result_folder = self.folder / "Results" self.words_folder = result_folder / "words" self.logs_folder = result_folder / "logs" for folder in [result_folder, self.words_folder, self.logs_folder] : if not folder.exists() : folder.mkdir() current_time = korea_time("%Y%m%d_%H%M%S") log_file = self.logs_folder / "logs - {}.txt".format(current_time) self.logger = Logger("Model.py", log_file) return current_time
def KeyboardInterruptError(self): msg = "Time : [ {} ]\n".format(korea_time()) msg += "KeyboardInterrupt\n" msg += "\n" + " -- " * 10 + "\n\n" self.write(msg)
def ReasonToBreak(self, reason): msg = "Time : [ {} ]\n".format(korea_time()) msg += reason msg += "\n" + " -- " * 10 + "\n\n" self.write(msg)
def StartWordCrawling(self, word): msg = "Time : [ {} ]\n".format(korea_time()) msg += "\n[ {} ] 단어에 대한 예문을 수집합니다.\n".format(word) msg += "\n" + "-- " * 10 + "\n\n" self.write(msg)
def NoExamples(self, word, url): msg = "Time : [ {} ]\n".format(korea_time()) msg += "\n[ {} ] 단어에 대한 예문이 존재하지 않습니다.\n".format(word) msg += "URL : {}\n".format(url) msg += "\n" + "-- " * 10 + "\n\n" self.write(msg)
def train(self, **kwargs): interval = kwargs["interval"] model_ckpt_path, model_images_path, model_logs_path, model_result_file = make_folders_for_model( kwargs['folder']) self.generator = self.make_generator() self.critic = self.make_critic() train_dataset = self.get_dataset() num_batches = ceil(self.num_train / self.batch_size) c_epoch_loss = [] g_epoch_loss = [] training_progbar = tf.keras.utils.Progbar(target=self.num_train) save_initial_model_info( { 'generator': self.generator, 'critic': self.critic }, model_logs_path, model_ckpt_path, **kwargs) count = 0 self.g_opt = tf.keras.optimizers.Adam(lr=self.g_lr, beta_1=0, beta_2=0.9) self.c_opt = tf.keras.optimizers.Adam(lr=self.c_lr, beta_1=0, beta_2=0.9) ckpt = tf.train.Checkpoint(g_opt=self.g_opt, c_opt=self.c_opt, g_model=self.generator, c_model=self.critic) if kwargs["ckpt_path"] is not None: fname, self.initial_epoch = load_checkpoint(**kwargs) print("\nCheckpoint File : {}\n".format(fname)) ckpt.mapped = { "g_opt": self.g_opt, "c_opt": self.c_opt, "g_model": self.generator, "c_model": self.critic } ckpt.restore(fname) self.g_lr = self.g_opt.get_config()["learning_rate"] self.c_lr = self.c_opt.get_config()["learning_rate"] for epoch in range(self.initial_epoch, self.initial_epoch + 50000): count += 1 start_time = korea_time() num_batch = 0 # 64 * 5 = 320 mult = self.n_critic * self.batch_size num_dataset = 0 # 60000 real_images_list = [] for real_images in train_dataset: # self.n_critic개 만큼의 image dataset을 불러옴 real_images_list.append(real_images) num_images = K.int_shape(real_images)[0] num_batch += num_images num_dataset += num_images if (num_batch == mult) or (num_dataset == self.num_train): critic_loss_list = [(self.train_D(real_images)).numpy() for real_images in real_images_list] g_loss = (self.train_G()).numpy() c_epoch_loss.extend(critic_loss_list) g_epoch_loss.append(g_loss) training_progbar.add(num_batch) if num_dataset == self.num_train: break num_batch = 0 real_images_list = [] end_time = korea_time() training_progbar.update(0) # Progress bar 초기화 c_mean_loss = np.mean(c_epoch_loss, axis=0) g_mean_loss = np.mean(g_epoch_loss, axis=0) ckpt_prefix = os.path.join( model_ckpt_path, "Epoch-{}_G-Loss-{:.6f}_C-Loss-{:.6f}".format( epoch, g_mean_loss, c_mean_loss)) ckpt.save(file_prefix=ckpt_prefix) print( "Epoch = [{:5d}]\tGenerator Loss = [{:8.6f}]\tCritic Loss = [{:8.6f}]\n" .format(epoch, g_mean_loss, c_mean_loss)) # model result 저장 str_ = "Epoch = [{:5d}] - End Time [ {} ]\n".format( epoch, str(end_time.strftime("%Y / %m / %d %H:%M:%S"))) str_ += "Elapsed Time = {}\n".format(end_time - start_time) str_ += "Generator Learning Rate = [{:.6f}] - Critic Learning Rate = [{:.6f}]\n".format( self.g_lr, self.c_lr) str_ += "Generator Loss : [{:8.6f}] - Critic Loss : [{:8.6f}] - Sum : [{:8.6f}]\n".format( g_mean_loss, c_mean_loss, g_mean_loss + c_mean_loss) str_ += " - " * 15 + "\n\n" with open(model_result_file, "a+", encoding='utf-8') as fp: fp.write(str_) if count == interval: fname = os.path.join(model_images_path, "{}.png".format(epoch)) self.plot_images(fname) count = 0 c_epoch_loss = [] g_epoch_loss = []
def CurrentStatus(self, word, level, cur_page): msg = "Time : [ {} ]\n".format(korea_time()) msg += "Word : [ {} ] - Level : [ {} ] - Current Page : [{:4d}]\n".format( word, level, cur_page) msg += "\n" + " -- " * 10 + "\n\n" self.write(msg)
def train(self, **kwargs): interval = kwargs["interval"] model_ckpt_path, model_images_path, model_logs_path, model_result_file = make_folders_for_model( kwargs['folder']) self.generator = self.make_generator() self.discriminator = self.make_discriminator() train_dataset = self.get_dataset() num_batches = ceil(self.num_train / self.batch_size) d_epoch_loss = [] d_epoch_aux_loss = [] g_epoch_loss = [] training_progbar = tf.keras.utils.Progbar(target=self.num_train) save_initial_model_info( { 'generator': self.generator, 'discriminator': self.discriminator }, model_logs_path, model_ckpt_path, **kwargs) count = 0 self.g_opt = tf.keras.optimizers.Adam(lr=self.g_lr, beta_1=0.5) self.d_opt = tf.keras.optimizers.Adam(lr=self.d_lr, beta_1=0.5) self.BC_function = tf.keras.losses.BinaryCrossentropy(from_logits=True) self.SCC_function = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True) ckpt = tf.train.Checkpoint(g_opt=self.g_opt, d_opt=self.d_opt, g_model=self.generator, d_model=self.discriminator) if kwargs["ckpt_path"] is not None: fname, self.initial_epoch = load_checkpoint(**kwargs) print("\nCheckpoint File : {}\n".format(fname)) ckpt.mapped = { "g_opt": self.g_opt, "d_opt": self.d_opt, "g_model": self.generator, "d_model": self.discriminator } ckpt.restore(fname) self.g_lr = self.g_opt.get_config()["learning_rate"] self.d_lr = self.d_opt.get_config()["learning_rate"] for epoch in range(self.initial_epoch, self.initial_epoch + 50000): count += 1 start_time = korea_time() for real_images, real_labels in train_dataset: num_images = K.int_shape(real_labels)[0] g_loss = (self.train_G(num_images)).numpy() d_BC_loss, d_SCC_loss = self.train_D(real_images, real_labels) d_BC_loss = d_BC_loss.numpy() d_SCC_loss = d_SCC_loss.numpy() d_epoch_loss.append(d_BC_loss) d_epoch_aux_loss.append(d_SCC_loss) g_epoch_loss.append(g_loss) training_progbar.add(num_images) end_time = korea_time() training_progbar.update(0) # Progress bar 초기화 d_mean_loss = np.mean(d_epoch_loss, axis=0) d_mean_aux_loss = np.mean(d_epoch_aux_loss, axis=0) g_mean_loss = np.mean(g_epoch_loss, axis=0) ckpt_prefix = os.path.join( model_ckpt_path, "Epoch-{}_G-Loss-{:.6f}_D-Loss-{:.6f}".format( epoch, g_mean_loss, d_mean_loss + d_mean_aux_loss)) ckpt.save(file_prefix=ckpt_prefix) str_ = ("Epoch = [{:5d}]\tG Loss = [{:8.6f}]\t".format( epoch, g_mean_loss) + "D Loss = [{:8.6f}]\tD AUX Loss = [{:8.6f}]\n".format( d_mean_loss, d_mean_aux_loss)) print(str_) # model result 저장 str_ = "Epoch = [{:5d}] - End Time [ {} ]\n".format( epoch, str(end_time.strftime("%Y / %m / %d %H:%M:%S"))) str_ += "Elapsed Time = {}\n".format(end_time - start_time) str_ += "G Learning Rate = [{:.6f}] - D Learning Rate = [{:.6f}]\n".format( self.g_lr, self.d_lr) str_ += "G Loss : [{:8.6f}] - D Loss : [{:8.6f}] - D AUX Loss : [{:8.6f}] - Sum : [{:8.6f}]\n".format( g_mean_loss, d_mean_loss, d_mean_aux_loss, g_mean_loss + d_mean_loss + d_mean_aux_loss) str_ += " - " * 15 + "\n\n" with open(model_result_file, "a+", encoding='utf-8') as fp: fp.write(str_) if count == interval: fname = os.path.join(model_images_path, "{}.png".format(epoch)) self.plot_images(fname) count = 0 d_epoch_loss = [] d_epoch_aux_loss = [] g_epoch_loss = []
def AlreadyExists(self, eng, kor): msg = "Time : [ {} ]\n".format(korea_time()) msg += "{} -> {}\n".format(eng, kor) msg += "이미 database에 존재하는 예문입니다.\n" msg += "\n" + " -- " * 10 + "\n\n" self.write(msg)
def train(self): self.g_opt = tf.keras.optimizers.Adam(lr=self.lr, beta_1=0.5) self.d_opt = tf.keras.optimizers.Adam(lr=self.lr, beta_1=0.5) self.g_loss_metric = tf.keras.metrics.Mean(name="g_loss") self.d_loss_metric = tf.keras.metrics.Mean(name="d_loss") ckpt = tf.train.Checkpoint(generator=self.gen, discriminator=self.disc, genenerator_optimizer=self.g_opt, discriminator_optimizer=self.d_opt) if self.ckpt_path is not None: fname, self.initial_epoch = load_checkpoint( Path(self.ckpt_path).resolve(), self.ckpt_epoch) print("\nCheckpoint File : {}\n".format(fname)) ckpt.mapped = { "generator": self.gen, "discriminator": self.disc, "generator_optimizer": self.g_opt, "discriminator_optimizer": self.d_opt } ckpt.restore(fname) self.lr = self.g_opt.get_config()["learning_rate"] progbar = tf.keras.utils.Progbar(target=self.num_train) for epoch in range(self.initial_epoch, self.initial_epoch + self.epochs): self.g_loss_metric.reset_states() self.d_loss_metric.reset_states() start_time = korea_time(None) for images in self.dataset: num_images = K.int_shape(images)[0] self.train_D(images) self.train_G(num_images) progbar.add(num_images) end_time = korea_time(None) progbar.update(0) # Progress bar 초기화 g_loss = self.g_loss_metric.result() d_loss = self.d_loss_metric.result() ckpt_prefix = self.ckpt_folder / "Epoch-{}_gLoss-{:.6f}_dLoss-{:.6f}".format( epoch, g_loss, d_loss) ckpt.save(file_prefix=ckpt_prefix) print("Epoch = [{:5d}] G_loss = [{:8.6f}] D_loss = [{:8.6f}]\n". format(epoch, g_loss, d_loss)) # model result 저장 with self.training_result_file.open("a+", encoding='utf-8') as fp: str_ = "Epoch = [{:5d}] - End Time [ {} ]\n".format( epoch, str(end_time.strftime("%Y / %m / %d %H:%M:%S"))) str_ += "Elapsed Time = {}\n".format(end_time - start_time) str_ += "Learning Rate = [{:.6f}]\n".format(self.lr) str_ += "g_loss = [{:8.6f}] d_loss = [{:8.6f}]\n".format( g_loss, d_loss) str_ += " - " * 15 + "\n\n" fp.write(str_) fname = self.image_folder / "{}.png".format(epoch) self.plot_images(fname)
def start_crawling(self) : driver = self.get_webdriver() driver.implicitly_wait(3) self.logger.StartCrawling(list(self.words_dic.keys())) try : for word in list(self.words_dic.keys()) : self.logger.StartWordCrawling(word) levels = self.words_dic[word]["levels"] start_pages = self.words_dic[word]["start_page"] end_pages = self.words_dic[word]["end_page"] user = self.words_dic[word]["user"] trsl = self.words_dic[word]["trsl"] words_dic = dict() for level, start_page, end_page, use_user, use_trsl in zip(levels, start_pages, end_pages, user, trsl) : previous_page = start_page - 1 for page in range(start_page, end_page + 1) : # 해당 page로 이동 driver.get(self.query.format(page, word, self.levels_dic[level])) try : # 본문이 나타날 때까지 기다림 WebDriverWait(driver, self.patience_time).until(EC.presence_of_element_located((By.ID, "searchPage_example"))) except TimeoutException : self.logger.NoExamples(word, self.query.format(page, word, self.levels_dic[level])) break cur_page = self.get_current_page(driver) if cur_page == 0 or cur_page < start_page : # 해당 page까지 예문이 존재하지 않음 self.logger.ReasonToBreak("해당 page까지 예문이 존재하지 않습니다.\n") break elif (previous_page != start_page - 1) and (previous_page == cur_page) : # 더이상 이동할 page가 없음 self.logger.ReasonToBreak("더 이상 이동할 page가 없습니다.\n") break self.logger.CurrentStatus(word, level, cur_page) previous_page = cur_page for idx in range(len(driver.find_elements_by_css_selector(self.examples_area))) : user_status = self.CheckExistence(self.get_one_example(driver, idx), ".user_profile") trsl_status = self.CheckExistence(self.get_one_example(driver, idx), ".translate_btns") if trsl_status : # 파파고 번역 if use_trsl : self.get_one_example(driver, idx).find_element_by_css_selector(".btn_papago").click() try : WebDriverWait(driver, self.patience_time).until( lambda wd : self.get_papago_result(driver, idx) != "" ) eng = self.get_eng_area(driver, idx) kor = self.get_papago_result(driver, idx) self.saver.save((word, eng, kor, "자동 번역", page, level, korea_time())) except TimeoutException : self.saver.save((word, eng, kor, "자동 번역 실패(응답 없음)", page, level, korea_time())) elif user_status : # 유저 참여 번역 if use_user : eng = self.get_eng_area(driver, idx) kor = self.get_kor_area(driver, idx) self.saver.save((word, eng, kor, "이용자 참여", page, level, korea_time())) else : # Official 또는 한글 예문이 존재하지 않음 eng = self.get_eng_area(driver, idx) try : kor = self.get_kor_area(driver, idx) self.saver.save((word, eng, kor, "공식 예문", page, level, korea_time())) except IndexError : # 한글 예문 없음 kor = "None" self.saver.save((word, eng, kor, "한글 예문 없음", page, level, korea_time())) # 다음 예문으로 이동하기 전 휴식 Sleep(self.sleep_time) except KeyboardInterrupt : self.logger.KeyboardInterruptError() self.saver.quit_db() driver.close()