예제 #1
0
파일: Train.py 프로젝트: leesc912/NMT
    def train(self) :
        self.optimizer = tf.keras.optimizers.Adam(beta_1 = 0.9, beta_2 = 0.98, epsilon = 1e-9)
        if self.use_label_smoothing :
            self.loss_function = tf.keras.losses.CategoricalCrossentropy(from_logits = True)
        else :
            self.loss_function = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
        self.loss_metric = tf.keras.metrics.Mean(name = "train_loss")
        self.acc_metric = tf.keras.metrics.SparseCategoricalAccuracy(name = "train_acc")

        ckpt = tf.train.Checkpoint(model = self.transformer, opt = self.optimizer)

        if self.ckpt_path is not None :
            fname, self.initial_epoch = load_checkpoint(Path(self.ckpt_path).resolve(), self.ckpt_epoch)
            print("\nCheckpoint File : {}\n".format(fname))
            ckpt.mapped = {"model" : self.transformer, "opt" : self.optimizer}
            ckpt.restore(fname)

        progbar = tf.keras.utils.Progbar(target = self.num_train)

        self.count = 0
        for epoch in range(self.initial_epoch, self.initial_epoch + self.epochs) :
            K.set_value(self.optimizer.lr, self._get_lr(epoch))
            progbar.update(0)
            self.loss_metric.reset_states()
            self.acc_metric.reset_states()

            start_time = korea_time(None)
            for train_src, train_tar in self.train_dataset :
                num_data = K.int_shape(train_src)[0]
                logits = self.forward(train_src, train_tar)

                progbar.add(num_data)

            end_time = korea_time(None)

            epoch_loss = self.loss_metric.result()
            epoch_acc = self.acc_metric.result()

            ckpt_prefix = self.ckpt_folder / "Epoch-{}_Loss-{:.5f}_Acc-{:5f}".format(
                epoch, epoch_loss, epoch_acc)
            ckpt.save(file_prefix = ckpt_prefix)

            print("Epoch = [{:5d}]    Loss = [{:8.6f}]    Acc = [{:8.6f}]   LR = [{:.10f}]\n".format(
                epoch, epoch_loss, epoch_acc, K.get_value(self.optimizer.lr)))

            # model result 저장
            msg = "Epoch = [{:5d}] - End Time [ {} ]\n".format(epoch, end_time.strftime("%Y/%m/%d %H:%M:%S"))
            msg += "Elapsed Time = {}\n".format(end_time - start_time)
            msg += "Learning Rate = [{:.10f}]\n".format(K.get_value(self.optimizer.lr))
            msg += "Loss : [{:8.6f}] - Acc : [{:8.6f}]\n".format(epoch_loss, epoch_acc)
            msg += " - " * 15 + "\n\n"
            
            with self.training_result_file.open("a+", encoding = "utf-8") as fp :
                fp.write(msg)

            if self.test_result_file is not None :
                self.translate(epoch)
예제 #2
0
 def Arguments(self, **kwargs):
     msg = "Time : [ {} ]\n".format(korea_time())
     msg += "Log File Path = {}\n".format(self.log_file)
     for k, v in list(kwargs.items()):
         msg += "{} = {}\n".format(k, v)
     msg += "\n" + "-- " * 10 + "\n\n"
     self.write(msg)
예제 #3
0
 def StartCrawling(self, words):
     msg = "Time : [ {} ]\n".format(korea_time())
     msg += "Crawling을 시작합니다.\n"
     msg += "<수집할 단어>\n"
     for idx in range(len(words)):
         msg += "[{:4d}] : {}\n".format(idx + 1, words[idx])
     msg += "\n" + " -- " * 10 + "\n\n"
     self.write(msg)
예제 #4
0
    def NoInputsError(self, file_no):
        msg = "Time : [ {} ]\n".format(korea_time())
        msg += "File Name : {}   Line : {}\n".format(self.file_name, file_no)
        msg += "kwargs[\"words\"], kwargs[\"file\"] 모두 입력되지 않았습니다.\n"
        msg += "프로그램을 종료합니다...\n"
        msg += "\n" + " -- " * 10 + "\n\n"
        self.write(msg)

        self._quit()
예제 #5
0
    def FileNotExistsError(self, file_no, file_path):
        msg = "Time : [ {} ]\n".format(korea_time())
        msg += "File Name : {}   Line : {}\n".format(self.file_name, file_no)
        msg += "파일 [ {} ]가 존재하지 않습니다.\n".format(file_path)
        msg += "프로그램을 종료합니다...\n"
        msg += "\n" + " -- " * 10 + "\n\n"
        self.write(msg)

        self._quit()
예제 #6
0
    def TooManyPagesError(self, file_no, pages):
        msg = "Time : [ {} ]\n".format(korea_time())
        msg += "File Name : {}   Line : {}\n".format(self.file_name, file_no)
        msg += "kwargs[\"pages\"]에는 최대 2개의 숫자까지 전달 가능합니다.\n"
        msg += "입력된 pages : {}\n".format(pages)
        msg += "프로그램을 종료합니다...\n"
        msg += "\n" + " -- " * 10 + "\n\n"
        self.write(msg)

        self._quit()
예제 #7
0
    def InvalidSetError(self, file_no, word, inputs):
        msg = "Time : [ {} ]\n".format(korea_time())
        msg += "File Name : {}   Line : {}\n".format(self.file_name, file_no)
        msg += "Input file의 단어 [ {} ]에 잘못된 입력값이 있습니다.\n".format(word)
        msg += "입력값 : {}\n".format(inputs)
        msg += "프로그램을 종료합니다...\n"
        msg += "\n" + " -- " * 10 + "\n\n"
        self.write(msg)

        self._quit()
예제 #8
0
    def InvalidPagesError(self, file_no, start_page, end_page):
        msg = "Time : [ {} ]\n".format(korea_time())
        msg += "File Name : {}   Line : {}\n".format(self.file_name, file_no)
        msg += "잘못된 page 값\n"
        msg += "start page : [{:3d}]   end page : [{:3d}]\n".format(
            start_page, end_page)
        msg += "프로그램을 종료합니다...\n"
        msg += "\n" + " -- " * 10 + "\n\n"
        self.write(msg)

        self._quit()
예제 #9
0
파일: Model.py 프로젝트: leesc912/Crawler
    def create_folder(self, folder_location) :
        # 현재 위치에 결과 저장
        self.folder = Path.cwd() if folder_location is None else Path(folder_location).resolve()
        if not self.folder.exists() :
            self.logger.FileNotExistsError(sys._getframe().f_lineno, self.folder)
        
        result_folder = self.folder / "Results"
        self.words_folder = result_folder / "words"
        self.logs_folder = result_folder / "logs"
        for folder in [result_folder, self.words_folder, self.logs_folder] :
            if not folder.exists() :
                folder.mkdir()

        current_time = korea_time("%Y%m%d_%H%M%S")
        log_file = self.logs_folder / "logs - {}.txt".format(current_time)
        self.logger = Logger("Model.py", log_file)

        return current_time
예제 #10
0
 def KeyboardInterruptError(self):
     msg = "Time : [ {} ]\n".format(korea_time())
     msg += "KeyboardInterrupt\n"
     msg += "\n" + " -- " * 10 + "\n\n"
     self.write(msg)
예제 #11
0
 def ReasonToBreak(self, reason):
     msg = "Time : [ {} ]\n".format(korea_time())
     msg += reason
     msg += "\n" + " -- " * 10 + "\n\n"
     self.write(msg)
예제 #12
0
 def StartWordCrawling(self, word):
     msg = "Time : [ {} ]\n".format(korea_time())
     msg += "\n[ {} ] 단어에 대한 예문을 수집합니다.\n".format(word)
     msg += "\n" + "-- " * 10 + "\n\n"
     self.write(msg)
예제 #13
0
 def NoExamples(self, word, url):
     msg = "Time : [ {} ]\n".format(korea_time())
     msg += "\n[ {} ] 단어에 대한 예문이 존재하지 않습니다.\n".format(word)
     msg += "URL : {}\n".format(url)
     msg += "\n" + "-- " * 10 + "\n\n"
     self.write(msg)
예제 #14
0
파일: WGAN.py 프로젝트: leesc912/GAN
    def train(self, **kwargs):
        interval = kwargs["interval"]
        model_ckpt_path, model_images_path, model_logs_path, model_result_file = make_folders_for_model(
            kwargs['folder'])

        self.generator = self.make_generator()
        self.critic = self.make_critic()

        train_dataset = self.get_dataset()
        num_batches = ceil(self.num_train / self.batch_size)

        c_epoch_loss = []
        g_epoch_loss = []

        training_progbar = tf.keras.utils.Progbar(target=self.num_train)

        save_initial_model_info(
            {
                'generator': self.generator,
                'critic': self.critic
            }, model_logs_path, model_ckpt_path, **kwargs)

        count = 0

        self.g_opt = tf.keras.optimizers.Adam(lr=self.g_lr,
                                              beta_1=0,
                                              beta_2=0.9)
        self.c_opt = tf.keras.optimizers.Adam(lr=self.c_lr,
                                              beta_1=0,
                                              beta_2=0.9)

        ckpt = tf.train.Checkpoint(g_opt=self.g_opt,
                                   c_opt=self.c_opt,
                                   g_model=self.generator,
                                   c_model=self.critic)

        if kwargs["ckpt_path"] is not None:
            fname, self.initial_epoch = load_checkpoint(**kwargs)
            print("\nCheckpoint File : {}\n".format(fname))
            ckpt.mapped = {
                "g_opt": self.g_opt,
                "c_opt": self.c_opt,
                "g_model": self.generator,
                "c_model": self.critic
            }
            ckpt.restore(fname)

            self.g_lr = self.g_opt.get_config()["learning_rate"]
            self.c_lr = self.c_opt.get_config()["learning_rate"]

        for epoch in range(self.initial_epoch, self.initial_epoch + 50000):
            count += 1

            start_time = korea_time()
            num_batch = 0  # 64 * 5 = 320
            mult = self.n_critic * self.batch_size
            num_dataset = 0  # 60000
            real_images_list = []

            for real_images in train_dataset:
                # self.n_critic개 만큼의 image dataset을 불러옴
                real_images_list.append(real_images)
                num_images = K.int_shape(real_images)[0]
                num_batch += num_images
                num_dataset += num_images

                if (num_batch == mult) or (num_dataset == self.num_train):
                    critic_loss_list = [(self.train_D(real_images)).numpy()
                                        for real_images in real_images_list]
                    g_loss = (self.train_G()).numpy()

                    c_epoch_loss.extend(critic_loss_list)
                    g_epoch_loss.append(g_loss)

                    training_progbar.add(num_batch)

                    if num_dataset == self.num_train:
                        break

                    num_batch = 0
                    real_images_list = []

            end_time = korea_time()
            training_progbar.update(0)  # Progress bar 초기화

            c_mean_loss = np.mean(c_epoch_loss, axis=0)
            g_mean_loss = np.mean(g_epoch_loss, axis=0)

            ckpt_prefix = os.path.join(
                model_ckpt_path, "Epoch-{}_G-Loss-{:.6f}_C-Loss-{:.6f}".format(
                    epoch, g_mean_loss, c_mean_loss))
            ckpt.save(file_prefix=ckpt_prefix)

            print(
                "Epoch = [{:5d}]\tGenerator Loss = [{:8.6f}]\tCritic Loss = [{:8.6f}]\n"
                .format(epoch, g_mean_loss, c_mean_loss))

            # model result 저장
            str_ = "Epoch = [{:5d}] - End Time [ {} ]\n".format(
                epoch, str(end_time.strftime("%Y / %m / %d   %H:%M:%S")))
            str_ += "Elapsed Time = {}\n".format(end_time - start_time)
            str_ += "Generator Learning Rate = [{:.6f}] - Critic Learning Rate = [{:.6f}]\n".format(
                self.g_lr, self.c_lr)
            str_ += "Generator Loss : [{:8.6f}] - Critic Loss : [{:8.6f}] - Sum : [{:8.6f}]\n".format(
                g_mean_loss, c_mean_loss, g_mean_loss + c_mean_loss)
            str_ += " - " * 15 + "\n\n"

            with open(model_result_file, "a+", encoding='utf-8') as fp:
                fp.write(str_)

            if count == interval:
                fname = os.path.join(model_images_path, "{}.png".format(epoch))
                self.plot_images(fname)

                count = 0

            c_epoch_loss = []
            g_epoch_loss = []
예제 #15
0
 def CurrentStatus(self, word, level, cur_page):
     msg = "Time : [ {} ]\n".format(korea_time())
     msg += "Word : [ {} ] - Level : [ {} ] - Current Page : [{:4d}]\n".format(
         word, level, cur_page)
     msg += "\n" + " -- " * 10 + "\n\n"
     self.write(msg)
예제 #16
0
    def train(self, **kwargs):
        interval = kwargs["interval"]
        model_ckpt_path, model_images_path, model_logs_path, model_result_file = make_folders_for_model(
            kwargs['folder'])

        self.generator = self.make_generator()
        self.discriminator = self.make_discriminator()

        train_dataset = self.get_dataset()
        num_batches = ceil(self.num_train / self.batch_size)

        d_epoch_loss = []
        d_epoch_aux_loss = []
        g_epoch_loss = []

        training_progbar = tf.keras.utils.Progbar(target=self.num_train)

        save_initial_model_info(
            {
                'generator': self.generator,
                'discriminator': self.discriminator
            }, model_logs_path, model_ckpt_path, **kwargs)

        count = 0

        self.g_opt = tf.keras.optimizers.Adam(lr=self.g_lr, beta_1=0.5)
        self.d_opt = tf.keras.optimizers.Adam(lr=self.d_lr, beta_1=0.5)

        self.BC_function = tf.keras.losses.BinaryCrossentropy(from_logits=True)
        self.SCC_function = tf.keras.losses.SparseCategoricalCrossentropy(
            from_logits=True)

        ckpt = tf.train.Checkpoint(g_opt=self.g_opt,
                                   d_opt=self.d_opt,
                                   g_model=self.generator,
                                   d_model=self.discriminator)

        if kwargs["ckpt_path"] is not None:
            fname, self.initial_epoch = load_checkpoint(**kwargs)
            print("\nCheckpoint File : {}\n".format(fname))
            ckpt.mapped = {
                "g_opt": self.g_opt,
                "d_opt": self.d_opt,
                "g_model": self.generator,
                "d_model": self.discriminator
            }
            ckpt.restore(fname)

            self.g_lr = self.g_opt.get_config()["learning_rate"]
            self.d_lr = self.d_opt.get_config()["learning_rate"]

        for epoch in range(self.initial_epoch, self.initial_epoch + 50000):
            count += 1

            start_time = korea_time()

            for real_images, real_labels in train_dataset:
                num_images = K.int_shape(real_labels)[0]
                g_loss = (self.train_G(num_images)).numpy()
                d_BC_loss, d_SCC_loss = self.train_D(real_images, real_labels)
                d_BC_loss = d_BC_loss.numpy()
                d_SCC_loss = d_SCC_loss.numpy()

                d_epoch_loss.append(d_BC_loss)
                d_epoch_aux_loss.append(d_SCC_loss)
                g_epoch_loss.append(g_loss)

                training_progbar.add(num_images)

            end_time = korea_time()
            training_progbar.update(0)  # Progress bar 초기화

            d_mean_loss = np.mean(d_epoch_loss, axis=0)
            d_mean_aux_loss = np.mean(d_epoch_aux_loss, axis=0)
            g_mean_loss = np.mean(g_epoch_loss, axis=0)

            ckpt_prefix = os.path.join(
                model_ckpt_path, "Epoch-{}_G-Loss-{:.6f}_D-Loss-{:.6f}".format(
                    epoch, g_mean_loss, d_mean_loss + d_mean_aux_loss))
            ckpt.save(file_prefix=ckpt_prefix)

            str_ = ("Epoch = [{:5d}]\tG Loss = [{:8.6f}]\t".format(
                epoch, g_mean_loss) +
                    "D Loss = [{:8.6f}]\tD AUX Loss = [{:8.6f}]\n".format(
                        d_mean_loss, d_mean_aux_loss))
            print(str_)

            # model result 저장
            str_ = "Epoch = [{:5d}] - End Time [ {} ]\n".format(
                epoch, str(end_time.strftime("%Y / %m / %d   %H:%M:%S")))
            str_ += "Elapsed Time = {}\n".format(end_time - start_time)
            str_ += "G Learning Rate = [{:.6f}] - D Learning Rate = [{:.6f}]\n".format(
                self.g_lr, self.d_lr)
            str_ += "G Loss : [{:8.6f}] - D Loss : [{:8.6f}] - D AUX Loss : [{:8.6f}] - Sum : [{:8.6f}]\n".format(
                g_mean_loss, d_mean_loss, d_mean_aux_loss,
                g_mean_loss + d_mean_loss + d_mean_aux_loss)
            str_ += " - " * 15 + "\n\n"

            with open(model_result_file, "a+", encoding='utf-8') as fp:
                fp.write(str_)

            if count == interval:
                fname = os.path.join(model_images_path, "{}.png".format(epoch))
                self.plot_images(fname)

                count = 0

            d_epoch_loss = []
            d_epoch_aux_loss = []
            g_epoch_loss = []
예제 #17
0
 def AlreadyExists(self, eng, kor):
     msg = "Time : [ {} ]\n".format(korea_time())
     msg += "{} -> {}\n".format(eng, kor)
     msg += "이미 database에 존재하는 예문입니다.\n"
     msg += "\n" + " -- " * 10 + "\n\n"
     self.write(msg)
예제 #18
0
    def train(self):
        self.g_opt = tf.keras.optimizers.Adam(lr=self.lr, beta_1=0.5)
        self.d_opt = tf.keras.optimizers.Adam(lr=self.lr, beta_1=0.5)
        self.g_loss_metric = tf.keras.metrics.Mean(name="g_loss")
        self.d_loss_metric = tf.keras.metrics.Mean(name="d_loss")

        ckpt = tf.train.Checkpoint(generator=self.gen,
                                   discriminator=self.disc,
                                   genenerator_optimizer=self.g_opt,
                                   discriminator_optimizer=self.d_opt)

        if self.ckpt_path is not None:
            fname, self.initial_epoch = load_checkpoint(
                Path(self.ckpt_path).resolve(), self.ckpt_epoch)
            print("\nCheckpoint File : {}\n".format(fname))
            ckpt.mapped = {
                "generator": self.gen,
                "discriminator": self.disc,
                "generator_optimizer": self.g_opt,
                "discriminator_optimizer": self.d_opt
            }
            ckpt.restore(fname)

            self.lr = self.g_opt.get_config()["learning_rate"]

        progbar = tf.keras.utils.Progbar(target=self.num_train)
        for epoch in range(self.initial_epoch,
                           self.initial_epoch + self.epochs):
            self.g_loss_metric.reset_states()
            self.d_loss_metric.reset_states()

            start_time = korea_time(None)
            for images in self.dataset:
                num_images = K.int_shape(images)[0]
                self.train_D(images)
                self.train_G(num_images)
                progbar.add(num_images)

            end_time = korea_time(None)
            progbar.update(0)  # Progress bar 초기화

            g_loss = self.g_loss_metric.result()
            d_loss = self.d_loss_metric.result()

            ckpt_prefix = self.ckpt_folder / "Epoch-{}_gLoss-{:.6f}_dLoss-{:.6f}".format(
                epoch, g_loss, d_loss)
            ckpt.save(file_prefix=ckpt_prefix)

            print("Epoch = [{:5d}]  G_loss = [{:8.6f}]  D_loss = [{:8.6f}]\n".
                  format(epoch, g_loss, d_loss))

            # model result 저장
            with self.training_result_file.open("a+", encoding='utf-8') as fp:
                str_ = "Epoch = [{:5d}] - End Time [ {} ]\n".format(
                    epoch, str(end_time.strftime("%Y / %m / %d   %H:%M:%S")))
                str_ += "Elapsed Time = {}\n".format(end_time - start_time)
                str_ += "Learning Rate = [{:.6f}]\n".format(self.lr)
                str_ += "g_loss = [{:8.6f}]   d_loss = [{:8.6f}]\n".format(
                    g_loss, d_loss)
                str_ += " - " * 15 + "\n\n"
                fp.write(str_)

            fname = self.image_folder / "{}.png".format(epoch)
            self.plot_images(fname)
예제 #19
0
파일: Model.py 프로젝트: leesc912/Crawler
    def start_crawling(self) :
        driver = self.get_webdriver()
        driver.implicitly_wait(3)
        
        self.logger.StartCrawling(list(self.words_dic.keys()))
        try :
            for word in list(self.words_dic.keys()) :
                self.logger.StartWordCrawling(word)
                
                levels = self.words_dic[word]["levels"]
                start_pages = self.words_dic[word]["start_page"]
                end_pages = self.words_dic[word]["end_page"]
                user = self.words_dic[word]["user"]
                trsl = self.words_dic[word]["trsl"]

                words_dic = dict()
                for level, start_page, end_page, use_user, use_trsl  in zip(levels, start_pages, end_pages, user, trsl) :
                    previous_page = start_page - 1
                    for page in range(start_page, end_page + 1) :
                        # 해당 page로 이동
                        driver.get(self.query.format(page, word, self.levels_dic[level]))

                        try : # 본문이 나타날 때까지 기다림
                            WebDriverWait(driver, self.patience_time).until(EC.presence_of_element_located((By.ID, "searchPage_example")))
                        except TimeoutException :
                            self.logger.NoExamples(word, self.query.format(page, word, self.levels_dic[level]))
                            break

                        cur_page = self.get_current_page(driver)
                        if cur_page == 0 or cur_page < start_page : # 해당 page까지 예문이 존재하지 않음
                            self.logger.ReasonToBreak("해당 page까지 예문이 존재하지 않습니다.\n")
                            break
                        elif (previous_page != start_page - 1) and (previous_page == cur_page) : # 더이상 이동할 page가 없음
                            self.logger.ReasonToBreak("더 이상 이동할 page가 없습니다.\n")
                            break

                        self.logger.CurrentStatus(word, level, cur_page)
                        previous_page = cur_page
                        
                        for idx in range(len(driver.find_elements_by_css_selector(self.examples_area))) :
                            user_status = self.CheckExistence(self.get_one_example(driver, idx), ".user_profile")
                            trsl_status = self.CheckExistence(self.get_one_example(driver, idx), ".translate_btns")
                            
                            if trsl_status : # 파파고 번역
                                if use_trsl :
                                    self.get_one_example(driver, idx).find_element_by_css_selector(".btn_papago").click()
                                    try :
                                        WebDriverWait(driver, self.patience_time).until(
                                            lambda wd : self.get_papago_result(driver, idx) != ""
                                        )
                                        eng = self.get_eng_area(driver, idx)
                                        kor = self.get_papago_result(driver, idx)

                                        self.saver.save((word, eng, kor, "자동 번역", page, level, korea_time()))
                                    except TimeoutException :
                                        self.saver.save((word, eng, kor, "자동 번역 실패(응답 없음)", page, level, korea_time()))

                            elif user_status : # 유저 참여 번역
                                if use_user :
                                    eng = self.get_eng_area(driver, idx)
                                    kor = self.get_kor_area(driver, idx)
                                    self.saver.save((word, eng, kor, "이용자 참여", page, level, korea_time()))
                                    
                            else : # Official 또는 한글 예문이 존재하지 않음
                                eng = self.get_eng_area(driver, idx)
                                try :
                                    kor = self.get_kor_area(driver, idx)
                                    self.saver.save((word, eng, kor, "공식 예문", page, level, korea_time()))

                                except IndexError : # 한글 예문 없음
                                    kor = "None"
                                    self.saver.save((word, eng, kor, "한글 예문 없음", page, level, korea_time()))

                            # 다음 예문으로 이동하기 전 휴식
                            Sleep(self.sleep_time)

        except KeyboardInterrupt :
            self.logger.KeyboardInterruptError()

        self.saver.quit_db()
        driver.close()