Exemple #1
0
class Task(object):
    def __init__(self, id: str, func: Callable, param: Any = {}) -> None:
        super().__init__()
        self.id = id
        self.func = func
        self.param = param
        self.logger = Logger("Task")
        self.loop: Optional[AbstractEventLoop] = None

    async def run(self, taskPool: TaskPool, pool: Pool) -> None:
        self.logger.info("run", "task run")
        if self.loop:
            self.param["taskPool"] = taskPool
            self.param["pool"] = pool
            await self.loop.create_task(self.func(**self.param))
Exemple #2
0
class Pool(object):
    def __init__(self) -> None:
        super().__init__()
        self.isRun = False
        self.logger = Logger("Pool")
        self.task: Optional[Task] = None
        self.taskId = ""

    def setTask(self, task: Task) -> None:
        self.task = task
        self.taskId = task.id

    def run(self, taskPool: TaskPool) -> None:
        self.isRun = True
        self.logger.info("run", "task pool run")
        if self.task is not None:
            self.poolTask = asyncio.ensure_future(self.task.run(
                taskPool, self))

    def cancel(self) -> None:
        self.isRun = False
        self.logger.info("cancel", "task pool cancel")
        if self.poolTask and not self.poolTask.cancelled():
            self.poolTask.cancel()
Exemple #3
0
 def marcapJob(marcapDtos: List[StockRunCrawling]) -> None:
     service: StockService = Locator.getInstance().get(StockService)
     logger = Logger("TaskService_marcapJob")
     for dto in marcapDtos:
         logger.info("#### schedule job start ####")
         logger.info("command" + dto.startDateStr + "~" + dto.endDateStr)
         dto.taskUniqueId = dto.taskId + dto.market+dto.startDateStr + dto.endDateStr + str(uuid.uuid4())
         if dto.isNow:
             dto.startDateStr = getNowDateStr()
             dto.endDateStr = getNowDateStr()
         logger.info("real:" + dto.startDateStr + "~" + dto.endDateStr)
     service.crawlingMarcapStockData(marcapDtos)
Exemple #4
0
class MarcapCrawler(object):
    
    def __init__(self) -> None:
        super().__init__()
        self.ee = EventEmitter()
        self.logger = Logger("MarcapCrawler")

    def createUUID(self) -> str:
        return str(uuid.uuid4())

    async def connectWebDriver(self, addr: str, uuid: str) -> WebDriver:
        chrome_options = webdriver.ChromeOptions()
        prefs = {
            'profile.default_content_setting_values.automatic_downloads': 1,
            'download.default_directory': f"/home/seluser/Downloads/{uuid}"
        }
        chrome_options.add_experimental_option("prefs", prefs)
        driver = webdriver.Remote(
            command_executor=addr,
            options=chrome_options,

        )
        driver.set_page_load_timeout(60)
        driver.set_script_timeout(60)
        self.logger.info("connectWebDriver", "create driver")
        return driver

    def connectLocalDriver(self, addr: str, uuid: str) -> WebDriver:
        chrome_options = webdriver.ChromeOptions()
        prefs = {
            'profile.default_content_setting_values.automatic_downloads': 1,
            'download.default_directory': f"/Users/iseongjae/Documents/PersonalProjects/fin-web/fin-crawling-server/server/downloads/{uuid}"
        }
        chrome_options.add_experimental_option("prefs", prefs)
        driver = webdriver.Chrome(executable_path="/Users/iseongjae/Downloads/chromedriver", chrome_options=chrome_options)
        return driver

    async def crawling(self, dto: StockRunCrawling) -> None:
        driver = None
        downloadObserver = None
        try:
            uuid = self.createUUID()
            self.logger.info("crawling", uuid)
            self.ee.emit(EVENT_MARCAP_CRAWLING_ON_CONNECTING_WEBDRIVER, dto)
            
            downloadObserver = DownloadObserver()
            path = await asyncRetryNonBlock(5, 1, downloadObserver.makePath, uuid)
            downloadObserver.startObserver(path, self.ee)
            self.logger.info("crawling", "create observer and start")
            print("startObserver")

            driver = await asyncRetryNonBlock(5, 1, self.connectWebDriver, dto.driverAddr, uuid)
            print("connectWebDriver")
            driver.get("http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201020101")
            try:
                alert = WebDriverWait(driver, timeout=3).until(EC.alert_is_present())
                alert.accept()
            except Exception as e:
                print("예외발생:"+str(e))
            print("start:"+dto.startDateStr)

            self.ee.emit(EVENT_MARCAP_CRAWLING_ON_START_CRAWLING, dto)
            WebDriverWait(driver, timeout=20, poll_frequency=1).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#mktId_0_1")))
            date = datetime.strptime(dto.startDateStr, "%Y%m%d")
            endDate = datetime.strptime(dto.endDateStr, "%Y%m%d")

            while date <= endDate:
                dateStr = date.strftime("%Y%m%d")
                downloadTask = StockCrawlingDownloadTask(**{
                    "dateStr": dateStr,
                    "market": dto.market,
                    "uuid": uuid,
                    "taskId": dto.taskId,
                    "taskUniqueId": dto.taskUniqueId
                })
                self.logger.info("crawling", f"create downloadTask taskId: {dto.taskId} market: {dto.market} date: {dateStr} taskUniqueId: {dto.taskUniqueId}")
                print(downloadTask.json())
                downloadObserver.event_handler.setDownloadTask(downloadTask)
                self.ee.emit(EVENT_MARCAP_CRAWLING_ON_DOWNLOAD_START, downloadTask)
                await asyncRetryNonBlock(5, 1, self.downloadData, downloadTask, downloadObserver, driver)
                # await self.downloadData(downloadTask, downloadObserver, driver)
                date = date + timedelta(days=1)
        except Exception as e:
            raise e
        finally:
            if downloadObserver:
                downloadObserver.stopObserver()
            if driver:
                driver.quit()
    
    async def downloadData(self, downloadTask: StockCrawlingDownloadTask, downloadObserver: DownloadObserver, driver: WebDriver) -> None:
        self.logger.info("downloadData")
        if driver is None:
            return
        # pymitter
        before = driver.execute_script("return $('.CI-MDI-UNIT-TIME').text()")
        if downloadTask.market == "kospi":
            driver.execute_script('$("#mktId_0_1").click()')
        elif downloadTask.market == "kosdaq":
            driver.execute_script('$("#mktId_0_2").click()')
        elif downloadTask.market == "konex":
            driver.execute_script('$("#mktId_0_3").click()')
        #     driver.implicitly_wait(1)
        driver.execute_script(f'$("#trdDd")[0].value = "{downloadTask.dateStr}"')
        #     driver.implicitly_wait(1)
        driver.execute_script('$(".btn_component_search").click()')
        #     driver.implicitly_wait(1)
        after = before
        while before == after:
            after = driver.execute_script('return $(".CI-MDI-UNIT-TIME").text()')
            await sleepNonBlock(0.5)
        #     driver.implicitly_wait(1)
        print("before:"+before)
        print("after:"+after)
        await sleepNonBlock(3)
        WebDriverWait(driver, timeout=10, poll_frequency=2).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "*[class='CI-MDI-UNIT-DOWNLOAD']")))
        driver.execute_script("$('[class=\"CI-MDI-UNIT-DOWNLOAD\"]').click()")
        WebDriverWait(driver, timeout=10, poll_frequency=2).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "*[data-type='csv']")))
        driver.execute_script("$(\"[data-type='csv']\").click()")
        print("wait:"+downloadTask.dateStr)

        loop = asyncio.get_running_loop()
        queue: asyncio.Queue = asyncio.Queue(maxsize=1, loop=loop)

        async def fileResultOfData(event: FileCreatedEvent, downloadTask: StockCrawlingDownloadTask) -> None:
            result = {}
            result["event"] = event
            result["downloadTask"] = downloadTask
            await queue.put(result)

        @self.ee.once(FILE_SYSTEM_HANDLER(downloadTask.uuid))
        def downloadComplete(event: FileCreatedEvent, downloadTask: StockCrawlingDownloadTask) -> None:
            loop.create_task(fileResultOfData(event, downloadTask))
            
        try:
            result = await asyncio.wait_for(queue.get(), timeout=30)
            self.ee.emit(EVENT_MARCAP_CRAWLING_ON_DOWNLOAD_COMPLETE, downloadTask)
            await asyncio.create_task(self.makeMarcapData(result["event"], result["downloadTask"]))
        except Exception as e:
            raise e
        finally:
            queue.task_done()

    def convertFileToDto(self, path: str, dto: StockMarketCapitalResult) -> None:
        lines = []
        with open(path, "r", encoding="utf-8") as f:
            # p = Path(f.name)
            # dto.date = p.stem
            lines = f.readlines()
        
        for i in range(1, len(lines)):
            data = lines[i].replace('"', '').split(",")
            if dto.market == "kospi":
                marcap = StockMarketCapital(**{
                    "date": dto.date,
                    "market": dto.market,
                    "code": data[0].strip(),
                    "name": data[1].strip(),
                    "close": data[2].strip(),
                    "diff": data[3].strip(),
                    "percent": data[4].strip(),
                    "open": data[5].strip(),
                    "high": data[6].strip(),
                    "low": data[7].strip(),
                    "volume": data[8].strip(),
                    "price": data[9].strip(),
                    "marcap": data[10].strip(),
                    "number": data[11].strip()
                })
            else:
                marcap = StockMarketCapital(**{
                    "date": dto.date,
                    "market": dto.market,
                    "code": data[0].strip(),
                    "name": data[1].strip(),
                    "close": data[3].strip(),
                    "diff": data[4].strip(),
                    "percent": data[5].strip(),
                    "open": data[6].strip(),
                    "high": data[7].strip(),
                    "low": data[8].strip(),
                    "volume": data[9].strip(),
                    "price": data[10].strip(),
                    "marcap": data[11].strip(),
                    "number": data[12].strip()
                })
            # print("append marcap: " + str(marcap))
            
            dto.data.append(marcap)

    async def isExistFile(self, path: str, ext: str = ".csv") -> bool:
        isExist = path.endswith(ext)
        restTimes = 3
        while not isExist and restTimes >= 0:
            await sleepNonBlock(1)
            isExist = path.endswith(ext)
            restTimes -= 1
        return isExist
    
    async def parseReceivedFile(self, event: FileCreatedEvent, downloadTask: StockCrawlingDownloadTask) -> None:
        retdto = StockMarketCapitalResult()
        date = downloadTask.dateStr
        market = downloadTask.market
        retdto.date = date
        retdto.market = market
        isExist = await self.isExistFile(event.src_path)
        if not isExist:
            return
        print("created: " + date)
        await sleepNonBlock(0.5)
        dest_path = f'{os.path.dirname(event.src_path)}/{market+"-"+date}.csv'
        if os.path.isfile(dest_path):
            return
        self.changeCharSet(event.src_path)
        os.rename(event.src_path, dest_path)
        self.convertFileToDto(dest_path, retdto)
        retdto.result = "success"
        self.ee.emit(EVENT_MARCAP_CRAWLING_ON_PARSING_COMPLETE, True, retdto, downloadTask)
        self.ee.emit(EVENT_MARCAP_CRAWLING_ON_RESULT_OF_STOCK_DATA, downloadTask, retdto)
        self.logger.info("parseFile", f"success, {downloadTask.taskUniqueId}")
    
    async def makeMarcapData(self, event: FileCreatedEvent, downloadTask: StockCrawlingDownloadTask) -> None:
        try:
            await asyncRetry(3, 1, self.parseReceivedFile, event, downloadTask)
        except Exception:
            retdto = StockMarketCapitalResult()
            retdto.result = "fail"
            retdto.errorMsg = traceback.format_exc()
            self.ee.emit(EVENT_MARCAP_CRAWLING_ON_PARSING_COMPLETE, False, retdto, downloadTask)
            self.logger.error("parseFile", f"fail, {downloadTask.taskUniqueId} error: {traceback.format_exc()}")
        finally:
            self.logger.info("parseFile...")

    def changeCharSet(self, path: str) -> None:
        lines = None
        with open(path, "r", encoding="euc-kr") as f:
            lines = f.readlines()
        with open(path, 'w', encoding="utf-8") as f:
            f.writelines(lines)
Exemple #5
0
class StockService:
    def __init__(self, stockRepository: StockRepository,
                 tasksRepository: TasksRepository,
                 crawlerRepository: CrawlerRepository) -> None:
        self.stockRepository = stockRepository
        self.tasksRepository = tasksRepository
        self.crawlerRepository = crawlerRepository
        self.logger = Logger("StockService")

    async def getStockData(self, market: str, startDate: str,
                           endDate: str) -> List[StockMarketCapital]:
        return await self.stockRepository.getStockData(market, startDate,
                                                       endDate)

    def crawlingMarcapStockData(self, dtoList: List[StockRunCrawling]) -> None:
        self.logger.info("crawlingMarcapStockData", str(len(dtoList)))
        for dto in dtoList:
            if dto.taskId == "marcap":

                async def marcapTaskWorker(runDto: StockRunCrawling,
                                           pool: Pool,
                                           taskPool: TaskPool) -> None:
                    try:
                        self.logger.info("runCrawling&marcapTaskWorker",
                                         "start")
                        marcapCrawler = MarcapCrawler()
                        taskUniqueId = runDto.taskUniqueId
                        self.crawlerRepository.addCrawler(
                            taskUniqueId, marcapCrawler)
                        self.createListners(marcapCrawler.ee)
                        self.logger.info("runCrawling&marcapTaskWorker",
                                         f"taskWorker:{taskUniqueId}")
                        await marcapCrawler.crawling(runDto)
                        taskPool.removeTaskPool(pool)
                        self.crawlerRepository.removeCrawler(taskUniqueId)
                    except asyncio.CancelledError:
                        self.logger.info("convertFactorFileToDbTask", "cancel")
                    except Exception:
                        self.logger.error("convertFactorFileToDbTask",
                                          f"error: {traceback.format_exc()}")
                        self.tasksRepository.errorTask(runDto,
                                                       traceback.format_exc())

                workerTask = Task(dto.taskUniqueId, marcapTaskWorker,
                                  {"runDto": dto})
                if self.tasksRepository.taskRunner:
                    if self.tasksRepository.isExistTask(
                            dto.taskId, dto.taskUniqueId):
                        return
                    startDate = datetime.strptime(dto.startDateStr, "%Y%m%d")
                    endDate = datetime.strptime(dto.endDateStr, "%Y%m%d")
                    taskDates = [
                        (startDate + timedelta(days=x)).strftime("%Y%m%d")
                        for x in range((endDate - startDate).days + 1)
                    ]
                    task = ProcessTask(
                        **{
                            "market": dto.market,
                            "startDateStr": dto.startDateStr,
                            "endDateStr": dto.endDateStr,
                            "taskUniqueId": dto.taskUniqueId,
                            "taskId": dto.taskId,
                            "count": len(taskDates),
                            "tasks": deque(taskDates),
                            "restCount": len(taskDates),
                            "tasksRet": deque(([0] * len(taskDates))),
                        })
                    task.state = "find worker"
                    self.tasksRepository.addTask(task)
                    self.tasksRepository.runTask(workerTask)
                    self.logger.info("runMarcapTask", f"runTask {task.json()}")

    def createListners(self, ee: EventEmitter) -> None:
        ee.on(EVENT_MARCAP_CRAWLING_ON_RESULT_OF_STOCK_DATA,
              self.onResultOfStockData)

        ee.on(EVENT_MARCAP_CRAWLING_ON_CONNECTING_WEBDRIVER,
              self.onConnectingWebDriver)
        ee.on(EVENT_MARCAP_CRAWLING_ON_START_CRAWLING, self.onStartCrawling)
        ee.on(EVENT_MARCAP_CRAWLING_ON_DOWNLOAD_START, self.onDownloadStart)
        ee.on(EVENT_MARCAP_CRAWLING_ON_DOWNLOAD_COMPLETE,
              self.onDownloadComplete)
        ee.on(EVENT_MARCAP_CRAWLING_ON_PARSING_COMPLETE,
              self.onParsingComplete)
        ee.on(EVENT_MARCAP_CRAWLING_ON_ERROR, self.onError)
        ee.on(EVENT_MARCAP_CRAWLING_ON_CANCEL, self.onCancelled)

    # 주식 종목 데이터 크롤링 결과값을 db에 저장한다.
    def onResultOfStockData(self, dto: StockCrawlingDownloadTask,
                            retDto: StockMarketCapitalResult) -> None:
        task = self.tasksRepository.getTask(dto.taskId, dto.taskUniqueId)
        task.state = "insert to database"
        self.tasksRepository.updateTask(task)

        async def completeMarcapTask() -> None:
            await self.stockRepository.insertMarcap(retDto)
            self.tasksRepository.completeStockCrawlingTask(True, retDto, dto)

        asyncio.create_task(completeMarcapTask())

    # 크롤링 중 웹드라이버와 연결되었을 때 이벤트
    def onConnectingWebDriver(self, dto: StockRunCrawling) -> None:
        task = self.tasksRepository.getTask(dto.taskId, dto.taskUniqueId)
        task.state = "connecting webdriver"
        self.tasksRepository.updateTask(task)
        self.logger.info("onConnectingWebDriver", task.taskUniqueId)

    # 크롤링이 시작되었을 떄 이벤트
    def onStartCrawling(self, dto: StockRunCrawling) -> None:
        task = self.tasksRepository.getTask(dto.taskId, dto.taskUniqueId)
        task.state = "start crawling"
        self.tasksRepository.updateTask(task)
        self.logger.info("onStartCrawling", task.taskUniqueId)

    # 크롤링 데이터 다운로드가 시작되었을 때 이벤트
    def onDownloadStart(self, dto: StockCrawlingDownloadTask) -> None:
        # self.logger.info("onDownloadStart: "+dto.json())
        task = self.tasksRepository.getTask(dto.taskId, dto.taskUniqueId)
        task.state = "download start"
        self.tasksRepository.updateTask(task)
        self.logger.info("onDownloadStart", task.taskUniqueId)

    # 크롤링 데이터 다운로드가 완료되었을 때 이벤트
    def onDownloadComplete(self, dto: StockCrawlingDownloadTask) -> None:
        task = self.tasksRepository.getTask(dto.taskId, dto.taskUniqueId)
        task.state = "download complete"
        self.tasksRepository.updateTask(task)
        self.logger.info("onDownloadComplete", task.taskUniqueId)

    # 크롤링 데이터 변환이 완료되었을 때 이벤트
    def onParsingComplete(self, isSuccess: bool,
                          retdto: StockMarketCapitalResult,
                          dto: StockCrawlingDownloadTask) -> None:
        self.logger.info("onParsingComplete")
        self.logger.info(f"taskId:{dto.taskId} taskUniqueId{dto.taskUniqueId}")
        tar = self.tasksRepository.tasksdto.tasks[dto.taskId]["list"]
        self.logger.info(f"taskDTO: {tar}")
        if not isSuccess:
            self.tasksRepository.completeStockCrawlingTask(
                isSuccess, retdto, dto)

    # 크롤링이 취소되었을 때 이벤트
    def onCancelled(self, dto: StockRunCrawling) -> None:
        self.logger.info("onCancelled")
        # self.tasksRepository.updateAllTask()
        # task = self.tasksRepository.getTask(dto.taskId, dto.taskUniqueId)
        # self.tasksRepository.fail(task, task.restCount)
        # task.state = "cancelled"
        # self.tasksRepository.updateTask(task)
        # self.logger.info("onCancelled", task.taskUniqueId)

    # 크롤링이 에러가났을 때 이벤트
    def onError(self, dto: StockRunCrawling, errorMsg: str) -> None:
        task = self.tasksRepository.getTask(dto.taskId, dto.taskUniqueId)
        self.tasksRepository.fail(task, task.restCount)
        task.state = "error"
        task.errMsg = errorMsg
        self.tasksRepository.updateTask(task)
        self.logger.error("onError", task.taskUniqueId)
Exemple #6
0
class FactorService:
    def __init__(self, manager: ConnectionManager,
                 factorRepository: FactorRepository,
                 tasksRepository: TasksRepository,
                 crawlerRepository: CrawlerRepository,
                 taskService: 'TaskService') -> None:
        self.manager = manager
        self.factorRepository = factorRepository
        self.tasksRepository = tasksRepository
        self.crawlerRepository = crawlerRepository
        self.taskService = taskService
        self.logger = Logger("FactorService")

    async def getFactor(self, code: str, year: str, month: str,
                        source: str) -> List[FactorData]:
        return await self.factorRepository.getFactor(code, year, month, source)

    def crawlingFactorDartData(self, dto: DartApiCrawling) -> None:
        async def crawlingFactorDartDataTask(pool: Pool,
                                             taskPool: TaskPool) -> None:
            # task = self.tasksRepository.getTask(dto.taskId, dto.taskUniqueId)
            try:
                crawler = DartApiCrawler()
                self.crawlerRepository.addCrawler(dto.taskUniqueId, crawler)
                self.createFactorDartListener(crawler.ee)
                await crawler.crawling(dto)
                self.crawlerRepository.removeCrawler(dto.taskUniqueId)
            except asyncio.CancelledError:
                self.logger.info("crawlingFactorDartDataTask", "cancel")
            except Exception:
                self.logger.error("crawlingFactorDartDataTask",
                                  f"error: {traceback.format_exc()}")
                self.tasksRepository.errorTask(dto, traceback.format_exc())
            finally:
                taskPool.removeTaskPool(pool)

        count = dto.endYear - dto.startYear + 1
        task = ProcessTask(
            **{
                "market": "",
                "startDateStr": dto.startYear,
                "endDateStr": dto.endYear,
                "taskUniqueId": dto.taskUniqueId,
                "taskId": dto.taskId,
                "count": count,
                "tasks": list(range(dto.startYear, dto.endYear + 1)),
                "restCount": count,
                "tasksRet": [0] * count,
                "state": "find worker"
            })
        self.tasksRepository.addTask(task)
        workerTask = Task(dto.taskUniqueId, crawlingFactorDartDataTask)
        self.tasksRepository.runTask(workerTask)

    # file에 있는 factor를 db에 저장한다.
    def convertFactorFileToDb(self, dto: RunFactorFileConvert) -> None:
        self.logger.info("convertFactorFileToDb")

        async def convertFactorFileToDbTask(pool: Pool,
                                            taskPool: TaskPool) -> None:
            try:
                task = self.tasksRepository.getTask(dto.taskId,
                                                    dto.taskUniqueId)
                data = await asyncio.create_task(
                    self.factorRepository.getFactorsInFile())
                task.state = "make Factor Object"
                self.tasksRepository.updateTask(task)
                daoList = await batchFunction(100, data,
                                              self.makeFactorDaoList)
                task.state = "start insert db"
                self.tasksRepository.updateTask(task)
                self.logger.info("convertFactorFileToDbTask",
                                 f"insertCount: {str(len(daoList))}")
                await self.factorRepository.insertFactor(daoList)
                task.state = "complete"
                self.tasksRepository.completeFactorConvertFileToDbTask(task)
            except asyncio.CancelledError:
                self.logger.info("convertFactorFileToDbTask", "cancel")
            except Exception:
                self.logger.error("convertFactorFileToDbTask",
                                  f"error: {traceback.format_exc()}")
                self.tasksRepository.errorTask(dto, traceback.format_exc())
            finally:
                taskPool.removeTaskPool(pool)

        task = ProcessTask(
            **{
                "market": "",
                "startDateStr": "20070101",
                "endDateStr": "20191231",
                "taskUniqueId": dto.taskUniqueId,
                "taskId": dto.taskId,
                "count": 1,
                "tasks": ["convert"],
                "restCount": 1,
                "tasksRet": [0],
                "state": "start get file"
            })
        self.tasksRepository.addTask(task)
        workerTask = Task(dto.taskUniqueId, convertFactorFileToDbTask)
        self.tasksRepository.runTask(workerTask)

    async def makeFactorDaoList(self, data: List[Dict]) -> List[FactorDao]:
        daoList = []
        for one in data:
            dao = FactorDao(
                **{
                    "code":
                    one["종목코드"],  # 종목코드
                    "name":
                    one["종목명"],  # 종목이름
                    "dataYear":
                    one["년"],  # 결산년
                    "dataMonth":
                    one["결산월"],  # 결산월
                    "dataName":
                    one["데이터명"],  # 데이터명
                    "dataValue": (
                        one["데이터값"] *
                        1000) if one["단위"] == "천원" else one["데이터값"]  # 데이터값
                })
            daoList.append(dao)
        return daoList

    def createFactorDartListener(self, ee: EventEmitter) -> None:
        ee.on(EVENT_DART_API_CRAWLING_ON_DOWNLOADING_CODES,
              self.onDownloadingCodes)
        ee.on(EVENT_DART_API_CRAWLING_ON_CRAWLING_FACTOR_DATA,
              self.onCrawlingFactorData)
        ee.on(EVENT_DART_API_CRAWLING_ON_COMPLETE_YEAR, self.onCompleteYear)
        ee.on(EVENT_DART_API_CRAWLING_ON_RESULT_OF_FACTOR,
              self.onResultOfFactor)
        ee.on(EVENT_DART_API_CRAWLING_ON_CANCEL, self.onCancelled)

    def onDownloadingCodes(self, dto: DartApiCrawling) -> None:
        self.logger.info("onDownloadingCodes", dto.taskUniqueId)
        task = self.tasksRepository.getTask(dto.taskId, dto.taskUniqueId)
        task.state = "download Codes"
        self.tasksRepository.updateTask(task)

    def onCrawlingFactorData(self, dto: DartApiCrawling) -> None:
        self.logger.info("onCrawlingFactorData", dto.taskUniqueId)
        task = self.tasksRepository.getTask(dto.taskId, dto.taskUniqueId)
        task.state = "crawling factor data"
        self.tasksRepository.updateTask(task)

    def onCompleteYear(self, dto: DartApiCrawling, year: int) -> None:
        self.logger.info("onCompleteYear", dto.taskUniqueId)
        task = self.tasksRepository.getTask(dto.taskId, dto.taskUniqueId)
        self.tasksRepository.completeFactorDart(task, year)

    def onResultOfFactor(self, dto: DartApiCrawling, year: int,
                         obj: List) -> None:
        self.logger.info("onResultOfFactor", dto.taskUniqueId)
        listOfFactorDao = list(
            map(
                lambda one: FactorDao(
                    **{
                        "code": one["crawling_code"],
                        "name": one["crawling_name"],
                        "dataYear": one["bsns_year"],
                        "dataMonth": getMonthFromReprtCode(one["reprt_code"]),
                        "dataName": one["account_nm"],
                        "dataValue": one["thstrm_amount"],
                        "dataId": one["account_id"]
                    }), obj))
        asyncio.create_task(
            self.factorRepository.insertFactorDart(listOfFactorDao))

    def onCancelled(self, dto: DartApiCrawling) -> None:
        self.logger.info("onCancelled")
Exemple #7
0
class DartApiCrawler(object):
    def __init__(self) -> None:
        super().__init__()
        self.ee = EventEmitter()
        self.isLock = False
        self.isCancelled = False
        self.logger = Logger("DartApiCrawler")

    def createUUID(self) -> str:
        return str(uuid.uuid4())

    async def downloadCodes(self, isCodeNew: bool, apiKey: str) -> Dict:
        if "pytest" in sys.modules:
            # savepath = Path('factors/codes.zip')
            loadpath = Path('factors/codes')
            datapath = Path("factors/codes/CORPCODE.xml")
        else:
            # savepath = Path('app/static/factors/codes.zip')
            loadpath = Path('app/static/factors/codes')
            datapath = Path("app/static/factors/codes/CORPCODE.xml")

        if isCodeNew or not os.path.exists(datapath.resolve()):
            # user_agent = UserAgent(cache=False, use_cache_server=True)
            headers = {
                'User-Agent':
                "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.2 (KHTML, like Gecko) Chrome/22.0.1216.0 Safari/537.2'",
                'accept-language': 'ko'
            }
            params = {"crtfc_key": apiKey}
            url = "https://opendart.fss.or.kr/api/corpCode.xml"
            async with aiohttp.ClientSession() as session:
                async with session.get(url, params=params,
                                       headers=headers) as response:
                    data = await response.read()
                    ZipFile(io.BytesIO(data)).extractall(loadpath.resolve())
        tree = ET.parse(datapath.resolve())
        codes: Dict[str, Any] = {}
        for li in tree.findall("list"):
            el = li.find("stock_code")
            if el is not None:
                stockCode = el.text
                if isinstance(stockCode, str) and len(stockCode) == 6:
                    codeEl = li.find("corp_code")
                    nameEl = li.find("corp_name")
                    if codeEl is not None:
                        codes[stockCode] = {}
                        codes[stockCode]["corp_code"] = codeEl.text
                        if nameEl is not None:
                            codes[stockCode]["corp_name"] = nameEl.text
        return codes

    async def crawling(self, dto: DartApiCrawling) -> None:
        # cpu bound 작업
        try:
            if dto.startYear < 2015:
                dto.startYear = 2015
            self.ee.emit(EVENT_DART_API_CRAWLING_ON_DOWNLOADING_CODES, dto)
            codes = await asyncRetryNonBlock(5,
                                             1,
                                             self.downloadCodes,
                                             isCodeNew=dto.isCodeNew,
                                             apiKey=dto.apiKey)
            # codes = self.downloadCodes(dto.isCodeNew, dto.apiKey)
            self.ee.emit(EVENT_DART_API_CRAWLING_ON_CRAWLING_FACTOR_DATA, dto)
            for year in range(dto.startYear, dto.endYear + 1):
                self.ee.emit(EVENT_DART_API_CRAWLING_ON_CRAWLING_FACTOR_DATA,
                             dto)
                self.logger.info("crawling", str(len(codes)))
                for code in codes:
                    # newDf = self.getYearDf(dart, code, codes, year)
                    newDf = await asyncRetryNonBlock(5, 1, self.getYearDf,
                                                     dto.apiKey, code, codes,
                                                     year)
                    if self.isCancelled:
                        self.ee.emit(EVENT_DART_API_CRAWLING_ON_CANCEL, dto)
                    if newDf is not None:
                        self.logger.info("crawling", code)
                        self.ee.emit(
                            EVENT_DART_API_CRAWLING_ON_RESULT_OF_FACTOR, dto,
                            year, newDf.to_dict("records"))
                    # yearDf = await self.getYearDf(dart, code, codes, year, yearDf)
                self.ee.emit(EVENT_DART_API_CRAWLING_ON_COMPLETE_YEAR, dto,
                             year)
                self.logger.info("crawling", str(year))
        except Exception as e:
            raise e

    async def getYearDf(self, apiKey: str, code: str, codes: Dict,
                        year: int) -> pd.DataFrame:
        self.logger.info("getYearDf", f"crawling: {code}")
        df = None
        try:

            url = 'https://opendart.fss.or.kr/api/fnlttSinglAcntAll.json'

            # user_agent = UserAgent(cache=False, use_cache_server=True)
            headers = {
                'User-Agent':
                "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.2 (KHTML, like Gecko) Chrome/22.0.1216.0 Safari/537.2'",
                'accept-language': 'ko',
            }
            params = {
                'crtfc_key': apiKey,
                'corp_code': codes[code]["corp_code"],
                'bsns_year': year,  # 사업년도
                'reprt_code': "11011",  # "11011": 사업보고서
                'fs_div': "CFS",  # "CFS":연결재무제표, "OFS":재무제표
            }
            connector = aiohttp.TCPConnector(limit=50, force_close=True)
            async with aiohttp.ClientSession(connector=connector) as session:
                timeout = aiohttp.ClientTimeout(total=15)
                # async with session.get(url, params=params, headers=headers) as response:
                async with session.get(url,
                                       params=params,
                                       timeout=timeout,
                                       headers=headers) as response:
                    data = await response.json()
                    if 'list' not in data:
                        return None
                    df = pd.json_normalize(data, 'list')
            # df = dart.finstate_all(code, year)
            # df = await asyncio.create_task(dart.finstate_all(code, year))
            # df = await loop.run_in_executor(self.pool, dart.finstate_all, code, year)
        except Exception as e:
            self.logger.error("getYearDf", traceback.format_exc())
            raise e
        self.logger.info("df", str(df))
        if df is not None:
            df["crawling_year"] = year
            df["crawling_code"] = code
            df["crawling_name"] = codes[code]["corp_name"]
            name = codes[code]["corp_name"]
            self.logger.info("getYearDf",
                             f"{str(year)} {str(code)} {str(name)}")
            return df
            # allCodeDf = pd.concat([allCodeDf, df])
            # return allCodeDf
        return None
Exemple #8
0
class TaskService:
    def __init__(
            self,
            manager: ConnectionManager,
            tasksRepository: TasksRepository,
            taskScheduler: TaskScheduler,
            factorService: FactorService,
            stockService: StockService,
            crawlerRepository: CrawlerRepository
            ) -> None:
        self.tasksRepository = tasksRepository
        self.crawlerRepository = crawlerRepository
        self.manager = manager
        self.taskScheduler = taskScheduler
        self.factorService = factorService
        self.stockService = stockService
        self.logger = Logger("TaskService")
        self.ee = self.tasksRepository.taskEventEmitter
        self.setupEvents()
    
    def setupEvents(self) -> None:
        self.ee.on(EVENT_TASK_REPO_UPDATE_TASKS, self.fetchTasks)
        self.ee.on(EVENT_TASK_REPO_TASK_COMPLETE, self.updateTaskState)
        self.ee.on(EVENT_TASK_REPO_UPDATE_POOL_INFO, self.updateTaskPoolInfo)
    
    def getTaskSchedule(self, webSocket: WebSocket, isBroadCast: bool = False) -> None:
        jobs = self.taskScheduler.getJobs()
        stockTaskScheduleList = StockTaskScheduleList(**{"list": []})
        for i in range(len(jobs)):
            fields = jobs[i].trigger.fields
            id = jobs[i].id
            self.logger.info(f"jobargs: {str(jobs[i].args[0])}")
            stockTaskScheduleList.list.append(StockTaskScheduleInfo(**{
                "id": id,
                "year": str(fields[0]),
                "month": str(fields[1]),
                "day": str(fields[2]),
                "dayOfWeek": str(fields[4]),
                "hour": str(fields[5]),
                "minute": str(fields[6]),
                "second": str(fields[7]),
                "taskList": list(jobs[i].args[0])
            }))
        if isBroadCast:
            self.manager.sendBroadCast(RES_SOCKET_TASK_FETCH_TASK_SCHEDULE, stockTaskScheduleList.dict())
        else:
            self.manager.send(RES_SOCKET_TASK_FETCH_TASK_SCHEDULE, stockTaskScheduleList.dict(), webSocket)
    
    @staticmethod
    def marcapJob(marcapDtos: List[StockRunCrawling]) -> None:
        service: StockService = Locator.getInstance().get(StockService)
        logger = Logger("TaskService_marcapJob")
        for dto in marcapDtos:
            logger.info("#### schedule job start ####")
            logger.info("command" + dto.startDateStr + "~" + dto.endDateStr)
            dto.taskUniqueId = dto.taskId + dto.market+dto.startDateStr + dto.endDateStr + str(uuid.uuid4())
            if dto.isNow:
                dto.startDateStr = getNowDateStr()
                dto.endDateStr = getNowDateStr()
            logger.info("real:" + dto.startDateStr + "~" + dto.endDateStr)
        service.crawlingMarcapStockData(marcapDtos)
    
    def addTaskSchedule(self, scheduleDto: StockTaskSchedule, runCrawlingDto: List[StockRunCrawling], webSocket: WebSocket) -> None:
        marcapDtos = []
        for dto in runCrawlingDto:
            if dto.taskId == "marcap":
                marcapDtos.append(dto)
        
        self.taskScheduler.addJob(
            self.marcapJob, 
            scheduleDto.year, 
            scheduleDto.month, 
            scheduleDto.dayOfWeek,
            scheduleDto.day, 
            scheduleDto.hour, 
            scheduleDto.minute, 
            scheduleDto.second, 
            "marcap",
            args=[marcapDtos])
        self.getTaskSchedule(webSocket, True)
        
    def removeTaskSchedule(self, id: str, webSocket: WebSocket) -> None:
        self.taskScheduler.removeJob(id)
        self.getTaskSchedule(webSocket, True)
    
    def fetchTasks(self, data: ProcessTasks = None, websocket: WebSocket = None) -> None:
        if data is None:
            data = self.tasksRepository.tasksdto
        self.logger.info("fetchTasks", data.json())
        if websocket is None:
            self.manager.sendBroadCast(RES_SOCKET_TASK_FETCH_TASKS, data.dict())
        else:
            self.manager.send(RES_SOCKET_TASK_FETCH_TASKS, data.dict(), websocket)
        
    def getTaskState(self, taskId: str, webSocket: WebSocket) -> None:
        data: YearData = self.tasksRepository.getAllTaskState(taskId)
        self.manager.send(RES_SOCKET_TASK_FETCH_TASK_STATE, data.dict(), webSocket)

    def updateTaskState(self, taskId: str, stockUpdateState: StockUpdateState = None) -> None:
        if stockUpdateState is not None:
            self.manager.sendBroadCast(RES_SOCKET_TASK_UPDATE_TASK_STATE, stockUpdateState.dict())
        self.fetchTasks()

    def getTaskPoolInfo(self, webSocket: WebSocket) -> None:
        taskPoolInfo: TaskPoolInfo = self.tasksRepository.getPoolInfo()
        self.manager.send(RES_SOCKET_TASK_FETCH_TASK_POOL_INFO, taskPoolInfo.dict(), webSocket)
    
    def updateTaskPoolInfo(self, poolInfo: TaskPoolInfo) -> None:
        # logger.info(f"updateTaskPoolInfo:{poolInfo.json()}")
        self.manager.sendBroadCast(RES_SOCKET_TASK_FETCH_TASK_POOL_INFO, poolInfo.dict())
    
    def addTask(self, taskName: str, dto: Any) -> None:
        if isinstance(dto, dict):
            if taskName == "crawlingMarcapStockData":
                data = []
                for market in dto["market"]:
                    taskUniqueId = dto["taskId"]+market+dto["startDate"]+dto["endDate"]+str(uuid.uuid4())
                    dtoOne = StockRunCrawling(**{
                        "driverAddr": "http://fin-carwling-webdriver:4444",
                        "market": market,
                        "startDateStr": dto["startDate"],
                        "endDateStr": dto["endDate"],
                        "taskId": dto["taskId"],
                        "taskUniqueId": taskUniqueId
                    })
                    data.append(dtoOne)
            elif taskName == "convertFactorFileToDb":
                data = RunFactorFileConvert(**{
                    "taskId": dto["taskId"],
                    "taskUniqueId": dto["taskId"] + str(uuid.uuid4())
                })
            elif taskName == "crawlingFactorDartData":
                data = DartApiCrawling(**{
                    "apiKey": dto["apiKey"],
                    "isCodeNew": dto["isCodeNew"],
                    "startYear": dto["startYear"],
                    "endYear": dto["endYear"],
                    "taskId": dto["taskId"],
                    "taskUniqueId": dto["taskId"] + dto["startYear"] + dto["endYear"] + str(uuid.uuid4())
                })
        else:
            data = dto
        if taskName == "convertFactorFileToDb":
            self.factorService.convertFactorFileToDb(data)
        elif taskName == "crawlingMarcapStockData":
            self.stockService.crawlingMarcapStockData(data)
        elif taskName == "crawlingFactorDartData":
            self.factorService.crawlingFactorDartData(data)
    
    def cancelTask(self, taskId: str, taskUniqueId: str) -> None:
        if taskUniqueId in self.crawlerRepository.getCrawlers():
            self.crawlerRepository.getCrawler(taskUniqueId).isCancelled = True
        self.tasksRepository.taskRunner.cancel(taskUniqueId)
        task = self.tasksRepository.getTask(taskId, taskUniqueId)
        if task is not None:
            if task.state == "cancel":
                self.tasksRepository.deleteTask(task)
                self.tasksRepository.updateAllTask()
            elif task.state == "error":
                self.tasksRepository.deleteTask(task)
                self.tasksRepository.updateAllTask()
            else:
                task.state = "cancel"
                self.tasksRepository.updateTask(task)
        else:
            self.tasksRepository.updateAllTask()
    
    def fetchCompletedTask(self, dto: ListLimitData, webSocket: WebSocket) -> None:
        dao = ListLimitDao(**{
            "limit": dto.limit,
            "offset": dto.offset,
            "taskId": dto.taskId
        })
        tasks = self.tasksRepository.getCompletedTask(dao)
        self.manager.send(RES_SOCKET_TASK_FETCH_COMPLETED_TASK, tasks.dict(), webSocket)
class FactorMongoDataSource(MongoDataSource):
    def __init__(self) -> None:
        super().__init__()
        self.logger = Logger("FactorMongoDataSource")

    async def getFactor(self,
                        year: str = "*",
                        month: str = "*",
                        code: str = "*") -> list:
        try:
            findObj: Dict[str, Any] = {}
            self.mergeFindObj(findObj, "dataYear", str(float(year)))
            self.mergeFindObj(findObj, "dataMonth", month)
            self.mergeFindObj(findObj, "code", code)
            self.logger.info("getFactor", str(findObj))
            cursor = self.factor.find(findObj)
            fields = [
                "code", "dataMonth", "dataName", "dataYear", "dataId",
                "dataValue", "name"
            ]
            return list(
                map(
                    lambda data: FactorData(
                        **{field: data[field]
                           for field in fields}), list(cursor)))
        except Exception:
            self.logger.error("getFactor", traceback.format_exc())
            return list()

    async def insertFactor(self, li: List[FactorDao]) -> None:
        try:
            if not self.isSetupMarcap():
                self.setupMarcap()
            for one in li:
                data = one.dict()
                data["updatedAt"] = getNow()
                await asyncio.create_task(self.insertFactorOne(data))
        except Exception:
            self.logger.error("insertFactor", traceback.format_exc())

    async def insertFactorOne(self, data: Dict) -> None:
        self.factor.update_one(
            {
                "code": data["code"],
                "dataYear": data["dataYear"],
                "dataMonth": data["dataMonth"],
                "dataName": data["dataName"],
            }, {
                "$set": data,
                "$setOnInsert": {
                    "createdAt": getNow()
                }
            },
            upsert=True)

    def getCompletedTask(self, dto: ListLimitData) -> ListLimitResponse:
        try:
            data = dto.dict()
            cursor = self.task.find({"$or": [
                        {"state": "success"},
                        {"state": "fail"}
                    ]}
                ).sort("createdAt", DESCENDING)\
                .skip(data["offset"])\
                .limit(data["limit"])

            count = self.task.find({
                "$or": [{
                    "state": "success"
                }, {
                    "state": "fail"
                }]
            }).count()

            res = ListLimitResponse(
                **{
                    "count": count,
                    "offset": data["offset"],
                    "limit": data["limit"],
                    "data": self.exceptId(list(cursor))
                })

            return res
        except Exception:
            self.logger.error("getFactor", traceback.format_exc())
        return []
Exemple #10
0
class TaskRunner(object):
    def __init__(self) -> None:
        super().__init__()
        self.logger = Logger("TaskRunner")
        self.queue: asyncio.Queue = asyncio.Queue()
        self.loop = asyncio.get_running_loop()
        self.pool = TaskPool(notifyCallback=self.notifyRmOnPool)
        self.notifyCallback = None
        # self.loop: asyncio.AbstractEventLoop = asyncio.new_event_loop()

    def getPoolInfo(self) -> TaskPoolInfo:
        return TaskPoolInfo(
            **{
                "poolSize": self.pool.poolSize,
                "poolCount": self.pool.poolCount(),
                "runCount": self.pool.runCount(),
                "queueCount": self.queue.qsize()
            })

    def updatePoolInfo(self) -> None:
        self.logger.info(
            "updatePoolInfo",
            f"runCount:{self.pool.runCount()}, queueCount:{self.queue.qsize()}"
        )
        if self.notifyCallback:
            self.notifyCallback(
                TaskPoolInfo(
                    **{
                        "poolSize": self.pool.poolSize,
                        "poolCount": self.pool.poolCount(),
                        "runCount": self.pool.runCount(),
                        "queueCount": self.queue.qsize()
                    }))

    def notifyPutOnQueue(self) -> None:
        self.loop.create_task(self.notifyToPool())

    def notifyRmOnPool(self) -> None:
        if self.queue.qsize() > 0:
            self.loop.create_task(self.notifyToPool())
        else:
            self.updatePoolInfo()

    def cancel(self, id: str) -> None:
        pool: Optional[Pool] = self.pool.findPool(id)
        if pool is not None:
            self.logger.info("cancel", id)
            pool.cancel()
            self.pool.removeTaskPool(pool)
        else:
            self.logger.info("cancel", "pool is not exist")

    def isExist(self, id: str) -> bool:
        return self.pool.findPool(id) is not None

    async def notifyToPool(self) -> None:
        try:
            if self.queue.qsize() > 0 and (self.pool.poolSize -
                                           self.pool.poolCount()) > 0:
                pool = self.pool.addTaskPool(Pool(), False)
                # timeout이 있으면 nonblocking으로 움직임
                task: Task = await asyncio.wait_for(self.queue.get(),
                                                    timeout=1)
                if task:
                    pool.setTask(task)
                    pool.run(self.pool)
                else:
                    self.pool.removeTaskPool(pool, False)
            # if self.pool.poolSize > self.queue.qsize() and self.pool.poolCount() >= self.queue.qsize():
            #     print("exit")
            # elif self.pool.poolSize > self.pool.poolCount() and self.queue.qsize() > 0:
            #     pool = self.pool.addTaskPool(Pool(), False)
            #     print(f"before qsize:{self.queue.qsize()}")
            #     task: Task = await asyncio.wait_for(self.queue.get(), timeout=1)
            #     print(f"after qsize:{self.queue.qsize()}")
            #     if task:
            #         pool.setTask(task)
            #         pool.run(self.pool)
            #     else:
            #         self.pool.removeTaskPool(pool, False)
        except asyncio.TimeoutError as e:
            self.logger.info("notifyToPool", f"timeout:{str(e)}")
            self.pool.removeTaskPool(pool, False)
        finally:
            self.updatePoolInfo()

    def put(self, task: Task) -> None:
        task.loop = self.loop
        self.loop.create_task(self._put(task))

    async def _put(self, task: Task) -> None:
        self.logger.info("_put", "task put")
        await self.queue.put(task)
        self.notifyPutOnQueue()
Exemple #11
0
class TasksRepository(object):
    def __init__(self, mongod: TaskMongoDataSource) -> None:
        super().__init__()
        self.mongod = mongod
        self.logger = Logger("TasksRepository")
        self.taskEventEmitter = EventEmitter()
        self.tasksdto = ProcessTasks()
        self.taskRunner: Optional[TaskRunner] = None
        self.createTaskRunner()

    # 태스크 러너를 만든다.
    def createTaskRunner(self) -> None:
        if self.taskRunner is None:
            self.taskRunner = TaskRunner()
            self.taskRunner.notifyCallback = self.onUpdatePoolInfo
            self.logger.info("createTaskRunner", "created taskrunner")

    # 태스크 풀 정보가 업데이트 될 떄 이벤트를 날린다.
    def onUpdatePoolInfo(self, poolInfo: TaskPoolInfo) -> None:
        self.taskEventEmitter.emit(EVENT_TASK_REPO_UPDATE_POOL_INFO, poolInfo)
        self.logger.info("updatePoolInfo", f"{poolInfo.json()}")

    # 테스크 풀 정보를 가져온다.
    def getPoolInfo(self) -> None:
        if self.taskRunner:
            poolInfo = self.taskRunner.getPoolInfo()
            self.taskEventEmitter.emit(EVENT_TASK_REPO_UPDATE_POOL_INFO,
                                       poolInfo)

    # 태스크 풀에 태스크를 등록한다.
    def runTask(self, task: Task) -> None:
        # print("runTask")
        if self.taskRunner:
            self.taskRunner.put(task)

    # 추가된 태스크 정보를 저장한다.
    def addTask(self, task: ProcessTask) -> None:
        if task.taskId not in self.tasksdto.tasks:
            self.tasksdto.tasks[task.taskId] = dict()
            self.tasksdto.tasks[task.taskId]["list"] = dict()
            self.tasksdto.tasks[task.taskId]["ids"] = []
            self.tasksdto.taskIds.append(task.taskId)

        self.tasksdto.tasks[task.taskId]["list"][task.taskUniqueId] = task
        self.tasksdto.tasks[task.taskId]["ids"].append(task.taskUniqueId)
        self.taskEventEmitter.emit(EVENT_TASK_REPO_UPDATE_TASKS, self.tasksdto)
        self.logger.info("addTask", f"{task.taskUniqueId}")

    # 갱신 태스크 정보를 저장한다.
    def updateTask(self, task: ProcessTask) -> None:
        self.tasksdto.tasks[task.taskId]["list"][task.taskUniqueId] = task
        self.logger.info("updateTask", f"{task.taskUniqueId}")
        self.mongod.upsertTask(task.dict())
        self.taskEventEmitter.emit(EVENT_TASK_REPO_UPDATE_TASKS, self.tasksdto)

    def updateAllTask(self) -> None:
        self.taskEventEmitter.emit(EVENT_TASK_REPO_UPDATE_TASKS, self.tasksdto)

    # 저장된 테스크 정보를 반환한다.
    def getTask(self, taskId: str, taskUniqueId: str) -> ProcessTask:
        if self.isExistTask(taskId, taskUniqueId):
            return self.tasksdto.tasks[taskId]["list"][taskUniqueId]
        return None

    # 저장된 태스크가 있는지 확인한다.
    def isExistTask(self, taskId: str, taskUniqueId: str) -> bool:
        return taskId in self.tasksdto.tasks and taskUniqueId in self.tasksdto.tasks[
            taskId]["list"]

    # 저장된 태스크 정보를 삭제한다.
    def deleteTask(self, task: ProcessTask) -> None:
        if task.taskId in self.tasksdto.tasks:
            if task.taskUniqueId in self.tasksdto.tasks[task.taskId]["list"]:
                del self.tasksdto.tasks[task.taskId]["list"][task.taskUniqueId]
                self.tasksdto.tasks[task.taskId]["ids"].remove(
                    task.taskUniqueId)
                self.logger.info("deleteTask", f"{task.taskUniqueId}")

    def errorTask(self, dto: TaskModel, errMsg: str) -> None:
        task = self.getTask(dto.taskId, dto.taskUniqueId)
        task.state = "error"
        task.errMsg = errMsg
        self.updateTask(task)

    def completeFactorConvertFileToDbTask(self, task: ProcessTask) -> None:
        self.success(task, 1)
        self.updateTask(task)
        self.deleteTask(task)
        self.taskEventEmitter.emit(EVENT_TASK_REPO_TASK_COMPLETE, "factorFile",
                                   None)

    def completeFactorDart(self, task: ProcessTask, year: int) -> None:
        self.success(task, 1)
        self.updateTask(task)
        if task.restCount <= 0:
            self.deleteTask(task)
        task.state = "complete"
        self.updateTask(task)
        self.logger.info("completeFactorDart", "complete")
        self.taskEventEmitter.emit(
            EVENT_TASK_REPO_TASK_COMPLETE, "factorDart",
            StockUpdateState(
                **{
                    "taskId": task.taskId,
                    "market": task.market,
                    "date": year,
                    "ret": 1
                }))

    # 완료된 태스크 정보를 처린한다.
    def completeStockCrawlingTask(self, isSuccess: bool,
                                  retdto: StockMarketCapitalResult,
                                  dto: StockCrawlingDownloadTask) -> None:
        self.logger.info("##############completeStockCrawlingTask",
                         str(isSuccess))
        task = self.getTask(dto.taskId, dto.taskUniqueId)
        if isSuccess:
            self.success(task, 1)
        else:
            self.fail(task, 1)
        if task.restCount <= 0:
            self.deleteTask(task)
        if retdto:
            task.errMsg = retdto.errorMsg
        task.state = "success"
        self.updateTask(task)
        self.logger.info("completeStockCrawlingTask", "complete")
        self.taskEventEmitter.emit(
            EVENT_TASK_REPO_TASK_COMPLETE, "marcap",
            StockUpdateState(
                **{
                    "taskId": dto.taskId,
                    "market": dto.market,
                    "date": dto.dateStr,
                    "ret": 1 if isSuccess else 2
                }))

    # 성공한 태스크 정보를 처리한다.
    def success(self, task: ProcessTask, count: int) -> None:
        task.successCount = task.successCount + count
        task.restCount = task.restCount - count
        i = 0
        for _ in range(count):
            task.tasksRet[task.index + i] = SUCCESS
            i = i + 1
        task.index = task.index + count
        task.percent = (task.successCount + task.failCount) / task.count * 100
        if task.restCount <= 0:
            task.state = "success"
        else:
            task.state = "waiting next task"
        self.logger.info("success", f"{task.taskUniqueId}")

    # 실패한 태스크 정보를 처리한다.
    def fail(self, task: ProcessTask, count: int) -> None:
        task.failCount = task.failCount + count
        task.restCount = task.restCount - count
        i = 0
        for _ in range(count):
            left = task.tasks[task.index + i]
            task.failTasks.append(left)
            task.tasksRet[task.index + i] = FAIL
            i = i + 1
        task.index = task.index + count
        task.percent = (task.successCount + task.failCount) / task.count * 100
        if task.restCount <= 0:
            task.state = "fail"
        else:
            task.state = "waiting next task"
        self.logger.info("fail", f"{task.taskUniqueId}")

    # 완료된 태스크 정보를 반환한다.
    def getCompletedTask(self, dto: ListLimitDao) -> ListLimitDataDao:
        taskData = self.mongod.getCompletedTask(dto)
        print(taskData)
        tasks: Dict = dict()
        taskIds = []
        for task in taskData.data:
            if task["taskId"] not in tasks:
                tasks[task["taskId"]] = dict()
                tasks[task["taskId"]]["list"] = dict()
                tasks[task["taskId"]]["ids"] = []
                taskIds.append(task["taskId"])
            tasks[task["taskId"]]["list"][task["taskUniqueId"]] = task
            tasks[task["taskId"]]["ids"].append(task["taskUniqueId"])

        stockCrawlingCompletedTasksDTO = StockCrawlingCompletedTasks(
            **{
                "history": tasks,
                "historyIds": taskIds
            })
        taskData.data = stockCrawlingCompletedTasksDTO
        self.logger.info("getCompletedTask", f"count: {len(taskIds)}")
        return taskData

    # 모든 태스크 상태를 반환한다.
    def getAllTaskState(self, taskId: str) -> StockTaskState:
        markets = ["kospi", "kosdaq"]
        resultDict: YearData = YearData(**{"yearData": dict()})
        resultDict.yearData[taskId] = dict()
        for market in markets:
            data = self.mongod.getAllTaskState(taskId, market)
            compDict: Dict = {}
            count: Dict = {}
            for one in data:
                for idx, taskDate in enumerate(one["tasks"]):
                    if taskDate in compDict.keys():
                        if compDict[taskDate]["ret"] == 1 or one["tasksRet"][
                                idx] == 1:
                            compDict[taskDate] = {"date": taskDate, "ret": 1}
                    else:
                        year = taskDate[0:4]
                        if year in count.keys():
                            count[year] = count[year] + 1
                        else:
                            count[year] = 1
                        compDict[taskDate] = {
                            "date": taskDate,
                            "ret": one["tasksRet"][idx]
                        }
            collect: List = list(compDict.values())
            collect = sorted(collect, key=lambda x: x["date"])
            resultDict.yearData[taskId][market] = StockTaskState(
                **{
                    "taskStates": compDict,
                    "taskKeys": compDict.keys(),
                    "stocks": collect,
                    "years": count,
                    "market": market,
                    "taskId": taskId
                })
        return resultDict