def update_crawler_log(self, run_by: str, name_type: str) -> None: """ 크롤러 로그는 기존 크롤러 로그를 업데이트하는 방식으로 작성되어집니다. """ region_date_list: typing.List[CrawlerRegionDate] = [] total_statistics = attr.asdict(self.total_statistics) if name_type == "토지이용계획정보": total_statistics["land_feature_zip_count"] = 0 elif name_type == "토지특성정보": total_statistics["land_use_zip_count"] = 0 elif name_type == "토지이용계획정보없음": total_statistics["land_feature_zip_count"] = 0 total_statistics["land_use_zip_count"] = 0 name_type = "토지이용계획정보" elif name_type == "토지특성정보없음": total_statistics["land_feature_zip_count"] = 0 total_statistics["land_use_zip_count"] = 0 name_type = "토지특성정보" if name_type == "토지이용계획정보": for region, date in self.region_land_use_dict.items(): if date != "0001-01-01": region_date_list.append( CrawlerRegionDate(region=region, date=date) ) elif name_type == "토지특성정보": for region, date in self.region_land_feature_dict.items(): if date != "0001-01-01": region_date_list.append( CrawlerRegionDate(region=region, date=date) ) data = { "time_stamp": self.crawling_start_time, "run_by": run_by, "finish_time_stamp": str(timestamp(tznow())), "total_statistics": total_statistics, "region_date": [vars(x) for x in region_date_list], } folder_name = ( f"{self.config['ENVIRONMENT']}/" f"{self.crawling_date.year}/" f"{self.crawling_date.month:02}/" f"{self.crawling_date.day:02}/" f"{str(self.crawling_start_time)}/" f"{name_type}/" f"crawler-log" ) file_name = f"{self.crawling_start_time}.json" self.s3_client.upload_s3( folder_name, file_name, data, "application/json", encoding="utf-8" )
def __init__( self, config: typing.Dict[str, typing.Any], ): super().__init__() self.config = config self.slack_client = SlackClient(config.get("SLACK_CHANNEL"), config.get("SLACK_API_TOKEN")) self.info_care_client = InfocareClient(config) self.s3_client = S3Client(config) self.total_statistics = CrawlerStatistics() self.failure_statistics = CrawlerStatistics() self.crawling_date: datetime.datetime = tznow( pytz.timezone("Asia/Seoul")) self.crawling_start_time: str = str(timestamp(self.crawling_date))
def __init__( self, config: typing.Dict[str, typing.Any], ): super().__init__() self.config = config self.slack_client = SlackClient(config.get("SLACK_CHANNEL"), config.get("SLACK_API_TOKEN")) self.taein_client = TaeinClient( client_delay=self.config['CLIENT_DELAY'], proxy=random.choice(self.config['PROXY_HOST_LIST'])) self.s3_client = S3Client(config) self.total_statistics = CrawlerStatistics() self.failure_statistics = CrawlerStatistics() self.crawling_date: datetime.datetime = tznow( pytz.timezone("Asia/Seoul")) self.crawling_start_time: str = str( timestamp(tznow(pytz.timezone("Asia/Seoul"))))
def __init__( self, config: typing.Dict[str, typing.Any], ) -> None: super().__init__() self.config = config self.slack_client = SlackClient( config.get("SLACK_CHANNEL"), config.get("SLACK_API_TOKEN") ) self.nsdi_client = NsdiClient(config) self.s3_client = S3Client(config) self.region_land_use_dict: typing.Dict[str, str] = dict() self.region_land_feature_dict: typing.Dict[str, str] = dict() self.total_statistics = CrawlerStatistics() self.failure_statistics = CrawlerStatistics() self.crawling_date: datetime.datetime = tznow( pytz.timezone("Asia/Seoul") ) self.crawling_start_time: str = str( timestamp(tznow(pytz.timezone("Asia/Seoul"))) )
def update_crawler_log(self, run_by: str) -> None: total_statistics = attr.asdict(self.total_statistics) data = { "time_stamp": self.crawling_start_time, "run_by": run_by, "finish_time_stamp": str(timestamp(tznow())), "total_statistics": total_statistics, } folder_name = (f"{self.config['ENVIRONMENT']}/" f"{self.crawling_date.year}/" f"{self.crawling_date.month:02}/" f"{self.crawling_date.day:02}/" f"{str(self.crawling_start_time)}/" f"crawler-log") file_name = f"{self.crawling_start_time}.json" self.s3_client.upload_json(folder_name=folder_name, file_name=file_name, data=data)
def upload_crawler_log_to_s3(self, run_by: str) -> None: total_statistics = attr.asdict(self.total_statistics) area_step = self.config["BUILDING_AREA_STEP"] area_start = self.config["BUILDING_AREA_START"] area_end = self.config["BUILDING_AREA_END"] area_range = [{ "start_area": area_end, "end_area": 1000 } if x == area_end else { "start_area": x, "end_area": x + area_step } for x in range(area_start, area_end + area_step, area_step)] data = { "time_stamp": self.crawling_start_time, "run_by": run_by, "finish_time_stamp": str(timestamp(tznow())), "total_statistics": total_statistics, "area_range": area_range, } folder_name = (f"{self.config['ENVIRONMENT']}/" f"{self.crawling_date.year}/" f"{self.crawling_date.month:02}/" f"{self.crawling_date.day:02}/" f"{str(self.crawling_start_time)}/" f"crawler-log") file_name = f"{self.crawling_start_time}.json" self.s3_client.upload_json(folder_name=folder_name, file_name=file_name, data=data) logger.info( "Upload crawler log to s3", folder_name=folder_name, file_name=file_name, )