def naver_news_crawling_example(): from korea_news_crawler.articlecrawler import ArticleCrawler crawler = ArticleCrawler() crawler.set_category('정치', 'IT과학', 'economy') #crawler.set_category('politics', 'IT_science', 'economy') crawler.set_date_range(2017, 1, 2018, 4) crawler.start()
def main(): """메인 함수.""" args = parse_arguments() logger = logging.getLogger('crawling') logger.info(f"Categories: {' '.join(args.categories)}") start_date = datetime(args.start_year, args.start_month, 1) # 10.01 end_date = datetime(args.end_year, args.end_month, 1) + relativedelta( months=1) # 12.01 end_date = end_date - relativedelta(days=1) inter_start_date = start_date # 10.01 inter_end_date = start_date + relativedelta( months=args.month_interval) # 11.01 inter_end_date = inter_end_date - relativedelta(days=1) while True: start_time = time.time() crawler = ArticleCrawler(write_root=os.path.join( args.result_dir, inter_start_date.strftime("%Y_%m")), logger=logger) crawler.set_category(*args.categories) crawler.set_date_range(start_year=inter_start_date.year, start_month=inter_start_date.month, end_year=inter_end_date.year, end_month=inter_end_date.month) crawler.start(join=args.join) if args.join: elapsed = time.time() - start_time logger.info( f"{crawler.write_root} finished. {elapsed/60:.2f} minutes ({elapsed:.2f} seconds)." ) inter_start_date += relativedelta( months=args.month_interval) # 11.01 -> 12.01 inter_end_date += relativedelta( months=args.month_interval) # 12.01 -> 01.01 if inter_end_date > end_date: break
from korea_news_crawler.articlecrawler import ArticleCrawler if __name__ == "__main__": Crawler = ArticleCrawler() Crawler.set_category("IT과학", "경제") # 정치, 경제, 생활문화, IT과학, 사회 카테고리 사용 가능 Crawler.set_date_range(2017, 2018, 3) # 2017년 1월부터 2018년 3월까지 크롤링 시작 Crawler.start()
# https://github.com/lumyjuwon/KoreaNewsCrawler # pip install KoreaNewsCrawler from korea_news_crawler.articlecrawler import ArticleCrawler from multiprocessing import freeze_support if __name__ == '__main__': freeze_support() c = ArticleCrawler() c.set_category('economy') c.set_date_range(2021, 1, 2021, 2) c.start()
from korea_news_crawler.articlecrawler import ArticleCrawler if __name__ == "__main__": Crawler = ArticleCrawler() Crawler.start(isMultiProc=True)
def main(): Crawler = ArticleCrawler() Crawler.set_category('정치', '경제', '사회') Crawler.set_date_range(2021, 1, 2021, 1) Crawler.start()