Esempio n. 1
0
def naver_news_crawling_example():
    from korea_news_crawler.articlecrawler import ArticleCrawler

    crawler = ArticleCrawler()
    crawler.set_category('정치', 'IT과학', 'economy')
    #crawler.set_category('politics', 'IT_science', 'economy')
    crawler.set_date_range(2017, 1, 2018, 4)
    crawler.start()
Esempio n. 2
0
def main():
    """메인 함수."""
    args = parse_arguments()

    logger = logging.getLogger('crawling')
    logger.info(f"Categories: {' '.join(args.categories)}")

    start_date = datetime(args.start_year, args.start_month, 1)  # 10.01
    end_date = datetime(args.end_year, args.end_month, 1) + relativedelta(
        months=1)  # 12.01
    end_date = end_date - relativedelta(days=1)

    inter_start_date = start_date  # 10.01
    inter_end_date = start_date + relativedelta(
        months=args.month_interval)  # 11.01
    inter_end_date = inter_end_date - relativedelta(days=1)

    while True:
        start_time = time.time()

        crawler = ArticleCrawler(write_root=os.path.join(
            args.result_dir, inter_start_date.strftime("%Y_%m")),
                                 logger=logger)
        crawler.set_category(*args.categories)
        crawler.set_date_range(start_year=inter_start_date.year,
                               start_month=inter_start_date.month,
                               end_year=inter_end_date.year,
                               end_month=inter_end_date.month)
        crawler.start(join=args.join)
        if args.join:
            elapsed = time.time() - start_time
            logger.info(
                f"{crawler.write_root} finished. {elapsed/60:.2f} minutes ({elapsed:.2f} seconds)."
            )

        inter_start_date += relativedelta(
            months=args.month_interval)  # 11.01 -> 12.01
        inter_end_date += relativedelta(
            months=args.month_interval)  # 12.01 -> 01.01

        if inter_end_date > end_date:
            break
Esempio n. 3
0
from korea_news_crawler.articlecrawler import ArticleCrawler

if __name__ == "__main__":
    Crawler = ArticleCrawler()
    Crawler.set_category("IT과학", "경제")  # 정치, 경제, 생활문화, IT과학, 사회 카테고리 사용 가능
    Crawler.set_date_range(2017, 2018, 3)  # 2017년 1월부터 2018년 3월까지 크롤링 시작
    Crawler.start()
Esempio n. 4
0
# https://github.com/lumyjuwon/KoreaNewsCrawler
# pip install KoreaNewsCrawler
from korea_news_crawler.articlecrawler import ArticleCrawler
from multiprocessing import freeze_support
if __name__ == '__main__':
    freeze_support()
    c = ArticleCrawler()
    c.set_category('economy')
    c.set_date_range(2021, 1, 2021, 2)
    c.start()
Esempio n. 5
0
from korea_news_crawler.articlecrawler import ArticleCrawler

if __name__ == "__main__":
    Crawler = ArticleCrawler()
    Crawler.start(isMultiProc=True)
def main():
    Crawler = ArticleCrawler()  
    Crawler.set_category('정치', '경제', '사회')  
    Crawler.set_date_range(2021, 1, 2021, 1)  
    Crawler.start()