""" .. _download_gdelt_climate_en: """ import os import requests import zipfile import logging from dotenv import find_dotenv, load_dotenv from arg_mine import PROJECT_DIR from arg_mine.utils import LOG_FMT, get_logger _logger = get_logger(__name__, logging.DEBUG) # URL taken from https://blog.gdeltproject.org/a-new-contextual-dataset-for-exploring-climate-change-narratives-6-3m-english-news-urls-with-contextual-snippets-2015-2020/ # noqa: E501 BASE_URL_FMT = "http://data.gdeltproject.org/blog/2020-climate-change-narrative/WebNewsEnglishSnippets.{year}.csv.zip" def download_file_from_url(url, target_file_path): """ Download a file from the given url Parameters ---------- url : str target URL to download file from target_file_path : str path to download the file to Returns
""" Main entry point for document sentence argument classification from a list of URLs TODO: add unit testing for the CLI options """ import os import logging import click from arg_mine import DATA_DIR from arg_mine.data.loaders import get_gdelt_df from arg_mine.api import classify from arg_mine import utils _logger = utils.get_logger(__name__, logging.DEBUG) # formatted string template for the output filenames _WRITE_FILENAME_FMT = "gdelt_{year}_{data}_docs{start:0{ndigit}d}-{end:0{ndigit}d}.csv" @click.command() @click.option( "--ndocs", default=100, type=int, help="The number of documents we want to extract from the list", ) @click.option( "--start-row", default=0, type=int,