Example #1
0
"""
.. _download_gdelt_climate_en:
"""
import os
import requests
import zipfile
import logging

from dotenv import find_dotenv, load_dotenv

from arg_mine import PROJECT_DIR
from arg_mine.utils import LOG_FMT, get_logger

_logger = get_logger(__name__, logging.DEBUG)

# URL taken from https://blog.gdeltproject.org/a-new-contextual-dataset-for-exploring-climate-change-narratives-6-3m-english-news-urls-with-contextual-snippets-2015-2020/  # noqa: E501
BASE_URL_FMT = "http://data.gdeltproject.org/blog/2020-climate-change-narrative/WebNewsEnglishSnippets.{year}.csv.zip"


def download_file_from_url(url, target_file_path):
    """
    Download a file from the given url

    Parameters
    ----------
    url : str
        target URL to download file from
    target_file_path : str
        path to download the file to

    Returns
Example #2
0
"""
Main entry point for document sentence argument classification from a list of URLs
TODO: add unit testing for the CLI options
"""
import os
import logging

import click

from arg_mine import DATA_DIR
from arg_mine.data.loaders import get_gdelt_df
from arg_mine.api import classify
from arg_mine import utils

_logger = utils.get_logger(__name__, logging.DEBUG)

# formatted string template for the output filenames
_WRITE_FILENAME_FMT = "gdelt_{year}_{data}_docs{start:0{ndigit}d}-{end:0{ndigit}d}.csv"


@click.command()
@click.option(
    "--ndocs",
    default=100,
    type=int,
    help="The number of documents we want to extract from the list",
)
@click.option(
    "--start-row",
    default=0,
    type=int,