Exemple #1
0
def get_stock_recent_data(stock,
                          country,
                          as_json=False,
                          order='ascending',
                          debug=False):
    """
    This function retrieves recent historical data from the introduced stock from Investing.com. So on, the recent data
    of the introduced stock from the specified country will be retrieved and returned as a :obj:`pandas.DataFrame` if
    the parameters are valid and the request to Investing.com succeeds. Note that additionally some optional parameters
    can be specified: as_json, order and debug, which let the user decide if the data is going to be returned as a
    :obj:`json` or not, if the historical data is going to be ordered ascending or descending (where the index is the date)
    and whether debug messages are going to be printed or not, respectively.

    Args:
        stock (:obj:`str`): symbol of the stock to retrieve recent historical data from.
        country (:obj:`str`): name of the country from where the stock is.
        as_json (:obj:`bool`, optional):
            to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True.
        order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending.
        debug (:obj:`bool`, optional):
            optional argument to either show or hide debug messages on log, either True or False, respectively.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function can return either a :obj:`pandas.DataFrame` or a :obj:`json` object, containing the retrieved
            recent data of the specified stock from the specified country. So on, the resulting dataframe contains the
            open, high, low, close and volume values for the selected stock on market days and the currency in which those
            values are presented.

            The resulting recent data, in case that the default parameters were applied, will look like::

                date || open | high | low | close | volume | currency
                -----||-----------------------------------------------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx

            but in case that as_json parameter was defined as True, then the output will be::

                {
                    name: name,
                    recent: [
                        dd/mm/yyyy: {
                            open: x,
                            high: x,
                            low: x,
                            close: x,
                            volume: x,
                            currency: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised whenever any of the introduced arguments is not valid or errored.
        IOError: raised if stocks object/file was not found or unable to retrieve.
        RuntimeError: raised if the introduced stock/country was not found or did not match any of the existing ones.
        ConnectionError: raised if connection to Investing.com could not be established.
        IndexError: raised if stock recent data was unavailable or not found in Investing.com.

    Examples:
        >>> investpy.get_recent_data(stock='bbva', country='spain')
                         Open   High    Low  Close    Volume Currency
            Date
            2019-08-13  4.263  4.395  4.230  4.353  27250000      EUR
            2019-08-14  4.322  4.325  4.215  4.244  36890000      EUR
            2019-08-15  4.281  4.298  4.187  4.234  21340000      EUR
            2019-08-16  4.234  4.375  4.208  4.365  46080000      EUR
            2019-08-19  4.396  4.425  4.269  4.269  18950000      EUR

    """

    if not stock:
        raise ValueError(
            "ERR#0013: stock parameter is mandatory and must be a valid stock name."
        )

    if not isinstance(stock, str):
        raise ValueError("ERR#0027: stock argument needs to be a str.")

    if country is None:
        raise ValueError(
            "ERR#0039: country can not be None, it should be a str.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not isinstance(debug, bool):
        raise ValueError(
            "ERR#0033: debug argument can just be a boolean value, either True or False."
        )

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'stocks', 'stocks.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        stocks = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0056: stocks file not found or errored.")

    if stocks is None:
        raise IOError(
            "ERR#0001: stocks object not found or unable to retrieve.")

    if unidecode.unidecode(country.lower()) not in get_stock_countries():
        raise RuntimeError("ERR#0034: country " + country.lower() +
                           " not found, check if it is correct.")

    stocks = stocks[stocks['country'] == unidecode.unidecode(country.lower())]

    stock = stock.strip()
    stock = stock.lower()

    if unidecode.unidecode(stock) not in [
            unidecode.unidecode(value.lower())
            for value in stocks['symbol'].tolist()
    ]:
        raise RuntimeError("ERR#0018: stock " + stock +
                           " not found, check if it is correct.")

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger('investpy')

    if debug is False:
        logger.disabled = True
    else:
        logger.disabled = False

    logger.info('Searching introduced stock on Investing.com')

    symbol = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(),
                        'symbol']
    id_ = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'id']
    name = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'name']

    stock_currency = stocks.loc[(
        stocks['symbol'].str.lower() == stock).idxmax(), 'currency']

    logger.info(str(stock) + ' found on Investing.com')

    header = "Datos históricos " + symbol

    params = {
        "curr_id": id_,
        "smlID": str(randint(1000000, 99999999)),
        "header": header,
        "interval_sec": "Daily",
        "sort_col": "date",
        "sort_ord": "DESC",
        "action": "historical_data"
    }

    head = {
        "User-Agent": user_agent.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    url = "https://es.investing.com/instruments/HistoricalDataAjax"

    logger.info('Request sent to Investing.com!')

    req = requests.post(url, headers=head, data=params)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                              ", try again later.")

    logger.info('Request to Investing.com data succeeded with code ' +
                str(req.status_code) + '!')

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")
    result = list()

    if path_:
        logger.info('Data parsing process starting...')

        for elements_ in path_:
            info = []
            for nested_ in elements_.xpath(".//td"):
                info.append(nested_.text_content())

            if info[0] == 'No se encontraron resultados':
                raise IndexError(
                    "ERR#0007: stock information unavailable or not found.")

            stock_date = datetime.datetime.strptime(info[0].replace('.', '-'),
                                                    '%d-%m-%Y')
            stock_close = float(info[1].replace('.', '').replace(',', '.'))
            stock_open = float(info[2].replace('.', '').replace(',', '.'))
            stock_high = float(info[3].replace('.', '').replace(',', '.'))
            stock_low = float(info[4].replace('.', '').replace(',', '.'))

            stock_volume = 0

            if info[5].__contains__('K'):
                stock_volume = int(
                    float(info[5].replace('K', '').replace('.', '').replace(
                        ',', '.')) * 1000)
            elif info[5].__contains__('M'):
                stock_volume = int(
                    float(info[5].replace('M', '').replace('.', '').replace(
                        ',', '.')) * 1000000)
            elif info[5].__contains__('B'):
                stock_volume = int(
                    float(info[5].replace('B', '').replace('.', '').replace(
                        ',', '.')) * 1000000000)

            result.insert(
                len(result),
                Data(stock_date, stock_open, stock_high, stock_low,
                     stock_close, stock_volume, stock_currency))

        if order in ['ascending', 'asc']:
            result = result[::-1]
        elif order in ['descending', 'desc']:
            result = result

        logger.info('Data parsing process finished...')

        if as_json is True:
            json_ = {
                'name': name,
                'recent': [value.stock_as_json() for value in result]
            }

            return json.dumps(json_, sort_keys=False)
        elif as_json is False:
            df = pd.DataFrame.from_records(
                [value.stock_to_dict() for value in result])
            df.set_index('Date', inplace=True)

            return df
    else:
        raise RuntimeError("ERR#0004: data retrieval error while scraping.")
Exemple #2
0
def get_stock_historical_data(stock,
                              country,
                              from_date,
                              to_date,
                              as_json=False,
                              order='ascending',
                              debug=False):
    """
    This function retrieves historical data from the introduced stock from Investing.com. So on, the historical data
    of the introduced stock from the specified country in the specified data range will be retrieved and returned as
    a :obj:`pandas.DataFrame` if the parameters are valid and the request to Investing.com succeeds. Note that additionally
    some optional parameters can be specified: as_json, order and debug, which let the user decide if the data is going to
    be returned as a :obj:`json` or not, if the historical data is going to be ordered ascending or descending (where the
    index is the date) and whether debug messages are going to be printed or not, respectively.

    Args:
        stock (:obj:`str`): symbol of the stock to retrieve historical data from.
        country (:obj:`str`): name of the country from where the stock is.
        from_date (:obj:`str`): date formatted as `dd/mm/yyyy`, since when data is going to be retrieved.
        to_date (:obj:`str`): date formatted as `dd/mm/yyyy`, until when data is going to be retrieved.
        as_json (:obj:`bool`, optional):
            to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True.
        order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending.
        debug (:obj:`bool`, optional):
            optional argument to either show or hide debug messages on log, either True or False, respectively.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved
            recent data from the specified stock via argument. The dataset contains the open, high, low, close and
            volume values for the selected stock on market days.

            The returned data is case we use default arguments will look like::

                date || open | high | low | close | volume | currency
                -----||-----------------------------------------------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    historical: [
                        dd/mm/yyyy: {
                            open: x,
                            high: x,
                            low: x,
                            close: x,
                            volume: x,
                            currency: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised whenever any of the introduced arguments is not valid or errored.
        IOError: raised if stocks object/file was not found or unable to retrieve.
        RuntimeError: raised if the introduced stock/country was not found or did not match any of the existing ones.
        ConnectionError: raised if connection to Investing.com could not be established.
        IndexError: raised if stock historical data was unavailable or not found in Investing.com.

    Examples:
        >>> investpy.get_historical_data(stock='bbva', country='spain', from_date='01/01/2010', to_date='01/01/2019')
                         Open   High    Low  Close  Volume Currency
            Date
            2010-01-04  12.73  12.96  12.73  12.96       0      EUR
            2010-01-05  13.00  13.11  12.97  13.09       0      EUR
            2010-01-06  13.03  13.17  13.02  13.12       0      EUR
            2010-01-07  13.02  13.11  12.93  13.05       0      EUR
            2010-01-08  13.12  13.22  13.04  13.18       0      EUR

    """

    if not stock:
        raise ValueError(
            "ERR#0013: stock parameter is mandatory and must be a valid stock name."
        )

    if not isinstance(stock, str):
        raise ValueError("ERR#0027: stock argument needs to be a str.")

    if country is None:
        raise ValueError(
            "ERR#0039: country can not be None, it should be a str.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not isinstance(debug, bool):
        raise ValueError(
            "ERR#0033: debug argument can just be a boolean value, either True or False."
        )

    try:
        datetime.datetime.strptime(from_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0011: incorrect from_date date format, it should be 'dd/mm/yyyy'."
        )

    try:
        datetime.datetime.strptime(to_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0012: incorrect to_date format, it should be 'dd/mm/yyyy'.")

    start_date = datetime.datetime.strptime(from_date, '%d/%m/%Y')
    end_date = datetime.datetime.strptime(to_date, '%d/%m/%Y')

    if start_date >= end_date:
        raise ValueError(
            "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'."
        )

    date_interval = {
        'intervals': [],
    }

    flag = True

    while flag is True:
        diff = end_date.year - start_date.year

        if diff > 20:
            obj = {
                'start':
                start_date.strftime('%d/%m/%Y'),
                'end':
                start_date.replace(year=start_date.year +
                                   20).strftime('%d/%m/%Y'),
            }

            date_interval['intervals'].append(obj)

            start_date = start_date.replace(year=start_date.year + 20)
        else:
            obj = {
                'start': start_date.strftime('%d/%m/%Y'),
                'end': end_date.strftime('%d/%m/%Y'),
            }

            date_interval['intervals'].append(obj)

            flag = False

    interval_limit = len(date_interval['intervals'])
    interval_counter = 0

    data_flag = False

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'stocks', 'stocks.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        stocks = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0056: stocks file not found or errored.")

    if stocks is None:
        raise IOError(
            "ERR#0001: stocks object not found or unable to retrieve.")

    if unidecode.unidecode(country.lower()) not in get_stock_countries():
        raise RuntimeError("ERR#0034: country " + country.lower() +
                           " not found, check if it is correct.")

    stocks = stocks[stocks['country'] == unidecode.unidecode(country.lower())]

    stock = stock.strip()
    stock = stock.lower()

    if unidecode.unidecode(stock) not in [
            unidecode.unidecode(value.lower())
            for value in stocks['symbol'].tolist()
    ]:
        raise RuntimeError("ERR#0018: stock " + stock +
                           " not found, check if it is correct.")

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger('investpy')

    if debug is False:
        logger.disabled = True
    else:
        logger.disabled = False

    logger.info('Searching introduced stock on Investing.com')

    symbol = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(),
                        'symbol']
    id_ = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'id']
    name = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'name']

    stock_currency = stocks.loc[(
        stocks['symbol'].str.lower() == stock).idxmax(), 'currency']

    logger.info(str(stock) + ' found on Investing.com')

    final = list()

    logger.info('Data parsing process starting...')

    header = "Datos históricos " + symbol

    for index in range(len(date_interval['intervals'])):
        interval_counter += 1

        params = {
            "curr_id": id_,
            "smlID": str(randint(1000000, 99999999)),
            "header": header,
            "st_date": date_interval['intervals'][index]['start'],
            "end_date": date_interval['intervals'][index]['end'],
            "interval_sec": "Daily",
            "sort_col": "date",
            "sort_ord": "DESC",
            "action": "historical_data"
        }

        head = {
            "User-Agent": user_agent.get_random(),
            "X-Requested-With": "XMLHttpRequest",
            "Accept": "text/html",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
        }

        url = "https://es.investing.com/instruments/HistoricalDataAjax"

        req = requests.post(url, headers=head, data=params)

        if req.status_code != 200:
            raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                                  ", try again later.")

        if not req.text:
            continue

        root_ = fromstring(req.text)
        path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")

        result = list()

        if path_:
            for elements_ in path_:
                info = []
                for nested_ in elements_.xpath(".//td"):
                    info.append(nested_.text_content())

                if info[0] == 'No se encontraron resultados':
                    if interval_counter < interval_limit:
                        data_flag = False
                    else:
                        raise IndexError(
                            "ERR#0007: stock information unavailable or not found."
                        )
                else:
                    data_flag = True

                if data_flag is True:
                    stock_date = datetime.datetime.strptime(
                        info[0].replace('.', '-'), '%d-%m-%Y')
                    stock_close = float(info[1].replace('.',
                                                        '').replace(',', '.'))
                    stock_open = float(info[2].replace('.',
                                                       '').replace(',', '.'))
                    stock_high = float(info[3].replace('.',
                                                       '').replace(',', '.'))
                    stock_low = float(info[4].replace('.',
                                                      '').replace(',', '.'))

                    stock_volume = 0

                    if info[5].__contains__('K'):
                        stock_volume = int(
                            float(info[5].replace('K', '').replace(
                                '.', '').replace(',', '.')) * 1000)
                    elif info[5].__contains__('M'):
                        stock_volume = int(
                            float(info[5].replace('M', '').replace(
                                '.', '').replace(',', '.')) * 1000000)
                    elif info[5].__contains__('B'):
                        stock_volume = int(
                            float(info[5].replace('B', '').replace(
                                '.', '').replace(',', '.')) * 1000000000)

                    result.insert(
                        len(result),
                        Data(stock_date, stock_open, stock_high, stock_low,
                             stock_close, stock_volume, stock_currency))

            if data_flag is True:
                if order in ['ascending', 'asc']:
                    result = result[::-1]
                elif order in ['descending', 'desc']:
                    result = result

                if as_json is True:
                    json_ = {
                        'name': name,
                        'historical':
                        [value.stock_as_json() for value in result]
                    }
                    final.append(json_)
                elif as_json is False:
                    df = pd.DataFrame.from_records(
                        [value.stock_to_dict() for value in result])
                    df.set_index('Date', inplace=True)

                    final.append(df)

        else:
            raise RuntimeError(
                "ERR#0004: data retrieval error while scraping.")

    logger.info('Data parsing process finished...')

    if as_json is True:
        return json.dumps(final[0], sort_keys=False)
    elif as_json is False:
        return pd.concat(final)
Exemple #3
0
def get_currency_cross_historical_data(currency_cross,
                                       from_date,
                                       to_date,
                                       as_json=False,
                                       order='ascending',
                                       debug=False):
    """
    This function retrieves recent historical data from the introduced `currency_cross` from Investing
    via Web Scraping. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a
    :obj:`json` file, with `ascending` or `descending` order.

    Args:
        currency_cross (:obj:`str`): name of the currency cross to retrieve recent historical data from.
        from_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, from where data is going to be retrieved.
        to_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, until where data is going to be retrieved.
        as_json (:obj:`bool`, optional):
            optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`).
        order (:obj:`str`, optional):
            optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`).
        debug (:obj:`bool`, optional):
            optional argument to either show or hide debug messages on log, `True` or `False`, respectively.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved
            recent data from the specified currency_cross via argument. The dataset contains the open, high, low, close and
            volume values for the selected currency_cross on market days.

            The return data is case we use default arguments will look like::

                date || open | high | low | close | volume | currency
                -----||------------------------------------|---------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    recent: [
                        dd/mm/yyyy: {
                            'open': x,
                            'high': x,
                            'low': x,
                            'close': x,
                            'volume': x,
                            'currency' : x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: argument error.
        IOError: stocks object/file not found or unable to retrieve.
        RuntimeError: introduced currency_cross does not match any of the indexed ones.
        ConnectionError: if GET requests does not return 200 status code.
        IndexError: if currency_cross information was unavailable or not found.

    Examples:
        >>> investpy.get_currency_cross_historical_data(currency_cross='EUR/USD', from_date='01/01/2018', to_date='01/01/2019')
                          Open    High     Low   Close  Volume Currency
            Date
            2018-01-01  1.2003  1.2014  1.1995  1.2010       0      USD
            2018-01-02  1.2013  1.2084  1.2003  1.2059       0      USD
            2018-01-03  1.2058  1.2070  1.2001  1.2014       0      USD
            2018-01-04  1.2015  1.2090  1.2004  1.2068       0      USD
            2018-01-05  1.2068  1.2085  1.2021  1.2030       0      USD

    """

    if not currency_cross:
        raise ValueError(
            "ERR#0052: currency_cross param is mandatory and should be a str.")

    if not isinstance(currency_cross, str):
        raise ValueError(
            "ERR#0052: currency_cross param is mandatory and should be a str.")

    try:
        datetime.datetime.strptime(from_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.")

    try:
        datetime.datetime.strptime(to_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.")

    start_date = datetime.datetime.strptime(from_date, '%d/%m/%Y')
    end_date = datetime.datetime.strptime(to_date, '%d/%m/%Y')

    if start_date >= end_date:
        raise ValueError(
            "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'."
        )

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not isinstance(debug, bool):
        raise ValueError(
            "ERR#0033: debug argument can just be a boolean value, either True or False."
        )

    date_interval = {
        'intervals': [],
    }

    flag = True

    while flag is True:
        diff = end_date.year - start_date.year

        if diff > 20:
            obj = {
                'start':
                start_date.strftime('%d/%m/%Y'),
                'end':
                start_date.replace(year=start_date.year +
                                   20).strftime('%d/%m/%Y'),
            }

            date_interval['intervals'].append(obj)

            start_date = start_date.replace(year=start_date.year + 20)
        else:
            obj = {
                'start': start_date.strftime('%d/%m/%Y'),
                'end': end_date.strftime('%d/%m/%Y'),
            }

            date_interval['intervals'].append(obj)

            flag = False

    interval_limit = len(date_interval['intervals'])
    interval_counter = 0

    data_flag = False

    resource_package = 'investpy'
    resource_path = '/'.join(
        ('resources', 'currency_crosses', 'currency_crosses.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        currency_crosses = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError(
            "ERR#0060: currency_crosses file not found or errored.")

    if currency_crosses is None:
        raise IOError(
            "ERR#0050: currency_crosses not found or unable to retrieve.")

    currency_cross = currency_cross.strip()
    currency_cross = currency_cross.lower()

    if unidecode.unidecode(currency_cross) not in [
            unidecode.unidecode(value.lower())
            for value in currency_crosses['name'].tolist()
    ]:
        raise RuntimeError("ERR#0054: the introduced currency_cross " +
                           str(currency_cross) + " does not exists.")

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger('investpy')

    if debug is False:
        logger.disabled = True
    else:
        logger.disabled = False

    logger.info('Searching introduced currency_cross on Investing.com')

    id_ = currency_crosses.loc[(
        currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'id']
    name = currency_crosses.loc[(
        currency_crosses['name'].str.lower() == currency_cross).idxmax(),
                                'name']
    currency = currency_crosses.loc[(
        currency_crosses['name'].str.lower() == currency_cross).idxmax(),
                                    'second']

    logger.info(str(currency_cross) + ' found on Investing.com')

    final = list()

    header = "Datos históricos " + name

    for index in range(len(date_interval['intervals'])):
        interval_counter += 1

        params = {
            "curr_id": id_,
            "smlID": str(randint(1000000, 99999999)),
            "header": header,
            "st_date": date_interval['intervals'][index]['start'],
            "end_date": date_interval['intervals'][index]['end'],
            "interval_sec": "Daily",
            "sort_col": "date",
            "sort_ord": "DESC",
            "action": "historical_data"
        }

        head = {
            "User-Agent": user_agent.get_random(),
            "X-Requested-With": "XMLHttpRequest",
            "Accept": "text/html",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
        }

        url = "https://es.investing.com/instruments/HistoricalDataAjax"

        logger.info('Request sent to Investing.com!')

        req = requests.post(url, headers=head, data=params)

        if req.status_code != 200:
            raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                                  ", try again later.")

        logger.info('Request to Investing.com data succeeded with code ' +
                    str(req.status_code) + '!')

        root_ = fromstring(req.text)
        path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")
        result = list()

        if path_:
            logger.info('Data parsing process starting...')

            for elements_ in path_:
                info = []
                for nested_ in elements_.xpath(".//td"):
                    info.append(nested_.text_content())

                if info[0] == 'No se encontraron resultados':
                    if interval_counter < interval_limit:
                        data_flag = False
                    else:
                        raise IndexError(
                            "ERR#0055: currency_cross information unavailable or not found."
                        )
                else:
                    data_flag = True

                if data_flag is True:
                    currency_cross_date = datetime.datetime.strptime(
                        info[0].replace('.', '-'), '%d-%m-%Y')

                    currency_cross_close = float(info[1].replace('.',
                                                                 '').replace(
                                                                     ',', '.'))
                    currency_cross_open = float(info[2].replace('.',
                                                                '').replace(
                                                                    ',', '.'))
                    currency_cross_high = float(info[3].replace('.',
                                                                '').replace(
                                                                    ',', '.'))
                    currency_cross_low = float(info[4].replace('.',
                                                               '').replace(
                                                                   ',', '.'))

                    currency_cross_volume = 0

                    if info[5].__contains__('K'):
                        currency_cross_volume = int(
                            float(info[5].replace('K', '').replace(
                                '.', '').replace(',', '.')) * 1000)
                    elif info[5].__contains__('M'):
                        currency_cross_volume = int(
                            float(info[5].replace('M', '').replace(
                                '.', '').replace(',', '.')) * 1000000)
                    elif info[5].__contains__('B'):
                        currency_cross_volume = int(
                            float(info[5].replace('B', '').replace(
                                '.', '').replace(',', '.')) * 1000000000)

                    result.insert(
                        len(result),
                        Data(currency_cross_date, currency_cross_open,
                             currency_cross_high, currency_cross_low,
                             currency_cross_close, currency_cross_volume,
                             currency))

            if data_flag is True:
                if order in ['ascending', 'asc']:
                    result = result[::-1]
                elif order in ['descending', 'desc']:
                    result = result

                if as_json is True:
                    json_ = {
                        'name':
                        name,
                        'historical':
                        [value.currency_cross_as_json() for value in result]
                    }

                    final.append(json_)
                elif as_json is False:
                    df = pd.DataFrame.from_records(
                        [value.currency_cross_to_dict() for value in result])
                    df.set_index('Date', inplace=True)

                    final.append(df)
        else:
            raise RuntimeError(
                "ERR#0004: data retrieval error while scraping.")

    logger.info('Data parsing process finished...')

    if as_json is True:
        return json.dumps(final[0], sort_keys=False)
    elif as_json is False:
        return pd.concat(final)
Exemple #4
0
def get_etf_historical_data(etf,
                            country,
                            from_date,
                            to_date,
                            as_json=False,
                            order='ascending',
                            debug=False):
    """
    This function retrieves historical data from the introduced `etf` from Investing
    via Web Scraping on the introduced date range. The resulting data can it either be
    stored in a :obj:`pandas.DataFrame` or in a :obj:`json` object with `ascending` or `descending` order.

    Args:
        etf (:obj:`str`): name of the etf to retrieve recent historical data from.
        country (:obj:`str`): name of the country from where the etf is.
        from_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, from where data is going to be retrieved.
        to_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, until where data is going to be retrieved.
        as_json (:obj:`bool`, optional):
            to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`).
        order (:obj:`str`, optional):
            optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`).
        debug (:obj:`bool`, optional):
            optional argument to either show or hide debug messages on log, `True` or `False`, respectively.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved
            recent data from the specified etf via argument. The dataset contains the open, high, low and close
            values for the selected etf on market days.

            The returned data is case we use default arguments will look like::

                date || open | high | low | close | currency
                -----||--------------------------------------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxxxx

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    historical: [
                        {
                            date: dd/mm/yyyy,
                            open: x,
                            high: x,
                            low: x,
                            close: x,
                            currency: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: argument error.
        IOError: etfs object/file not found or unable to retrieve.
        RuntimeError: introduced etf does not match any of the indexed ones.
        ConnectionError: if GET requests does not return 200 status code.
        IndexError: if etf information was unavailable or not found.

    Examples:
        >>> investpy.get_etf_historical_data(etf='bbva accion dj eurostoxx 50', country='spain', from_date='01/01/2010', to_date='01/01/2019', as_json=False, order='ascending', debug=False)
                         Open   High    Low  Close Currency
            Date
            2011-12-07  23.70  23.70  23.70  23.62      EUR
            2011-12-08  23.53  23.60  23.15  23.04      EUR
            2011-12-09  23.36  23.60  23.36  23.62      EUR
            2011-12-12  23.15  23.26  23.00  22.88      EUR
            2011-12-13  22.88  22.88  22.88  22.80      EUR

    """

    if not etf:
        raise ValueError(
            "ERR#0031: etf parameter is mandatory and must be a valid etf name."
        )

    if not isinstance(etf, str):
        raise ValueError("ERR#0030: etf argument needs to be a str.")

    if country is None:
        raise ValueError(
            "ERR#0039: country can not be None, it should be a str.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not isinstance(debug, bool):
        raise ValueError(
            "ERR#0033: debug argument can just be a boolean value, either True or False."
        )

    try:
        datetime.datetime.strptime(from_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.")

    try:
        datetime.datetime.strptime(to_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.")

    start_date = datetime.datetime.strptime(from_date, '%d/%m/%Y')
    end_date = datetime.datetime.strptime(to_date, '%d/%m/%Y')

    if start_date >= end_date:
        raise ValueError(
            "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'."
        )

    date_interval = {
        'intervals': [],
    }

    flag = True

    while flag is True:
        diff = end_date.year - start_date.year

        if diff > 20:
            obj = {
                'start':
                start_date.strftime('%d/%m/%Y'),
                'end':
                start_date.replace(year=start_date.year +
                                   20).strftime('%d/%m/%Y'),
            }

            date_interval['intervals'].append(obj)

            start_date = start_date.replace(year=start_date.year + 20)
        else:
            obj = {
                'start': start_date.strftime('%d/%m/%Y'),
                'end': end_date.strftime('%d/%m/%Y'),
            }

            date_interval['intervals'].append(obj)

            flag = False

    interval_limit = len(date_interval['intervals'])
    interval_counter = 0

    data_flag = False

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'etfs', 'etfs.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        etfs = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0058: etfs file not found or errored.")

    if etfs is None:
        raise IOError("ERR#0009: etfs object not found or unable to retrieve.")

    if unidecode.unidecode(country.lower()) not in get_etf_countries():
        raise RuntimeError("ERR#0034: country " + country.lower() +
                           " not found, check if it is correct.")

    etfs = etfs[etfs['country'] == unidecode.unidecode(country.lower())]

    etf = etf.strip()
    etf = etf.lower()

    if unidecode.unidecode(etf) not in [
            unidecode.unidecode(value.lower())
            for value in etfs['name'].tolist()
    ]:
        raise RuntimeError("ERR#0019: etf " + str(etf) + " not found in " +
                           str(country.lower()) + ", check if it is correct.")

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger('investpy')

    if debug is False:
        logger.disabled = True
    else:
        logger.disabled = False

    logger.info('Searching introduced etf on Investing.com')

    symbol = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'symbol']
    id_ = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'id']
    name = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'name']

    etf_currency = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(),
                            'currency']

    logger.info(str(etf) + ' found on Investing.com')

    final = list()

    header = "Datos históricos " + symbol

    for index in range(len(date_interval['intervals'])):
        interval_counter += 1

        params = {
            "curr_id": id_,
            "smlID": str(randint(1000000, 99999999)),
            "header": header,
            "st_date": date_interval['intervals'][index]['start'],
            "end_date": date_interval['intervals'][index]['end'],
            "interval_sec": "Daily",
            "sort_col": "date",
            "sort_ord": "DESC",
            "action": "historical_data"
        }

        head = {
            "User-Agent": user_agent.get_random(),
            "X-Requested-With": "XMLHttpRequest",
            "Accept": "text/html",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
        }

        url = "https://es.investing.com/instruments/HistoricalDataAjax"

        logger.info('Request sent to Investing.com!')

        req = requests.post(url, headers=head, data=params)

        if req.status_code != 200:
            raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                                  ", try again later.")

        logger.info('Request to Investing.com data succeeded with code ' +
                    str(req.status_code) + '!')

        if not req.text:
            continue

        root_ = fromstring(req.text)
        path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")
        result = list()

        if path_:
            logger.info('Data parsing process starting...')

            for elements_ in path_:
                info = []

                for nested_ in elements_.xpath(".//td"):
                    info.append(nested_.text_content())

                if info[0] == 'No se encontraron resultados':
                    if interval_counter < interval_limit:
                        data_flag = False
                    else:
                        raise IndexError(
                            "ERR#0010: etf information unavailable or not found."
                        )
                else:
                    data_flag = True

                if data_flag is True:
                    etf_date = datetime.datetime.strptime(
                        info[0].replace('.', '-'), '%d-%m-%Y')
                    etf_close = float(info[1].replace('.',
                                                      '').replace(',', '.'))
                    etf_open = float(info[2].replace('.',
                                                     '').replace(',', '.'))
                    etf_high = float(info[3].replace('.',
                                                     '').replace(',', '.'))
                    etf_low = float(info[4].replace('.', '').replace(',', '.'))

                    result.insert(
                        len(result),
                        Data(etf_date, etf_open, etf_high, etf_low, etf_close,
                             None, etf_currency))

            if data_flag is True:
                if order in ['ascending', 'asc']:
                    result = result[::-1]
                elif order in ['descending', 'desc']:
                    result = result

                if as_json is True:
                    json_ = {
                        'name': name,
                        'historical':
                        [value.etf_as_json() for value in result]
                    }

                    final.append(json_)
                elif as_json is False:
                    df = pd.DataFrame.from_records(
                        [value.etf_to_dict() for value in result])
                    df.set_index('Date', inplace=True)

                    final.append(df)

        else:
            raise RuntimeError(
                "ERR#0004: data retrieval error while scraping.")

    logger.info('Data parsing process finished...')

    if as_json is True:
        return json.dumps(final[0], sort_keys=False)
    elif as_json is False:
        return pd.concat(final)
Exemple #5
0
def get_index_historical_data(index,
                              country,
                              from_date,
                              to_date,
                              as_json=False,
                              order='ascending',
                              debug=False):
    """
    This function retrieves historical data of the introduced `index` (from the specified country, note that both
    index and country should match since if the introduced index is not listed in the indices of that country, the
    function will raise an error). The retrieved historical data are the OHLC values plus the Volume and the Currency in
    which those values are specified, from the introduced data range if valid. So on, the resulting data can it either be
    stored in a :obj:`pandas.DataFrame` or in a :obj:`json` file.

    Args:
        index (:obj:`str`): name of the index to retrieve recent historical data from.
        country (:obj:`str`): name of the country from where the index is.
        from_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, from where data is going to be retrieved.
        to_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, until where data is going to be retrieved.
        as_json (:obj:`bool`, optional):
            optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`).
        order (:obj:`str`, optional):
            optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`).
        debug (:obj:`bool`, optional):
            optional argument to either show or hide debug messages on log, `True` or `False`, respectively.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved
            historical data from the specified index via argument. The dataset contains the open, high, low, close and
            volume values for the selected index on market days, additionally the currency in which those values are
            specified is returned.

            The returned data is case we use default arguments will look like::

                Date || Open | High | Low | Close | Volume | Currency
                -----||------------------------------------|----------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    historical: [
                        {
                            date: dd/mm/yyyy,
                            open: x,
                            high: x,
                            low: x,
                            close: x,
                            volume: x,
                            currency: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised if there was an argument error.
        IOError: raised if indices object/file was not found or unable to retrieve.
        RuntimeError: raised if the introduced index does not match any of the indexed ones.
        ConnectionError: raised if GET requests does not return 200 status code.
        IndexError: raised if index information was unavailable or not found.

    Examples:
        >>> investpy.get_index_historical_data(index='ibex 35', country='spain', from_date='01/01/2018', to_date='01/01/2019')
                           Open     High      Low    Close    Volume Currency
            Date
            2018-01-02  15128.2  15136.7  14996.6  15096.8  10340000      EUR
            2018-01-03  15145.0  15186.9  15091.9  15106.9  12800000      EUR
            2018-01-04  15105.5  15368.7  15103.7  15368.7  17070000      EUR
            2018-01-05  15353.9  15407.5  15348.6  15398.9  11180000      EUR
            2018-01-08  15437.1  15448.7  15344.0  15373.3  12890000      EUR

    """

    if not index:
        raise ValueError(
            "ERR#0047: index param is mandatory and should be a str.")

    if not isinstance(index, str):
        raise ValueError(
            "ERR#0047: index param is mandatory and should be a str.")

    if country is None:
        raise ValueError(
            "ERR#0039: country can not be None, it should be a str.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    try:
        datetime.datetime.strptime(from_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.")

    try:
        datetime.datetime.strptime(to_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.")

    start_date = datetime.datetime.strptime(from_date, '%d/%m/%Y')
    end_date = datetime.datetime.strptime(to_date, '%d/%m/%Y')

    if start_date >= end_date:
        raise ValueError(
            "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'."
        )

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not isinstance(debug, bool):
        raise ValueError(
            "ERR#0033: debug argument can just be a boolean value, either True or False."
        )

    date_interval = {
        'intervals': [],
    }

    flag = True

    while flag is True:
        diff = end_date.year - start_date.year

        if diff > 20:
            obj = {
                'start':
                start_date.strftime('%d/%m/%Y'),
                'end':
                start_date.replace(year=start_date.year +
                                   20).strftime('%d/%m/%Y'),
            }

            date_interval['intervals'].append(obj)

            start_date = start_date.replace(year=start_date.year + 20)
        else:
            obj = {
                'start': start_date.strftime('%d/%m/%Y'),
                'end': end_date.strftime('%d/%m/%Y'),
            }

            date_interval['intervals'].append(obj)

            flag = False

    interval_limit = len(date_interval['intervals'])
    interval_counter = 0

    data_flag = False

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'indices', 'indices.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        indices = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0059: indices file not found or errored.")

    if indices is None:
        raise IOError("ERR#0037: indices not found or unable to retrieve.")

    if unidecode.unidecode(country.lower()) not in get_index_countries():
        raise RuntimeError("ERR#0034: country " + country.lower() +
                           " not found, check if it is correct.")

    indices = indices[indices['country'] == unidecode.unidecode(
        country.lower())]

    index = index.strip()
    index = index.lower()

    if unidecode.unidecode(index) not in [
            unidecode.unidecode(value.lower())
            for value in indices['name'].tolist()
    ]:
        raise RuntimeError("ERR#0045: index " + index +
                           " not found, check if it is correct.")

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger('investpy')

    if debug is False:
        logger.disabled = True
    else:
        logger.disabled = False

    logger.info('Searching introduced index on Investing.com')

    full_name = indices.loc[(indices['name'].str.lower() == index).idxmax(),
                            'full_name']
    id_ = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'id']
    name = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'name']

    index_currency = indices.loc[(
        indices['name'].str.lower() == index).idxmax(), 'currency']

    logger.info(str(index) + ' found on Investing.com')

    final = list()

    header = "Datos históricos " + full_name

    for index in range(len(date_interval['intervals'])):
        interval_counter += 1

        params = {
            "curr_id": id_,
            "smlID": str(randint(1000000, 99999999)),
            "header": header,
            "st_date": date_interval['intervals'][index]['start'],
            "end_date": date_interval['intervals'][index]['end'],
            "interval_sec": "Daily",
            "sort_col": "date",
            "sort_ord": "DESC",
            "action": "historical_data"
        }

        head = {
            "User-Agent": user_agent.get_random(),
            "X-Requested-With": "XMLHttpRequest",
            "Accept": "text/html",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
        }

        url = "https://es.investing.com/instruments/HistoricalDataAjax"

        logger.info('Request sent to Investing.com!')

        req = requests.post(url, headers=head, data=params)

        if req.status_code != 200:
            raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                                  ", try again later.")

        logger.info('Request to Investing.com data succeeded with code ' +
                    str(req.status_code) + '!')

        if not req.text:
            continue

        root_ = fromstring(req.text)
        path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")
        result = list()

        if path_:
            logger.info('Data parsing process starting...')

            for elements_ in path_:
                info = []
                for nested_ in elements_.xpath(".//td"):
                    info.append(nested_.text_content())

                if info[0] == 'No se encontraron resultados':
                    if interval_counter < interval_limit:
                        data_flag = False
                    else:
                        raise IndexError(
                            "ERR#0046: index information unavailable or not found."
                        )
                else:
                    data_flag = True

                if data_flag is True:
                    index_date = datetime.datetime.strptime(
                        info[0].replace('.', '-'), '%d-%m-%Y')

                    index_close = float(info[1].replace('.',
                                                        '').replace(',', '.'))
                    index_open = float(info[2].replace('.',
                                                       '').replace(',', '.'))
                    index_high = float(info[3].replace('.',
                                                       '').replace(',', '.'))
                    index_low = float(info[4].replace('.',
                                                      '').replace(',', '.'))

                    index_volume = 0

                    if info[5].__contains__('K'):
                        index_volume = int(
                            float(info[5].replace('K', '').replace(
                                '.', '').replace(',', '.')) * 1000)
                    elif info[5].__contains__('M'):
                        index_volume = int(
                            float(info[5].replace('M', '').replace(
                                '.', '').replace(',', '.')) * 1000000)
                    elif info[5].__contains__('B'):
                        index_volume = int(
                            float(info[5].replace('B', '').replace(
                                '.', '').replace(',', '.')) * 1000000000)

                    result.insert(
                        len(result),
                        Data(index_date, index_open, index_high, index_low,
                             index_close, index_volume, index_currency))
            if data_flag is True:
                if order in ['ascending', 'asc']:
                    result = result[::-1]
                elif order in ['descending', 'desc']:
                    result = result

                if as_json is True:
                    json_ = {
                        'name': name,
                        'historical':
                        [value.index_as_json() for value in result]
                    }

                    final.append(json_)
                elif as_json is False:
                    df = pd.DataFrame.from_records(
                        [value.index_to_dict() for value in result])
                    df.set_index('Date', inplace=True)

                    final.append(df)
        else:
            raise RuntimeError(
                "ERR#0004: data retrieval error while scraping.")

    logger.info('Data parsing process finished...')

    if as_json is True:
        return json.dumps(final[0], sort_keys=False)
    elif as_json is False:
        return pd.concat(final)
Exemple #6
0
def get_index_recent_data(index,
                          country,
                          as_json=False,
                          order='ascending',
                          debug=False):
    """
    This function retrieves recent historical data from the introduced `index` from Investing
    via Web Scraping. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a
    :obj:`json` file, with `ascending` or `descending` order.

    Args:
        index (:obj:`str`): name of the index to retrieve recent historical data from.
        country (:obj:`str`): name of the country from where the index is.
        as_json (:obj:`bool`, optional):
            optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`).
        order (:obj:`str`, optional):
            optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`).
        debug (:obj:`bool`, optional):
            optional argument to either show or hide debug messages on log, `True` or `False`, respectively.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved
            recent data from the specified index via argument. The dataset contains the open, high, low, close and volume
            values for the selected index on market days, additionally the currency value is returned.

            The returned data is case we use default arguments will look like::

                Date || Open | High | Low | Close | Volume | Currency
                -----||------|------|-----|-------|--------|----------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    recent: [
                        {
                            date: dd/mm/yyyy,
                            open: x,
                            high: x,
                            low: x,
                            close: x,
                            volume: x,
                            currency: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised if there was an argument error.
        IOError: raised if indices object/file was not found or unable to retrieve.
        RuntimeError: raised if the introduced index does not match any of the indexed ones.
        ConnectionError: raised if GET requests does not return 200 status code.
        IndexError: raised if index information was unavailable or not found.

    Examples:
        >>> investpy.get_index_recent_data(index='ibex 35', country='spain')
                           Open     High      Low    Close   Volume Currency
            Date
            2019-08-26  12604.7  12646.3  12510.4  12621.3  4770000      EUR
            2019-08-27  12618.3  12723.3  12593.6  12683.8  8230000      EUR
            2019-08-28  12657.2  12697.2  12585.1  12642.5  7300000      EUR
            2019-08-29  12637.2  12806.6  12633.8  12806.6  5650000      EUR
            2019-08-30  12767.6  12905.9  12756.9  12821.6  6040000      EUR

    """

    if not index:
        raise ValueError(
            "ERR#0047: index param is mandatory and should be a str.")

    if not isinstance(index, str):
        raise ValueError(
            "ERR#0047: index param is mandatory and should be a str.")

    if country is None:
        raise ValueError(
            "ERR#0039: country can not be None, it should be a str.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not isinstance(debug, bool):
        raise ValueError(
            "ERR#0033: debug argument can just be a boolean value, either True or False."
        )

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'indices', 'indices.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        indices = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0059: indices file not found or errored.")

    if indices is None:
        raise IOError("ERR#0037: indices not found or unable to retrieve.")

    if unidecode.unidecode(country.lower()) not in get_index_countries():
        raise RuntimeError("ERR#0034: country " + country.lower() +
                           " not found, check if it is correct.")

    indices = indices[indices['country'] == unidecode.unidecode(
        country.lower())]

    index = index.strip()
    index = index.lower()

    if unidecode.unidecode(index) not in [
            unidecode.unidecode(value.lower())
            for value in indices['name'].tolist()
    ]:
        raise RuntimeError("ERR#0045: index " + index +
                           " not found, check if it is correct.")

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger('investpy')

    if debug is False:
        logger.disabled = True
    else:
        logger.disabled = False

    logger.info('Searching introduced index on Investing.com')

    full_name = indices.loc[(indices['name'].str.lower() == index).idxmax(),
                            'full_name']
    id_ = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'id']
    name = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'name']

    index_currency = indices.loc[(
        indices['name'].str.lower() == index).idxmax(), 'currency']

    logger.info(str(index) + ' found on Investing.com')

    header = "Datos históricos " + full_name

    params = {
        "curr_id": id_,
        "smlID": str(randint(1000000, 99999999)),
        "header": header,
        "interval_sec": "Daily",
        "sort_col": "date",
        "sort_ord": "DESC",
        "action": "historical_data"
    }

    head = {
        "User-Agent": user_agent.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    url = "https://es.investing.com/instruments/HistoricalDataAjax"

    logger.info('Request sent to Investing.com!')

    req = requests.post(url, headers=head, data=params)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                              ", try again later.")

    logger.info('Request to Investing.com data succeeded with code ' +
                str(req.status_code) + '!')

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")
    result = list()

    if path_:
        logger.info('Data parsing process starting...')

        for elements_ in path_:
            info = []
            for nested_ in elements_.xpath(".//td"):
                info.append(nested_.text_content())

            if info[0] == 'No se encontraron resultados':
                raise IndexError(
                    "ERR#0046: index information unavailable or not found.")

            index_date = datetime.datetime.strptime(info[0].replace('.', '-'),
                                                    '%d-%m-%Y')

            index_close = float(info[1].replace('.', '').replace(',', '.'))
            index_open = float(info[2].replace('.', '').replace(',', '.'))
            index_high = float(info[3].replace('.', '').replace(',', '.'))
            index_low = float(info[4].replace('.', '').replace(',', '.'))

            index_volume = 0

            if info[5].__contains__('K'):
                index_volume = int(
                    float(info[5].replace('K', '').replace('.', '').replace(
                        ',', '.')) * 1000)
            elif info[5].__contains__('M'):
                index_volume = int(
                    float(info[5].replace('M', '').replace('.', '').replace(
                        ',', '.')) * 1000000)
            elif info[5].__contains__('B'):
                index_volume = int(
                    float(info[5].replace('B', '').replace('.', '').replace(
                        ',', '.')) * 1000000000)

            result.insert(
                len(result),
                Data(index_date, index_open, index_high, index_low,
                     index_close, index_volume, index_currency))

        if order in ['ascending', 'asc']:
            result = result[::-1]
        elif order in ['descending', 'desc']:
            result = result

        logger.info('Data parsing process finished...')

        if as_json is True:
            json_ = {
                'name': name,
                'recent': [value.index_as_json() for value in result]
            }

            return json.dumps(json_, sort_keys=False)
        elif as_json is False:
            df = pd.DataFrame.from_records(
                [value.index_to_dict() for value in result])
            df.set_index('Date', inplace=True)

            return df

    else:
        raise RuntimeError("ERR#0004: data retrieval error while scraping.")
Exemple #7
0
def get_fund_recent_data(fund, country, as_json=False, order='ascending', debug=False):
    """
    This function retrieves recent historical data from the introduced `fund` from Investing
    via Web Scraping. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a
    :obj:`json` file, with `ascending` or `descending` order.

    Args:
        fund (:obj:`str`): name of the fund to retrieve recent historical data from.
        country (:obj:`str`): name of the country from where the introduced fund is.
        as_json (:obj:`bool`, optional):
            optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`).
        order (:obj:`str`, optional):
            optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`).
        debug (:obj:`bool`, optional):
            optional argument to either show or hide debug messages on log, `True` or `False`, respectively.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved
            recent data from the specified fund via argument. The dataset contains the open, high, low and close
            values for the selected fund on market days.

            The return data is case we use default arguments will look like::

                date || open | high | low | close | currency
                -----||--------------------------------------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxxxx

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    recent: [
                        date: dd/mm/yyyy,
                        open: x,
                        high: x,
                        low: x,
                        close: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: argument error.
        IOError: funds object/file not found or unable to retrieve.
        RuntimeError: introduced fund does not match any of the indexed ones.
        ConnectionError: if GET requests does not return 200 status code.
        IndexError: if fund information was unavailable or not found.

    Examples:
        >>> investpy.get_fund_recent_data(fund='bbva multiactivo conservador pp', country='spain', as_json=False, order='ascending', debug=False)
                         Open   High    Low  Close Currency
            Date
            2019-08-13  1.110  1.110  1.110  1.110      EUR
            2019-08-16  1.109  1.109  1.109  1.109      EUR
            2019-08-19  1.114  1.114  1.114  1.114      EUR
            2019-08-20  1.112  1.112  1.112  1.112      EUR
            2019-08-21  1.115  1.115  1.115  1.115      EUR

    """

    if not fund:
        raise ValueError("ERR#0029: fund parameter is mandatory and must be a valid fund name.")

    if not isinstance(fund, str):
        raise ValueError("ERR#0028: fund argument needs to be a str.")

    if country is None:
        raise ValueError("ERR#0039: country can not be None, it should be a str.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    if not isinstance(as_json, bool):
        raise ValueError("ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError("ERR#0003: order argument can just be ascending (asc) or descending (desc), str type.")

    if not isinstance(debug, bool):
        raise ValueError("ERR#0033: debug argument can just be a boolean value, either True or False.")

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'funds', 'funds.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        funds = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0057: funds file not found or errored.")

    if funds is None:
        raise IOError("ERR#0005: funds object not found or unable to retrieve.")

    if unidecode.unidecode(country.lower()) not in get_fund_countries():
        raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.")

    funds = funds[funds['country'] == unidecode.unidecode(country.lower())]

    fund = fund.strip()
    fund = fund.lower()

    if unidecode.unidecode(fund) not in [unidecode.unidecode(value.lower()) for value in funds['name'].tolist()]:
        raise RuntimeError("ERR#0019: fund " + fund + " not found, check if it is correct.")

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger('investpy')

    if debug is False:
        logger.disabled = True
    else:
        logger.disabled = False

    logger.info('Searching introduced fund on Investing.com')

    symbol = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'symbol']
    id_ = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'id']
    name = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'name']

    fund_currency = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'currency']

    logger.info(str(fund) + ' found on Investing.com')

    header = "Datos históricos " + symbol

    params = {
        "curr_id": id_,
        "smlID": str(randint(1000000, 99999999)),
        "header": header,
        "interval_sec": "Daily",
        "sort_col": "date",
        "sort_ord": "DESC",
        "action": "historical_data"
    }

    head = {
        "User-Agent": user_agent.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    url = "https://es.investing.com/instruments/HistoricalDataAjax"

    logger.info('Request sent to Investing.com!')

    req = requests.post(url, headers=head, data=params)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.")

    logger.info('Request to Investing.com data succeeded with code ' + str(req.status_code) + '!')

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")
    result = list()

    if path_:
        logger.info('Data parsing process starting...')

        for elements_ in path_:
            info = []
            for nested_ in elements_.xpath(".//td"):
                info.append(nested_.text_content())

            if info[0] == 'No se encontraron resultados':
                raise IndexError("ERR#0008: fund information unavailable or not found.")

            fund_date = datetime.datetime.strptime(info[0].replace('.', '-'), '%d-%m-%Y')
            fund_close = float(info[1].replace('.', '').replace(',', '.'))
            fund_open = float(info[2].replace('.', '').replace(',', '.'))
            fund_high = float(info[3].replace('.', '').replace(',', '.'))
            fund_low = float(info[4].replace('.', '').replace(',', '.'))

            result.insert(len(result), Data(fund_date, fund_open, fund_high, fund_low,
                                            fund_close, None, fund_currency))

        if order in ['ascending', 'asc']:
            result = result[::-1]
        elif order in ['descending', 'desc']:
            result = result

        logger.info('Data parsing process finished...')

        if as_json is True:
            json_ = {'name': name,
                     'recent':
                         [value.fund_as_json() for value in result]
                     }

            return json.dumps(json_, sort_keys=False)
        elif as_json is False:
            df = pd.DataFrame.from_records([value.fund_to_dict() for value in result])
            df.set_index('Date', inplace=True)

            return df
    else:
        raise RuntimeError("ERR#0004: data retrieval error while scraping.")