def _data_retrieval(self, product, head, params): if product in ['equities', 'indice', 'fxfuture']: has_volume = True else: has_volume = False url = "https://www.investing.com/instruments/HistoricalDataAjax" req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: for elements_ in path_: info = [] for nested_ in elements_.xpath(".//td"): val = nested_.get('data-real-value') if val is None and nested_.text_content( ) == 'No results found': raise IndexError( "ERR#0033: information unavailable or not found.") info.append(val) date_ = datetime.strptime( str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d') close_ = float(info[1].replace(',', '')) open_ = float(info[2].replace(',', '')) high_ = float(info[3].replace(',', '')) low_ = float(info[4].replace(',', '')) volume_ = None if has_volume is True: volume_ = int(info[5]) result.insert( len(result), Data(date_, open_, high_, low_, close_, volume_, None, None)) result = result[::-1] df = pd.DataFrame.from_records( [value.unknown_to_dict() for value in result]) df.set_index('Date', inplace=True) return df
def get_certificate_historical_data(certificate, country, from_date, to_date, as_json=False, order='ascending', interval='Daily'): """ This function retrieves historical data from the introduced certificate from Investing.com. So on, the historical data of the introduced certificate from the specified country in the specified date range will be retrieved and returned as a :obj:`pandas.DataFrame` if the parameters are valid and the request to Investing.com succeeds. Note that additionally some optional parameters can be specified: as_json and order, which let the user decide if the data is going to be returned as a :obj:`json` or not, and if the historical data is going to be ordered ascending or descending (where the index is the date), respectively. Args: certificate (:obj:`str`): name of the certificate to retrieve historical data from. country (:obj:`str`): name of the country from where the certificate is. from_date (:obj:`str`): date formatted as `dd/mm/yyyy`, since when data is going to be retrieved. to_date (:obj:`str`): date formatted as `dd/mm/yyyy`, until when data is going to be retrieved. as_json (:obj:`bool`, optional): to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True. order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending. interval (:obj:`str`, optional): value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function can return either a :obj:`pandas.DataFrame` or a :obj:`json` object, containing the retrieved historical data of the specified certificate from the specified country. So on, the resulting dataframe contains the OHLC values for the selected certificate on market days. The returned data is case we use default arguments will look like:: Date || Open | High | Low | Close -----||------|------|-----|------- xxxx || xxxx | xxxx | xxx | xxxxx but if we define `as_json=True`, then the output will be:: { name: name, historical: [ { date: 'dd/mm/yyyy', open: x, high: x, low: x, close: x }, ... ] } Raises: ValueError: raised whenever any of the introduced arguments is not valid or errored. IOError: raised if certificates object/file was not found or unable to retrieve. RuntimeError: raised if the introduced certificate/country was not found or did not match any of the existing ones. ConnectionError: raised if connection to Investing.com could not be established. IndexError: raised if certificate historical data was unavailable or not found in Investing.com. Examples: >>> investpy.get_certificate_historical_data(certificate='COMMERZBANK Call ALIBABA GROUP', country='france', from_date='01/01/2010', to_date='01/01/2019') Open High Low Close Date 2018-03-14 39.77 39.77 39.77 39.77 2018-03-15 48.18 48.18 48.18 46.48 2018-03-16 46.48 46.48 46.48 46.48 2018-03-19 40.73 40.73 40.73 40.73 2018-03-20 44.61 44.61 44.61 44.61 """ if not certificate: raise ValueError("ERR#0100: certificate param is mandatory and should be a str.") if not isinstance(certificate, str): raise ValueError("ERR#0100: certificate param is mandatory and should be a str.") if country is None: raise ValueError("ERR#0039: country can not be None, it should be a str.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") if not isinstance(as_json, bool): raise ValueError("ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError("ERR#0003: order argument can just be ascending (asc) or descending (desc), str type.") if not interval: raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.") if not isinstance(interval, str): raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.") if interval not in ['Daily', 'Weekly', 'Monthly']: raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.") try: datetime.strptime(from_date, '%d/%m/%Y') except ValueError: raise ValueError("ERR#0011: incorrect from_date date format, it should be 'dd/mm/yyyy'.") try: datetime.strptime(to_date, '%d/%m/%Y') except ValueError: raise ValueError("ERR#0012: incorrect to_date format, it should be 'dd/mm/yyyy'.") start_date = datetime.strptime(from_date, '%d/%m/%Y') end_date = datetime.strptime(to_date, '%d/%m/%Y') if start_date >= end_date: raise ValueError("ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'.") date_interval = { 'intervals': [], } flag = True while flag is True: diff = end_date.year - start_date.year if diff > 19: obj = { 'start': start_date.strftime('%m/%d/%Y'), 'end': start_date.replace(year=start_date.year + 19).strftime('%m/%d/%Y'), } date_interval['intervals'].append(obj) start_date = start_date.replace(year=start_date.year + 19) else: obj = { 'start': start_date.strftime('%m/%d/%Y'), 'end': end_date.strftime('%m/%d/%Y'), } date_interval['intervals'].append(obj) flag = False interval_limit = len(date_interval['intervals']) interval_counter = 0 data_flag = False resource_package = 'investpy' resource_path = '/'.join(('resources', 'certificates', 'certificates.csv')) if pkg_resources.resource_exists(resource_package, resource_path): certificates = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0096: certificates file not found or errored.") if certificates is None: raise IOError("ERR#0097: certificates not found or unable to retrieve.") if unidecode.unidecode(country.lower()) not in get_certificate_countries(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") certificates = certificates[certificates['country'] == unidecode.unidecode(country.lower())] certificate = certificate.strip() certificate = certificate.lower() if unidecode.unidecode(certificate) not in [unidecode.unidecode(value.lower()) for value in certificates['name'].tolist()]: raise RuntimeError("ERR#0101: certificate " + certificate + " not found, check if it is correct.") symbol = certificates.loc[(certificates['name'].str.lower() == certificate).idxmax(), 'symbol'] id_ = certificates.loc[(certificates['name'].str.lower() == certificate).idxmax(), 'id'] name = certificates.loc[(certificates['name'].str.lower() == certificate).idxmax(), 'name'] header = symbol + ' Historical Data' final = list() for index in range(len(date_interval['intervals'])): interval_counter += 1 params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "st_date": date_interval['intervals'][index]['start'], "end_date": date_interval['intervals'][index]['end'], "interval_sec": interval, "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://www.investing.com/instruments/HistoricalDataAjax" req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") if not req.text: continue root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: for elements_ in path_: if elements_.xpath(".//td")[0].text_content() == 'No results found': if interval_counter < interval_limit: data_flag = False else: raise IndexError("ERR#0102: certificate information unavailable or not found.") else: data_flag = True info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.get('data-real-value')) if data_flag is True: certificate_date = datetime.strptime(str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d') certificate_close = float(info[1].replace(',', '')) certificate_open = float(info[2].replace(',', '')) certificate_high = float(info[3].replace(',', '')) certificate_low = float(info[4].replace(',', '')) result.insert(len(result), Data(certificate_date, certificate_open, certificate_high, certificate_low, certificate_close, None, None)) if data_flag is True: if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': name, 'historical': [value.certificate_as_json() for value in result] } final.append(json_) elif as_json is False: df = pd.DataFrame.from_records([value.certificate_to_dict() for value in result]) df.set_index('Date', inplace=True) final.append(df) else: raise RuntimeError("ERR#0004: data retrieval error while scraping.") if as_json is True: return json.dumps(final[0], sort_keys=False) elif as_json is False: return pd.concat(final)
def get_certificate_recent_data(certificate, country, as_json=False, order='ascending', interval='Daily'): """ This function retrieves recent historical data from the introduced certificate from Investing.com. So on, the recent data of the introduced certificate from the specified country will be retrieved and returned as a :obj:`pandas.DataFrame` if the parameters are valid and the request to Investing.com succeeds. Note that additionally some optional parameters can be specified: as_json and order, which let the user decide if the data is going to be returned as a :obj:`json` or not, and if the historical data is going to be ordered ascending or descending (where the index is the date), respectively. Args: certificate (:obj:`str`): name of the certificate to retrieve recent data from. country (:obj:`str`): name of the country from where the certificate is. as_json (:obj:`bool`, optional): to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True. order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending. interval (:obj:`str`, optional): value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved recent data from the specified certificate via argument. The dataset contains the OHLC values of the certificate. The returned data is case we use default arguments will look like:: Date || Open | High | Low | Close -----||------|------|-----|------- xxxx || xxxx | xxxx | xxx | xxxxx but if we define `as_json=True`, then the output will be:: { name: name, recent: [ { date: dd/mm/yyyy, open: x, high: x, low: x, close: x }, ... ] } Raises: ValueError: raised if there was an argument error. IOError: raised if certificates object/file was not found or unable to retrieve. RuntimeError: raised if the introduced certificate does not match any of the indexed ones. ConnectionError: raised if GET requests does not return 200 status code. IndexError: raised if certificate information was unavailable or not found. Examples: >>> investpy.get_certificate_recent_data(certificate='COMMERZBANK Call ALIBABA GROUP', country='france') Open High Low Close Date 2019-11-27 5.47 5.47 5.47 5.47 2019-12-05 5.52 5.52 5.52 5.52 2019-12-10 5.37 5.37 5.37 5.37 2019-12-12 6.27 6.27 6.27 6.27 2019-12-16 6.80 6.80 6.80 6.80 2019-12-20 7.50 7.50 7.50 7.50 """ if not certificate: raise ValueError("ERR#0100: certificate param is mandatory and should be a str.") if not isinstance(certificate, str): raise ValueError("ERR#0100: certificate param is mandatory and should be a str.") if country is None: raise ValueError("ERR#0039: country can not be None, it should be a str.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") if not isinstance(as_json, bool): raise ValueError("ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError("ERR#0003: order argument can just be ascending (asc) or descending (desc), str type.") if not interval: raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.") if not isinstance(interval, str): raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.") if interval not in ['Daily', 'Weekly', 'Monthly']: raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.") resource_package = 'investpy' resource_path = '/'.join(('resources', 'certificates', 'certificates.csv')) if pkg_resources.resource_exists(resource_package, resource_path): certificates = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0096: certificates file not found or errored.") if certificates is None: raise IOError("ERR#0097: certificates not found or unable to retrieve.") if unidecode.unidecode(country.lower()) not in get_certificate_countries(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") certificates = certificates[certificates['country'] == unidecode.unidecode(country.lower())] certificate = certificate.strip() certificate = certificate.lower() if unidecode.unidecode(certificate) not in [unidecode.unidecode(value.lower()) for value in certificates['name'].tolist()]: raise RuntimeError("ERR#0101: certificate " + certificate + " not found, check if it is correct.") symbol = certificates.loc[(certificates['name'].str.lower() == certificate).idxmax(), 'symbol'] id_ = certificates.loc[(certificates['name'].str.lower() == certificate).idxmax(), 'id'] name = certificates.loc[(certificates['name'].str.lower() == certificate).idxmax(), 'name'] header = symbol + ' Historical Data' params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "interval_sec": interval, "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://www.investing.com/instruments/HistoricalDataAjax" req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: for elements_ in path_: if elements_.xpath(".//td")[0].text_content() == 'No results found': raise IndexError("ERR#0102: certificate information unavailable or not found.") info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.get('data-real-value')) certificate_date = datetime.strptime(str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d') certificate_close = float(info[1].replace(',', '')) certificate_open = float(info[2].replace(',', '')) certificate_high = float(info[3].replace(',', '')) certificate_low = float(info[4].replace(',', '')) result.insert(len(result), Data(certificate_date, certificate_open, certificate_high, certificate_low, certificate_close, None, None)) if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': name, 'recent': [value.certificate_as_json() for value in result] } return json.dumps(json_, sort_keys=False) elif as_json is False: df = pd.DataFrame.from_records([value.certificate_to_dict() for value in result]) df.set_index('Date', inplace=True) return df else: raise RuntimeError("ERR#0004: data retrieval error while scraping.")
def get_currency_cross_historical_data(currency_cross, from_date, to_date, as_json=False, order='ascending', interval='Daily'): """ This function retrieves recent historical data from the introduced `currency_cross` from Investing via Web Scraping. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a :obj:`json` file, with `ascending` or `descending` order. Args: currency_cross (:obj:`str`): name of the currency cross to retrieve recent historical data from. from_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, from where data is going to be retrieved. to_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, until where data is going to be retrieved. as_json (:obj:`bool`, optional): optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`). order (:obj:`str`, optional): optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`). interval (:obj:`str`, optional): value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved recent data from the specified currency_cross via argument. The dataset contains the open, high, low, close and volume values for the selected currency_cross on market days. The return data is case we use default arguments will look like:: Date || Open | High | Low | Close | Currency -----||------|------|-----|-------|--------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxxxx but if we define `as_json=True`, then the output will be:: { name: name, historical: [ dd/mm/yyyy: { 'open': x, 'high': x, 'low': x, 'close': x, 'currency' : x }, ... ] } Raises: ValueError: argument error. IOError: stocks object/file not found or unable to retrieve. RuntimeError: introduced currency_cross does not match any of the indexed ones. ConnectionError: if GET requests does not return 200 status code. IndexError: if currency_cross information was unavailable or not found. Examples: >>> investpy.get_currency_cross_historical_data(currency_cross='EUR/USD', from_date='01/01/2018', to_date='01/01/2019') Open High Low Close Currency Date 2018-01-01 1.2003 1.2014 1.1995 1.2010 USD 2018-01-02 1.2013 1.2084 1.2003 1.2059 USD 2018-01-03 1.2058 1.2070 1.2001 1.2014 USD 2018-01-04 1.2015 1.2090 1.2004 1.2068 USD 2018-01-05 1.2068 1.2085 1.2021 1.2030 USD """ if not currency_cross: raise ValueError("ERR#0052: currency_cross param is mandatory and should be a str.") if not isinstance(currency_cross, str): raise ValueError("ERR#0052: currency_cross param is mandatory and should be a str.") try: datetime.strptime(from_date, '%d/%m/%Y') except ValueError: raise ValueError("ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.") try: datetime.strptime(to_date, '%d/%m/%Y') except ValueError: raise ValueError("ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.") start_date = datetime.strptime(from_date, '%d/%m/%Y') end_date = datetime.strptime(to_date, '%d/%m/%Y') if start_date >= end_date: raise ValueError("ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'.") if not isinstance(as_json, bool): raise ValueError("ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError("ERR#0003: order argument can just be ascending (asc) or descending (desc), str type.") if not interval: raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.") if not isinstance(interval, str): raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.") if interval not in ['Daily', 'Weekly', 'Monthly']: raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.") date_interval = { 'intervals': [], } flag = True while flag is True: diff = end_date.year - start_date.year if diff > 20: obj = { 'start': start_date.strftime('%m/%d/%Y'), 'end': start_date.replace(year=start_date.year + 20).strftime('%m/%d/%Y'), } date_interval['intervals'].append(obj) start_date = start_date.replace(year=start_date.year + 20) else: obj = { 'start': start_date.strftime('%m/%d/%Y'), 'end': end_date.strftime('%m/%d/%Y'), } date_interval['intervals'].append(obj) flag = False interval_limit = len(date_interval['intervals']) interval_counter = 0 data_flag = False resource_package = 'investpy' resource_path = '/'.join(('resources', 'currency_crosses', 'currency_crosses.csv')) if pkg_resources.resource_exists(resource_package, resource_path): currency_crosses = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0060: currency_crosses file not found or errored.") if currency_crosses is None: raise IOError("ERR#0050: currency_crosses not found or unable to retrieve.") currency_cross = currency_cross.strip() currency_cross = currency_cross.lower() if unidecode.unidecode(currency_cross) not in [unidecode.unidecode(value.lower()) for value in currency_crosses['name'].tolist()]: raise RuntimeError("ERR#0054: the introduced currency_cross " + str(currency_cross) + " does not exists.") id_ = currency_crosses.loc[(currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'id'] name = currency_crosses.loc[(currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'name'] currency = currency_crosses.loc[(currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'second'] final = list() header = name + ' Historical Data' for index in range(len(date_interval['intervals'])): interval_counter += 1 params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "st_date": date_interval['intervals'][index]['start'], "end_date": date_interval['intervals'][index]['end'], "interval_sec": interval, "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://www.investing.com/instruments/HistoricalDataAjax" req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") if not req.text: continue root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: for elements_ in path_: info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.get('data-real-value')) if elements_.xpath(".//td")[0].text_content() == 'No results found': if interval_counter < interval_limit: data_flag = False else: raise IndexError("ERR#0055: currency_cross information unavailable or not found.") else: data_flag = True if data_flag is True: currency_cross_date = datetime.strptime(str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d') currency_cross_close = float(info[1].replace(',', '')) currency_cross_open = float(info[2].replace(',', '')) currency_cross_high = float(info[3].replace(',', '')) currency_cross_low = float(info[4].replace(',', '')) result.insert(len(result), Data(currency_cross_date, currency_cross_open, currency_cross_high, currency_cross_low, currency_cross_close, None, currency)) if data_flag is True: if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = {'name': name, 'historical': [value.currency_cross_as_json() for value in result] } final.append(json_) elif as_json is False: df = pd.DataFrame.from_records([value.currency_cross_to_dict() for value in result]) df.set_index('Date', inplace=True) final.append(df) else: raise RuntimeError("ERR#0004: data retrieval error while scraping.") if as_json is True: return json.dumps(final[0], sort_keys=False) elif as_json is False: return pd.concat(final)
def get_currency_cross_recent_data(currency_cross, as_json=False, order='ascending', interval='Daily'): """ This function retrieves recent historical data from the introduced `currency_cross` as indexed in Investing.com via Web Scraping. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a :obj:`json` file, with `ascending` or `descending` order. Args: currency_cross (:obj:`str`): name of the currency_cross to retrieve recent historical data from. as_json (:obj:`bool`, optional): optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`). order (:obj:`str`, optional): optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`). interval (:obj:`str`, optional): value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved recent data from the specified currency_cross via argument. The dataset contains the open, high, low, close, volume and currency values for the selected currency_cross on market days. The return data is in case we use default arguments will look like:: Date || Open | High | Low | Close | Currency -----||------|------|-----|-------|--------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxxxx but if we define `as_json=True`, then the output will be:: { name: name, recent: [ dd/mm/yyyy: { 'open': x, 'high': x, 'low': x, 'close': x, 'currency' : x }, ... ] } Raises: ValueError: raised if any of the introduced arguments was not valid or errored. IOError: raised if currency_crosses object/file not found or unable to retrieve. RuntimeError: raised introduced currency_cross does not match any of the indexed ones. ConnectionError: raised if GET request did not return 200 status code. IndexError: raised if currency_cross information was unavailable or not found. Examples: >>> investpy.get_currency_cross_recent_data(currency_cross='EUR/USD') Open High Low Close Currency Date 2019-08-27 1.1101 1.1116 1.1084 1.1091 USD 2019-08-28 1.1090 1.1099 1.1072 1.1078 USD 2019-08-29 1.1078 1.1093 1.1042 1.1057 USD 2019-08-30 1.1058 1.1062 1.0963 1.0991 USD 2019-09-02 1.0990 1.1000 1.0958 1.0968 USD """ if not currency_cross: raise ValueError("ERR#0052: currency_cross param is mandatory and should be a str.") if not isinstance(currency_cross, str): raise ValueError("ERR#0052: currency_cross param is mandatory and should be a str.") if not isinstance(as_json, bool): raise ValueError("ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError("ERR#0003: order argument can just be ascending (asc) or descending (desc), str type.") if not interval: raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.") if not isinstance(interval, str): raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.") if interval not in ['Daily', 'Weekly', 'Monthly']: raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.") resource_package = 'investpy' resource_path = '/'.join(('resources', 'currency_crosses', 'currency_crosses.csv')) if pkg_resources.resource_exists(resource_package, resource_path): currency_crosses = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0060: currency_crosses file not found or errored.") if currency_crosses is None: raise IOError("ERR#0050: currency_crosses not found or unable to retrieve.") currency_cross = currency_cross.strip() currency_cross = currency_cross.lower() if unidecode.unidecode(currency_cross) not in [unidecode.unidecode(value.lower()) for value in currency_crosses['name'].tolist()]: raise RuntimeError("ERR#0054: the introduced currency_cross " + str(currency_cross) + " does not exists.") id_ = currency_crosses.loc[(currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'id'] name = currency_crosses.loc[(currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'name'] currency = currency_crosses.loc[(currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'second'] header = name + ' Historical Data' params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "interval_sec": interval, "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://www.investing.com/instruments/HistoricalDataAjax" req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: for elements_ in path_: if elements_.xpath(".//td")[0].text_content() == 'No results found': raise IndexError("ERR#0055: currency_cross information unavailable or not found.") info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.get('data-real-value')) currency_cross_date = datetime.strptime(str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d') currency_cross_close = float(info[1].replace(',', '')) currency_cross_open = float(info[2].replace(',', '')) currency_cross_high = float(info[3].replace(',', '')) currency_cross_low = float(info[4].replace(',', '')) result.insert(len(result), Data(currency_cross_date, currency_cross_open, currency_cross_high, currency_cross_low, currency_cross_close, None, currency)) if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': name, 'recent': [value.currency_cross_as_json() for value in result] } return json.dumps(json_, sort_keys=False) elif as_json is False: df = pd.DataFrame.from_records([value.currency_cross_to_dict() for value in result]) df.set_index('Date', inplace=True) return df else: raise RuntimeError("ERR#0004: data retrieval error while scraping.")
def get_stock_recent_data(stock, country, as_json=False, order='ascending', interval='Daily'): """ This function retrieves recent historical data from the introduced stock from Investing.com. So on, the recent data of the introduced stock from the specified country will be retrieved and returned as a :obj:`pandas.DataFrame` if the parameters are valid and the request to Investing.com succeeds. Note that additionally some optional parameters can be specified: as_json and order, which let the user decide if the data is going to be returned as a :obj:`json` or not, and if the historical data is going to be ordered ascending or descending (where the index is the date), respectively. Args: stock (:obj:`str`): symbol of the stock to retrieve recent historical data from. country (:obj:`str`): name of the country from where the stock is. as_json (:obj:`bool`, optional): to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True. order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending. interval (:obj:`str`, optional): value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function can return either a :obj:`pandas.DataFrame` or a :obj:`json` object, containing the retrieved recent data of the specified stock from the specified country. So on, the resulting dataframe contains the open, high, low, close and volume values for the selected stock on market days and the currency in which those values are presented. The resulting recent data, in case that the default parameters were applied, will look like:: Date || Open | High | Low | Close | Volume | Currency -----||------|------|-----|-------|--------|---------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx but in case that as_json parameter was defined as True, then the output will be:: { name: name, recent: [ { date: 'dd/mm/yyyy', open: x, high: x, low: x, close: x, volume: x, currency: x }, ... ] } Raises: ValueError: raised whenever any of the introduced arguments is not valid or errored. IOError: raised if stocks object/file was not found or unable to retrieve. RuntimeError: raised if the introduced stock/country was not found or did not match any of the existing ones. ConnectionError: raised if connection to Investing.com could not be established. IndexError: raised if stock recent data was unavailable or not found in Investing.com. Examples: >>> investpy.get_stock_recent_data(stock='bbva', country='spain') Open High Low Close Volume Currency Date 2019-08-13 4.263 4.395 4.230 4.353 27250000 EUR 2019-08-14 4.322 4.325 4.215 4.244 36890000 EUR 2019-08-15 4.281 4.298 4.187 4.234 21340000 EUR 2019-08-16 4.234 4.375 4.208 4.365 46080000 EUR 2019-08-19 4.396 4.425 4.269 4.269 18950000 EUR """ if not stock: raise ValueError( "ERR#0013: stock parameter is mandatory and must be a valid stock name." ) if not isinstance(stock, str): raise ValueError("ERR#0027: stock argument needs to be a str.") if country is None: raise ValueError( "ERR#0039: country can not be None, it should be a str.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not interval: raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) if not isinstance(interval, str): raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) if interval not in ['Daily', 'Weekly', 'Monthly']: raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) resource_package = 'investpy' resource_path = '/'.join(('resources', 'stocks', 'stocks.csv')) if pkg_resources.resource_exists(resource_package, resource_path): stocks = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0056: stocks file not found or errored.") if stocks is None: raise IOError( "ERR#0001: stocks object not found or unable to retrieve.") if unidecode.unidecode(country.lower()) not in get_stock_countries(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") stocks = stocks[stocks['country'] == unidecode.unidecode(country.lower())] stock = stock.strip() stock = stock.lower() if unidecode.unidecode(stock) not in [ unidecode.unidecode(value.lower()) for value in stocks['symbol'].tolist() ]: raise RuntimeError("ERR#0018: stock " + stock + " not found, check if it is correct.") symbol = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'symbol'] id_ = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'id'] name = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'name'] stock_currency = stocks.loc[( stocks['symbol'].str.lower() == stock).idxmax(), 'currency'] header = symbol + ' Historical Data' params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "interval_sec": interval, "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://www.investing.com/instruments/HistoricalDataAjax" req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: for elements_ in path_: if elements_.xpath( ".//td")[0].text_content() == 'No results found': raise IndexError( "ERR#0007: stock information unavailable or not found.") info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.get('data-real-value')) stock_date = datetime.fromtimestamp(int(info[0])) stock_date = date(stock_date.year, stock_date.month, stock_date.day) stock_close = float(info[1].replace(',', '')) stock_open = float(info[2].replace(',', '')) stock_high = float(info[3].replace(',', '')) stock_low = float(info[4].replace(',', '')) stock_volume = 0 if info[5].__contains__('K'): stock_volume = int( float(info[5].replace('K', '').replace(',', '')) * 1e3) elif info[5].__contains__('M'): stock_volume = int( float(info[5].replace('M', '').replace(',', '')) * 1e6) elif info[5].__contains__('B'): stock_volume = int( float(info[5].replace('B', '').replace(',', '')) * 1e9) result.insert( len(result), Data(stock_date, stock_open, stock_high, stock_low, stock_close, stock_volume, stock_currency)) if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': name, 'recent': [value.stock_as_json() for value in result] } return json.dumps(json_, sort_keys=False) elif as_json is False: df = pd.DataFrame.from_records( [value.stock_to_dict() for value in result]) df.set_index('Date', inplace=True) return df else: raise RuntimeError("ERR#0004: data retrieval error while scraping.")
def get_bond_recent_data(bond, country, as_json=False, order='ascending', debug=False): """ This function retrieves recent historical data from the introduced bond from Investing.com. So on, the recent data of the introduced bond from the specified country will be retrieved and returned as a :obj:`pandas.DataFrame` if the parameters are valid and the request to Investing.com succeeds. Note that additionally some optional parameters can be specified: as_json, order and debug, which let the user decide if the data is going to be returned as a :obj:`json` or not, if the historical data is going to be ordered ascending or descending (where the index is the date) and whether debug messages are going to be printed or not, respectively. Args: bond (:obj:`str`): name of the bond to retrieve recent historical data from. country (:obj:`str`): name of the country from where the bond is. as_json (:obj:`bool`, optional): to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True. order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending. debug (:obj:`bool`, optional): optional argument to either show or hide debug messages on log, either True or False, respectively. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function can return either a :obj:`pandas.DataFrame` or a :obj:`json` object, containing the retrieved recent data of the specified bond from the specified country. So on, the resulting dataframe contains the open, high, low and close values for the selected bond on market days. The resulting recent data, in case that the default parameters were applied, will look like:: date || open | high | low | close -----||--------------------------- xxxx || xxxx | xxxx | xxx | xxxxx but in case that as_json parameter was defined as True, then the output will be:: { name: name, recent: [ dd/mm/yyyy: { open: x, high: x, low: x, close: x, }, ... ] } Raises: ValueError: raised whenever any of the introduced arguments is not valid or errored. IOError: raised if bonds object/file was not found or unable to retrieve. RuntimeError: raised if the introduced bond/country was not found or did not match any of the existing ones. ConnectionError: raised if connection to Investing.com could not be established. IndexError: raised if bond historical data was unavailable or not found in Investing.com. Examples: >>> investpy.get_bond_recent_data(bond='Argentina 3Y', country='argentina') Open High Low Close Date 2019-09-23 52.214 52.214 52.214 52.214 2019-09-24 52.323 52.323 52.323 52.323 2019-09-25 52.432 52.432 52.432 52.432 2019-09-26 52.765 52.765 52.765 52.765 2019-09-27 52.876 52.876 52.876 52.876 """ if not bond: raise ValueError( "ERR#0066: bond parameter is mandatory and must be a valid bond name." ) if not isinstance(bond, str): raise ValueError("ERR#0067: bond argument needs to be a str.") if country is None: raise ValueError( "ERR#0039: country can not be None, it should be a str.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not isinstance(debug, bool): raise ValueError( "ERR#0033: debug argument can just be a boolean value, either True or False." ) resource_package = 'investpy' resource_path = '/'.join(('resources', 'bonds', 'bonds.csv')) if pkg_resources.resource_exists(resource_package, resource_path): bonds = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0064: bonds file not found or errored.") if bonds is None: raise IOError( "ERR#0065: bonds object not found or unable to retrieve.") if unidecode.unidecode(country.lower()) not in get_bond_countries(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") bonds = bonds[bonds['country'] == unidecode.unidecode(country.lower())] bond = bond.strip() bond = bond.lower() if unidecode.unidecode(bond) not in [ unidecode.unidecode(value.lower()) for value in bonds['name'].tolist() ]: raise RuntimeError("ERR#0068: bond " + bond + " not found, check if it is correct.") logging.basicConfig(level=logging.INFO) logger = logging.getLogger('investpy') if debug is False: logger.disabled = True else: logger.disabled = False logger.info('Searching introduced bond on Investing.com') id_ = bonds.loc[(bonds['name'].str.lower() == bond).idxmax(), 'id'] name = bonds.loc[(bonds['name'].str.lower() == bond).idxmax(), 'name'] full_name = bonds.loc[(bonds['name'].str.lower() == bond).idxmax(), 'full_name'] logger.info(str(bond) + ' found on Investing.com') header = full_name + " Bond Yield Historical Data" params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "interval_sec": "Daily", "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://www.investing.com/instruments/HistoricalDataAjax" logger.info('Request sent to Investing.com!') req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") logger.info('Request to Investing.com data succeeded with code ' + str(req.status_code) + '!') root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: logger.info('Data parsing process starting...') for elements_ in path_: info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.get('data-real-value')) bond_date = datetime.fromtimestamp(int(info[0])) bond_date = date(bond_date.year, bond_date.month, bond_date.day) bond_close = float(info[1]) bond_open = float(info[2]) bond_high = float(info[3]) bond_low = float(info[4]) result.insert( len(result), Data(bond_date, bond_open, bond_high, bond_low, bond_close, None, None)) if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result logger.info('Data parsing process finished...') if as_json is True: json_ = { 'name': name, 'recent': [value.bond_as_json() for value in result] } return json.dumps(json_, sort_keys=False) elif as_json is False: df = pd.DataFrame.from_records( [value.bond_to_dict() for value in result]) df.set_index('Date', inplace=True) return df else: raise RuntimeError("ERR#0004: data retrieval error while scraping.")
def get_crypto_recent_data(crypto, as_json=False, order='ascending', interval='Daily'): """ This function retrieves recent historical data from the introduced crypto from Investing.com. So on, the recent data of the introduced crypto will be retrieved and returned as a :obj:`pandas.DataFrame` if the parameters are valid and the request to Investing.com succeeds. Note that additionally some optional parameters can be specified: as_json and order, which let the user decide if the data is going to be returned as a :obj:`json` or not, and if the historical data is going to be ordered ascending or descending (where the index is the date), respectively. Args: crypto (:obj:`str`): name of the crypto currency to retrieve data from. as_json (:obj:`bool`, optional): to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True. order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending. interval (:obj:`str`, optional): value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function can return either a :obj:`pandas.DataFrame` or a :obj:`json` object, containing the retrieved recent data of the specified crypto currency. So on, the resulting dataframe contains the open, high, low, close and volume values for the selected crypto on market days and the currency in which those values are presented. The resulting recent data, in case that the default parameters were applied, will look like:: Date || Open | High | Low | Close | Volume | Currency -----||------|------|-----|-------|--------|---------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx but in case that as_json parameter was defined as True, then the output will be:: { name: name, recent: [ { date: 'dd/mm/yyyy', open: x, high: x, low: x, close: x, volume: x, currency: x }, ... ] } Raises: ValueError: raised whenever any of the introduced arguments is not valid or errored. IOError: raised if cryptos object/file was not found or unable to retrieve. RuntimeError: raised if the introduced crypto name was not found or did not match any of the existing ones. ConnectionError: raised if connection to Investing.com could not be established. IndexError: raised if crypto recent data was unavailable or not found in Investing.com. Examples: >>> investpy.get_crypto_recent_data(crypto='bitcoin') Open High Low Close Volume Currency Date 2019-10-25 7422.8 8697.7 7404.9 8658.3 1177632 USD 2019-10-26 8658.4 10540.0 8061.8 9230.6 1784005 USD 2019-10-27 9230.6 9773.2 9081.0 9529.6 1155038 USD 2019-10-28 9530.1 9866.9 9202.5 9207.2 1039295 USD 2019-10-29 9206.5 9531.3 9125.3 9411.3 918477 USD """ if not crypto: raise ValueError( "ERR#0083: crypto parameter is mandatory and must be a valid crypto name." ) if not isinstance(crypto, str): raise ValueError("ERR#0084: crypto argument needs to be a str.") if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not interval: raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) if not isinstance(interval, str): raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) if interval not in ['Daily', 'Weekly', 'Monthly']: raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) resource_package = 'investpy' resource_path = '/'.join(('resources', 'crypto', 'cryptos.csv')) if pkg_resources.resource_exists(resource_package, resource_path): cryptos = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0081: cryptos file not found or errored.") if cryptos is None: raise IOError("ERR#0082: cryptos not found or unable to retrieve.") crypto = crypto.strip() crypto = crypto.lower() if unidecode.unidecode(crypto) not in [ unidecode.unidecode(value.lower()) for value in cryptos['name'].tolist() ]: raise RuntimeError("ERR#0085: crypto currency: " + crypto + ", not found, check if it is correct.") status = cryptos.loc[(cryptos['name'].str.lower() == crypto).idxmax(), 'status'] if status == 'unavailable': raise ValueError( "ERR#0086: the selected crypto currency is not available for retrieval in Investing.com." ) crypto_name = cryptos.loc[(cryptos['name'].str.lower() == crypto).idxmax(), 'name'] crypto_id = cryptos.loc[(cryptos['name'].str.lower() == crypto).idxmax(), 'id'] crypto_currency = cryptos.loc[( cryptos['name'].str.lower() == crypto).idxmax(), 'currency'] header = crypto_name + ' Historical Data' params = { "curr_id": crypto_id, "smlID": str(randint(1000000, 99999999)), "header": header, "interval_sec": interval, "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://www.investing.com/instruments/HistoricalDataAjax" req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: for elements_ in path_: if elements_.xpath( ".//td")[0].text_content() == 'No results found': raise IndexError( "ERR#0087: crypto information unavailable or not found.") info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.get('data-real-value')) crypto_date = datetime.strptime( str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d') crypto_close = float(info[1].replace(',', '')) crypto_open = float(info[2].replace(',', '')) crypto_high = float(info[3].replace(',', '')) crypto_low = float(info[4].replace(',', '')) crypto_volume = int(info[5]) result.insert( len(result), Data(crypto_date, crypto_open, crypto_high, crypto_low, crypto_close, crypto_volume, crypto_currency)) if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': crypto_name, 'recent': [value.crypto_as_json() for value in result] } return json.dumps(json_, sort_keys=False) elif as_json is False: df = pd.DataFrame.from_records( [value.crypto_to_dict() for value in result]) df.set_index('Date', inplace=True) return df else: raise RuntimeError("ERR#0004: data retrieval error while scraping.")
def get_fund_recent_data(fund, country, as_json=False, order='ascending', debug=False): """ This function retrieves recent historical data from the introduced `fund` from Investing via Web Scraping. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a :obj:`json` file, with `ascending` or `descending` order. Args: fund (:obj:`str`): name of the fund to retrieve recent historical data from. country (:obj:`str`): name of the country from where the introduced fund is. as_json (:obj:`bool`, optional): optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`). order (:obj:`str`, optional): optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`). debug (:obj:`bool`, optional): optional argument to either show or hide debug messages on log, `True` or `False`, respectively. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved recent data from the specified fund via argument. The dataset contains the open, high, low and close values for the selected fund on market days. The return data is case we use default arguments will look like:: date || open | high | low | close | currency -----||-------------------------------------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxxxx but if we define `as_json=True`, then the output will be:: { name: name, recent: [ date: dd/mm/yyyy, open: x, high: x, low: x, close: x }, ... ] } Raises: ValueError: argument error. IOError: funds object/file not found or unable to retrieve. RuntimeError: introduced fund does not match any of the indexed ones. ConnectionError: if GET requests does not return 200 status code. IndexError: if fund information was unavailable or not found. Examples: >>> investpy.get_fund_recent_data(fund='bbva multiactivo conservador pp', country='spain', as_json=False, order='ascending', debug=False) Open High Low Close Currency Date 2019-08-13 1.110 1.110 1.110 1.110 EUR 2019-08-16 1.109 1.109 1.109 1.109 EUR 2019-08-19 1.114 1.114 1.114 1.114 EUR 2019-08-20 1.112 1.112 1.112 1.112 EUR 2019-08-21 1.115 1.115 1.115 1.115 EUR """ if not fund: raise ValueError("ERR#0029: fund parameter is mandatory and must be a valid fund name.") if not isinstance(fund, str): raise ValueError("ERR#0028: fund argument needs to be a str.") if country is None: raise ValueError("ERR#0039: country can not be None, it should be a str.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") if not isinstance(as_json, bool): raise ValueError("ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError("ERR#0003: order argument can just be ascending (asc) or descending (desc), str type.") if not isinstance(debug, bool): raise ValueError("ERR#0033: debug argument can just be a boolean value, either True or False.") resource_package = 'investpy' resource_path = '/'.join(('resources', 'funds', 'funds.csv')) if pkg_resources.resource_exists(resource_package, resource_path): funds = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0057: funds file not found or errored.") if funds is None: raise IOError("ERR#0005: funds object not found or unable to retrieve.") if unidecode.unidecode(country.lower()) not in get_fund_countries(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") funds = funds[funds['country'] == unidecode.unidecode(country.lower())] fund = fund.strip() fund = fund.lower() if unidecode.unidecode(fund) not in [unidecode.unidecode(value.lower()) for value in funds['name'].tolist()]: raise RuntimeError("ERR#0019: fund " + fund + " not found, check if it is correct.") logging.basicConfig(level=logging.INFO) logger = logging.getLogger('investpy') if debug is False: logger.disabled = True else: logger.disabled = False logger.info('Searching introduced fund on Investing.com') symbol = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'symbol'] id_ = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'id'] name = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'name'] fund_currency = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'currency'] logger.info(str(fund) + ' found on Investing.com') header = "Datos históricos " + symbol params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "interval_sec": "Daily", "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://es.investing.com/instruments/HistoricalDataAjax" logger.info('Request sent to Investing.com!') req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") logger.info('Request to Investing.com data succeeded with code ' + str(req.status_code) + '!') root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: logger.info('Data parsing process starting...') for elements_ in path_: info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.text_content()) if info[0] == 'No se encontraron resultados': raise IndexError("ERR#0008: fund information unavailable or not found.") fund_date = datetime.datetime.strptime(info[0].replace('.', '-'), '%d-%m-%Y') fund_close = float(info[1].replace('.', '').replace(',', '.')) fund_open = float(info[2].replace('.', '').replace(',', '.')) fund_high = float(info[3].replace('.', '').replace(',', '.')) fund_low = float(info[4].replace('.', '').replace(',', '.')) result.insert(len(result), Data(fund_date, fund_open, fund_high, fund_low, fund_close, None, fund_currency)) if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result logger.info('Data parsing process finished...') if as_json is True: json_ = {'name': name, 'recent': [value.fund_as_json() for value in result] } return json.dumps(json_, sort_keys=False) elif as_json is False: df = pd.DataFrame.from_records([value.fund_to_dict() for value in result]) df.set_index('Date', inplace=True) return df else: raise RuntimeError("ERR#0004: data retrieval error while scraping.")
def get_etf_recent_data(etf, country, as_json=False, order='ascending', interval='Daily'): """ This function retrieves recent historical data from the introduced `etf` from Investing via Web Scraping. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a :obj:`json` file, with `ascending` or `descending` order. Args: etf (:obj:`str`): name of the etf to retrieve recent historical data from. country (:obj:`str`): name of the country from where the etf is. as_json (:obj:`bool`, optional): optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`). order (:obj:`str`, optional): optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`). interval (:obj:`str`, optional): value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved recent data from the specified etf via argument. The dataset contains the open, high, low and close values for the selected etf on market days. The returned data is case we use default arguments will look like:: date || open | high | low | close | currency | exchange -----||--------------------------------------|--------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxxxx | xxxxxxxx but if we define `as_json=True`, then the output will be:: { name: name, recent: [ { date: dd/mm/yyyy, open: x, high: x, low: x, close: x, currency: x, exchange: x, }, ... ] } Raises: ValueError: raised whenever any of the arguments is not valid or errored. IOError: raised if etfs object/file not found or unable to retrieve. RuntimeError:raised if the introduced etf does not match any of the indexed ones. ConnectionError: raised if GET requests does not return 200 status code. IndexError: raised if etf information was unavailable or not found. Examples: >>> investpy.get_etf_recent_data(etf='bbva accion dj eurostoxx 50', country='spain') Open High Low Close Currency Exchange Date 2019-08-13 33.115 33.780 32.985 33.585 EUR Madrid 2019-08-14 33.335 33.335 32.880 32.905 EUR Madrid 2019-08-15 32.790 32.925 32.455 32.845 EUR Madrid 2019-08-16 33.115 33.200 33.115 33.305 EUR Madrid 2019-08-19 33.605 33.735 33.490 33.685 EUR Madrid """ if not etf: raise ValueError( "ERR#0031: etf parameter is mandatory and must be a valid etf name." ) if not isinstance(etf, str): raise ValueError("ERR#0030: etf argument needs to be a str.") if country is None: raise ValueError( "ERR#0039: country can not be None, it should be a str.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not interval: raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) if not isinstance(interval, str): raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) if interval not in ['Daily', 'Weekly', 'Monthly']: raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) resource_package = 'investpy' resource_path = '/'.join(('resources', 'etfs', 'etfs.csv')) if pkg_resources.resource_exists(resource_package, resource_path): etfs = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0058: etfs file not found or errored.") if etfs is None: raise IOError("ERR#0009: etfs object not found or unable to retrieve.") if unidecode.unidecode(country.lower()) not in get_etf_countries(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") etfs = etfs[etfs['country'] == unidecode.unidecode(country.lower())] etf = etf.strip() etf = etf.lower() if unidecode.unidecode(etf) not in [ unidecode.unidecode(value.lower()) for value in etfs['name'].tolist() ]: raise RuntimeError("ERR#0019: etf " + etf + " not found, check if it is correct.") found_etfs = etfs[etfs['name'].str.lower() == etf] if len(found_etfs) > 1: warnings.warn( 'Note that the displayed information can differ depending on the stock exchange.', Warning) del found_etfs symbol = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'symbol'] id_ = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'id'] name = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'name'] etf_currency = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'currency'] header = symbol + ' Historical Data' head = { "User-Agent": get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "interval_sec": interval, "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } url = "https://www.investing.com/instruments/HistoricalDataAjax" req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: for elements_ in path_: if elements_.xpath( ".//td")[0].text_content() == 'No results found': raise IndexError( "ERR#0010: etf information unavailable or not found.") info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.get('data-real-value')) etf_date = datetime.fromtimestamp(int(info[0])) etf_date = date(etf_date.year, etf_date.month, etf_date.day) etf_close = float(info[1].replace(',', '')) etf_open = float(info[2].replace(',', '')) etf_high = float(info[3].replace(',', '')) etf_low = float(info[4].replace(',', '')) result.insert( len(result), Data(etf_date, etf_open, etf_high, etf_low, etf_close, None, etf_currency)) if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': name, 'recent': [value.etf_as_json() for value in result] } return json.dumps(json_, sort_keys=False) elif as_json is False: df = pd.DataFrame.from_records( [value.etf_to_dict() for value in result]) df.set_index('Date', inplace=True) return df else: raise RuntimeError("ERR#0004: data retrieval error while scraping.")
def get_commodity_historical_data(commodity, from_date, to_date, country=None, as_json=False, order='ascending', interval='Daily'): """ This function retrieves historical data from the introduced commodity from Investing.com. So on, the historical data of the introduced commodity in the specified date range will be retrieved and returned as a :obj:`pandas.DataFrame` if the parameters are valid and the request to Investing.com succeeds. Note that additionally some optional parameters can be specified: as_json and order, which let the user decide if the data is going to be returned as a :obj:`json` or not, and if the historical data is going to be ordered ascending or descending (where the index is the date), respectively. Args: commodity (:obj:`str`): name of the commodity to retrieve recent data from. from_date (:obj:`str`): date formatted as `dd/mm/yyyy`, since when data is going to be retrieved. to_date (:obj:`str`): date formatted as `dd/mm/yyyy`, until when data is going to be retrieved. country (:obj:`str`, optional): name of the country to retrieve the commodity data from (if there is more than one country that provides data from the same commodity). as_json (:obj:`bool`, optional): to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True. order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending. interval (:obj:`str`, optional): value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved historical data of the specified commodity. So on, the resulting dataframe contains the open, high, low and close values for the selected commodity on market days and the currency in which those values are presented. The returned data is case we use default arguments will look like:: Date || Open | High | Low | Close | Volume | Currency -----||------|------|-----|-------|--------|---------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx but in case that as_json parameter was defined as True, then the output will be:: { name: name, historical: [ { date: 'dd/mm/yyyy', open: x, high: x, low: x, close: x, volume: x, currency: x }, ... ] } Raises: ValueError: raised whenever any of the introduced arguments is not valid or errored. IOError: raised if commodities object/file was not found or unable to retrieve. RuntimeError: raised if the introduced commodity was not found or did not match any of the existing ones. ConnectionError: raised if connection to Investing.com could not be established. IndexError: raised if commodity historical data was unavailable or not found in Investing.com. Examples: >>> investpy.get_historical_data(commodity='gold', from_date='01/01/2018', to_date='01/01/2019') Open High Low Close Volume Currency Date 2018-01-01 1305.8 1309.7 1304.6 1308.7 0 USD 2018-01-02 1370.5 1370.5 1370.5 1370.5 97 USD 2018-01-03 1372.0 1372.0 1369.0 1374.2 22 USD 2018-01-04 1363.4 1375.6 1362.7 1377.4 13 USD 2018-01-05 1377.8 1377.8 1377.8 1378.4 10 USD """ if not commodity: raise ValueError("ERR#0078: commodity parameter is mandatory and must be a valid commodity name.") if not isinstance(commodity, str): raise ValueError("ERR#0078: commodity parameter is mandatory and must be a valid commodity name.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") if not isinstance(as_json, bool): raise ValueError("ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError("ERR#0003: order argument can just be ascending (asc) or descending (desc), str type.") if not interval: raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.") if not isinstance(interval, str): raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.") if interval not in ['Daily', 'Weekly', 'Monthly']: raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.") try: datetime.strptime(from_date, '%d/%m/%Y') except ValueError: raise ValueError("ERR#0011: incorrect from_date date format, it should be 'dd/mm/yyyy'.") try: datetime.strptime(to_date, '%d/%m/%Y') except ValueError: raise ValueError("ERR#0012: incorrect to_date format, it should be 'dd/mm/yyyy'.") start_date = datetime.strptime(from_date, '%d/%m/%Y') end_date = datetime.strptime(to_date, '%d/%m/%Y') if start_date >= end_date: raise ValueError("ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'.") date_interval = { 'intervals': [], } flag = True while flag is True: diff = end_date.year - start_date.year if diff > 19: obj = { 'start': start_date.strftime('%m/%d/%Y'), 'end': start_date.replace(year=start_date.year + 19).strftime('%m/%d/%Y'), } date_interval['intervals'].append(obj) start_date = start_date.replace(year=start_date.year + 19, day=start_date.day + 1) else: obj = { 'start': start_date.strftime('%m/%d/%Y'), 'end': end_date.strftime('%m/%d/%Y'), } date_interval['intervals'].append(obj) flag = False interval_limit = len(date_interval['intervals']) interval_counter = 0 data_flag = False resource_package = 'investpy' resource_path = '/'.join(('resources', 'commodities', 'commodities.csv')) if pkg_resources.resource_exists(resource_package, resource_path): commodities = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0075: commodities file not found or errored.") if commodities is None: raise IOError("ERR#0076: commodities not found or unable to retrieve.") commodity = commodity.strip() commodity = commodity.lower() if unidecode.unidecode(commodity) not in [unidecode.unidecode(value.lower()) for value in commodities['name'].tolist()]: raise RuntimeError("ERR#0079: commodity " + commodity + " not found, check if it is correct.") if country is None: found_commodities = commodities[commodities['name'].str.lower() == commodity] if len(found_commodities) > 1: msg = "Note that the displayed commodity data can differ depending on the country. " \ "If you want to retrieve " + commodity + " data from either " + \ " or ".join(found_commodities['country'].tolist()) + ", specify the country parameter." warnings.warn(msg, Warning) del found_commodities else: if unidecode.unidecode(country.lower()) not in commodities['country'].unique().tolist(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") commodities = commodities[commodities['country'] == unidecode.unidecode(country.lower())] full_name = commodities.loc[(commodities['name'].str.lower() == commodity).idxmax(), 'full_name'] id_ = commodities.loc[(commodities['name'].str.lower() == commodity).idxmax(), 'id'] name = commodities.loc[(commodities['name'].str.lower() == commodity).idxmax(), 'name'] currency = commodities.loc[(commodities['name'].str.lower() == commodity).idxmax(), 'currency'] header = full_name + ' Historical Data' final = list() for index in range(len(date_interval['intervals'])): interval_counter += 1 params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "st_date": date_interval['intervals'][index]['start'], "end_date": date_interval['intervals'][index]['end'], "interval_sec": interval, "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://www.investing.com/instruments/HistoricalDataAjax" req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") if not req.text: continue root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: for elements_ in path_: if elements_.xpath(".//td")[0].text_content() == 'No results found': if interval_counter < interval_limit: data_flag = False else: raise IndexError("ERR#0080: commodity information unavailable or not found.") else: data_flag = True info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.get('data-real-value')) if data_flag is True: commodity_date = datetime.strptime(str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d') commodity_close = float(info[1].replace(',', '')) commodity_open = float(info[2].replace(',', '')) commodity_high = float(info[3].replace(',', '')) commodity_low = float(info[4].replace(',', '')) commodity_volume = int(info[5]) result.insert(len(result), Data(commodity_date, commodity_open, commodity_high, commodity_low, commodity_close, commodity_volume, currency, None)) if data_flag is True: if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': name, 'recent': [value.commodity_as_json() for value in result] } final.append(json_) elif as_json is False: df = pd.DataFrame.from_records([value.commodity_to_dict() for value in result]) df.set_index('Date', inplace=True) final.append(df) else: raise RuntimeError("ERR#0004: data retrieval error while scraping.") if as_json is True: return json.dumps(final[0], sort_keys=False) elif as_json is False: return pd.concat(final)
def get_commodity_recent_data(commodity, country=None, as_json=False, order='ascending', interval='Daily'): """ This function retrieves recent historical data from the introduced commodity from Investing.com, which will be returned as a :obj:`pandas.DataFrame` if the parameters are valid and the request to Investing.com succeeds. Note that additionally some optional parameters can be specified: as_json and order, which let the user decide if the data is going to be returned as a :obj:`json` or not, and if the historical data is going to be ordered ascending or descending (where the index is the date), respectively. Args: commodity (:obj:`str`): name of the commodity to retrieve recent data from. country (:obj:`str`, optional): name of the country to retrieve the commodity data from (if there is more than one country that provides data from the same commodity). as_json (:obj:`bool`, optional): to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True. order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending. interval (:obj:`str`, optional): value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function can return either a :obj:`pandas.DataFrame` or a :obj:`json` object, containing the retrieved recent data of the specified commodity. So on, the resulting dataframe contains the open, high, low and close values for the selected commodity on market days and the currency in which those values are presented. The returned data is case we use default arguments will look like:: Date || Open | High | Low | Close | Volume | Currency -----||------|------|-----|-------|--------|---------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx but in case that as_json parameter was defined as True, then the output will be:: { name: name, recent: [ { date: 'dd/mm/yyyy', open: x, high: x, low: x, close: x, volume: x, currency: x }, ... ] } Raises: ValueError: raised whenever any of the introduced arguments is not valid or errored. IOError: raised if commodities object/file was not found or unable to retrieve. RuntimeError: raised if the introduced commodity was not found or did not match any of the existing ones. ConnectionError: raised if connection to Investing.com could not be established. IndexError: raised if commodity recent data was unavailable or not found in Investing.com. Examples: >>> investpy.get_commodity_recent_data(commodity='gold') Open High Low Close Volume Currency Date 2019-10-25 1506.4 1520.9 1503.1 1505.3 368743 USD 2019-10-28 1507.4 1510.8 1492.3 1495.8 318126 USD 2019-10-29 1494.3 1497.1 1485.6 1490.7 291980 USD 2019-10-30 1490.5 1499.3 1483.1 1496.7 353638 USD 2019-10-31 1498.8 1516.7 1496.0 1514.8 390013 USD """ if not commodity: raise ValueError("ERR#0078: commodity parameter is mandatory and must be a valid commodity name.") if not isinstance(commodity, str): raise ValueError("ERR#0078: commodity parameter is mandatory and must be a valid commodity name.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") if not isinstance(as_json, bool): raise ValueError("ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError("ERR#0003: order argument can just be ascending (asc) or descending (desc), str type.") if not interval: raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.") if not isinstance(interval, str): raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.") if interval not in ['Daily', 'Weekly', 'Monthly']: raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.") resource_package = 'investpy' resource_path = '/'.join(('resources', 'commodities', 'commodities.csv')) if pkg_resources.resource_exists(resource_package, resource_path): commodities = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0075: commodities file not found or errored.") if commodities is None: raise IOError("ERR#0076: commodities not found or unable to retrieve.") commodity = commodity.strip() commodity = commodity.lower() if unidecode.unidecode(commodity) not in [unidecode.unidecode(value.lower()) for value in commodities['name'].tolist()]: raise RuntimeError("ERR#0079: commodity " + commodity + " not found, check if it is correct.") if country is None: found_commodities = commodities[commodities['name'].str.lower() == commodity] if len(found_commodities) > 1: msg = "Note that the displayed commodity data can differ depending on the country. " \ "If you want to retrieve " + commodity + " data from either " + \ " or ".join(found_commodities['country'].tolist()) + ", specify the country parameter." warnings.warn(msg, Warning) del found_commodities else: if unidecode.unidecode(country.lower()) not in commodities['country'].unique().tolist(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") commodities = commodities[commodities['country'] == unidecode.unidecode(country.lower())] full_name = commodities.loc[(commodities['name'].str.lower() == commodity).idxmax(), 'full_name'] id_ = commodities.loc[(commodities['name'].str.lower() == commodity).idxmax(), 'id'] name = commodities.loc[(commodities['name'].str.lower() == commodity).idxmax(), 'name'] currency = commodities.loc[(commodities['name'].str.lower() == commodity).idxmax(), 'currency'] header = full_name + ' Historical Data' params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "interval_sec": interval, "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://www.investing.com/instruments/HistoricalDataAjax" req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: for elements_ in path_: if elements_.xpath(".//td")[0].text_content() == 'No results found': raise IndexError("ERR#0080: commodity information unavailable or not found.") info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.get('data-real-value')) commodity_date = datetime.strptime(str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d') commodity_close = float(info[1].replace(',', '')) commodity_open = float(info[2].replace(',', '')) commodity_high = float(info[3].replace(',', '')) commodity_low = float(info[4].replace(',', '')) commodity_volume = int(info[5]) result.insert(len(result), Data(commodity_date, commodity_open, commodity_high, commodity_low, commodity_close, commodity_volume, currency)) if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': name, 'recent': [value.commodity_as_json() for value in result] } return json.dumps(json_, sort_keys=False) elif as_json is False: df = pd.DataFrame.from_records([value.commodity_to_dict() for value in result]) df.set_index('Date', inplace=True) return df else: raise RuntimeError("ERR#0004: data retrieval error while scraping.")
def get_etf_historical_data(etf, country, from_date, to_date, as_json=False, order='ascending', debug=False): """ This function retrieves historical data from the introduced `etf` from Investing via Web Scraping on the introduced date range. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a :obj:`json` object with `ascending` or `descending` order. Args: etf (:obj:`str`): name of the etf to retrieve recent historical data from. country (:obj:`str`): name of the country from where the etf is. from_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, from where data is going to be retrieved. to_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, until where data is going to be retrieved. as_json (:obj:`bool`, optional): to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`). order (:obj:`str`, optional): optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`). debug (:obj:`bool`, optional): optional argument to either show or hide debug messages on log, `True` or `False`, respectively. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved recent data from the specified etf via argument. The dataset contains the open, high, low and close values for the selected etf on market days. The returned data is case we use default arguments will look like:: date || open | high | low | close | currency | exchange -----||--------------------------------------|---------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxxxx | xxxxxxxx but if we define `as_json=True`, then the output will be:: { name: name, historical: [ { date: dd/mm/yyyy, open: x, high: x, low: x, close: x, currency: x, exchange: x, }, ... ] } Raises: ValueError: raised whenever any of the arguments is not valid or errored. IOError: raised if etfs object/file not found or unable to retrieve. RuntimeError:raised if the introduced etf does not match any of the indexed ones. ConnectionError: raised if GET requests does not return 200 status code. IndexError: raised if etf information was unavailable or not found. Examples: >>> investpy.get_etf_historical_data(etf='bbva accion dj eurostoxx 50', country='spain', from_date='01/01/2010', to_date='01/01/2019') Open High Low Close Currency Exchange Date 2011-12-07 23.70 23.70 23.70 23.62 EUR Madrid 2011-12-08 23.53 23.60 23.15 23.04 EUR Madrid 2011-12-09 23.36 23.60 23.36 23.62 EUR Madrid 2011-12-12 23.15 23.26 23.00 22.88 EUR Madrid 2011-12-13 22.88 22.88 22.88 22.80 EUR Madrid """ if not etf: raise ValueError( "ERR#0031: etf parameter is mandatory and must be a valid etf name." ) if not isinstance(etf, str): raise ValueError("ERR#0030: etf argument needs to be a str.") if country is None: raise ValueError( "ERR#0039: country can not be None, it should be a str.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not isinstance(debug, bool): raise ValueError( "ERR#0033: debug argument can just be a boolean value, either True or False." ) try: datetime.datetime.strptime(from_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.") try: datetime.datetime.strptime(to_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.") start_date = datetime.datetime.strptime(from_date, '%d/%m/%Y') end_date = datetime.datetime.strptime(to_date, '%d/%m/%Y') if start_date >= end_date: raise ValueError( "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'." ) date_interval = { 'intervals': [], } flag = True while flag is True: diff = end_date.year - start_date.year if diff > 20: obj = { 'start': start_date.strftime('%d/%m/%Y'), 'end': start_date.replace(year=start_date.year + 20).strftime('%d/%m/%Y'), } date_interval['intervals'].append(obj) start_date = start_date.replace(year=start_date.year + 20) else: obj = { 'start': start_date.strftime('%d/%m/%Y'), 'end': end_date.strftime('%d/%m/%Y'), } date_interval['intervals'].append(obj) flag = False interval_limit = len(date_interval['intervals']) interval_counter = 0 data_flag = False resource_package = 'investpy' resource_path = '/'.join(('resources', 'etfs', 'etfs.csv')) if pkg_resources.resource_exists(resource_package, resource_path): etfs = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0058: etfs file not found or errored.") if etfs is None: raise IOError("ERR#0009: etfs object not found or unable to retrieve.") if unidecode.unidecode(country.lower()) not in get_etf_countries(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") etfs = etfs[etfs['country'] == unidecode.unidecode(country.lower())] etf = etf.strip() etf = etf.lower() if unidecode.unidecode(etf) not in [ unidecode.unidecode(value.lower()) for value in etfs['name'].tolist() ]: raise RuntimeError("ERR#0019: etf " + str(etf) + " not found in " + str(country.lower()) + ", check if it is correct.") logging.basicConfig(level=logging.INFO) logger = logging.getLogger('investpy') if debug is False: logger.disabled = True else: logger.disabled = False logger.info('Searching introduced etf on Investing.com') found_etfs = etfs[etfs['name'].str.lower() == etf] if len(found_etfs) > 1: warnings.warn( 'Note that the displayed information can differ depending on the stock exchange.', Warning) del found_etfs symbol = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'symbol'] id_ = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'id'] name = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'name'] etf_currency = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'currency'] logger.info(str(etf) + ' found on Investing.com') final = list() header = "Datos históricos " + symbol for index in range(len(date_interval['intervals'])): interval_counter += 1 params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "st_date": date_interval['intervals'][index]['start'], "end_date": date_interval['intervals'][index]['end'], "interval_sec": "Daily", "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://es.investing.com/instruments/HistoricalDataAjax" logger.info('Request sent to Investing.com!') req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") logger.info('Request to Investing.com data succeeded with code ' + str(req.status_code) + '!') if not req.text: continue root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: logger.info('Data parsing process starting...') for elements_ in path_: info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.text_content()) if info[0] == 'No se encontraron resultados': if interval_counter < interval_limit: data_flag = False else: raise IndexError( "ERR#0010: etf information unavailable or not found." ) else: data_flag = True if data_flag is True: etf_date = datetime.datetime.strptime( info[0].replace('.', '-'), '%d-%m-%Y') etf_close = float(info[1].replace('.', '').replace(',', '.')) etf_open = float(info[2].replace('.', '').replace(',', '.')) etf_high = float(info[3].replace('.', '').replace(',', '.')) etf_low = float(info[4].replace('.', '').replace(',', '.')) result.insert( len(result), Data(etf_date, etf_open, etf_high, etf_low, etf_close, None, etf_currency)) if data_flag is True: if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': name, 'historical': [value.etf_as_json() for value in result] } final.append(json_) elif as_json is False: df = pd.DataFrame.from_records( [value.etf_to_dict() for value in result]) df.set_index('Date', inplace=True) final.append(df) else: raise RuntimeError( "ERR#0004: data retrieval error while scraping.") logger.info('Data parsing process finished...') if as_json is True: return json.dumps(final[0], sort_keys=False) elif as_json is False: return pd.concat(final)
def get_currency_cross_recent_data(currency_cross, as_json=False, order='ascending', debug=False): """ This function retrieves recent historical data from the introduced `currency_cross` as indexed in Investing.com via Web Scraping. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a :obj:`json` file, with `ascending` or `descending` order. Args: currency_cross (:obj:`str`): name of the currency_cross to retrieve recent historical data from. as_json (:obj:`bool`, optional): optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`). order (:obj:`str`, optional): optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`). debug (:obj:`bool`, optional): optional argument to either show or hide debug messages on log, `True` or `False`, respectively. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved recent data from the specified currency_cross via argument. The dataset contains the open, high, low, close, volume and currency values for the selected currency_cross on market days. The return data is in case we use default arguments will look like:: date || open | high | low | close | volume | currency -----||------------------------------------|--------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx but if we define `as_json=True`, then the output will be:: { name: name, recent: [ dd/mm/yyyy: { 'open': x, 'high': x, 'low': x, 'close': x, 'volume': x, 'currency' : x }, ... ] } Raises: ValueError: raised if any of the introduced arguments was not valid or errored. IOError: raised if currency_crosses object/file not found or unable to retrieve. RuntimeError: raised introduced currency_cross does not match any of the indexed ones. ConnectionError: raised if GET request did not return 200 status code. IndexError: raised if currency_cross information was unavailable or not found. Examples: >>> investpy.get_currency_cross_recent_data(currency_cross='EUR/USD') Open High Low Close Volume Currency Date 2019-08-27 1.1101 1.1116 1.1084 1.1091 0 USD 2019-08-28 1.1090 1.1099 1.1072 1.1078 0 USD 2019-08-29 1.1078 1.1093 1.1042 1.1057 0 USD 2019-08-30 1.1058 1.1062 1.0963 1.0991 0 USD 2019-09-02 1.0990 1.1000 1.0958 1.0968 0 USD """ if not currency_cross: raise ValueError( "ERR#0052: currency_cross param is mandatory and should be a str.") if not isinstance(currency_cross, str): raise ValueError( "ERR#0052: currency_cross param is mandatory and should be a str.") if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not isinstance(debug, bool): raise ValueError( "ERR#0033: debug argument can just be a boolean value, either True or False." ) resource_package = 'investpy' resource_path = '/'.join( ('resources', 'currency_crosses', 'currency_crosses.csv')) if pkg_resources.resource_exists(resource_package, resource_path): currency_crosses = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError( "ERR#0060: currency_crosses file not found or errored.") if currency_crosses is None: raise IOError( "ERR#0050: currency_crosses not found or unable to retrieve.") currency_cross = currency_cross.strip() currency_cross = currency_cross.lower() if unidecode.unidecode(currency_cross) not in [ unidecode.unidecode(value.lower()) for value in currency_crosses['name'].tolist() ]: raise RuntimeError("ERR#0054: the introduced currency_cross " + str(currency_cross) + " does not exists.") logging.basicConfig(level=logging.INFO) logger = logging.getLogger('investpy') if debug is False: logger.disabled = True else: logger.disabled = False logger.info('Searching introduced currency_cross on Investing.com') id_ = currency_crosses.loc[( currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'id'] name = currency_crosses.loc[( currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'name'] currency = currency_crosses.loc[( currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'second'] logger.info(str(currency_cross) + ' found on Investing.com') header = "Datos históricos " + name params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "interval_sec": "Daily", "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://es.investing.com/instruments/HistoricalDataAjax" logger.info('Request sent to Investing.com!') req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") logger.info('Request to Investing.com data succeeded with code ' + str(req.status_code) + '!') root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: logger.info('Data parsing process starting...') for elements_ in path_: info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.text_content()) if info[0] == 'No se encontraron resultados': raise IndexError( "ERR#0055: currency_cross information unavailable or not found." ) currency_cross_date = datetime.datetime.strptime( info[0].replace('.', '-'), '%d-%m-%Y') currency_cross_close = float(info[1].replace('.', '').replace(',', '.')) currency_cross_open = float(info[2].replace('.', '').replace(',', '.')) currency_cross_high = float(info[3].replace('.', '').replace(',', '.')) currency_cross_low = float(info[4].replace('.', '').replace(',', '.')) currency_cross_volume = 0 if info[5].__contains__('K'): currency_cross_volume = int( float(info[5].replace('K', '').replace('.', '').replace( ',', '.')) * 1e3) elif info[5].__contains__('M'): currency_cross_volume = int( float(info[5].replace('M', '').replace('.', '').replace( ',', '.')) * 1e6) elif info[5].__contains__('B'): currency_cross_volume = int( float(info[5].replace('B', '').replace('.', '').replace( ',', '.')) * 1e9) result.insert( len(result), Data(currency_cross_date, currency_cross_open, currency_cross_high, currency_cross_low, currency_cross_close, currency_cross_volume, currency)) if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result logger.info('Data parsing process finished...') if as_json is True: json_ = { 'name': name, 'recent': [value.currency_cross_as_json() for value in result] } return json.dumps(json_, sort_keys=False) elif as_json is False: df = pd.DataFrame.from_records( [value.currency_cross_to_dict() for value in result]) df.set_index('Date', inplace=True) return df else: raise RuntimeError("ERR#0004: data retrieval error while scraping.")
def get_bond_recent_data(bond, as_json=False, order='ascending', interval='Daily'): """ This function retrieves recent historical data from the introduced bond from Investing.com. So on, the recent data of the introduced bond will be retrieved and returned as a :obj:`pandas.DataFrame` if the parameters are valid and the request to Investing.com succeeds. Note that additionally some optional parameters can be specified: as_json and order, which let the user decide if the data is going to be returned as a :obj:`json` or not, and if the recent data is going to be ordered ascending or descending (where the index is the date), respectively. Args: bond (:obj:`str`): name of the bond to retrieve recent historical data from. as_json (:obj:`bool`, optional): to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True. order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending. interval (:obj:`str`, optional): value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function can return either a :obj:`pandas.DataFrame` or a :obj:`json` object, containing the retrieved recent data of the specified bond. So on, the resulting dataframe contains the open, high, low and close values for the selected bond on market days. The resulting recent data, in case that the default parameters were applied, will look like:: Date || Open | High | Low | Close -----||------|------|-----|------- xxxx || xxxx | xxxx | xxx | xxxxx but in case that as_json parameter was defined as True, then the output will be:: { name: name, recent: [ { date: 'dd/mm/yyyy', open: x, high: x, low: x, close: x }, ... ] } Raises: ValueError: raised whenever any of the introduced arguments is not valid or errored. IOError: raised if bonds object/file was not found or unable to retrieve. RuntimeError: raised if the introduced bond was not found or did not match any of the existing ones. ConnectionError: raised if connection to Investing.com could not be established. IndexError: raised if bond historical data was unavailable or not found in Investing.com. Examples: >>> investpy.get_bond_recent_data(bond='Argentina 3Y') Open High Low Close Date 2019-09-23 52.214 52.214 52.214 52.214 2019-09-24 52.323 52.323 52.323 52.323 2019-09-25 52.432 52.432 52.432 52.432 2019-09-26 52.765 52.765 52.765 52.765 2019-09-27 52.876 52.876 52.876 52.876 """ if not bond: raise ValueError( "ERR#0066: bond parameter is mandatory and must be a valid bond name." ) if not isinstance(bond, str): raise ValueError("ERR#0067: bond argument needs to be a str.") if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not interval: raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) if not isinstance(interval, str): raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) if interval not in ['Daily', 'Weekly', 'Monthly']: raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) resource_package = 'investpy' resource_path = '/'.join(('resources', 'bonds', 'bonds.csv')) if pkg_resources.resource_exists(resource_package, resource_path): bonds = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0064: bonds file not found or errored.") if bonds is None: raise IOError( "ERR#0065: bonds object not found or unable to retrieve.") bond = bond.strip() bond = bond.lower() if unidecode.unidecode(bond) not in [ unidecode.unidecode(value.lower()) for value in bonds['name'].tolist() ]: raise RuntimeError("ERR#0068: bond " + bond + " not found, check if it is correct.") id_ = bonds.loc[(bonds['name'].str.lower() == bond).idxmax(), 'id'] name = bonds.loc[(bonds['name'].str.lower() == bond).idxmax(), 'name'] full_name = bonds.loc[(bonds['name'].str.lower() == bond).idxmax(), 'full_name'] header = full_name + " Bond Yield Historical Data" params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "interval_sec": interval, "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://www.investing.com/instruments/HistoricalDataAjax" req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: for elements_ in path_: if elements_.xpath( ".//td")[0].text_content() == 'No results found': raise IndexError( "ERR#0069: bond information unavailable or not found.") info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.get('data-real-value')) bond_date = datetime.fromtimestamp(int(info[0])) bond_date = date(bond_date.year, bond_date.month, bond_date.day) bond_close = float(info[1].replace(',', '')) bond_open = float(info[2].replace(',', '')) bond_high = float(info[3].replace(',', '')) bond_low = float(info[4].replace(',', '')) result.insert( len(result), Data(bond_date, bond_open, bond_high, bond_low, bond_close, None, None)) if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': name, 'recent': [value.bond_as_json() for value in result] } return json.dumps(json_, sort_keys=False) elif as_json is False: df = pd.DataFrame.from_records( [value.bond_to_dict() for value in result]) df.set_index('Date', inplace=True) return df else: raise RuntimeError("ERR#0004: data retrieval error while scraping.")
def get_index_recent_data(index, country, as_json=False, order='ascending', interval='Daily'): """ This function retrieves recent historical data from the introduced `index` from Investing via Web Scraping. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a :obj:`json` file, with `ascending` or `descending` order. Args: index (:obj:`str`): name of the index to retrieve recent historical data from. country (:obj:`str`): name of the country from where the index is. as_json (:obj:`bool`, optional): optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`). order (:obj:`str`, optional): optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`). interval (:obj:`str`, optional): value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved recent data from the specified index via argument. The dataset contains the open, high, low, close and volume values for the selected index on market days, additionally the currency value is returned. The returned data is case we use default arguments will look like:: Date || Open | High | Low | Close | Volume | Currency -----||------|------|-----|-------|--------|---------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx but if we define `as_json=True`, then the output will be:: { name: name, recent: [ { date: dd/mm/yyyy, open: x, high: x, low: x, close: x, volume: x, currency: x }, ... ] } Raises: ValueError: raised if there was an argument error. IOError: raised if indices object/file was not found or unable to retrieve. RuntimeError: raised if the introduced index does not match any of the indexed ones. ConnectionError: raised if GET requests does not return 200 status code. IndexError: raised if index information was unavailable or not found. Examples: >>> investpy.get_index_recent_data(index='ibex 35', country='spain') Open High Low Close Volume Currency Date 2019-08-26 12604.7 12646.3 12510.4 12621.3 4770000 EUR 2019-08-27 12618.3 12723.3 12593.6 12683.8 8230000 EUR 2019-08-28 12657.2 12697.2 12585.1 12642.5 7300000 EUR 2019-08-29 12637.2 12806.6 12633.8 12806.6 5650000 EUR 2019-08-30 12767.6 12905.9 12756.9 12821.6 6040000 EUR """ if not index: raise ValueError( "ERR#0047: index param is mandatory and should be a str.") if not isinstance(index, str): raise ValueError( "ERR#0047: index param is mandatory and should be a str.") if country is None: raise ValueError( "ERR#0039: country can not be None, it should be a str.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not interval: raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) if not isinstance(interval, str): raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) if interval not in ['Daily', 'Weekly', 'Monthly']: raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) resource_package = 'investpy' resource_path = '/'.join(('resources', 'indices', 'indices.csv')) if pkg_resources.resource_exists(resource_package, resource_path): indices = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0059: indices file not found or errored.") if indices is None: raise IOError("ERR#0037: indices not found or unable to retrieve.") if unidecode.unidecode(country.lower()) not in get_index_countries(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") indices = indices[indices['country'] == unidecode.unidecode( country.lower())] index = index.strip() index = index.lower() if unidecode.unidecode(index) not in [ unidecode.unidecode(value.lower()) for value in indices['name'].tolist() ]: raise RuntimeError("ERR#0045: index " + index + " not found, check if it is correct.") full_name = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'full_name'] id_ = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'id'] name = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'name'] index_currency = indices.loc[( indices['name'].str.lower() == index).idxmax(), 'currency'] header = full_name + ' Historical Data' params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "interval_sec": interval, "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://www.investing.com/instruments/HistoricalDataAjax" req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: for elements_ in path_: if elements_.xpath( ".//td")[0].text_content() == 'No results found': raise IndexError( "ERR#0046: index information unavailable or not found.") info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.get('data-real-value')) index_date = datetime.strptime( str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d') index_close = float(info[1].replace(',', '')) index_open = float(info[2].replace(',', '')) index_high = float(info[3].replace(',', '')) index_low = float(info[4].replace(',', '')) index_volume = int(info[5]) result.insert( len(result), Data(index_date, index_open, index_high, index_low, index_close, index_volume, index_currency)) if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': name, 'recent': [value.index_as_json() for value in result] } return json.dumps(json_, sort_keys=False) elif as_json is False: df = pd.DataFrame.from_records( [value.index_to_dict() for value in result]) df.set_index('Date', inplace=True) return df else: raise RuntimeError("ERR#0004: data retrieval error while scraping.")
def get_etf_historical_data(etf, country, from_date, to_date, stock_exchange=None, as_json=False, order='ascending', interval='Daily'): """ This function retrieves historical data from the introduced `etf` from Investing via Web Scraping on the introduced date range. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a :obj:`json` object with `ascending` or `descending` order. Args: etf (:obj:`str`): name of the etf to retrieve recent historical data from. country (:obj:`str`): name of the country from where the etf is. from_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, from where data is going to be retrieved. to_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, until where data is going to be retrieved. as_json (:obj:`bool`, optional): to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`). order (:obj:`str`, optional): optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`). interval (:obj:`str`, optional): value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved recent data from the specified etf via argument. The dataset contains the open, high, low and close values for the selected etf on market days. The returned data is case we use default arguments will look like:: Date || Open | High | Low | Close | Currency | Exchange -----||------|------|-----|-------|----------|--------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxxxx | xxxxxxxx but if we define `as_json=True`, then the output will be:: { name: name, historical: [ { date: dd/mm/yyyy, open: x, high: x, low: x, close: x, currency: x, exchange: x }, ... ] } Raises: ValueError: raised whenever any of the arguments is not valid or errored. IOError: raised if etfs object/file not found or unable to retrieve. RuntimeError:raised if the introduced etf does not match any of the indexed ones. ConnectionError: raised if GET requests does not return 200 status code. IndexError: raised if etf information was unavailable or not found. Examples: >>> investpy.get_etf_historical_data(etf='bbva accion dj eurostoxx 50', country='spain', from_date='01/01/2010', to_date='01/01/2019') Open High Low Close Currency Exchange Date 2011-12-07 23.70 23.70 23.70 23.62 EUR Madrid 2011-12-08 23.53 23.60 23.15 23.04 EUR Madrid 2011-12-09 23.36 23.60 23.36 23.62 EUR Madrid 2011-12-12 23.15 23.26 23.00 22.88 EUR Madrid 2011-12-13 22.88 22.88 22.88 22.80 EUR Madrid """ if not etf: raise ValueError( "ERR#0031: etf parameter is mandatory and must be a valid etf name." ) if not isinstance(etf, str): raise ValueError("ERR#0030: etf argument needs to be a str.") if country is None: raise ValueError( "ERR#0039: country can not be None, it should be a str.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") if stock_exchange is not None and not isinstance(stock_exchange, str): raise ValueError( "ERR#0125: specified stock_exchange value is not valid, it should be a str." ) if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not interval: raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) if not isinstance(interval, str): raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) if interval not in ['Daily', 'Weekly', 'Monthly']: raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) try: datetime.strptime(from_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.") try: datetime.strptime(to_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.") start_date = datetime.strptime(from_date, '%d/%m/%Y') end_date = datetime.strptime(to_date, '%d/%m/%Y') if start_date >= end_date: raise ValueError( "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'." ) date_interval = { 'intervals': [], } flag = True while flag is True: diff = end_date.year - start_date.year if diff > 19: obj = { 'start': start_date.strftime('%m/%d/%Y'), 'end': start_date.replace(year=start_date.year + 19).strftime('%m/%d/%Y'), } date_interval['intervals'].append(obj) start_date = start_date.replace(year=start_date.year + 19, day=start_date.day + 1) else: obj = { 'start': start_date.strftime('%m/%d/%Y'), 'end': end_date.strftime('%m/%d/%Y'), } date_interval['intervals'].append(obj) flag = False interval_limit = len(date_interval['intervals']) interval_counter = 0 data_flag = False resource_package = 'investpy' resource_path = '/'.join(('resources', 'etfs', 'etfs.csv')) if pkg_resources.resource_exists(resource_package, resource_path): etfs = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0058: etfs file not found or errored.") if etfs is None: raise IOError("ERR#0009: etfs object not found or unable to retrieve.") country = unidecode.unidecode(country.strip().lower()) if country not in get_etf_countries(): raise RuntimeError("ERR#0034: country " + country + " not found, check if it is correct.") etf = unidecode.unidecode(etf.strip().lower()) def_exchange = etfs.loc[((etfs['name'].str.lower() == etf) & (etfs['def_stock_exchange'] == True)).idxmax()] etfs = etfs[etfs['country'].str.lower() == country] if etf not in [value for value in etfs['name'].str.lower()]: raise RuntimeError("ERR#0019: etf " + etf + " not found, check if it is correct.") etfs = etfs[etfs['name'].str.lower() == etf] if def_exchange['country'] != country: warnings.warn( 'Selected country does not contain the default stock exchange of the introduced ETF. ' + \ 'Default country is: \"' + def_exchange['country'] + '\" and default stock_exchange: \"' + \ def_exchange['stock_exchange'] + '\".', Warning ) if stock_exchange: if stock_exchange.lower() not in etfs['stock_exchange'].str.lower( ).tolist(): raise ValueError( "ERR#0126: introduced stock_exchange value does not exists, leave this parameter to None to use default stock_exchange." ) etf_exchange = etfs.loc[(etfs['stock_exchange'].str.lower() == stock_exchange.lower()).idxmax(), 'stock_exchange'] else: found_etfs = etfs[etfs['name'].str.lower() == etf] if len(found_etfs) > 1: warnings.warn( 'Note that the displayed information can differ depending on the stock exchange. Available stock_exchange' + \ ' values for \"' + country + '\" are: \"' + '\", \"'.join(found_etfs['stock_exchange']) + '\".', Warning ) del found_etfs etf_exchange = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'stock_exchange'] else: if stock_exchange: if stock_exchange.lower() not in etfs['stock_exchange'].str.lower( ).tolist(): raise ValueError( "ERR#0126: introduced stock_exchange value does not exists, leave this parameter to None to use default stock_exchange." ) if def_exchange['stock_exchange'].lower() != stock_exchange.lower( ): warnings.warn( 'Selected stock_exchange is not the default one of the introduced ETF. ' + \ 'Default country is: \"' + def_exchange['country'] + '\" and default stock_exchange: \"' + \ def_exchange['stock_exchange'].lower() + '\".', Warning ) etf_exchange = etfs.loc[(etfs['stock_exchange'].str.lower() == stock_exchange.lower()).idxmax(), 'stock_exchange'] else: etf_exchange = def_exchange['stock_exchange'] symbol = etfs.loc[( (etfs['name'].str.lower() == etf) & (etfs['stock_exchange'].str.lower() == etf_exchange.lower())).idxmax(), 'symbol'] id_ = etfs.loc[( (etfs['name'].str.lower() == etf) & (etfs['stock_exchange'].str.lower() == etf_exchange.lower())).idxmax(), 'id'] name = etfs.loc[( (etfs['name'].str.lower() == etf) & (etfs['stock_exchange'].str.lower() == etf_exchange.lower())).idxmax(), 'name'] etf_currency = etfs.loc[( (etfs['name'].str.lower() == etf) & (etfs['stock_exchange'].str.lower() == etf_exchange.lower())).idxmax(), 'currency'] final = list() header = symbol + ' Historical Data' for index in range(len(date_interval['intervals'])): interval_counter += 1 params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "st_date": date_interval['intervals'][index]['start'], "end_date": date_interval['intervals'][index]['end'], "interval_sec": interval, "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://www.investing.com/instruments/HistoricalDataAjax" req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") if not req.text: continue root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: for elements_ in path_: if elements_.xpath( ".//td")[0].text_content() == 'No results found': if interval_counter < interval_limit: data_flag = False else: raise IndexError( "ERR#0010: etf information unavailable or not found." ) else: data_flag = True info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.get('data-real-value')) if data_flag is True: etf_date = datetime.strptime( str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d') etf_close = float(info[1].replace(',', '')) etf_open = float(info[2].replace(',', '')) etf_high = float(info[3].replace(',', '')) etf_low = float(info[4].replace(',', '')) result.insert( len(result), Data(etf_date, etf_open, etf_high, etf_low, etf_close, None, etf_currency, etf_exchange)) if data_flag is True: if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': name, 'historical': [value.etf_as_json() for value in result] } final.append(json_) elif as_json is False: df = pd.DataFrame.from_records( [value.etf_to_dict() for value in result]) df.set_index('Date', inplace=True) final.append(df) else: raise RuntimeError( "ERR#0004: data retrieval error while scraping.") if as_json is True: return json.dumps(final[0], sort_keys=False) elif as_json is False: return pd.concat(final)
def get_index_historical_data(index, country, from_date, to_date, as_json=False, order='ascending', interval='Daily'): """ This function retrieves historical data of the introduced `index` (from the specified country, note that both index and country should match since if the introduced index is not listed in the indices of that country, the function will raise an error). The retrieved historical data are the OHLC values plus the Volume and the Currency in which those values are specified, from the introduced date range if valid. So on, the resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a :obj:`json` file. Args: index (:obj:`str`): name of the index to retrieve recent historical data from. country (:obj:`str`): name of the country from where the index is. from_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, from where data is going to be retrieved. to_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, until where data is going to be retrieved. as_json (:obj:`bool`, optional): optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`). interval (:obj:`str`, optional): value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved historical data from the specified index via argument. The dataset contains the open, high, low, close and volume values for the selected index on market days, additionally the currency in which those values are specified is returned. The returned data is case we use default arguments will look like:: Date || Open | High | Low | Close | Volume | Currency -----||------|------|-----|-------|--------|---------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx but if we define `as_json=True`, then the output will be:: { name: name, historical: [ { date: dd/mm/yyyy, open: x, high: x, low: x, close: x, volume: x, currency: x }, ... ] } Raises: ValueError: raised if there was an argument error. IOError: raised if indices object/file was not found or unable to retrieve. RuntimeError: raised if the introduced index does not match any of the indexed ones. ConnectionError: raised if GET requests does not return 200 status code. IndexError: raised if index information was unavailable or not found. Examples: >>> investpy.get_index_historical_data(index='ibex 35', country='spain', from_date='01/01/2018', to_date='01/01/2019') Open High Low Close Volume Currency Date 2018-01-02 15128.2 15136.7 14996.6 15096.8 10340000 EUR 2018-01-03 15145.0 15186.9 15091.9 15106.9 12800000 EUR 2018-01-04 15105.5 15368.7 15103.7 15368.7 17070000 EUR 2018-01-05 15353.9 15407.5 15348.6 15398.9 11180000 EUR 2018-01-08 15437.1 15448.7 15344.0 15373.3 12890000 EUR """ if not index: raise ValueError( "ERR#0047: index param is mandatory and should be a str.") if not isinstance(index, str): raise ValueError( "ERR#0047: index param is mandatory and should be a str.") if country is None: raise ValueError( "ERR#0039: country can not be None, it should be a str.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") try: datetime.strptime(from_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.") try: datetime.strptime(to_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.") start_date = datetime.strptime(from_date, '%d/%m/%Y') end_date = datetime.strptime(to_date, '%d/%m/%Y') if start_date >= end_date: raise ValueError( "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'." ) if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not interval: raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) if not isinstance(interval, str): raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) if interval not in ['Daily', 'Weekly', 'Monthly']: raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) date_interval = { 'intervals': [], } flag = True while flag is True: diff = end_date.year - start_date.year if diff > 20: obj = { 'start': start_date.strftime('%m/%d/%Y'), 'end': start_date.replace(year=start_date.year + 20).strftime('%m/%d/%Y'), } date_interval['intervals'].append(obj) start_date = start_date.replace(year=start_date.year + 20) else: obj = { 'start': start_date.strftime('%m/%d/%Y'), 'end': end_date.strftime('%m/%d/%Y'), } date_interval['intervals'].append(obj) flag = False interval_limit = len(date_interval['intervals']) interval_counter = 0 data_flag = False resource_package = 'investpy' resource_path = '/'.join(('resources', 'indices', 'indices.csv')) if pkg_resources.resource_exists(resource_package, resource_path): indices = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0059: indices file not found or errored.") if indices is None: raise IOError("ERR#0037: indices not found or unable to retrieve.") if unidecode.unidecode(country.lower()) not in get_index_countries(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") indices = indices[indices['country'] == unidecode.unidecode( country.lower())] index = index.strip() index = index.lower() if unidecode.unidecode(index) not in [ unidecode.unidecode(value.lower()) for value in indices['name'].tolist() ]: raise RuntimeError("ERR#0045: index " + index + " not found, check if it is correct.") full_name = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'full_name'] id_ = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'id'] name = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'name'] index_currency = indices.loc[( indices['name'].str.lower() == index).idxmax(), 'currency'] final = list() header = full_name + ' Historical Data' for index in range(len(date_interval['intervals'])): interval_counter += 1 params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "st_date": date_interval['intervals'][index]['start'], "end_date": date_interval['intervals'][index]['end'], "interval_sec": interval, "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://www.investing.com/instruments/HistoricalDataAjax" req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") if not req.text: continue root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: for elements_ in path_: if elements_.xpath( ".//td")[0].text_content() == 'No results found': if interval_counter < interval_limit: data_flag = False else: raise IndexError( "ERR#0046: index information unavailable or not found." ) else: data_flag = True info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.get('data-real-value')) if data_flag is True: index_date = datetime.strptime( str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d') index_close = float(info[1].replace(',', '')) index_open = float(info[2].replace(',', '')) index_high = float(info[3].replace(',', '')) index_low = float(info[4].replace(',', '')) index_volume = int(info[5]) result.insert( len(result), Data(index_date, index_open, index_high, index_low, index_close, index_volume, index_currency)) if data_flag is True: if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': name, 'historical': [value.index_as_json() for value in result] } final.append(json_) elif as_json is False: df = pd.DataFrame.from_records( [value.index_to_dict() for value in result]) df.set_index('Date', inplace=True) final.append(df) else: raise RuntimeError( "ERR#0004: data retrieval error while scraping.") if as_json is True: return json.dumps(final[0], sort_keys=False) elif as_json is False: return pd.concat(final)
def get_crypto_historical_data(crypto, from_date, to_date, as_json=False, order='ascending', interval='Daily'): """ This function retrieves historical data from the introduced crypto from Investing.com. So on, the historical data of the introduced crypto will be retrieved and returned as a :obj:`pandas.DataFrame` if the parameters are valid and the request to Investing.com succeeds. Note that additionally some optional parameters can be specified: as_json and order, which let the user decide if the data is going to be returned as a :obj:`json` or not, and if the historical data is going to be ordered ascending or descending (where the index is the date), respectively. Args: crypto (:obj:`str`): name of the crypto currency to retrieve data from. from_date (:obj:`str`): date formatted as `dd/mm/yyyy`, since when data is going to be retrieved. to_date (:obj:`str`): date formatted as `dd/mm/yyyy`, until when data is going to be retrieved. as_json (:obj:`bool`, optional): to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True. order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending. interval (:obj:`str`, optional): value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function can return either a :obj:`pandas.DataFrame` or a :obj:`json` object, containing the retrieved historical data of the specified crypto currency. So on, the resulting dataframe contains the open, high, low, close and volume values for the selected crypto on market days and the currency in which those values are presented. The returned data is case we use default arguments will look like:: Date || Open | High | Low | Close | Volume | Currency -----||------|------|-----|-------|--------|---------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx but if we define `as_json=True`, then the output will be:: { name: name, historical: [ { date: 'dd/mm/yyyy', open: x, high: x, low: x, close: x, volume: x, currency: x }, ... ] } Raises: ValueError: raised whenever any of the introduced arguments is not valid or errored. IOError: raised if cryptos object/file was not found or unable to retrieve. RuntimeError: raised if the introduced crypto currency name was not found or did not match any of the existing ones. ConnectionError: raised if connection to Investing.com could not be established. IndexError: raised if crypto historical data was unavailable or not found in Investing.com. Examples: >>> investpy.get_crypto_historical_data(crypto='bitcoin', from_date='01/01/2018', to_date='01/01/2019') Open High Low Close Volume Currency Date 2018-01-01 13850.5 13921.5 12877.7 13444.9 78425 USD 2018-01-02 13444.9 15306.1 12934.2 14754.1 137732 USD 2018-01-03 14754.1 15435.0 14579.7 15156.6 106543 USD 2018-01-04 15156.5 15408.7 14244.7 15180.1 110969 USD 2018-01-05 15180.1 17126.9 14832.4 16954.8 141960 USD """ if not crypto: raise ValueError( "ERR#0083: crypto parameter is mandatory and must be a valid crypto name." ) if not isinstance(crypto, str): raise ValueError("ERR#0084: crypto argument needs to be a str.") if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not interval: raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) if not isinstance(interval, str): raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) if interval not in ['Daily', 'Weekly', 'Monthly']: raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) try: datetime.strptime(from_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0011: incorrect from_date date format, it should be 'dd/mm/yyyy'." ) try: datetime.strptime(to_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0012: incorrect to_date format, it should be 'dd/mm/yyyy'.") start_date = datetime.strptime(from_date, '%d/%m/%Y') end_date = datetime.strptime(to_date, '%d/%m/%Y') if start_date >= end_date: raise ValueError( "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'." ) date_interval = { 'intervals': [], } flag = True while flag is True: diff = end_date.year - start_date.year if diff > 19: obj = { 'start': start_date.strftime('%m/%d/%Y'), 'end': start_date.replace(year=start_date.year + 19).strftime('%m/%d/%Y'), } date_interval['intervals'].append(obj) start_date = start_date.replace(year=start_date.year + 19) else: obj = { 'start': start_date.strftime('%m/%d/%Y'), 'end': end_date.strftime('%m/%d/%Y'), } date_interval['intervals'].append(obj) flag = False interval_limit = len(date_interval['intervals']) interval_counter = 0 data_flag = False resource_package = 'investpy' resource_path = '/'.join(('resources', 'crypto', 'cryptos.csv')) if pkg_resources.resource_exists(resource_package, resource_path): cryptos = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0081: cryptos file not found or errored.") if cryptos is None: raise IOError("ERR#0082: cryptos not found or unable to retrieve.") crypto = crypto.strip() crypto = crypto.lower() if unidecode.unidecode(crypto) not in [ unidecode.unidecode(value.lower()) for value in cryptos['name'].tolist() ]: raise RuntimeError("ERR#0085: crypto currency: " + crypto + ", not found, check if it is correct.") status = cryptos.loc[(cryptos['name'].str.lower() == crypto).idxmax(), 'status'] if status == 'unavailable': raise ValueError( "ERR#0086: the selected crypto currency is not available for retrieval in Investing.com." ) crypto_name = cryptos.loc[(cryptos['name'].str.lower() == crypto).idxmax(), 'name'] crypto_id = cryptos.loc[(cryptos['name'].str.lower() == crypto).idxmax(), 'id'] crypto_currency = cryptos.loc[( cryptos['name'].str.lower() == crypto).idxmax(), 'currency'] header = crypto_name + ' Historical Data' final = list() for index in range(len(date_interval['intervals'])): interval_counter += 1 params = { "curr_id": crypto_id, "smlID": str(randint(1000000, 99999999)), "header": header, "st_date": date_interval['intervals'][index]['start'], "end_date": date_interval['intervals'][index]['end'], "interval_sec": interval, "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://www.investing.com/instruments/HistoricalDataAjax" req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") if not req.text: continue root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: for elements_ in path_: if elements_.xpath( ".//td")[0].text_content() == 'No results found': if interval_counter < interval_limit: data_flag = False else: raise IndexError( "ERR#0087: crypto information unavailable or not found." ) else: data_flag = True info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.get('data-real-value')) if data_flag is True: crypto_date = datetime.strptime( str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d') crypto_close = float(info[1].replace(',', '')) crypto_open = float(info[2].replace(',', '')) crypto_high = float(info[3].replace(',', '')) crypto_low = float(info[4].replace(',', '')) crypto_volume = int(info[5]) result.insert( len(result), Data(crypto_date, crypto_open, crypto_high, crypto_low, crypto_close, crypto_volume, crypto_currency)) if data_flag is True: if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': crypto_name, 'historical': [value.crypto_as_json() for value in result] } final.append(json_) elif as_json is False: df = pd.DataFrame.from_records( [value.crypto_to_dict() for value in result]) df.set_index('Date', inplace=True) final.append(df) else: raise RuntimeError( "ERR#0004: data retrieval error while scraping.") if as_json is True: return json.dumps(final[0], sort_keys=False) elif as_json is False: return pd.concat(final)
def get_fund_historical_data(fund, country, from_date, to_date, as_json=False, order='ascending', interval='Daily'): """ This function retrieves historical data from the introduced `fund` from Investing via Web Scraping on the introduced date range. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a :obj:`json` object with `ascending` or `descending` order. Args: fund (:obj:`str`): name of the fund to retrieve recent historical data from. country (:obj:`str`): name of the country from where the introduced fund is. from_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, from where data is going to be retrieved. to_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, until where data is going to be retrieved. as_json (:obj:`bool`, optional): to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`). order (:obj:`str`, optional): optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`). interval (:obj:`str`, optional): value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved recent data from the specified fund via argument. The dataset contains the open, high, low and close values for the selected fund on market days. The returned data is case we use default arguments will look like:: date || open | high | low | close | currency -----||-------------------------------------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxxxx but if we define `as_json=True`, then the output will be:: { name: name, historical: [ { date: dd/mm/yyyy, open: x, high: x, low: x, close: x }, ... ] } Raises: ValueError: argument error. IOError: funds object/file not found or unable to retrieve. RuntimeError: introduced fund does not match any of the indexed ones. ConnectionError: if GET requests does not return 200 status code. IndexError: if fund information was unavailable or not found. Examples: >>> investpy.get_fund_historical_data(fund='bbva multiactivo conservador pp', country='spain', from_date='01/01/2010', to_date='01/01/2019') Open High Low Close Currency Date 2018-02-15 1.105 1.105 1.105 1.105 EUR 2018-02-16 1.113 1.113 1.113 1.113 EUR 2018-02-17 1.113 1.113 1.113 1.113 EUR 2018-02-18 1.113 1.113 1.113 1.113 EUR 2018-02-19 1.111 1.111 1.111 1.111 EUR """ if not fund: raise ValueError( "ERR#0029: fund parameter is mandatory and must be a valid fund name." ) if not isinstance(fund, str): raise ValueError("ERR#0028: fund argument needs to be a str.") if country is None: raise ValueError( "ERR#0039: country can not be None, it should be a str.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not interval: raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) if not isinstance(interval, str): raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) if interval not in ['Daily', 'Weekly', 'Monthly']: raise ValueError( "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'." ) try: datetime.strptime(from_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0011: incorrect start date format, it should be 'dd/mm/yyyy'." ) try: datetime.strptime(to_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0012: incorrect to_date format, it should be 'dd/mm/yyyy'.") start_date = datetime.strptime(from_date, '%d/%m/%Y') end_date = datetime.strptime(to_date, '%d/%m/%Y') if start_date >= end_date: raise ValueError( "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'." ) date_interval = { 'intervals': [], } flag = True while flag is True: diff = end_date.year - start_date.year if diff > 20: obj = { 'start': start_date.strftime('%m/%d/%Y'), 'end': start_date.replace(year=start_date.year + 20).strftime('%m/%d/%Y'), } date_interval['intervals'].append(obj) start_date = start_date.replace(year=start_date.year + 20) else: obj = { 'start': start_date.strftime('%m/%d/%Y'), 'end': end_date.strftime('%m/%d/%Y'), } date_interval['intervals'].append(obj) flag = False interval_limit = len(date_interval['intervals']) interval_counter = 0 data_flag = False resource_package = 'investpy' resource_path = '/'.join(('resources', 'funds', 'funds.csv')) if pkg_resources.resource_exists(resource_package, resource_path): funds = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0057: funds file not found or errored.") if funds is None: raise IOError( "ERR#0005: funds object not found or unable to retrieve.") if unidecode.unidecode(country.lower()) not in get_fund_countries(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") funds = funds[funds['country'] == unidecode.unidecode(country.lower())] fund = fund.strip() fund = fund.lower() if unidecode.unidecode(fund) not in [ unidecode.unidecode(value.lower()) for value in funds['name'].tolist() ]: raise RuntimeError("ERR#0019: fund " + fund + " not found, check if it is correct.") symbol = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'symbol'] id_ = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'id'] name = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'name'] fund_currency = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'currency'] final = list() header = symbol + ' Historical Data' for index in range(len(date_interval['intervals'])): params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "st_date": date_interval['intervals'][index]['start'], "end_date": date_interval['intervals'][index]['end'], "interval_sec": interval, "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://www.investing.com/instruments/HistoricalDataAjax" req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") if not req.text: continue root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: for elements_ in path_: if elements_.xpath( ".//td")[0].text_content() == 'No results found': if interval_counter < interval_limit: data_flag = False else: raise IndexError( "ERR#0008: fund information unavailable or not found." ) else: data_flag = True info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.get('data-real-value')) if data_flag is True: fund_date = datetime.fromtimestamp(int(info[0])) fund_date = date(fund_date.year, fund_date.month, fund_date.day) fund_close = float(info[1].replace(',', '')) fund_open = float(info[2].replace(',', '')) fund_high = float(info[3].replace(',', '')) fund_low = float(info[4].replace(',', '')) result.insert( len(result), Data(fund_date, fund_open, fund_high, fund_low, fund_close, None, fund_currency)) if data_flag is True: if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': name, 'historical': [value.fund_as_json() for value in result] } final.append(json_) elif as_json is False: df = pd.DataFrame.from_records( [value.fund_to_dict() for value in result]) df.set_index('Date', inplace=True) final.append(df) else: raise RuntimeError( "ERR#0004: data retrieval error while scraping.") if as_json is True: return json.dumps(final[0], sort_keys=False) elif as_json is False: return pd.concat(final)
def get_bond_historical_data(bond, country, from_date, to_date, as_json=False, order='ascending', debug=False): """ This function retrieves historical data from the introduced bond from Investing.com. So on, the historical data of the introduced bond from the specified country in the specified data range will be retrieved and returned as a :obj:`pandas.DataFrame` if the parameters are valid and the request to Investing.com succeeds. Note that additionally some optional parameters can be specified: as_json, order and debug, which let the user decide if the data is going to be returned as a :obj:`json` or not, if the historical data is going to be ordered ascending or descending (where the index is the date) and whether debug messages are going to be printed or not, respectively. Args: bond (:obj:`str`): name of the bond to retrieve historical data from. country (:obj:`str`): name of the country from where the bond is. from_date (:obj:`str`): date formatted as `dd/mm/yyyy`, since when data is going to be retrieved. to_date (:obj:`str`): date formatted as `dd/mm/yyyy`, until when data is going to be retrieved. as_json (:obj:`bool`, optional): to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True. order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending. debug (:obj:`bool`, optional): optional argument to either show or hide debug messages on log, either True or False, respectively. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved recent data from the specified bond via argument. The dataset contains the open, high, low and close for the selected bond on market days. The returned data is case we use default arguments will look like:: date || open | high | low | close -----||--------------------------- xxxx || xxxx | xxxx | xxx | xxxxx but if we define `as_json=True`, then the output will be:: { name: name, historical: [ dd/mm/yyyy: { open: x, high: x, low: x, close: x, }, ... ] } Raises: ValueError: raised whenever any of the introduced arguments is not valid or errored. IOError: raised if bonds object/file was not found or unable to retrieve. RuntimeError: raised if the introduced bond/country was not found or did not match any of the existing ones. ConnectionError: raised if connection to Investing.com could not be established. IndexError: raised if bond historical data was unavailable or not found in Investing.com. Examples: >>> investpy.get_bond_historical_data(bond='Argentina 3Y', country='argentina', from_date='01/01/2010', to_date='01/01/2019') Open High Low Close Date 2011-01-03 4.15 4.15 4.15 5.15 2011-01-04 4.07 4.07 4.07 5.45 2011-01-05 4.27 4.27 4.27 5.71 2011-01-10 4.74 4.74 4.74 6.27 2011-01-11 4.30 4.30 4.30 6.56 """ if not bond: raise ValueError( "ERR#0066: bond parameter is mandatory and must be a valid bond name." ) if not isinstance(bond, str): raise ValueError("ERR#0067: bond argument needs to be a str.") if country is None: raise ValueError( "ERR#0039: country can not be None, it should be a str.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not isinstance(debug, bool): raise ValueError( "ERR#0033: debug argument can just be a boolean value, either True or False." ) try: datetime.strptime(from_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0011: incorrect from_date date format, it should be 'dd/mm/yyyy'." ) try: datetime.strptime(to_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0012: incorrect to_date format, it should be 'dd/mm/yyyy'.") start_date = datetime.strptime(from_date, '%d/%m/%Y') end_date = datetime.strptime(to_date, '%d/%m/%Y') if start_date >= end_date: raise ValueError( "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'." ) date_interval = { 'intervals': [], } flag = True while flag is True: diff = end_date.year - start_date.year if diff > 20: obj = { 'start': start_date.strftime('%d/%m/%Y'), 'end': start_date.replace(year=start_date.year + 20).strftime('%d/%m/%Y'), } date_interval['intervals'].append(obj) start_date = start_date.replace(year=start_date.year + 20) else: obj = { 'start': start_date.strftime('%d/%m/%Y'), 'end': end_date.strftime('%d/%m/%Y'), } date_interval['intervals'].append(obj) flag = False interval_limit = len(date_interval['intervals']) interval_counter = 0 data_flag = False resource_package = 'investpy' resource_path = '/'.join(('resources', 'bonds', 'bonds.csv')) if pkg_resources.resource_exists(resource_package, resource_path): bonds = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0064: bonds file not found or errored.") if bonds is None: raise IOError( "ERR#0065: bonds object not found or unable to retrieve.") if unidecode.unidecode(country.lower()) not in get_bond_countries(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") bonds = bonds[bonds['country'] == unidecode.unidecode(country.lower())] bond = bond.strip() bond = bond.lower() if unidecode.unidecode(bond) not in [ unidecode.unidecode(value.lower()) for value in bonds['name'].tolist() ]: raise RuntimeError("ERR#0068: bond " + bond + " not found, check if it is correct.") logging.basicConfig(level=logging.INFO) logger = logging.getLogger('investpy') if debug is False: logger.disabled = True else: logger.disabled = False logger.info('Searching introduced bond on Investing.com') id_ = bonds.loc[(bonds['name'].str.lower() == bond).idxmax(), 'id'] name = bonds.loc[(bonds['name'].str.lower() == bond).idxmax(), 'name'] full_name = bonds.loc[(bonds['name'].str.lower() == bond).idxmax(), 'full_name'] logger.info(str(bond) + ' found on Investing.com') final = list() logger.info('Data parsing process starting...') header = full_name + " Bond Yield Historical Data" for index in range(len(date_interval['intervals'])): interval_counter += 1 params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "st_date": date_interval['intervals'][index]['start'], "end_date": date_interval['intervals'][index]['end'], "interval_sec": "Daily", "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://www.investing.com/instruments/HistoricalDataAjax" req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") if not req.text: continue root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: for elements_ in path_: if elements_.xpath( ".//td")[0].text_content() == 'No results found': if interval_counter < interval_limit: data_flag = False else: raise IndexError( "ERR#0069: bond information unavailable or not found." ) else: data_flag = True if data_flag is True: info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.get('data-real-value')) bond_date = datetime.fromtimestamp(int(info[0])) bond_date = date(bond_date.year, bond_date.month, bond_date.day) bond_close = float(info[1]) bond_open = float(info[2]) bond_high = float(info[3]) bond_low = float(info[4]) result.insert( len(result), Data(bond_date, bond_open, bond_high, bond_low, bond_close, None, None)) if data_flag is True: if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': name, 'historical': [value.bond_as_json() for value in result] } final.append(json_) elif as_json is False: df = pd.DataFrame.from_records( [value.bond_to_dict() for value in result]) df.set_index('Date', inplace=True) final.append(df) else: raise RuntimeError( "ERR#0004: data retrieval error while scraping.") logger.info('Data parsing process finished...') if as_json is True: return json.dumps(final[0], sort_keys=False) elif as_json is False: return pd.concat(final)
def get_stock_historical_data(stock, country, from_date, to_date, as_json=False, order='ascending', debug=False): """ This function retrieves historical data from the introduced stock from Investing.com. So on, the historical data of the introduced stock from the specified country in the specified data range will be retrieved and returned as a :obj:`pandas.DataFrame` if the parameters are valid and the request to Investing.com succeeds. Note that additionally some optional parameters can be specified: as_json, order and debug, which let the user decide if the data is going to be returned as a :obj:`json` or not, if the historical data is going to be ordered ascending or descending (where the index is the date) and whether debug messages are going to be printed or not, respectively. Args: stock (:obj:`str`): symbol of the stock to retrieve historical data from. country (:obj:`str`): name of the country from where the stock is. from_date (:obj:`str`): date formatted as `dd/mm/yyyy`, since when data is going to be retrieved. to_date (:obj:`str`): date formatted as `dd/mm/yyyy`, until when data is going to be retrieved. as_json (:obj:`bool`, optional): to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True. order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending. debug (:obj:`bool`, optional): optional argument to either show or hide debug messages on log, either True or False, respectively. Returns: :obj:`pandas.DataFrame` or :obj:`json`: The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved recent data from the specified stock via argument. The dataset contains the open, high, low, close and volume values for the selected stock on market days. The returned data is case we use default arguments will look like:: date || open | high | low | close | volume | currency -----||----------------------------------------------- xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx but if we define `as_json=True`, then the output will be:: { name: name, historical: [ dd/mm/yyyy: { open: x, high: x, low: x, close: x, volume: x, currency: x }, ... ] } Raises: ValueError: raised whenever any of the introduced arguments is not valid or errored. IOError: raised if stocks object/file was not found or unable to retrieve. RuntimeError: raised if the introduced stock/country was not found or did not match any of the existing ones. ConnectionError: raised if connection to Investing.com could not be established. IndexError: raised if stock historical data was unavailable or not found in Investing.com. Examples: >>> investpy.get_historical_data(stock='bbva', country='spain', from_date='01/01/2010', to_date='01/01/2019') Open High Low Close Volume Currency Date 2010-01-04 12.73 12.96 12.73 12.96 0 EUR 2010-01-05 13.00 13.11 12.97 13.09 0 EUR 2010-01-06 13.03 13.17 13.02 13.12 0 EUR 2010-01-07 13.02 13.11 12.93 13.05 0 EUR 2010-01-08 13.12 13.22 13.04 13.18 0 EUR """ if not stock: raise ValueError( "ERR#0013: stock parameter is mandatory and must be a valid stock name." ) if not isinstance(stock, str): raise ValueError("ERR#0027: stock argument needs to be a str.") if country is None: raise ValueError( "ERR#0039: country can not be None, it should be a str.") if country is not None and not isinstance(country, str): raise ValueError("ERR#0025: specified country value not valid.") if not isinstance(as_json, bool): raise ValueError( "ERR#0002: as_json argument can just be True or False, bool type.") if order not in ['ascending', 'asc', 'descending', 'desc']: raise ValueError( "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type." ) if not isinstance(debug, bool): raise ValueError( "ERR#0033: debug argument can just be a boolean value, either True or False." ) try: datetime.datetime.strptime(from_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0011: incorrect from_date date format, it should be 'dd/mm/yyyy'." ) try: datetime.datetime.strptime(to_date, '%d/%m/%Y') except ValueError: raise ValueError( "ERR#0012: incorrect to_date format, it should be 'dd/mm/yyyy'.") start_date = datetime.datetime.strptime(from_date, '%d/%m/%Y') end_date = datetime.datetime.strptime(to_date, '%d/%m/%Y') if start_date >= end_date: raise ValueError( "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'." ) date_interval = { 'intervals': [], } flag = True while flag is True: diff = end_date.year - start_date.year if diff > 20: obj = { 'start': start_date.strftime('%d/%m/%Y'), 'end': start_date.replace(year=start_date.year + 20).strftime('%d/%m/%Y'), } date_interval['intervals'].append(obj) start_date = start_date.replace(year=start_date.year + 20) else: obj = { 'start': start_date.strftime('%d/%m/%Y'), 'end': end_date.strftime('%d/%m/%Y'), } date_interval['intervals'].append(obj) flag = False interval_limit = len(date_interval['intervals']) interval_counter = 0 data_flag = False resource_package = 'investpy' resource_path = '/'.join(('resources', 'stocks', 'stocks.csv')) if pkg_resources.resource_exists(resource_package, resource_path): stocks = pd.read_csv( pkg_resources.resource_filename(resource_package, resource_path)) else: raise FileNotFoundError("ERR#0056: stocks file not found or errored.") if stocks is None: raise IOError( "ERR#0001: stocks object not found or unable to retrieve.") if unidecode.unidecode(country.lower()) not in get_stock_countries(): raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.") stocks = stocks[stocks['country'] == unidecode.unidecode(country.lower())] stock = stock.strip() stock = stock.lower() if unidecode.unidecode(stock) not in [ unidecode.unidecode(value.lower()) for value in stocks['symbol'].tolist() ]: raise RuntimeError("ERR#0018: stock " + stock + " not found, check if it is correct.") logging.basicConfig(level=logging.INFO) logger = logging.getLogger('investpy') if debug is False: logger.disabled = True else: logger.disabled = False logger.info('Searching introduced stock on Investing.com') symbol = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'symbol'] id_ = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'id'] name = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'name'] stock_currency = stocks.loc[( stocks['symbol'].str.lower() == stock).idxmax(), 'currency'] logger.info(str(stock) + ' found on Investing.com') final = list() logger.info('Data parsing process starting...') header = "Datos históricos " + symbol for index in range(len(date_interval['intervals'])): interval_counter += 1 params = { "curr_id": id_, "smlID": str(randint(1000000, 99999999)), "header": header, "st_date": date_interval['intervals'][index]['start'], "end_date": date_interval['intervals'][index]['end'], "interval_sec": "Daily", "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } head = { "User-Agent": user_agent.get_random(), "X-Requested-With": "XMLHttpRequest", "Accept": "text/html", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", } url = "https://es.investing.com/instruments/HistoricalDataAjax" req = requests.post(url, headers=head, data=params) if req.status_code != 200: raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.") if not req.text: continue root_ = fromstring(req.text) path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr") result = list() if path_: for elements_ in path_: info = [] for nested_ in elements_.xpath(".//td"): info.append(nested_.text_content()) if info[0] == 'No se encontraron resultados': if interval_counter < interval_limit: data_flag = False else: raise IndexError( "ERR#0007: stock information unavailable or not found." ) else: data_flag = True if data_flag is True: stock_date = datetime.datetime.strptime( info[0].replace('.', '-'), '%d-%m-%Y') stock_close = float(info[1].replace('.', '').replace(',', '.')) stock_open = float(info[2].replace('.', '').replace(',', '.')) stock_high = float(info[3].replace('.', '').replace(',', '.')) stock_low = float(info[4].replace('.', '').replace(',', '.')) stock_volume = 0 if info[5].__contains__('K'): stock_volume = int( float(info[5].replace('K', '').replace( '.', '').replace(',', '.')) * 1e3) elif info[5].__contains__('M'): stock_volume = int( float(info[5].replace('M', '').replace( '.', '').replace(',', '.')) * 1e6) elif info[5].__contains__('B'): stock_volume = int( float(info[5].replace('B', '').replace( '.', '').replace(',', '.')) * 1e9) result.insert( len(result), Data(stock_date, stock_open, stock_high, stock_low, stock_close, stock_volume, stock_currency)) if data_flag is True: if order in ['ascending', 'asc']: result = result[::-1] elif order in ['descending', 'desc']: result = result if as_json is True: json_ = { 'name': name, 'historical': [value.stock_as_json() for value in result] } final.append(json_) elif as_json is False: df = pd.DataFrame.from_records( [value.stock_to_dict() for value in result]) df.set_index('Date', inplace=True) final.append(df) else: raise RuntimeError( "ERR#0004: data retrieval error while scraping.") logger.info('Data parsing process finished...') if as_json is True: return json.dumps(final[0], sort_keys=False) elif as_json is False: return pd.concat(final)