Exemple #1
0
def test_user_agent_errors():
    """
    This function raises errors on user_agent functions
    """

    clear_file()
    try:
        get_random()
    except:
        pass

    delete_file()
    try:
        get_random()
    except:
        pass
Exemple #2
0
def retrieve_currency_cross_continents():
    """
    This function retrieves all the continents/regions with available currency crosses as indexed in Investing.com, so
    on, this continent or region listing will be retrieved via Web Scraping from https://www.investing.com/currencies/.
    This listing will be used to retrieve all the currency crosses, because the retrieved tag for every country will be
    used to generate the URL to retrieve the data from.

    Returns:
        :obj:`pandas.DataFrame` - currency_cross_continents:
            The resulting :obj:`pandas.DataFrame` contains all the available continents/regions with their
            corresponding tag, which will be used later by investpy.

    Raises:
        ValueError: raised if any of the introduced arguments is not valid.
        ConnectionError: raised if connection to Investing.com could not be established.
        RuntimeError: raised if no countries were retrieved from Investing.com equity listing.
    """

    headers = {
        "User-Agent": ua.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    url = 'https://www.investing.com/currencies/'

    req = requests.get(url, headers=headers)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.")

    root = fromstring(req.text)
    path = root.xpath(".//div[@class='worldCurSimpleList']/ul/li")

    countries = list()

    for element in path:
        obj = {
            'country': element.xpath(".//a")[0].text_content().strip(),
            'tag': element.xpath(".//a")[0].get("href").replace('/currencies/', ''),
        }

        countries.append(obj)

    if len(countries) < 1:
        raise RuntimeError('ERR#0035: no countries could be retrieved!')

    resource_package = __name__
    resource_path = '/'.join(('resources', 'currency_crosses', 'currency_cross_continents.csv'))
    file = pkg_resources.resource_filename(resource_package, resource_path)

    df = pd.DataFrame(countries)
    df.to_csv(file, index=False)

    return df
Exemple #3
0
def retrieve_index_info(tag):
    """
    This function retrieves additional information from an index as listed in Investing.com. Every index data is
    retrieved and stored in a CSV in order to get all the possible information from it.

    Args:
        tag (:obj:`str`): is the identifying tag of the specified index.

    Returns:
        :obj:`dict` - index_data:
            The resulting :obj:`dict` contains the retrieved data if found, if not, the corresponding
            fields are filled with `None` values.

            In case the information was successfully retrieved, the :obj:`dict` will look like::

                {
                    'currency': currency
                }

    Raises:
        ConnectionError: raised if GET requests does not return 200 status code.
        IndexError: raised if fund information was unavailable or not found.
    """

    url = "https://www.investing.com/indices/" + tag

    head = {
        "User-Agent": ua.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    req = requests.get(url, headers=head)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.")

    result = {
        'currency': None
    }

    root_ = fromstring(req.text)

    path_ = root_.xpath(".//div[contains(@class, 'bottom')]"
                        "/span[@class='bold']")

    for element_ in path_:
        if element_.text_content():
            result['currency'] = element_.text_content()

    return result
Exemple #4
0
def get_isin_code(info):
    """
    This is an additional function that adds data to the equities pandas.DataFrame.
    Added data in this case, are the ISIN codes of every company in order to identify it.

    Returns
    -------
        :returns a str that contains the ISIN code of the specified equity
    """

    url = "https://es.investing.com/equities/" + info

    head = {
        "User-Agent": ua.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    req = requests.get(url, headers=head, timeout=5)

    if req.status_code != 200:
        raise ConnectionError("ERR#015: error " + str(req.status_code) +
                              ", try again later.")

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//div[contains(@class, 'overViewBox')]"
                        "/div[@id='quotes_summary_current_data']"
                        "/div[@class='right']"
                        "/div")

    for p in path_:
        try:
            if p.xpath("span[not(@class)]")[0].text_content().__contains__(
                    'ISIN'):
                try:
                    code = p.xpath(
                        "span[@class='elp']")[0].text_content().rstrip()
                    time.sleep(.5)

                    return code
                except IndexError:
                    raise IndexError(
                        "ERR#017: isin code unavailable or not found.")
            else:
                continue
        except IndexError:
            raise IndexError("ERR#017: isin code unavailable or not found.")

    return None
Exemple #5
0
def retrieve_etf_info(tag):
    """
    This function retrieves additional information from the specified etf as indexed in Investing.com, in order to add
    more information to `etfs.csv` which can later be useful. Currently just the currency value is retrieved, since it
    is needed so to determine in which currency the historical data values are.

    Args:
       tag (:obj:`str`): is the tag of the etf to retrieve the information from as indexed by Investing.com.

    Returns:
       :obj:`dict` - info:
           The resulting :obj:`dict` contains the needed information for the etfs listing.

    Raises:
       ConnectionError: raised if GET requests does not return 200 status code.
       IndexError: raised if the information from the etf was not found or unable to retrieve.
    """

    url = "https://es.investing.com/etfs/" + tag

    head = {
        "User-Agent": ua.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    req = requests.get(url, headers=head)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                              ", try again later.")

    result = {
        'currency': None,
    }

    root_ = fromstring(req.text)

    path_ = root_.xpath(".//div[contains(@class, 'bottom')]"
                        "/span[@class='bold']")

    for element_ in path_:
        if element_.text_content():
            print(element_.text_content())
            result['currency'] = element_.text_content()

    return result
def scrape():
    """
   This function scrapes the first results of Google Search Engine
   """

    head = {
        "User-Agent": get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    query = 'ongs save oceans'
    query = query.replace(' ', '%20')

    url = 'https://www.google.com/search?q=' + query

    req = requests.get(url, headers=head)

    root = fromstring(req.text)
    path = root.xpath(f".//div[@data-async-context='query:{query}']/div")

    organizations = []
    for val in path:
        if val.xpath(".//h2"):
            for value in val.xpath(".//h2"):
                if value.text_content() == "Resultados web":
                    for content in val.xpath(
                            ".//div[@class='srg']/div[@class='g']"):
                        name = content.xpath(".//h3")[0].text_content()
                        website = content.xpath(".//a")[0].get("href")

                        if not website.__contains__(
                                "wiki") and not website.__contains__(
                                    "youtube"):
                            desc = content.xpath(
                                ".//span[@class='st']")[0].text_content()

                            organizations.append({
                                "name": name.strip(),
                                "url": website.strip(),
                                "description": desc.strip(),
                            })

    return organizations
Exemple #7
0
def get_fund_data(fund_tag):
    """
    This function retrieves additional information from a fund as listed on
    es.Investing.com. Every fund data is retrieved and stored in a CSV in order
    to get all the possible information from a fund.

    Args:
        fund_tag (str): is the identifying tag of the specified fund.

    Returns:
        dict: contains the retrieved data if found, if not, the corresponding
        fields are filled with None values.

        The Return dictionary if the data was retrieved will look like::

            {
                'issuer': issuer_value,
                'isin': isin_value,
                'asset class': asset_value
            }

    Raises:
        ConnectionError: if GET requests does not return 200 status code.
        IndexError: if fund information was unavailable or not found.

    """

    url = "https://www.investing.com/funds/" + fund_tag

    head = {
        "User-Agent": ua.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    req = requests.get(url, headers=head, timeout=5)

    if req.status_code != 200:
        raise ConnectionError("ERR#015: error " + str(req.status_code) +
                              ", try again later.")

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//div[contains(@class, 'overViewBox')]"
                        "/div[@id='quotes_summary_current_data']"
                        "/div[@class='right']"
                        "/div")

    result = {
        'issuer': None,
        'isin': None,
        'asset class': None,
    }

    for p in path_:
        try:
            if p.xpath("span[not(@class)]")[0].text_content().__contains__(
                    'Issuer'):
                try:
                    result['issuer'] = p.xpath("span[@class='elp']")[0].get(
                        'title').rstrip()
                    continue
                except IndexError:
                    raise IndexError(
                        "ERR#023: fund issuer unavailable or not found.")
            elif p.xpath("span[not(@class)]")[0].text_content().__contains__(
                    'ISIN'):
                try:
                    result['isin'] = p.xpath("span[@class='elp']")[0].get(
                        'title').rstrip()
                    continue
                except IndexError:
                    raise IndexError(
                        "ERR#024: fund isin code unavailable or not found.")
            elif p.xpath("span[not(@class)]")[0].text_content().__contains__(
                    'Asset Class'):
                try:
                    result['asset class'] = p.xpath(
                        "span[@class='elp']")[0].get('title').rstrip()
                    continue
                except IndexError:
                    raise IndexError(
                        "ERR#025: fund asset class unavailable or not found.")
            else:
                continue
        except IndexError:
            raise IndexError("ERR#017: isin code unavailable or not found.")

    return result
Exemple #8
0
def get_fund_names():
    """
    This function retrieves all the available funds to retrieve data from.
    All the available funds can be found at: https://es.investing.com/funds/spain-funds?&issuer_filter=0

    Returns
    -------
        :returns a dictionary containing all the funds information
    """

    head = {
        "User-Agent": ua.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    url = "https://es.investing.com/funds/spain-funds?&issuer_filter=0"

    req = requests.get(url, headers=head)

    if req.status_code != 200:
        raise ConnectionError("ERR#015: error " + str(req.status_code) +
                              ", try again later.")

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//table[@id='etfs']" "/tbody" "/tr")

    results = list()

    if path_:
        for elements_ in path_:
            id_ = elements_.get('id').replace('pair_', '')
            symbol = elements_.xpath(
                ".//td[contains(@class, 'symbol')]")[0].get('title')

            nested = elements_.xpath(".//a")[0].get('title').rstrip()
            info = elements_.xpath(".//a")[0].get('href').replace(
                '/funds/', '')

            data = get_fund_data(info)

            obj = {
                "name": nested,
                "symbol": symbol,
                "tag": info,
                "id": id_,
                "issuer": data['issuer'],
                "isin": data['isin'],
                "asset class": data['asset class'],
            }

            results.append(obj)

    resource_package = __name__
    resource_path = '/'.join(('resources', 'es', 'funds.csv'))
    file = pkg_resources.resource_filename(resource_package, resource_path)

    df = pd.DataFrame(results)
    df.to_csv(file, index=False)

    return results
Exemple #9
0
def retrieve_currency_crosses(test_mode=False):
    """
    This function retrieves all the available `currency_crosses` indexed on Investing.com, so to retrieve data from
    them which will be used later for inner functions for data retrieval. Additionally, when currency crosses are
    retrieved all the meta-information is both returned as a :obj:`pandas.DataFrame` and stored on a CSV file on a
    package folder containing all the available resources. Note that maybe some of the information contained in the
    resulting :obj:`pandas.DataFrame` is useless as it is just used for inner function purposes. All the currency
    crosses available can be found at: https://es.investing.com/currencies/ plus the name of the country

    Args:
        test_mode (:obj:`bool`):
            variable to avoid time waste on travis-ci since it just needs to test the basics in order to improve code
            coverage.

    Returns:
        :obj:`pandas.DataFrame` - currency_crosses:
            The resulting :obj:`pandas.DataFrame` contains all the currency crosses meta-information if found, if not,
            an empty :obj:`pandas.DataFrame` will be returned and no CSV file will be stored.

            In the case that the retrieval process of currencies was successfully completed, the resulting
            :obj:`pandas.DataFrame` will look like::

                name | full_name | tag | id | base | second | base_name | second_name
                -----|-----------|-----|----|------|--------|-----------|-------------
                xxxx | xxxxxxxxx | xxx | xx | xxxx | xxxxxx | xxxxxxxxx | xxxxxxxxxxx

    Raises:
        ValueError: raised if any of the introduced arguments is not valid.
        FileNotFoundError: raised if `currency_crosses.csv` file does not exists or is empty.
        ConnectionError: raised if GET requests did not return 200 status code.
    """

    if not isinstance(test_mode, bool):
        raise ValueError('ERR#0041: test_mode can just be either True or False')

    resource_package = __name__
    resource_path = '/'.join(('resources', 'currency_crosses', 'currency_cross_continents.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        countries = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0048: currency_cross_continents.csv file not found")

    results = list()

    for index, row in countries.iterrows():
        head = {
            "User-Agent": ua.get_random(),
            "X-Requested-With": "XMLHttpRequest",
            "Accept": "text/html",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
        }

        url = "https://www.investing.com/currencies/" + row['tag']

        req = requests.get(url, headers=head)

        if req.status_code != 200:
            raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.")

        root_ = fromstring(req.text)
        path_ = root_.xpath(".//table[@id='curr_table']")

        if path_:
            for elements_ in path_:
                for element_ in elements_.xpath(".//tbody/tr"):
                    for values in element_.xpath('.//a'):
                        tag_ = values.get('href')

                        if str(tag_).__contains__('/currencies/'):
                            tag_ = tag_.replace('//www.investing.com/currencies/', '')

                            name = values.text.strip()

                            if name in list(map(operator.itemgetter('name'), results)):
                                continue

                            base = name.split('/')[0]
                            second = name.split('/')[1]

                            info = retrieve_currency_cross_info(tag_)

                            if info is None:
                                continue

                            if info['second_name'].__contains__("..."):
                                info['second_name'] = info['full_name'].replace(name, '').\
                                    replace(info['base_name'], '').replace(' -  ', '')

                            data = {
                                'name': name,
                                'full_name': info['full_name'],
                                'tag': tag_,
                                'id': info['id'],
                                'base': base,
                                'base_name': info['base_name'],
                                'second': second,
                                'second_name': info['second_name'],
                            }

                            results.append(data)

                    if test_mode is True:
                        break
                if test_mode is True:
                    break
        if test_mode is True:
            break

    resource_package = __name__
    resource_path = '/'.join(('resources', 'currency_crosses', 'currency_crosses.csv'))
    file = pkg_resources.resource_filename(resource_package, resource_path)

    df = pd.DataFrame(results)

    if test_mode is False:
        df.to_csv(file, index=False)

    return df
Exemple #10
0
def retrieve_currency_cross_info(tag):
    """
    This function retrieves additional information that should be included in every currency cross details such as the
    base currency name or the full name of the currency cross. Additionally, this function is intended to retrieve the
    id which will later be used when retrieving historical data from currency crosses since the id is required in the
    request headers. As Investing.com currency crosses listing has some minor mistakes, if the request errors with a
    404 code, the information won't be retrieved and so on the currency cross won't be added to the currency_crosses.csv
    file.

    Args:
        tag (:obj:`str`):
            is the tag of the currency cross to retrieve the information from, as indexed in Investing.com.

    Returns:
        :obj:`dict` - info:
            The resulting :obj:`dict` contains the needed information for the currency crosses listing. And the id of
            the currency cross which is required to send the request to Investing.com when it comes to historical data
            retrieval.

    Raises:
        ConnectionError: raised if GET requests does not return 200 status code.
    """

    url = "https://www.investing.com/currencies/" + tag

    head = {
        "User-Agent": ua.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    req = requests.get(url, headers=head)

    if req.status_code == 404:
        return None
    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.")

    result = {
        'id': None,
        'base_name': None,
        'second_name': None,
        'full_name': None
    }

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//div[contains(@class, 'instrumentHead')]/div/div[contains(@class, 'headBtnWrapper')]")

    for element_ in path_:
        result['id'] = element_.get('data-pair-id')

    path_ = root_.xpath(".//div[contains(@class, 'overViewBox')]"
                        "/div[@id='quotes_summary_current_data']"
                        "/div[@class='right']"
                        "/div")

    for element_ in path_:
        if element_.xpath("span[not(@class)]")[0].text_content().__contains__('Base'):
            result['base_name'] = element_.xpath("span[@class='elp']")[0].text_content().rstrip()
        elif element_.xpath("span[not(@class)]")[0].text_content().__contains__('Second'):
            result['second_name'] = element_.xpath("span[@class='elp']")[0].text_content().rstrip()

    path_ = root_.xpath(".//section[@id='leftColumn']/div[@class='instrumentHead']/h1")

    for element_ in path_:
        result['full_name'] = element_.text_content().rstrip()

    return result
Exemple #11
0
    def _city_ranking(self, city):
        header = {
            'Authorization': 'token ' + self.oauth_token,
            'User-Agent': get_random(),
        }

        url = 'https://api.github.com/search/users?q=location:' + city

        req = requests.get(url=url, headers=header)

        pages = math.ceil(req.json()['total_count'] / 30)

        total_users = list()

        for page in range(1, pages + 1):
            url = 'https://api.github.com/search/users?q=location:' + city + '&page=' + str(
                page)

            header = {
                'Authorization': 'token ' + self.oauth_token,
                'User-Agent': get_random(),
            }

            req = requests.get(url=url, headers=header)

            users = req.json()['items']

            for user in users:
                username = user['login']
                username_url = user['html_url']
                avatar_url = user['avatar_url']

                user_url = user['url']

                header = {
                    'Authorization': 'token ' + self.oauth_token,
                    'User-Agent': get_random(),
                }

                req = requests.get(url=user_url, headers=header)
                status_code = req.status_code

                while status_code != 200:
                    time.sleep(1)
                    req = requests.get(url=url, headers=header)
                    status_code = req.status_code

                user_details = req.json()

                location = user_details['location']

                public_contributions = 0
                languages = dict()

                repo_page = 1
                repo_flag = False

                while repo_flag is False:
                    url = user['repos_url'] + '?page=' + str(repo_page)

                    header = {
                        'Authorization': 'token ' + self.oauth_token,
                        'User-Agent': get_random(),
                    }

                    req = requests.get(url=url, headers=header)
                    status_code = req.status_code

                    while status_code != 200:
                        time.sleep(1)
                        req = requests.get(url=url, headers=header)
                        status_code = req.status_code

                    repos = req.json()

                    if len(repos) != 30:
                        repo_flag = True
                    else:
                        repo_page += 1

                    for repo in repos:
                        flag = False

                        while flag is False:
                            full_name = repo['full_name']

                            url = 'https://api.github.com/repos/' + full_name + '/stats/contributors'

                            header = {
                                'Authorization': 'token ' + self.oauth_token,
                                'User-Agent': get_random(),
                            }

                            req = requests.get(url=url, headers=header)

                            if req.status_code in [204, 403]:
                                break
                            elif req.status_code != 200:
                                continue
                            else:
                                if req.json():
                                    weeks = req.json()[0]['weeks']

                                    for week in weeks:
                                        date_value = datetime.datetime.fromtimestamp(
                                            week['w'])

                                        if date_value.year == 2019:
                                            public_contributions += int(
                                                week['c'])

                                    if repo['language'] is not None:
                                        if repo['language'] in languages:
                                            languages[repo['language']] += 1
                                        else:
                                            languages[repo['language']] = 1
                                flag = True

                if bool(languages) is False:
                    top_language = ''

                    used_languages = ''
                else:
                    top_language = max(languages.items(),
                                       key=operator.itemgetter(1))[0]

                    used_languages = ', '.join(languages.keys())

                obj = {
                    'username': username,
                    'username_url': username_url,
                    'avatar_url': avatar_url,
                    'location': location,
                    'public_contributions': public_contributions,
                    'top_language': top_language,
                    'used_languages': used_languages,
                }

                total_users.append(obj)

        df = pd.DataFrame(total_users)

        df.sort_values(by='public_contributions',
                       ascending=False,
                       inplace=True)

        ranks = [value for value in range(1, len(df) + 1)]

        df['ranks'] = ranks

        df.set_index('ranks', inplace=True)

        return df
Exemple #12
0
def retrieve_indices(test_mode=False):
    """
    This function retrieves all the available `equities` indexed on Investing.com, so to
    retrieve data from them which will be used later for inner functions for data retrieval.
    All the equities available can be found at: https://es.investing.com/equities/. Additionally,
    when equities are retrieved all the meta-information is both returned as a :obj:`pandas.DataFrame`
    and stored on a CSV file on a package folder containing all the available resources.
    Note that maybe some of the information contained in the resulting :obj:`pandas.DataFrame` is useless as it is
    just used for inner function purposes.

    Args:
        test_mode (:obj:`bool`):
            variable to avoid time waste on travis-ci since it just needs to test the basics in order to improve code
            coverage.

    Returns:
        :obj:`pandas.DataFrame` - indices:
            The resulting :obj:`pandas.DataFrame` contains all the indices meta-information if found, if not, an
            empty :obj:`pandas.DataFrame` will be returned and no CSV file will be stored.

            In the case that the retrieval process of indices was successfully completed, the resulting
            :obj:`pandas.DataFrame` will look like::

                country | name | full_name | tag | id | currency
                --------|------|-----------|-----|----|----------
                xxxxxxx | xxxx | xxxxxxxxx | xxx | xx | xxxxxxxx

    Raises:
        ValueError: raised if any of the introduced arguments is not valid.
        FileNotFoundError: raised if `index_countries.csv` file does not exists or is empty.
        ConnectionError: raised if GET requests did not return 200 status code.
        IndexError: raised if indices information was unavailable or not found.
    """

    if not isinstance(test_mode, bool):
        raise ValueError('ERR#0041: test_mode can just be either True or False')

    results = list()

    for country in index_countries_as_list():
        head = {
            "User-Agent": ua.get_random(),
            "X-Requested-With": "XMLHttpRequest",
            "Accept": "text/html",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
        }

        url = "https://www.investing.com/indices/" + country.replace(' ', '-') + "-indices"

        req = requests.get(url, headers=head)

        if req.status_code != 200:
            raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.")

        root_ = fromstring(req.text)
        path_ = root_.xpath(".//table[@id='cr1']/tbody/tr")

        if path_:
            for elements_ in path_:
                id_ = elements_.get('id').replace('pair_', '')

                for element_ in elements_.xpath('.//a'):
                    tag_ = element_.get('href')

                    if str(tag_).__contains__('/indices/'):
                        tag_ = tag_.replace('/indices/', '')
                        full_name_ = element_.get('title').replace(' (CFD)', '').strip()
                        name = element_.text.strip()

                        info = retrieve_index_info(tag_)

                        data = {
                            'country': country,
                            'name': name,
                            'full_name': full_name_,
                            'tag': tag_,
                            'id': id_,
                            'currency': info['currency'],
                        }

                        results.append(data)

                if test_mode is True:
                    break

        if test_mode is True:
            break

    resource_package = __name__
    resource_path = '/'.join(('resources', 'indices', 'indices.csv'))
    file = pkg_resources.resource_filename(resource_package, resource_path)

    df = pd.DataFrame(results)

    if test_mode is False:
        df.to_csv(file, index=False)

    return df
Exemple #13
0
def retrieve_funds(test_mode=False):
    """
    This function retrieves all the available `funds` listed in Investing.com https://es.investing.com/funds. Retrieving
    all the meta-information attached to them. Additionally when funds are retrieved all the meta-information
    is both returned as a :obj:`pandas.DataFrame` and stored on a CSV file on a package folder containing all the
    available resources. Note that maybe some of the information contained in the resulting :obj:`pandas.DataFrame`
    is useless.

    Args:
        test_mode (:obj:`bool`):
            variable to avoid time waste on travis-ci since it just needs to test the basics in order to improve code
            coverage.

    Returns:
        :obj:`pandas.DataFrame` - funds:
            The resulting :obj:`pandas.DataFrame` contains all the fund meta-information if found, if not, an
            empty :obj:`pandas.DataFrame` will be returned and no CSV file will be stored.

            In the case that the retrieval process of funds was successfully completed, the resulting
            :obj:`pandas.DataFrame` will look like::

                asset class | id | isin | issuer | name | symbol | tag
                ------------|----|------|--------|------|--------|-----
                xxxxxxxxxxx | xx | xxxx | xxxxxx | xxxx | xxxxxx | xxx

    Raises:
        ValueError: if any of the introduced arguments is not valid.
        ConnectionError: if GET requests does not return 200 status code.
        IndexError: if fund information was unavailable or not found.
    """

    if not isinstance(test_mode, bool):
        raise ValueError(
            'ERR#0041: test_mode can just be either True or False')

    resource_package = __name__
    resource_path = '/'.join(('resources', 'funds', 'fund_countries.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        countries = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0042: fund_countries.csv file not found")

    results = list()

    for country in countries['country'].tolist():
        head = {
            "User-Agent": ua.get_random(),
            "X-Requested-With": "XMLHttpRequest",
            "Accept": "text/html",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
        }

        url = 'https://es.investing.com/funds/' + country.replace(
            ' ', '-') + '-funds?&issuer_filter=0'

        req = requests.get(url, headers=head)

        if req.status_code != 200:
            raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                                  ", try again later.")

        root_ = fromstring(req.text)
        path_ = root_.xpath(".//table[@id='etfs']" "/tbody" "/tr")

        if path_:
            for elements_ in path_:
                id_ = elements_.get('id').replace('pair_', '')
                symbol = elements_.xpath(
                    ".//td[contains(@class, 'symbol')]")[0].get('title')

                nested = elements_.xpath(".//a")[0].get('title').rstrip()
                tag = elements_.xpath(".//a")[0].get('href').replace(
                    '/funds/', '')

                info = None

                while info is None:
                    try:
                        info = retrieve_fund_info(tag)
                    except:
                        pass

                obj = {
                    "country":
                    'united kingdom' if country == 'uk' else
                    'united states' if country == 'usa' else country,
                    "name":
                    nested.strip(),
                    "symbol":
                    symbol,
                    "tag":
                    tag,
                    "id":
                    id_,
                    "issuer":
                    info['issuer'].strip()
                    if info['issuer'] is not None else info['issuer'],
                    "isin":
                    info['isin'],
                    "asset_class":
                    info['asset_class'].lower() if info['asset_class']
                    is not None else info['asset_class'],
                    "currency":
                    info['currency']
                }

                results.append(obj)

                if test_mode is True:
                    break

    resource_package = __name__
    resource_path = '/'.join(('resources', 'funds', 'funds.csv'))
    file = pkg_resources.resource_filename(resource_package, resource_path)

    df = pd.DataFrame(results)

    if test_mode is False:
        df.to_csv(file, index=False)

    return df
Exemple #14
0
def retrieve_fund_countries(test_mode=False):
    """
    This function retrieves all the country names indexed in Investing.com with available funds to retrieve data
    from, via Web Scraping https://www.investing.com/funds/ where the available countries are listed and retrieved.

    Args:
        test_mode (:obj:`bool`):
            variable to avoid time waste on travis-ci since it just needs to test the basics in order to improve code
            coverage.

    Returns:
        :obj:`pandas.DataFrame` - fund_countries:
            The resulting :obj:`pandas.DataFrame` contains all the available countries with their corresponding ID,
            which will be used later by investpy.

    Raises:
        ConnectionError: raised if connection to Investing.com could not be established.
        RuntimeError: raised if no countries were retrieved from Investing.com fund listing.
    """

    if not isinstance(test_mode, bool):
        raise ValueError(
            'ERR#0041: test_mode can just be either True or False')

    headers = {
        "User-Agent": ua.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    url = 'https://www.investing.com/funds/'

    req = requests.get(url, headers=headers)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                              ", try again later.")

    root = fromstring(req.text)
    path = root.xpath("//div[@id='country_select']/select/option")

    countries = list()

    for element in path:
        if element.get('value') != '/funds/world-funds':
            obj = {
                'country':
                element.get('value').replace('/funds/', '').replace(
                    '-funds', '').replace('-', ' ').strip(),
                'id':
                int(element.get('country_id')),
            }

            countries.append(obj)

    resource_package = __name__
    resource_path = '/'.join(('resources', 'funds', 'fund_countries.csv'))
    file = pkg_resources.resource_filename(resource_package, resource_path)

    df = pd.DataFrame(countries)

    if test_mode is False:
        df.to_csv(file, index=False)

    return df
Exemple #15
0
def retrieve_fund_info(tag):
    """
    This function retrieves additional information from a fund as listed in Investing.com. Every fund data is retrieved
    and stored in a CSV in order to get all the possible information from a fund.

    Args:
        tag (:obj:`str`): is the identifying tag of the specified fund.

    Returns:
        :obj:`dict` - fund_data:
            The resulting :obj:`dict` contains the retrieved data if found, if not, the corresponding
            fields are filled with `None` values.

            In case the information was successfully retrieved, the :obj:`dict` will look like::

                {
                    'issuer': issuer_value,
                    'isin': isin_value,
                    'asset class': asset_value
                }

    Raises:
        ConnectionError: if GET requests does not return 200 status code.
        IndexError: if fund information was unavailable or not found.
    """

    url = "https://www.investing.com/funds/" + tag

    head = {
        "User-Agent": ua.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    req = requests.get(url, headers=head)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                              ", try again later.")

    result = {
        'issuer': None,
        'isin': None,
        'asset_class': None,
        'currency': None
    }

    root_ = fromstring(req.text)

    path_ = root_.xpath(".//div[contains(@class, 'overViewBox')]"
                        "/div[@id='quotes_summary_current_data']"
                        "/div[@class='right']"
                        "/div")

    for p in path_:
        if p.xpath("span[not(@class)]")[0].text_content().__contains__(
                'Issuer'):
            result['issuer'] = p.xpath("span[@class='elp']")[0].get(
                'title').rstrip()
            continue
        elif p.xpath("span[not(@class)]")[0].text_content().__contains__(
                'ISIN'):
            result['isin'] = p.xpath("span[@class='elp']")[0].get(
                'title').rstrip()
            continue
        elif p.xpath("span[not(@class)]")[0].text_content().__contains__(
                'Asset Class'):
            result['asset_class'] = p.xpath("span[@class='elp']")[0].get(
                'title').rstrip()
            continue

    path_ = root_.xpath(".//div[contains(@class, 'bottom')]"
                        "/span[@class='bold']")

    for element_ in path_:
        if element_.text_content():
            result['currency'] = element_.text_content()

    return result
Exemple #16
0
def retrieve_equity_countries(test_mode=False):
    """
    This function retrieves all the country names indexed in Investing.com with available equities to retrieve data
    from, via Web Scraping https://www.investing.com/equities/ where the available countries are listed, and from their
    names the specific equity website of every country is retrieved in order to get the ID which will later be used
    when retrieving all the information from the available equities in every country.

    Args:
        test_mode (:obj:`bool`):
            variable to avoid time waste on travis-ci since it just needs to test the basics in order to improve code
            coverage.

    Returns:
        :obj:`pandas.DataFrame` - equity_countries:
            The resulting :obj:`pandas.DataFrame` contains all the available countries with their corresponding ID,
            which will be used later by investpy.

    Raises:
        ValueError: raised if any of the introduced arguments is not valid.
        ConnectionError: raised if connection to Investing.com could not be established.
        RuntimeError: raised if no countries were retrieved from Investing.com equity listing.
    """

    if not isinstance(test_mode, bool):
        raise ValueError(
            'ERR#0041: test_mode can just be either True or False')

    headers = {
        "User-Agent": ua.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    url = 'https://www.investing.com/equities/'

    req = requests.get(url, headers=headers)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                              ", try again later.")

    root = fromstring(req.text)
    path = root.xpath("//*[@id='countryDropdownContainer']/div")

    countries = list()

    for element in path:
        if element.get('id') != 'regionsSelectorContainer' and element.get(
                'id') != 'cdregion0':
            for value in element.xpath(".//ul/li/a"):
                countries.append(
                    value.get('href').replace('/equities/',
                                              '').replace('-', ' ').strip())

    results = list()

    if len(countries) > 0:
        for country in countries:
            if country not in ['estonia', 'latvia', 'lithuania']:
                country_url = url + country

                req = requests.get(country_url, headers=headers)

                root = fromstring(req.text)
                path = root.xpath(".//*[@id='leftColumn']/input[@id='smlID']")

                country_id = path[0].get('value')

                obj = {'country': country, 'id': country_id}

                results.append(obj)

            if test_mode:
                break
    else:
        raise RuntimeError('ERR#0035: no countries could be retrieved!')

    resource_package = __name__
    resource_path = '/'.join(('resources', 'equities', 'equity_countries.csv'))
    file = pkg_resources.resource_filename(resource_package, resource_path)

    df = pd.DataFrame(results)

    if test_mode is False:
        df.to_csv(file, index=False)

    return df
Exemple #17
0
def retrieve_equity_info(tag):
    """
    This function retrieves both the ISIN code, the currency and the symbol of an equity indexed in Investing.com, so
    to include additional information in `equities.csv` file. The ISIN code will later be used in order to retrieve more
    information from the specified equity, as the ISIN code is an unique identifier of each equity; the currency
    will be required in order to know which currency is the value in, and the symbol will be used for processing the
    request to HistoricalDataAjax to retrieve historical data from Investing.com.

    Args:
        tag (:obj:`str`): is the tag of the equity to retrieve the information from as indexed by Investing.com.

    Returns:
        :obj:`dict` - info:
            The resulting :obj:`dict` contains the needed information for the equities listing, so on, the ISIN
             code of the introduced equity, the currency of its values and the symbol of the equity.

    Raises:
        ConnectionError: raised if GET requests does not return 200 status code.
        IndexError: raised if either the isin code or the currency were unable to retrieve.
    """

    url = "https://es.investing.com/equities/" + tag

    head = {
        "User-Agent": ua.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    req = requests.get(url, headers=head)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                              ", try again later.")

    result = {'isin': None, 'currency': None, 'symbol': None}

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//div[contains(@class, 'overViewBox')]"
                        "/div[@id='quotes_summary_current_data']"
                        "/div[@class='right']"
                        "/div")

    for element_ in path_:
        if element_.xpath("span[not(@class)]")[0].text_content().__contains__(
                'ISIN'):
            result['isin'] = element_.xpath(
                "span[@class='elp']")[0].text_content().rstrip()

    path_ = root_.xpath(".//div[contains(@class, 'bottom')]"
                        "/span[@class='bold']")

    for element_ in path_:
        if element_.text_content():
            result['currency'] = element_.text_content()

    path_ = root_.xpath(".//div[@class='instrumentHeader']" "/h2")

    for element_ in path_:
        if element_.text_content():
            result['symbol'] = element_.text_content().replace('Resumen ',
                                                               '').strip()

    return result
Exemple #18
0
def retrieve_etfs(test_mode=False):
    """
    This function retrieves all the available `world etfs` indexed on Investing.com, so to
    retrieve data from them which will be used later for inner functions for data retrieval.
    All the etfs available can be found at: https://es.investing.com/etfs/world-etfs. Additionally,
    when etfs are retrieved all the meta-information is both returned as a :obj:`pandas.DataFrame`
    and stored on a CSV file on a package folder containing all the available resources.
    Note that maybe some of the information contained in the resulting :obj:`pandas.DataFrame` is useless as it is
    just used for inner function purposes.

    Args:
        test_mode (:obj:`bool`):
            variable to avoid time waste on travis-ci since it just needs to test the basics in order to improve code
            coverage.

    Returns:
        :obj:`pandas.DataFrame` - etfs:
            The resulting :obj:`pandas.DataFrame` contains all the world etfs meta-information if found, if not, an
            empty :obj:`pandas.DataFrame` will be returned and no CSV file will be stored.

            In the case that the retrieval process of world etfs was successfully completed, the resulting
            :obj:`pandas.DataFrame` will look like::

                country | country_code | name | symbol | tag | id
                --------|--------------|------|--------|-----|----
                xxxxxxx | xxxxxxxxxxxx | xxxx | xxxxxx | xxx | xx

    Raises:
        ValueError: if any of the introduced arguments is not valid.
        FileNotFoundError: raised when `etf_countries.csv` file is missing.
        ConnectionError: if GET requests does not return 200 status code.
    """

    if not isinstance(test_mode, bool):
        raise ValueError('ERR#0041: test_mode can just be either True or False')

    head = {
        "User-Agent": ua.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    resource_package = __name__
    resource_path = '/'.join(('resources', 'etfs', 'etf_countries.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        markets = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0024: etf_countries file not found")

    final = list()

    for index, row in markets.iterrows():
        country = row['country']
        country_code = row['code']

        url = "https://es.investing.com/etfs/" + row['country'].replace(" ", "-") + "-etfs"

        req = requests.get(url, headers=head)

        if req.status_code != 200:
            raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.")

        root_ = fromstring(req.text)
        path_ = root_.xpath(".//table[@id='etfs']"
                            "/tbody"
                            "/tr")

        results = list()

        if path_:
            for elements_ in path_:
                id_ = elements_.get('id').replace('pair_', '')
                symbol = elements_.xpath(".//td[contains(@class, 'symbol')]")[0].get('title')

                nested = elements_.xpath(".//a")[0]
                tag = nested.get('href').replace('/etfs/', '')

                info = retrieve_etf_info(tag)

                data = {
                    "country": country,
                    "country_code": country_code,
                    "name": nested.text.strip(),
                    "symbol": symbol,
                    "tag": tag,
                    "id": id_,
                    "currency": info['currency'],
                }

                results.append(data)

                if test_mode is True:
                    break

        final.extend(results)

        if test_mode is True:
            break

    resource_package = __name__
    resource_path = '/'.join(('resources', 'etfs', 'etfs.csv'))
    file = pkg_resources.resource_filename(resource_package, resource_path)

    df = pd.DataFrame(final)

    if test_mode is False:
        df.to_csv(file, index=False)

    return df
Exemple #19
0
def retrieve_index_countries(test_mode=False):
    """
    This function retrieves all the country names indexed in Investing.com with available equities to retrieve data
    from, via Web Scraping https://www.investing.com/equities/ where the available countries are listed, and from their
    names the specific equity website of every country is retrieved in order to get the ID which will later be used
    when retrieving all the information from the available equities in every country.

    Args:
        test_mode (:obj:`bool`):
            variable to avoid time waste on travis-ci since it just needs to test the basics in order to improve code
            coverage.

    Returns:
        :obj:`pandas.DataFrame` - equity_countries:
            The resulting :obj:`pandas.DataFrame` contains all the available countries with their corresponding ID,
            which will be used later by investpy.

    Raises:
        ValueError: raised if any of the introduced arguments is not valid.
        ConnectionError: raised if connection to Investing.com could not be established.
        RuntimeError: raised if no countries were retrieved from Investing.com equity listing.
    """

    if not isinstance(test_mode, bool):
        raise ValueError('ERR#0041: test_mode can just be either True or False')

    headers = {
        "User-Agent": ua.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    url = 'https://www.investing.com/indices/'

    req = requests.get(url, headers=headers)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.")

    root = fromstring(req.text)
    path = root.xpath("//select[@name='country']/option")

    countries = list()

    for element in path:
        if element.get('value') != '/indices/world-indices':
            obj = {
                'country': element.get('value').replace('/indices/', '').replace('-indices', '').replace('-', ' ').strip(),
                'country_name': unidecode.unidecode(element.text_content().strip().lower()),
            }

            countries.append(obj)

    if len(countries) <= 0:
        raise RuntimeError('ERR#0035: no countries could be retrieved!')

    resource_package = __name__
    resource_path = '/'.join(('resources', 'indices', 'index_countries.csv'))
    file = pkg_resources.resource_filename(resource_package, resource_path)

    df = pd.DataFrame(countries)

    if test_mode is False:
        df.to_csv(file, index=False)

    return df
Exemple #20
0
def get_equity_names():
    """
    This function retrieves all the available equities to retrieve data from.
    All the equities available can be found at: https://es.investing.com/equities/spain

    Returns
    -------
        :returns a dictionary containing all the equities information
    """

    params = {
        "noconstruct": "1",
        "smlID": "10119",
        "sid": "",
        "tabletype": "price",
        "index_id": "all"
    }

    head = {
        "User-Agent": ua.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    url = "https://es.investing.com/equities/StocksFilter"

    req = requests.get(url, params=params, headers=head)

    if req.status_code != 200:
        raise ConnectionError("ERR#015: error " + str(req.status_code) +
                              ", try again later.")

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//table[@id='cross_rate_markets_stocks_1']"
                        "/tbody"
                        "/tr")

    results = list()

    if path_:
        for elements_ in path_:
            id_ = elements_.get('id').replace('pair_', '')

            for element_ in elements_.xpath('.//a'):
                tag_ = element_.get('href').replace('/equities/', '')
                full_name_ = element_.get('title').replace(' (CFD)', '')

                try:
                    isin_ = get_isin_code(tag_)
                except (ConnectionError, IndexError):
                    isin_ = None

                data = {
                    "name": element_.text,
                    "full_name": full_name_.rstrip(),
                    "tag": tag_,
                    "isin": isin_,
                    "id": id_
                }

                results.append(data)

    resource_package = __name__
    resource_path = '/'.join(('resources', 'es', 'equities.csv'))
    file = pkg_resources.resource_filename(resource_package, resource_path)

    df = pd.DataFrame(results)
    df.to_csv(file, index=False)

    return results
Exemple #21
0
def get_etf(country):
    """
    This function retrieves all the available etfs to retrieve data from.
    All the available etfs available can be found at: https://es.investing.com/etfs/spain-etfs

    Returns
    -------
        :returns a dictionary containing all the etfs information
    """

    if country is None or not isinstance(country, str):
        raise IOError("ERR#028: specified country value not valid.")

    head = {
        "User-Agent": ua.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    resource_package = __name__
    resource_path = '/'.join(('resources', 'etfs', 'etf_markets.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        etfs = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#027: available_etfs file not found")

    for index, row in etfs.iterrows():
        if row['country'] == country.lower():
            country_code = row['code']

            url = "https://es.investing.com/etfs/" + row['country'].replace(
                " ", "-") + "-etfs"

            req = requests.get(url, headers=head, timeout=15)

            if req.status_code != 200:
                raise ConnectionError("ERR#015: error " +
                                      str(req.status_code) +
                                      ", try again later.")

            root_ = fromstring(req.text)
            path_ = root_.xpath(".//table[@id='etfs']" "/tbody" "/tr")

            results = list()

            if path_:
                for elements_ in path_:
                    id_ = elements_.get('id').replace('pair_', '')
                    symbol = elements_.xpath(
                        ".//td[contains(@class, 'symbol')]")[0].get('title')

                    nested = elements_.xpath(".//a")[0]
                    info = nested.get('href').replace('/etfs/', '')

                    if symbol:
                        data = {
                            "name": nested.text,
                            "symbol": symbol,
                            "tag": info,
                            "id": id_
                        }
                    else:
                        data = {
                            "name": nested.text,
                            "symbol": "undefined",
                            "tag": info,
                            "id": id_
                        }

                    results.append(data)

            resource_package = __name__
            resource_path = '/'.join(('resources', 'etfs', 'etfs.csv'))
            file = pkg_resources.resource_filename(resource_package,
                                                   resource_path)

            df = pd.DataFrame(results)
            df.to_csv(file, index=False)

            return results

    raise IOError(
        "ERR#029: specified country etfs not found or unable to retrieve.")