def get_trailing_returns(self, fund_symbol):
        # Build a dictionary, where key = time period, value = trailing return for that time period.
        timespans = [
            "1-Month", "3-Month", "6-Month", "YTD", "1-Year", "3-Year",
            "5-Year", "10-Year", "15-Year"
        ]
        response = {}
        url = Util.build_url(Section.TRAILING, fund_symbol)
        raw = requests.get(url)
        if raw.status_code == 200 and raw.text != "":
            print("200 and not empty")
            soup = BeautifulSoup(raw.text, 'html.parser')

            # Find corresponding column values of trailing returns. These will be the values of the dict
            table = soup.find("table")

            if table is not None:
                rows = table.findAll(lambda tag: tag.name == 'tr')
                for row in rows:
                    row_header = row.find("th")
                    if row_header != None and row_header.text == fund_symbol:
                        quarterly_returns = [
                            col.text for col in row.findAll("td")
                        ]
                        response = dict(zip(timespans, quarterly_returns))
            else:
                raise FundException.UIChangedError(
                    f"Error while retrieving data for trailing returns: UI for source website of this symbol has changed, so we can't scrape the data: {fund_symbol}"
                )
        else:
            raise FundException.SymbolDoesNotExistError(
                f"Error while retrieving data for trailing returns: Symbol does not exist: {fund_symbol}"
            )

        return response
    def get_morningstar_overall_rating(self, fund_symbol):
        """
        Gets the overall Morningstar rating
        Process:
            1. Use lxml to find the corresponding performanceId for the fund, located in head > meta name = performanceId
            2. Then, hit the security identifier API of morningstar with that id, which will return in a field the number of stars'

        Ex:
            FSDAX's performanceId is 0P00002PPP
        """
        performanceId = self.extract_performance_id(fund_symbol)

        response = {}
        url = Util.build_url(Section.OVERALL_RATING, fund_symbol, 0,
                             performanceId)
        raw = requests.get(url)
        if raw.status_code == 200 and raw.text != "":
            print("200 and not empty")
            data = raw.json()
            if "starRating" in data:
                response["starRating"] = data["starRating"]
            else:
                raise FundException.UIChangedError(
                    f"Error while retrieving data for trailing returns: UI for source website of this symbol has changed, so we can't scrape the data: {fund_symbol}"
                )
        else:
            raise FundException.SymbolDoesNotExistError(
                f"Error while retrieving data for trailing returns: Symbol does not exist: {fund_symbol}"
            )

        return response
    def scrape_historical_column_data(self, fund_symbol, url, page):
        #Build lxml tree from webpage
        tree = html.fromstring(page.content)

        #Find the H3 tag that says Annual Total Return (%) History
        h3_span_text = tree.xpath(
            './/span[text()="Annual Total Return (%) History"]')

        if len(h3_span_text) > 0:
            #The table we wnat is the div tag, which is a sibling to h3. The h3 and the div tag are under one overarching div tag. Get h3's sibiling
            h3 = h3_span_text[0].getparent()
            table = h3.getnext()

            #Grab all columns as lxml Element objects. This includes the 2 columns we don't want (placeholder value column + current year), so we need to filter them out.
            columns = [column for column in list(table)]

            #Assuming elements are in document order, we can just remove the first 2 elements of the list
            del columns[0:2]

            #Return filtered version
            return columns

        else:
            redirected_to_error_page = tree.xpath(
                './/span[contains(text(),"Symbols similar to ")]')
            if len(redirected_to_error_page) > 0:
                raise FundException.SymbolDoesNotExistError(
                    f"Error while retrieving data for historical returns: Symbol does not exist: {fund_symbol}"
                )
            else:
                raise FundException.UIChangedError(
                    f"Error while retrieving data for historical returns: UI for source website of this symbol has changed, so we can't scrape the data: {fund_symbol}"
                )
 def get_fund_historical_returns(self, fund_symbol):
     url = Util.build_url(Section.HISTORICAL, fund_symbol)
     raw = requests.get(url)
     if raw.status_code == 200 and raw.text != "":
         return self.retrieve_historical_returns(fund_symbol, url, raw)
     else:
         raise FundException.SymbolDoesNotExistError(
             f"Error while retrieving data for historical returns: Symbol does not exist: {fund_symbol}"
         )
    def get_10000_growth(self, fund_symbol):
        response = {}
        url = Util.build_url(Section.GROWTH, fund_symbol)
        raw = requests.get(url)

        if raw.status_code == 200 and raw.text != "":
            raw_json = {}
            try:
                raw_json = raw.json()
            except Exception as e:
                raise FundException.SymbolDoesNotExistError(
                    f"Error while retrieving data for $10000 growth: Symbol does not exist: {fund_symbol}"
                )

            #Interpret HTML using BeautifulSoup, then extract out data in JSON from <div data_mod_config = ...., class = mod-ui-chart--dynamic>
            html = raw_json["html"]
            soup = BeautifulSoup(html, 'html.parser')
            response = {}
            data_mod_config_div = soup.find(
                "div", {"class": "mod-ui-chart--dynamic"})["data-mod-config"]
            if data_mod_config_div != "":
                #Convert dictionary in string form to an actual dictionary
                growth_json = ast.literal_eval(data_mod_config_div)
                internal_data = growth_json["data"]
                if len(internal_data) >= 1:
                    #Access first element in the dict, which is the list of values
                    growths = next(iter(internal_data.values()))

                    #Parse into a dict where key = date (YYYY-MM-DD, removing the "T00:00:00" from the end), value = expected dollar value that year
                    response = {
                        year["date"][:len(year["date"]) - 9]: year["value"]
                        for year in growths
                    }
            else:
                raise FundException.UIChangedError(
                    f"Error while retrieving data for $10000 growth: UI changed for symbol name: {fund_symbol}; thus, we cannot scrape"
                )
        else:
            raise FundException.SymbolDoesNotExistError(
                f"Error while retrieving data for $10000 growth: Symbol does not exist: {fund_symbol}"
            )

        return response
Example #6
0
 def pullData(self, url, fund_symbol):
     raw = requests.get(url)
     if raw.status_code == 200 and raw.text != "":
         raw_data = raw.json()
         data = raw_data["htmlStr"]
         return self.filterSpecialChars(data)
     else:
         raise FundException.SymbolDoesNotExistError(
             f"Error while retrieving data for holdings data: Symbol does not exist: {fund_symbol}"
         )
Example #7
0
    def get_holdings_stats(self, fund_symbol):
        """
        Gets the top 25 companies in their portfolio, as well as the following stats:
            1. Name
            2. % portfolio weight
            3. YTD return
            4. Shares owned
            5. Shares changed
            6. P/E
            7. Price
            8. G/L % (gain/loss percent for the day)

        First get the first 25 most weighted companies from portfolio (desc)
        For each:
            1. Equity view tab
                -Name
                -% portfolio weight
                -Shares owned
                -Shares changed
                -YTD return (could be positive, negative, float, or blank (-) )
                -P/E (could be positive, negative, float, or blank (-) )
            2. Equity prices tab
                -Price
                -G/L % (gain/loss percent)

        Each tab is represented as a table
            -equity view tab:       id = equity_holding_tab
                -get <tbody> with id holding_epage0
            -equity prices tab:     id = equityPrice_holding_tab

        Comparisons between 2+ mutual funds will compare Name and % portfolio weight only
        """

        fund_symbol = fund_symbol.upper()
        response = {}

        try:
            Util.validate_format(fund_symbol)
            url = Util.build_url(Section.HOLDINGS_PAGE_TOP_25, fund_symbol)
            response = self.extractHoldings(url, fund_symbol)

        except FundException.ImproperSymbolFormatError as e:
            raise FundException.ImproperSymbolFormatError(e)
        except FundException.SymbolDoesNotExistError as e:
            raise FundException.SymbolDoesNotExistError(e)
        except FundException.UIChangedError as e:
            raise FundException.UIChangedError(e)
        except FundException.SourceEndpointChangedError as e:
            raise FundException.SourceEndpointChangedError(e)

        return response
    def get_capture_ratios(self, fund_symbol, timespan):
        """
        Gets upside and downside capture ratios for 1 year, 3 year, 5 year, 10 year, 15 year
        """
        # Build a dictionary, where key = time period, value = trailing return for that time period.
        timespans = ["3-Year", "5-Year", "10-Year", "15-Year"]
        upsidedownside_fields = ["Upside ratio", "Downside ratio"]
        fields = [
            "Standard Deviation", "Return", "Sharpe Ratio", "Sortino Ratio"
        ]
        response = {}

        url = Util.build_url(Section.CAPTURE_RATIOS, fund_symbol)
        raw = requests.get(url)
        if raw.status_code == 200 and raw.text != "":
            print("200 and not empty")
            soup = BeautifulSoup(raw.text, 'html.parser')

            # Find corresponding column values of trailing risk stats. These will be the values of the dict
            table = soup.find("table")
            if table is not None:
                rows = table.findAll(lambda tag: tag.name == 'tr')
                for row in rows:
                    row_header = row.find("th")
                    if row_header != None and row_header.text == fund_symbol:
                        stats = []
                        for col in row.findAll("td"):
                            #Values are stuck together. Ex: Convert "145.9576.71" --> "145.95", "76.71"
                            raw = col.text
                            first_dot = raw.find(".")
                            upside_ratio = raw[:first_dot + 3]
                            downside_ratio = raw[first_dot + 3:]
                            stats.append({
                                "upside_ratio": upside_ratio,
                                "downside_ratio": downside_ratio
                            })

                        del stats[
                            0]  #Delete 1-Year for consistency, since other stats only have 3year, 5year, 10year, 15year
                        response = dict(zip(timespans, stats))
            else:
                raise FundException.UIChangedError(
                    f"Error while retrieving data for risk capture ratios: UI for source website of this symbol has changed, so we can't scrape the data: {fund_symbol}"
                )
        else:
            raise FundException.SymbolDoesNotExistError(
                f"Error while retrieving data for risk capture ratios: Symbol does not exist: {fund_symbol}"
            )

        return response
    def get_asset_allocation_data(self, fund_symbol):
        """
        Gets the asset allocation data necessary for the pie chart
        Mimics Morningstar's asset allocation pie chart on the quotes page
        Note: On morningstar, there are 2 possible layouts:
            1. Pie chart:
                -7 rows in response (1 blank, 6 with 2 columns each: field name and value)
                -ex: PRHSX
            2. Table:
                -8 rows in response (2 irrelvant, 6 with 4 columns each: field name, net, short, long)
                -We'll only use field name and net, to match consistency with pie chart scenario
                -Contains the phrase "Note: Contains derivatives or short positions"
                -ex: FSDAX
        """
        # Build a dictionary, where key = time period, value = trailing return for that time period.
        response = {}
        url = Util.build_url(Section.ASSET_ALLOCATION, fund_symbol)
        raw = requests.get(url)
        if raw.status_code == 200 and raw.text != "":
            print("200 and not empty")
            soup = BeautifulSoup(raw.text, 'html.parser')

            fields = [
                "Cash", "US Stock", "US Stocks", "Non US Stock",
                "Non US Stocks", "Bond", "Bonds", "Other"
            ]
            table = soup.find("table")
            if table is not None:
                rows = table.findAll(lambda tag: tag.name == 'tr')
                for row in rows:
                    rowData = [
                        col.text for col in row.findAll("td") if col.text != ""
                    ]
                    if len(rowData) > 0:
                        fieldEntry = rowData[0]
                        if fieldEntry in fields:
                            response[fieldEntry] = rowData[1]
            else:
                raise FundException.UIChangedError(
                    f"Error while retrieving data for trailing returns: UI for source website of this symbol has changed, so we can't scrape the data: {fund_symbol}"
                )
        else:
            raise FundException.SymbolDoesNotExistError(
                f"Error while retrieving data for trailing returns: Symbol does not exist: {fund_symbol}"
            )

        return response
    def get_mpt_stats(self, fund_symbol, timespan):
        """
        Retrieves alpha, beta, R-squared, Treynor ratio
        """
        # Build a dictionary, where key = time period, value = trailing return for that time period.
        timespans = ["3-Year", "5-Year", "10-Year", "15-Year"]
        fields = [
            "Category Index", "R-Squared", "Beta", "Alpha", "Treynor Ratio",
            "Currency"
        ]
        response = {}

        for timespan in timespans:
            year = timespan.split("-")[0]
            url = Util.build_url(Section.RISK_MPT, fund_symbol, year)
            raw = requests.get(url)
            if raw.status_code == 200 and raw.text != "":
                print("200 and not empty")
                soup = BeautifulSoup(raw.text, 'html.parser')

                # Find corresponding column values of trailing risk stats. These will be the values of the dict
                dataNotFoundYet = True
                table = soup.find("table")
                if table is not None:
                    rows = table.findAll(lambda tag: tag.name == 'tr')
                    for row in rows:
                        row_header = row.find("th")
                        if dataNotFoundYet and row_header != None and row_header.text == fund_symbol:
                            dataNotFoundYet = False
                            stats = [
                                col.text.strip() for col in row.findAll("td")
                            ]
                            response[timespan] = dict(zip(fields, stats))
                else:
                    raise FundException.UIChangedError(
                        f"Error while retrieving data for risk mpt: UI for source website of this symbol has changed, so we can't scrape the data: {fund_symbol}"
                    )
            else:
                raise FundException.SymbolDoesNotExistError(
                    f"Error while retrieving data for risk mpt: Symbol does not exist: {fund_symbol}"
                )

        return response
Example #11
0
    def get_mpt_and_volatility_data(self, fund_symbol, timespan):
        """
        For a given timespan, gets ALL the MPT + Volatility data
        Builds a dictionary, where key = time period, value = dict containing all the stats for that year
        """

        timespan_dict = {}
        year = timespan.split("-")[0]
        sections = [Section.RISK_MPT, Section.RISK_VOLATILITY]

        for section in sections:
            section_dict = {}
            url = Util.build_url(section, fund_symbol, year)
            raw = requests.get(url)
            if raw.status_code == 200 and raw.text != "":
                print("200 and not empty")
                soup = BeautifulSoup(raw.text, 'html.parser')

                # Find corresponding column values of trailing risk stats. These will be the values of the dict
                dataNotFoundYet = True
                table = soup.find("table")
                if table is not None:
                    rows = table.findAll(lambda tag: tag.name == 'tr')
                    for row in rows:
                        row_header = row.find("th")
                        if dataNotFoundYet and row_header != None and row_header.text == fund_symbol:
                            dataNotFoundYet = False
                            section_dict = self.extract_column_data(
                                row, section)

                            #Accumulate key-value pairs of section_dict into timespan_dict
                            timespan_dict = {**timespan_dict, **section_dict}
                else:
                    raise FundException.UIChangedError(
                        f"Error while retrieving data for risk mpt: UI for source website of this symbol has changed, so we can't scrape the data: {fund_symbol}"
                    )
            else:
                raise FundException.SymbolDoesNotExistError(
                    f"Error while retrieving data for risk mpt: Symbol does not exist: {fund_symbol}"
                )

        return timespan_dict
Example #12
0
    def get_risk_stats(self, fund_symbol):
        """
        Grabs risk stats. Grabs 8 things, for 4 time periods (3 year, 5 year, 10 year, 15 year):
            1. Alpha
            2. Beta
            3. R-squared
            4. Standard deviation
            5. Sharpe ratio
            6. Sortino ratio
            7. Treynor ratio
            8. Capture ratios
        Return in a JsonResponse encoded object
        """

        #Add data from capture ratios first. We can get all data in capture ratios in 1 GET request, but need multiple GET requests for mpt and volatility
        fund_symbol = fund_symbol.upper()
        response = {}
        try:
            Util.validate_format(fund_symbol)
            response = self.get_capture_ratios(fund_symbol)
            timespans = ["3-Year", "5-Year", "10-Year", "15-Year"]
            for timespan in timespans:
                #Extract and aggregate data for MPT stats and Volatility stats
                mpt_and_volatility = self.get_mpt_and_volatility_data(
                    fund_symbol, timespan)

                #Add these values into the current timespan dict along with the capture ratios
                response[timespan] = {
                    **response[timespan],
                    **mpt_and_volatility
                }

        except FundException.ImproperSymbolFormatError as e:
            raise FundException.ImproperSymbolFormatError(e)
        except FundException.SymbolDoesNotExistError as e:
            raise FundException.SymbolDoesNotExistError(e)
        except FundException.UIChangedError as e:
            raise FundException.UIChangedError(e)
        except FundException.SourceEndpointChangedError as e:
            raise FundException.SourceEndpointChangedError(e)

        return response
    def get_volatility_stats(self, fund_symbol, timespan):
        """
        Retrieves standard deviation, return, sharpe ratio, sortino ratio
        """
        # Build a dictionary, where key = time period, value = trailing return for that time period.
        timespans = ["3-Year", "5-Year", "10-Year", "15-Year"]
        fields = [
            "Standard Deviation", "Return", "Sharpe Ratio", "Sortino Ratio"
        ]
        response = {}

        for timespan in timespans:
            year = timespan.split("-")[0]
            url = Util.build_url(Section.RISK_VOLATILITY, fund_symbol, year)
            raw = requests.get(url)
            if raw.status_code == 200 and raw.text != "":
                print("200 and not empty")
                soup = BeautifulSoup(raw.text, 'html.parser')

                # Find corresponding column values of trailing risk stats. These will be the values of the dict
                table = soup.find("table")
                if table is not None:
                    rows = table.findAll(lambda tag: tag.name == 'tr')
                    for row in rows:
                        row_header = row.find("th")
                        if row_header != None and row_header.text == fund_symbol:
                            stats = [
                                col.text.strip() for col in row.findAll("td")
                            ]
                            del stats[len(stats) -
                                      1]  #Remove unnecessary values
                            response[timespan] = dict(zip(fields, stats))
                else:
                    raise FundException.UIChangedError(
                        f"Error while retrieving data for risk volatility statistics: UI for source website of this symbol has changed, so we can't scrape the data: {fund_symbol}"
                    )
            else:
                raise FundException.SymbolDoesNotExistError(
                    f"Error while retrieving data for risk volatility statistics: Symbol does not exist: {fund_symbol}"
                )

        return response
Example #14
0
    def get_section_data(self, section, fund_symbol):
        response = {}

        url = ""
        if section == Section.OVERALL_RATING:
            performanceId = self.extract_performance_id(fund_symbol)
            url = Util.build_url(section, fund_symbol, 0, performanceId)
        else:
            url = Util.build_url(section, fund_symbol)

        raw = requests.get(url)
        if raw.status_code == 200 and raw.text != "":
            print("200 and not empty")
            soup = BeautifulSoup(raw.text, 'html.parser')
            return self.extract_column_data(section, soup, raw)

        else:
            raise FundException.SymbolDoesNotExistError(
                f"Error while retrieving data for General stats, section {section}: Symbol does not exist: {fund_symbol}"
            )
    def get_performance_stats(self, fund_symbol):
        fund_symbol = fund_symbol.upper()
        stats = {}
        try:
            Util.validate_format(fund_symbol)
            stats["trailing_returns"] = self.get_trailing_returns(fund_symbol)
            stats["historical_returns"] = self.get_fund_historical_returns(
                fund_symbol)
            stats["10000_growth_data"] = self.get_10000_growth(fund_symbol)

        except FundException.ImproperSymbolFormatError as e:
            raise FundException.ImproperSymbolFormatError(e)
        except FundException.SymbolDoesNotExistError as e:
            raise FundException.SymbolDoesNotExistError(e)
        except FundException.UIChangedError as e:
            raise FundException.UIChangedError(e)
        except FundException.SourceEndpointChangedError as e:
            raise FundException.SourceEndpointChangedError(e)

        return stats
Example #16
0
    def get_general_stats(self, fund_symbol):
        """
        Grabs general stats of the mutual fund. Grabs things:
            1. Price (NAV)
            2. Min. initial investment
            3. Expense ratio
            4. Asset allocation pie chart data(Morningstar's pie chart: Cash, US stock, Non-US stock, bonds, etc)Asset allocation pie chart data(Morningstar's pie chart: Cash, US stock, Non-US stock, bonds, etc)
            5. Morningstar overall rating
            6. Morningstar risk vs category
            7. Morningstar return vs category
            8. Morningstar category
            9. Turnover ratio
        Source = Morningstar, quotes page
        """

        fund_symbol = fund_symbol.upper()
        response = {}

        try:
            Util.validate_format(fund_symbol)
            sections = [
                Section.GENERAL_STATS, Section.ASSET_ALLOCATION,
                Section.RISK_RETURN_VS_CATEGORY, Section.OVERALL_RATING
            ]
            for section in sections:
                response[str(section)] = self.get_section_data(
                    section, fund_symbol)

        except FundException.ImproperSymbolFormatError as e:
            raise FundException.ImproperSymbolFormatError(e)
        except FundException.SymbolDoesNotExistError as e:
            raise FundException.SymbolDoesNotExistError(e)
        except FundException.UIChangedError as e:
            raise FundException.UIChangedError(e)
        except FundException.SourceEndpointChangedError as e:
            raise FundException.SourceEndpointChangedError(e)

        return response
    def get_risk_return_vs_category(self, fund_symbol):
        """
        Gets the:
            1. overall risk compared to its category, as judged by Morningstar
            2. overall return compared to its category, as judged by Morningstar
        Found on quotes page
        """
        response = {}
        url = Util.build_url(Section.RISK_RETURN_VS_CATEGORY, fund_symbol)
        raw = requests.get(url)
        if raw.status_code == 200 and raw.text != "":
            print("200 and not empty")
            soup = BeautifulSoup(raw.text, 'html.parser')

            fields = ["Risk vs.Category", "Return vs.Category"]
            table = soup.find("table")
            if table is not None:
                rows = table.findAll(lambda tag: tag.name == 'tr')
                for row in rows:
                    rowData = [
                        col.text.strip() for col in row.findAll("td")
                        if col.text.strip() != ""
                    ]
                    if len(rowData) > 0:
                        fieldEntry = rowData[0]
                        for field in fields:
                            if fieldEntry.find(field) != -1:
                                response[field] = rowData[1]
            else:
                raise FundException.UIChangedError(
                    f"Error while retrieving data for trailing returns: UI for source website of this symbol has changed, so we can't scrape the data: {fund_symbol}"
                )
        else:
            raise FundException.SymbolDoesNotExistError(
                f"Error while retrieving data for trailing returns: Symbol does not exist: {fund_symbol}"
            )

        return response
    def get_general_details(self, fund_symbol):
        """
        Gets the following:
            1. Price/ NAV
            2. Minimum investment
            3. Expense ratio (in percentage, ex: .77%)
            4. Turnover ratio (in percentage, ex: .77%)
            5. Morningstar Category
        """
        # Build a dictionary, where key = time period, value = trailing return for that time period.
        response = {}
        url = Util.build_url(Section.GENERAL_STATS, fund_symbol)
        raw = requests.get(url)
        if raw.status_code == 200 and raw.text != "":
            print("200 and not empty")
            soup = BeautifulSoup(raw.text, 'html.parser')

            keys = [
                "NAV", "MinInvestment", "ExpenseRatio", "Turnover",
                "MorningstarCategory"
            ]
            for key in keys:
                spans = soup.findAll("span", attrs={"vkey": key})
                if len(spans) > 0:
                    span = spans[0]
                    span_text = span.text
                    response[key] = span_text.strip()
                else:
                    raise FundException.UIChangedError(
                        f"Error while retrieving data for trailing returns: UI for source website of this symbol has changed, so we can't scrape the data: {fund_symbol}"
                    )
        else:
            raise FundException.SymbolDoesNotExistError(
                f"Error while retrieving data for trailing returns: Symbol does not exist: {fund_symbol}"
            )

        return response