Beispiel #1
0
    def parse_earnings_date(self,data,):

        print('parsing earnings dates')

        tickers = list(data.keys())
        tickers.sort()

        for i in range(len(tickers)):

            ticker = tickers[i]
            if i % 10 == 0:
                print('%s/%s %s' %(i+1, len(tickers), ticker))

            url = 'http://biz.yahoo.com/research/earncal/%s/%s.html' %(ticker[0], ticker.lower())
            lines = screener.finance().read_url(url, ticker)

            for i in range(len(lines)):
                line = lines[i]
                if 'Earnings Calendar for' in line:
                    index1 = 0
                    index2 = lines[i+1].index('<')
                    results = lines[i+1][index1:index2]
                    year = int(results[-4:])
                    month = results[:3]
                    day = int(results[-8:-6])
                    results = '%s%s%s' %(year, month, str(day).zfill(2))

            if lines in [[],[''],]:
                data[ticker]['Date'] = '%s' %('N/A')
            else:
                data[ticker]['Date'] = '%s' %(results)

        return data
Beispiel #2
0
    def parse_ownership(self, d):

        for ticker in d.keys():
            url = 'https://finance.yahoo.com/q/mh?s={}+Major+Holders'.format(ticker)
            lines = screener.finance().read_url(url, '0')
            for line in lines:
##                print(line)
                r = r'<table.*?><tr><th.*?>Holder</th><th.*?>Shares</th>.*?(<tr>.*?</tr>)</table>'
                p = re.compile(r)
                matches = re.finditer(p, line)
                for match in matches:
                    soup = BeautifulSoup(match.group(0))
                    letters = soup.find_all("td", class_="yfnc_tabledata1")
                    print(letters)
                    for element in letters:
                        print(element)

        return d
Beispiel #3
0
    def parse_financial_highlights(self,ticker):

        url = 'http://moneycentral.msn.com/investor/invsub/results/hilite.asp?Symbol=%s' %(ticker)
        lines = screener.finance().read_url(url, ticker)

        for i in range(len(lines)):

            line = lines[i]

            if '<td>Payout Ratio</td>' in line:
                index1 = lines[i+1].index('<td>')+len('<td>')
                index2 = lines[i+1].index('</td>')
                s = lines[i+1][index1:index2]
                if s == 'NA':
                    payout_ratio = 'N/A'
                else:
                    payout_ratio = float(s.replace('%',''))/100.

        return payout_ratio
Beispiel #4
0
    def parse_historical_prices(self, url):

        print(url)

        ## http://real-chart.finance.yahoo.com/table.csv?s=AVV.L&g=v&ignore=.csv

        ## key = year, value = sum of dividends
        d = {}

        lines = screener.finance().read_url(url, '0')
        for line in lines[1:]:
            date, dividend = line.split(',')
            year = int(date[:4])
            dividend = float(dividend)
            try:
                d[year] += dividend
            except KeyError:
                d[year] = dividend

        return d
Beispiel #5
0
    def parse_insidertrading(self,data, tickers):

        print('parsing insider trading')

        for i in range(len(tickers)):

            ticker = tickers[i]
            print(('\n%s/%s' %(i+1, len(tickers)), ticker))

            url = 'http://moneycentral.msn.com/investor/invsub/insider/trans.asp?Symbol=%s' %(ticker)
            lines = screener.finance().read_url(url, ticker)
            for i in range(len(lines)):
                line = lines[i]
                if 'Recent Insider Trading Activity' in line:
                    print(line)
                    index = 0
                    for j in range(3):
                        index += line[index:].index('<tr')
                    for k in range(7):
                        index += line[index:].index('<td')

        return data
    def parse_statement(
        self,
        url,
        dic,
        statement,
    ):

        print(url)

        d_periods = {
            '12 Weeks': '13 Weeks',
            '14 Weeks': '13 Weeks',  #'16 Weeks':'13 Weeks',
            '25 Weeks': '26 Weeks',
            '27 Weeks':
            '26 Weeks',  #'24 Weeks':'26 Weeks','28 Weeks':'26 Weeks','29 Weeks':'26 Weeks',
            '38 Weeks': '39 Weeks',
            '40 Weeks':
            '39 Weeks',  #'35 Weeks':'39 Weeks','36 Weeks':'39 Weeks',
            '51 Weeks': '52 Weeks',
            '53 Weeks': '52 Weeks',  #'48 Weeks':'52 Weeks',
            ##                                        '11 Months':'12 Months',
        }

        dic_out = {
            'period': [],
            'date': [],
        }

        lines = screener.finance().read_url(url, '0')

        statementNA = False
        bool_no_financials = False

        for i1 in range(len(lines)):

            if 'No Financials Data Available</div>' in lines[i1]:
                print('No Financials Data Available</div>')
                statementNA = True
                bool_no_financials = True
                stop5
                break

            if '<table class="dataTable financials" cellspacing="1" cellpadding="0" width="100%">' in lines[
                    i1]:

                for i2 in range(i1 + 1, len(lines)):

                    if '<span class="units">' in lines[i2]:

                        l = lines[i2 + 1].split()
                        s = l[1]
                        d_factors = {'Millions': 1000000., 'Thousands': 1000.}
                        factor = d_factors[s.strip()]

                        index1 = str(lines[i2 + 1]).index(' of') + 3
                        index2 = str(lines[i2 + 1]).index('<')
                        currency = str(lines[i2 + 1])[index1:index2].strip()

                    elif '<span class="period">' in lines[i2]:
                        index1 = 0
                        index2 = str(lines[i2 - 1]).index('<')
                        s = str(lines[i2 - 1])[index1:index2].strip()
                        if int(s[-2:]) <= 6:  ## e.g. FXJ.AX, BBY, BKS
                            s = '%5s%02i%3s' % (
                                s[:5],
                                int(s[5:-3]) - 1,
                                s[-3:],
                            )
                        dic_out['date'] += [s[:-3]]

                        if statement != 'balance':
                            for i3 in range(i2 + 1, len(lines)):
                                if '</span>' in lines[i3]:
                                    index1 = 0
                                    index2 = lines[i3].index('</span>')
                                    s = lines[i3][index1:index2].strip(
                                    ).replace('&#160;', ' ')
                                    if s in list(d_periods.keys()):
                                        s = d_periods[s]
                                    if s not in [
                                            '3 Months',
                                            '6 Months',
                                            '9 Months',
                                            '12 Months',
                                            '13 Weeks',
                                            '26 Weeks',
                                            '39 Weeks',
                                            '52 Weeks',
                                            ##                                        '27 Weeks','53 Weeks','25 Weeks',
                                    ]:
                                        if statement == 'income':
                                            statementNA = True
                                            break
                                        else:
                                            print(s)
                                            print((lines[i3]))
                                            print((lines[i3]))
                                            print((lines[i3][index1:index2]))
                                            stop
                                    l = s.split()
                                    dic_out['period'] += [[
                                        int(l[0]),
                                        l[1],
                                    ]]
                                    break

                    elif '<tr ' in lines[i2]:

                        col1 = True
                        for i3 in range(i2 + 1, len(lines)):
                            if '<td ' in lines[i3]:
                                index1 = lines[i3].index('>') + 1
                                index2 = lines[i3].rindex('<')
                                s = lines[i3][index1:index2].replace('  ', ' ')
                                if col1 == True:
                                    key = s
                                    dic_out[key] = []
                                    col1 = False
                                else:
                                    s = s.replace(',', '').replace('(',
                                                                   '').replace(
                                                                       ')', '')
                                    if s == '--':
                                        s = 0
                                    value = factor * float(s)
                                    if 'minus' in lines[i3]:
                                        value *= -1
                                    dic_out[key] += [value]
                            if lines[i3].strip() == '</tr>':
                                break
                            if '<th>' in lines[i3].strip():
                                break

                break  ## break loop over lines

        if lines == []:
            if statementNA == True:
                currency = 'N/A'
            else:
                stop_loop
            if bool_no_financials == False:
                stop

        return dic_out, statementNA, currency
    def find_candidates_TA(
        self,
        l_tickers, l_time, months, l_statementNA, d_portfolio,
        d_ADR,
        ):

        print('finding TA candidates')

        year2 = l_time[0] ; month2 = l_time[1] ; day2 = l_time[2]
        year1 = year2-11 ; month1 = month2  ; day1 = day2

        TAcandidates = []
        TAdata = {}

        l_supports = []
        l_breakouts = []

        l_MA50_increasing = []
        l_MA50_bounce = []
        l_52w_low = []
        l_down10percent_morethansp500 = []

        ##
        ## S&P 500
        ##
        ticker = ticker_yahoo = '^GSPC' ## S&P500
        TAdata[ticker] = {}
        period = 'daily'
        TAdata[ticker][period] = {}
        ## read url
        url = 'http://ichart.finance.yahoo.com/table.csv?s=%s&d=%s&e=%s&f=%s&g=d&a=%s&b=%s&c=%s&ignore=.csv' %(
            ticker_yahoo, month2-1, day2, year2, month1-1, day1, year1,
            ) ## g=d signifies daily_weekly_monthly graph
        linesd = screener.finance().read_url(url, ticker)
        data = TAdata[ticker][period]['raw'] = linesd[1:]
        ## parse lines
        TAdata[ticker][period]['price'] = {
            'date':[], 'open':[], 'high':[], 'low':[], 'close':[], 'volume':[],'adjclose':[],
            }
        TAdata = self.data_conversion(ticker,period,data,TAdata,)
        ## price today
        price_today = TAdata[ticker][period]['price']['adjclose'][-1]
        date_today = TAdata[ticker][period]['price']['date'][-1]
        ## price 52w
        date_52w = '%4s%s' %(int(date_today[:4])-1,date_today[4:])
        price_52w = None
        for i in range(2,len(TAdata[ticker][period]['price']['date'])):
            if TAdata[ticker][period]['price']['date'][-i] <= date_52w:
                price_52w = TAdata[ticker][period]['price']['adjclose'][-i]
                break
        ## change 52w
        sp500_52w_change = (price_today-price_52w)/price_52w

        for ticker in l_tickers:

            ticker_FA = ticker

##            if ticker[-2:] == '.I':
##                continue
##            if ticker[-3:] in [
##                '.HE','.VX','.IS','.BR','.MM',
##                '.MX','.SA',
##                '.HK','.BO',
##                ]:
##                continue

            if ticker[-3:] in [
##                '.IC', ## Iceland not on Yahoo
##                '.SI', ## Singapore not on Yahoo
                '.BO', ## India not on Yahoo
##                '.ME', ## Russia not on Yahoo
                ]:
                continue
##            if ticker == 'SUN.BO':
##                continue
##            if ticker == 'WIPR.BO':
##                continue
            if ticker == 'INGC.BO':
                continue
            if ticker == 'HUVR.BO':
                continue

            if '.' in ticker and ticker[-2:] in ['.A','.B',] and ticker[-2:] not in ['.O']:
                index = ticker.index('.')
                ticker = ticker[:index]+'-'+ticker[index+1:]
            ticker = ticker.replace('.a','-a')
            ticker = ticker.replace('.b','-b')
            ticker = ticker.replace('b','-B') ## HUBb, NOVO-B.CO
            ticker = ticker.replace('a','-A') ## BFa
            if ':' in ticker:
                index = ticker.index(':')
##                if ticker[:index] == 'JP': ## Japan not on Yahoo
##                    continue
                if ticker[:index] == 'CA' and '.' in ticker:
                    ticker.replace('.','-')
                    stop
##                if ticker[:index] == 'SE' and '-' in ticker:
##                    ticker = ticker.replace('-','')
                ticker = ticker_conversion.unknown2yahoo(ticker)
            elif '.' in ticker:
                ticker = ticker_conversion.unknown2yahoo(ticker)
            ticker = ticker.replace('..','.') ## RB..L

            ticker_yahoo = ticker

            ticker = ticker_FA

##            if ticker in d_yahoo2reuters:

            ##
            ## parse historical data
            ##

            TAdata[ticker] = {}

            ## daily
            url = 'http://ichart.finance.yahoo.com/table.csv?s=%s&d=%s&e=%s&f=%s&g=d&a=%s&b=%s&c=%s&ignore=.csv' %(
                ticker_yahoo, month2-1, day2, year2, month1-1, day1, year1,
                ) ## g=d signifies daily_weekly_monthly graph

            linesd = screener.finance().read_url(url, ticker)
            fp = 'urls/%s' %(url.replace(':','').replace('/','').replace('.','').replace('?',''))

            ## no data
            if linesd == ['']: continue

##            ## weekly
##            url = 'http://ichart.finance.yahoo.com/table.csv?s=%s&a=%s&b=%s&c=%s&d=%s&e=%s&f=%s&g=w&ignore=.csv' %(
##                ticker_yahoo, month1-1, day1, year1, month2-1, day2, year2,
##                ) ## g=w signifies daily_weekly_monthly graph
##            for x in range(10):
##                try:
##                    urllines = urllib2.urlopen(url)
##                    linesw = urllines.readlines()
##                    break
##                except:
##                    print x, url
##                    continue
##            if x == 9:
##                continue
##
##            ## monthly
##            url = 'http://ichart.finance.yahoo.com/table.csv?s=%s&a=%s&b=%s&c=%s&d=%s&e=%s&f=%s&g=w&ignore=.csv' %(
##                ticker_yahoo, month1-1, day1, year1, month2-1, day2, year2,
##                ) ## g=w signifies daily_weekly_monthly graph
##            for x in range(10):
##                try:
##                    urllines = urllib2.urlopen(url)
##                    linesm = urllines.readlines()
##                    break
##                except:
##                    print x, url
##                    continue
##            if x == 9:
##                continue

            TAdata[ticker]['daily'] = {
                'raw':linesd[1:],
                }

            ## find TA candidates
            periods = list(TAdata[ticker].keys())
            TAcandidate = True
            for period in [
                'daily',
##                'weekly','monthly',
                ]:

                TAdata[ticker][period]['price'] = {
                    'date':[], 'open':[], 'high':[], 'low':[], 'close':[], 'volume':[],'adjclose':[],
                    }

                data = TAdata[ticker][period]['raw']
                n = len(data)

                TAdata = self.data_conversion(ticker,period,data,TAdata,)

                ## calculate MA
                if period == 'daily':
                    TAdata, MA50, MA200, l_MA50_increasing, l_MA50_bounce = self.MA(
                        ticker,period,TAdata,l_MA50_increasing,l_MA50_bounce,
                        )

                    TAdata[ticker][period]['MA50'] = MA50
                    TAdata[ticker][period]['MA200'] = MA200

##                    print ticker, 'ma50', MA50, 'ma200', MA200
                    price_today = TAdata[ticker][period]['price']['adjclose'][-1]
                    date_today = TAdata[ticker][period]['price']['date'][-1]

                    date_52w = '%4s%s' %(int(date_today[:4])-1,date_today[4:])
                    price_52w = None
                    for i in range(2,len(TAdata[ticker][period]['price']['date'])):
                        if TAdata[ticker][period]['price']['date'][-i] <= date_52w:
                            price_52w = TAdata[ticker][period]['price']['adjclose'][-i]
                            break

                    if price_52w == None:
                        continue

                    date_10y = '%4s%s' %(int(date_today[:4])-10,date_today[4:])
                    price_10y = None
                    for i in range(2,len(TAdata[ticker][period]['price']['date'])):
                        if TAdata[ticker][period]['price']['date'][-i] <= date_10y:
                            price_10y = TAdata[ticker][period]['price']['adjclose'][-i]
                            break
                        
                    l_prices_52w = []
                    for i in range(2,len(TAdata[ticker][period]['price']['date'])):
                        if TAdata[ticker][period]['price']['date'][-i] >= date_52w:
                            l_prices_52w += [TAdata[ticker][period]['price']['adjclose'][-i]]
                            continue

                    if price_52w:
                        change_52w = (
                            price_today
                            -
                            price_52w
                            ) / price_52w
                        TAdata[ticker][period]['change_52w'] = round(100*change_52w,0)
                    else:
                        change_52w = None

                    if price_10y:
                        change_10y = (price_today-price_10y)/price_10y
                        TAdata[ticker][period]['change_10y'] = round(100*change_10y,0)
                    else:
                        change_10y = None

                    price_52w_min = min(l_prices_52w)
                    price_52w_max = max(l_prices_52w)

                    above_52w = (price_today-price_52w_min)/price_52w_min
                    below_52w_max = (price_today-price_52w_max)/price_52w_max

                    TAdata[ticker][period]['above_52w'] = round(100*above_52w,0)
                    TAdata[ticker][period]['below_52w_max'] = round(100*below_52w_max,0)

                    if price_today < 1.05*price_52w_min:
                        l_52w_low += [ticker]

                    ## dropped more than 10% relative to market
                    if (price_today-price_52w)/price_52w < sp500_52w_change-0.1:
                        l_down10percent_morethansp500 += [ticker]

                ## find support and resistance
                ## conflicts if support or resistance while paying out dividend...
                if period == 'daily':
                    l_supports, l_breakouts = self.support_and_resistance(
                        ticker,TAdata,
                        l_supports,l_breakouts,
                        )

                ## find gap support/resistance
                if period == 'daily':
                    l_gaps = self.gaps(ticker,data,)

                ## calculate RSI
                if period == 'daily':
                    TAdata = self.RSI(ticker,period,TAdata)

                ## calculate MFI
                if period == 'daily':
                    TAdata = self.MFI(ticker,period,TAdata)

                ## calculate MACD
                TAdata = self.MACD(ticker,period,TAdata,)

                ## evaluate MACD (bullish)
                if period != 'monthly' and not (TAdata[ticker][period]['MACD']['DIV'][-1] > TAdata[ticker][period]['MACD']['DIV'][-2] and TAdata[ticker][period]['MACD']['DIV'][-2] < 0):
                    TAcandidate = False
                elif period == 'monthly' and not TAdata[ticker][period]['MACD']['DIV'][-2] < 0:
                    TAcandidate = False

                ## end of loop over periods


##            ## evaluate MACD (bullish)
##            if (
####                TAdata[ticker]['daily']['MACD']['DIV'][-1] > TAdata[ticker]['daily']['MACD']['DIV'][-2]
####                and
####                TAdata[ticker]['daily']['MACD']['DIV'][-2] < 0
####                and
##                TAdata[ticker]['weekly']['MACD']['DIV'][-1] > TAdata[ticker]['weekly']['MACD']['DIV'][-2]
##                and
##                TAdata[ticker]['weekly']['MACD']['DIV'][-2] < 0
##                and
##                TAdata[ticker]['monthly']['MACD']['DIV'][-2] < 0
##                ):
##                    TAcandidates.append(ticker)
##                    print 'TAcandidate!!!'
##
##            ## evaluate MACD (bearish)
##            if ticker in d_portfolio.keys() and ticker not in l_statementNA:
##                if (
##                    TAdata[ticker]['daily']['MACD']['DIV'][-1] > 0 and
##                    TAdata[ticker]['weekly']['MACD']['DIV'][-1] > 0 and
##                    TAdata[ticker]['monthly']['MACD']['DIV'][-1] > 0
##                    ):
##                    print 'SELL %s !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' %(ticker)

        d_TA = {
            'MACD':TAcandidates,
            'bouncing at support level':l_supports,
            'breaking resistance level':l_breakouts,
            'MA50 increasing':l_MA50_increasing,
            'MA50 bounce':l_MA50_bounce,
            }
        for s_TA in list(d_TA.keys()):
            l_TA = d_TA[s_TA]
            yahoo = 'http://finance.yahoo.com/q/cq?d=v1&s='
            for ticker in l_TA:
                yahoo += '%s+' %(ticker)
            print('\n')
            print(s_TA)
            print((yahoo[:-1]))

        print('\n')
        print(('l_52w_low', l_52w_low))
        print(('l_down10percent_morethansp500', l_down10percent_morethansp500))
        print('\n')

        s_CAPS = ''
        for ticker in l_TA:
            if ticker in list(d_ADR.values()):
                for ADR,v in list(d_ADR.items()):
                    if v == ticker:
                        break
                ticker_US = ADR
            else:
                ticker_US = ticker
            s_CAPS += '%s,' %(ticker_US)
        print((s_CAPS[:-1]))

        fd = open('TAcandidates.txt', 'w')
        fd.write('%s\n%s' %(l_tickers, TAcandidates))
        fd.close()

##        matrix = self.covar_matrix(d_pca)
##        eigenvalues,eigenvectors = self.diagonalization(matrix)
##        print 'matrix', matrix
##        print 'eval', eigenvalues
##        print 'evec', eigenvectors[0]
##        print 'tickers', l_tickers
##        for i in range(len(l_tickers)):
##            print '%s \t %s' %(l_tickers[i],eigenvectors[0][i],)

        return TAcandidates, TAdata
Beispiel #8
0
    def parse_currencies(self):

        d_currency_reuters = {}

        ## ISO 4217 codes
        l_currency_msn = [
            ## most traded currencies
            'Euro','EUR',
            'Japanese Yen','JPY',
            'British Pounds','GBP',
            'Swiss Francs','CHF',
            'Australian Dollars','AUD',
            ## Asia
            'Chinese Renminbi','CNY', ## China
            'Taiwanese Dollars','TWD',
            'Hong Kong Dollars','HKD',
            'Philippine Pesos','PHP',
            'Singapore Dollars','SGD',
            'Indonesian Rupiah','IDR', ## Indonesia
            'Indian Rupee','INR',
            'Thai Bahts','THB',
            'South Korean Won','KRW',
            'Malaysian Ringgit','MYR', ## Malaysia
            ## Europe
            'Swedish Krona','SEK',
            'Norwegian Krone','NOK',
            'Danish Krone','DKK',
##            'Iceland Krona','ISK',
            'Icelandic Kronas','ISK',
            'Hungarian Forint','HUF',
            'Czech Korunas','CZK',
##            'Estonian Kroon','EEK', ## Euro 2011-
            'Lithuanian Lita','LTL',  # Euro 2015-
            'Russian Rouble','RUB', ## msn
            'Turkish Lira','TRY',
            'Polish Zlotys','PLN',
            ## North America
            'Canadian Dollars','CAD',
            'Mexican Pesos','MXN',
            ## Oceania
            'N.Z. Dollars','NZD',
            ## South America
            'Colombian Peso','COP',
            'Argentine Peso','ARS',
            'Chilean Peso','CLP',
            'Brazilian Real','BRL',
            'Peruvian Nuevo Sol','PEN',
##            'New Sol','PEN', 'Bolivar','VEB',
            ## Middle East
            'Israeli Shekel','ILS', ## Israel
            'Kuwait Dinars','KWD',
            'Saudi Arabian Riyals','SAR',
            'Qatari Rials','QAR',
            ## Africa
            'South African Rand','ZAR', ## South Africa (ZAR...)
            'Nigerian Naira','NGN',
            ]

        d_currency_msn = {}

        for i in range(0,len(l_currency_msn),2):

            name = l_currency_msn[i]
            symbol = l_currency_msn[i+1]

            url = 'http://download.finance.yahoo.com/d/quotes.csv?s=USD%s=X&f=sl1d1t1c1ohgv&e=.csv' %(symbol,)
##            print url

            lines = screener.finance().read_url(url, '0')
            
            line = lines[0]

            rate = float(str(line).split(',')[1])
            if rate == 0:
                print(symbol)
                print(rate)
                print(url)
                print(line)
                stop_zero_rate
            d_currency_reuters[symbol] = rate
            d_currency_msn[name] = rate
            print('%s%s %3s %7.2f' %(name, (24-len(name))*'-', symbol, rate))

        d_currency_msn['U.S. Dollars'] = 1.
        d_currency_reuters['USD'] = 1.
        d_currency_msn['US Dollars'] = 1.
##        d_currency_msn['GBX'] = d_currency_msn['GBP']*100.

        d_currency_reuters['TRL'] = d_currency_reuters['TRY']
        d_currency_reuters['ZAX'] = d_currency_reuters['ZAR']
        d_currency_reuters['GBX'] = d_currency_reuters['GBP']*100.

        for currency in list(d_currency_msn.keys()):
            d_currency_msn['%ss' %(currency)] = d_currency_msn[currency]
        d_currency_msn['Chinese Renminbi (Yuan)s'] = d_currency_msn['Chinese Renminbi']
        d_currency_msn['Taiwan Dollars'] = d_currency_msn['Taiwanese Dollars']
##        d_currency_msn['Lithuanian Litass'] = d_currency_msn['Lithuanian Lita']
        d_currency_msn['Turkish New Liras'] = d_currency_msn['Turkish Lira']
        d_currency_msn['Philippines Pesos'] = d_currency_msn['Philippine Pesos']
        d_currency_msn['New Zealand Dollars'] = d_currency_msn['N.Z. Dollars']
##        d_currency_msn['Qatari Rial'] = d_currency_msn['Qatari Rial']
        d_currency_msn['Won'] = d_currency_msn['South Korean Won']

        return d_currency_msn, d_currency_reuters
Beispiel #9
0
    def parseKeyRatios(self, url, ticker):

        print(url)

        dic_10year = {}

        d_factors = {'Mil': 1., 'Bil': 1000.}

        ##        url = 'http://financials.morningstar.com/financials/getFinancePart.html?&callback=?&t=AAP'
        ##        url = 'http://financials.morningstar.com/financials/getFinancePart.html?&callback=?&t=ABT'

        lines = screener.finance().read_url(url, ticker)
        s_html = '\n'.join(lines)

        ## ticker wrong or simply no data on morningstar.com
        if s_html == '' or s_html == '?({"componentData":null})':
            return dic_10year

##        print(s_html)
        for k in (
                'Revenue',
                'Operating Income',
                'Net Income',
                'Earnings Per Share',
                'Dividends',
                'Shares',
                'Book Value Per Share',
                'Operating Cash Flow',
                'Cap Spending',
                'Free Cash Flow',
                'Free Cash Flow Per Share',
                'Working Capital',
        ):
            ##            print(k)
            ## \w\w\w is the currency code
            p = r'>{}&nbsp;<span>\w\w\w.*?<\\/tr\>'.format(k)
            ##            print(p)
            s_tr = re.search(p, s_html).group()
            try:
                factor = d_factors[re.search(r'[MB]il', s_tr).group()]
            except AttributeError:
                factor = 1
            l = []
            for match in re.finditer(r'>([-\d,.]+|&mdash;)<', s_tr):
                s = match.group(1)
                if s == '&mdash;':
                    l.append('-')
                else:
                    l.append(factor * float(s.replace(',', '')))
            ## Do not include TTM.
            if len(l) == 11:
                l = l[:-1]
            else:
                print(l)
                print(s_tr)
                stop
            l = list(reversed(l))
            assert len(l) == 10
            if l.count('-') == 1:
                i = l.index('-')
                ## if earliest year, then same as year after
                if i == 9:
                    l[9] = l[8]
                ## otherwise average of neigbouring years
                else:
                    l[i] = (l[i - 1] + l[i + 1]) / 2
            if k == 'Revenue' and '-' in l:
                print(ticker, k, l)
                return {}
##            l = list(
##                factor*float(x.replace(',','')) for x in reversed(
##                    re.findall(r'>([-\d,.]+)<',substr)))[1:]
##                    re.findall(r'>([-\d,.]+|&mdash;)<',substr)))[1:]
            print(k, len(l), l)
            ## often initial bvps missing, so just assume 10% lower last year
            ## fudge factor big time!
            if len(l) == 9 and k in (
                    'Book Value Per Share',
                    '',
            ):
                l.append(0.9 * l[-1])
            if len(l) < 10 and k not in (
                    ## Some of them not used...
                    'Free Cash Flow Per Share',
                    'Working Capital',
                    'Dividends',
                    'Book Value Per Share',
            ):
                print(k, len(l), l)
                return {}
            dic_10year[k] = l


##        print('string\n',s[s.index('Shares')-2*80:s.index('Shares')+10*80],'\nstring',)
##        print(s[s.index('2003')-2*80:s.index('Shares')+10*80])
        s_tr = re.search(r'>Shares&nbsp;<span>[MB]il.*?<\\/tr\>',
                         s_html).group()
        factor = d_factors[re.search(r'[MB]il', s_tr).group()]
        l_shares = list(factor * float(x.replace(',', ''))
                        for x in reversed(re.findall(r'>([\d,]+)<', s_tr)))[1:]
        ##        print(l_shares)

        p = r'<th scope=\\"col\\" align=\\"right\\" id=\\"Y\d+\\">(\d\d\d\d-\d\d)<\\/th>'
        l_dates = list(reversed(re.findall(p, s)))
        ##        print(l_dates)

        dic_10year['DATE'] = l_dates
        dic_10year['SHARES OUTSTANDING'] = l_shares
        dic_10year['SALES'] = dic_10year['Revenue']

        return dic_10year
Beispiel #10
0
    def parse_company_report(self,ticker,rate,):

        url = 'http://moneycentral.msn.com/companyreport?Symbol='+ticker
        lines = screener.finance().read_url(url, ticker)

        d = {
            'ma50':'N/A',
            'ma200':'N/A',
            'relative strength':'N/A',
            }

        for i in range(len(lines)):

            line = lines[i]

            if 'Exchange : ' in lines[i]:
                index = lines[i].index('Exchange : ')
                index2 = index+lines[i][index:].index('</b>')
                index1 = lines[i][:index2].rindex('>')+1
                exchange = lines[i][index1:index2]
    ##                if exchange == 'OTC BB':
    ##                    break

            if 'Last Price' in lines[i] and '<meta ' not in lines[i]:
                index1 = lines[i+1].index('<td>')+4
                index2 = lines[i+1].index('</td>')
                price = float(lines[i+1][index1:index2].replace(',',''))
                d['price'] = price

            if '50 Day Moving Average' in lines[i]:
                index1 = lines[i+1].index('<td>')+4
                index2 = lines[i+1].index('</td>')
                s = lines[i+1][index1:index2]
                if s == 'NA':
                    ma50 = 'N/A'
                else:
                    ma50 = float(s.replace(',',''))
                d['ma50'] = ma50

            if '200 Day Moving Average' in lines[i]:
                index1 = lines[i+1].index('<td>')+4
                index2 = lines[i+1].index('</td>')
                s = lines[i+1][index1:index2]
                if s == 'NA':
                    ma200 = 'N/A'
                else:
                    ma200 = float(s.replace(',',''))
                d['ma200'] = ma200

            if ': Company Report</' in line:
                index2 = line.index(': Company Report</')
                index1 = line[:index2].rindex('>')+1
                name = line[index1:index2]
                d['name'] = name

            if 'Volatility (beta)' in lines[i]:
                index2 = lines[i+1].index('</')
                index1 = lines[i+1][:index2].rindex('>')+1
                s = lines[i+1][index1:index2]
                if s == 'NA':
                    beta = 'N/A'
                else:
                    beta = float(s.replace(',',''))
                d['beta'] = beta

            if '<td>Sales</td>' in lines[i]:
                index2 = lines[i+2].index('</td>')
                index1 = lines[i+2][:index2].index('>')+1
                s = lines[i+2][index1:index2]
                if s == 'NA':
                    print('sales 5y N/A (maybe because negative)')
                    growth_sales_5y = 'N/A'
                elif '-' in s or '<span ' in s:
                    growth_sales_5y = 0
                else:
                    growth_sales_5y = float(s[:-1])
                d['growth_sales_5y'] = growth_sales_5y

            if '<td>Income</td>' in lines[i]:
                index2 = lines[i+2].index('</td>')
                index1 = lines[i+2][:index2].index('>')+1
                s = lines[i+2][index1:index2]
                if s == 'NA':
                    print('income 5y N/A (maybe because negative)')
                    growth_income_5y = 'N/A'
                elif '-' in s:
                    growth_income_5y = 0
                else:
                    growth_income_5y = float(s[:-1])
                d['growth_income_5y'] = growth_income_5y

            if 'Market Capitalization' in lines[i] and '<meta ' not in lines[i]:
                index = lines[i].index('Market Capitalization')
                index1 = index+lines[i][index:].index('<td>')+len('<td>')
                index2 = index1+lines[i][index1:].index('</td>')
                if lines[i][index1:index1+2] == 'NA':
                    stop_mc
                else:
                    factor = lines[i][index2-3:index2]
                    if factor not in ['Bil','Mil']:
                        factor = .001
                        mc = factor*float(lines[i][index1:index2])/rate
                    else:
                        if factor == 'Bil':
                            factor = 1000000000.
                        elif factor == 'Mil':
                            factor = 1000000.
                        mc = factor*float(lines[i][index1:index2-4])/rate
                d['mc'] = mc

            if '<td>Dividend Yield</td>' in lines[i]:
                index2 = lines[i+1].index('</td>')
                index1 = lines[i+1][:index2].index('>')+1
                s = lines[i+1][index1:index2]
                if s == 'NA':
                    div_yield = 'N/A'
                else:
                    div_yield = float(s[:-1].replace(',',''))/100
                d['div_yield'] = div_yield

            if '<td>Debt/Equity Ratio</td>' in lines[i]:
                index = lines[i].index('Debt/Equity Ratio')
                index1 = index+lines[i][index:].index('<td>')+len('<td>')
                index2 = index1+lines[i][index1:].index('</td>')
                s = lines[i][index1:index2]
                if s == 'NA':
                    debt_equity_ratio = 'N/A'
                else:
                    debt_equity_ratio = float(lines[i][index1:index2])
                d['debt_equity_ratio'] = debt_equity_ratio

            if '<td>Last 12 Months</td>' in lines[i]:
                s = lines[i+2]
                while '<' in s:
                    index1 = s.index('>')+1
                    index2 = s.rindex('<')
                relative_strength = int(s.replace('%%',''))
                d['relative strength'] = relative_strength

        return d
Beispiel #11
0
    def parse_overview(self,ticker,):

        d_factors = {
            'K':1000,
            'M':1000000,
            'B':1000000000,
            'T':1000000000000,
            }

        name = ''
        mc = ''
        currencyCode = ''
        price = ''
        sector = ''
        industry = ''
        statementNA = False

        url = 'http://investing.businessweek.com/research/stocks/snapshot/snapshot.asp?ticker=%s' %(ticker)

        lines = screener.finance().read_url(url, ticker)

        for i1 in range(len(lines)):

            ## name
##                    if '<h1 id="companyTitle"' in lines[i1]:
##            if '<h2 class="pageHeader">' in lines[i1]:
            if '<span itemprop="name">' in lines[i1]:
##                index1 = lines[i1].index('<h2 class="pageHeader">')+len('<h2 class="pageHeader">')
                index1 = lines[i1].index('<span itemprop="name">')+len('<span itemprop="name">')
                index2 = index1+lines[i1][index1:].index('<')
                print(111,lines[i1])
                s = lines[i1][index1:index2].strip()
                print(222,s)
                if '(' in s:
                    s = s[:s.rindex('(')]
                name = s.upper()

            ## price, currency
            if (
                '<div class="dataPoint"><span class="quoteHeading">LAST</span> <span class="quoteData">' in lines[i1]
                or
                '<div class="dataPoint"><span class="quoteHeading">Last</span> <span class="quoteData">' in lines[i1]
                ):
                index1 = lines[i1].index('<span class="quoteData">')+len('<span class="quoteData">')
                index2 = index1+lines[i1][index1:].index('<')
                s = lines[i1][index1:index2].strip()
                for x in 'abcdefghijklmnopqrstuvwxyz&;$':
                    s = s.replace(x,'')
                    s = s.replace(x.upper(),'')
                s = s.replace(',','')
                if s[0] == '.':
                    s = s[1:] ## e.g. SFr.
                if s == '--':
                    statementNA = True
                    break
                price = float(s)
                if '<span class="xSmGreyTxt">' in lines[i1]:
                    index2 = index1+lines[i1][index1:].index('</span>')
                    index1 = lines[i1][:index2].rindex('>')+1
                    s = lines[i1][index1:index2].strip()
                    currencyCode = s.upper()
                else:
                    print('@@@', lines[i1])
                    currencyCode = None

            ## sector, industry
            if '<meta name="sector"' in lines[i1]:
                index2 = lines[i1].index('<meta name="sector"')

                index1 = index2+lines[i1][index2:].index('content="')+len(('content="'))
                index2 = index1+lines[i1][index1:].index('"')
                sector = lines[i1][index1:index2]

                index1 = index2+lines[i1][index2:].index('content="')+len(('content="'))
                index2 = index1+lines[i1][index1:].index('"')
                industry = lines[i1][index1:index2]

            ## market cap
            if '>MARKET CAP<' in lines[i1] or '>Market Cap<' in lines[i1]:

                index1 = lines[i1+1].index('>')+1
                index2 = lines[i1+1].rindex('<')
                factor = lines[i1+1][index2-1]
                s = lines[i1+1][index1:index2-1]

##                        index1 = lines[i1].index('MARKET CAP</div><div class="quoteData">')+len('MARKET CAP</div><div class="quoteData">')
##                        index2 = index1+lines[i1][index1:].index('<')
##                        factor = lines[i1][index2-1]
##                        s = lines[i1][index1:index2-1]

                if s == '-':
                    mc = None
                    statementNA = True
                else:
                    mc = float(s)*d_factors[factor]

        if name == '':
            print(len(lines))
            retry_name

        if lines == [] and price != '' and mc != '':
            stop_loop

        if mc == '' or price == '':
            statementNA = True

        return (
            name, currencyCode, price,
            sector, industry,
            mc,
            statementNA,
            )
Beispiel #12
0
    def parseIncomeStatement(self, url):

        print(url)

        d_factors = {'Mil':1.,'Bil':1000.}

        lines = screener.finance().read_url(url, '0')
        s_html = s = '\n'.join(lines)
##        print(lines)
##        stop

        ## ticker wrong or simply no data on morningstar.com
        if s == '':
            return {}

##        ## DOTALL = Make the '.' special character match any character at all, including a newline; without this flag, '.' will match anything except a newline.
##        p = re.compile(regex, re.DOTALL)
##        m = re.search(p, s)
##        print(m.group())
##        from xml.etree import ElementTree as ET
##        table = ET.XML(m.group())
##        rows = iter(table)
##        headers = [col.text for col in next(rows)]
##        for row in rows:
##            values = [col.text for col in row]
##            print(values)
##        print(headers)
##        stop
##
##        parser = MyHTMLParser()
##        parser.feed(s)
##        stop

        r = r'<TABLE class="yfnc_tabledata1".*?><TR><TD>(<TABLE.*?></TABLE>)</TD></TR></TABLE>'
        p = re.compile(r, re.DOTALL)
        m = re.search(p, s_html)
        if not m:
            return None, True, None
        s_table = m.group(1)

##        r = r'Period Ending.*?<b>(.*?)</b></TD></TR>'
##        p = re.compile(r, re.DOTALL)
##        matches = re.finditer(p, s_table)
##        for match in matches:
##            print(match.group(0))

        ## regex for tr
##        regex1 = r'<tr>(<td.*?>.*?</td>)</tr>'
##        regex1 = r'<TABLE class="yfnc_tabledata1".*?(<tr><td.*?>.*?</td></tr>)</TABLE>'
##        regex1 = r'<TABLE class="yfnc_tabledata1".*?></TR>(<t[r]>.*?</t[r]>)</TABLE>'
        r = r'<[tT][rR]>(.*?)</[tT][rR]>'
        ## DOTALL = Make the '.' special character match any character at all, including a newline; without this flag, '.' will match anything except a newline.
        p = re.compile(r, re.DOTALL)
        matches = re.finditer(p, s_table)
        d = {}
        for match in matches:
##            print('match', match.group())
            if 'style="display:block' in match.group(0):
                continue
            s_tr = match.group(0)
            s_tr = s_tr.replace('<strong>','').replace('</strong>','')
            # Replace HTML tags with an empty string.
##            regex2 = r'<td.*?>.*?([-\d,.]+).*?</td>'
##            matches2 = re.findall(r'<td.*?>(.*?)</td>', s_tr, re.DOTALL)
            matches2 = re.findall(
                r'<[tT][dD].*?>(.*?)</[tT][dD]>', s_tr, re.DOTALL)
            if len(matches2) <= 3:
                continue
##            print(matches2)
            if len(matches2) >= 5 and 'spacer' in match.group(0):
                matches2 = matches2[1:]
            k = re.sub('<.*?>', '', matches2[0].strip())
##            print(matches2)
            l = []
            for i in range(1,4):
                s = matches2[i]
                s = re.sub('<.*?>', '', s)
                s = s.replace(',','').replace('&nbsp;','').strip()
                if k == 'Period Ending':
                    l.append(int(s[-4:]))
                elif s == '-':
                    l.append(s)
                else:
                    ## 1000 if all numbers in thousands
                    assert 'All numbers in thousands' in s_html
                    factor = 1000
                    if s[0] == '(' and s[-1] == ')':
                        l.append(-factor*float(s[1:-1]))
                    else:
                        l.append(factor*float(s))
            d[k] = l

        m = re.search('>Currency in (\w*)', s_html)
        currency = m.group(1)

        return d, False, currency
Beispiel #13
0
    def parse_financial_10_year_summary(self,url):

        '''return values in millions'''

        dic_10year_summary = {
            '&nbsp;':[],
##                'Sales':[],
##                'EBIT':[],
##                'Current Assets':[],
##                'Current Liabilities':[],
##                'Shares Outstanding':[],
            'SALES':[],
            'EBIT':[],
            'CURRENT ASSETS':[],
            'CURRENT LIABILITIES':[],
            'SHARES OUTSTANDING':[],
            }
        dic_10year_summary['DATE'] = []

        ## http://investing.money.msn.com/investments/financial-statements?symbol=aap not sorted by year...

        d_factors = {'Mil':1.,'Bil':1000.}

        lines = screener.finance().read_url(url, '0')

        for i1 in range(len(lines)):

##            if 'Balance Sheet - 10 Year Summary' in lines[i1] or 'Income Statement - 10 Year Summary' in lines[i1]:
            if 'INCOME STATEMENT: 10-YEAR SUMMARY' in lines[i1] or 'BALANCE SHEET: 10 YEAR SUMMARY' in lines[i1]:

                d_cols = {}
                col = 0
                bool_tr = False
                bool_td = False
                for i2 in range(i1+1,len(lines)):

                    if '</table>' in lines[i2]:
                        break
                        bool_tr = bool_td = False
                        d_cols = {}
                        continue
                    if '</tr>' in lines[i2]:
                        col = 0
                        bool_tr = False
                        continue
                    elif '<tr' in lines[i2]:
                        bool_tr = True
                        continue
                    elif '<td' in lines[i2] or '<th' in lines[i2]:
                        bool_td = True
                        continue
                    elif '</td>' in lines[i2] or '</th>' in lines[i2]:
                        col += 1
                        bool_td = False
                        continue

                    if (bool_tr == False or bool_td == False):
                        continue

##                    if '</td>' in lines[i2] and col+1 not in d_cols.keys():
                    ##
                    ## header
                    ##
                    if '<span' in lines[i2-1] and col not in list(d_cols.keys()):
##                        col += 1
##                        index2 = lines[i2].index('</td>')
##                        index1 = lines[i2][:index2].rindex('>')+1
##                        k = lines[i2][index1:index2]
                        k = lines[i2].strip().replace('<br />',' ').replace('<br/>',' ')
                        if k in list(dic_10year_summary.keys()):
                            d_cols[col] = k
                    ##
                    ## data
                    ##
                    elif '<span' in lines[i2-1] and col in list(d_cols.keys()):
##                        col += 1
##                        index2 = lines[i2].index('</td>')
##                        index1 = lines[i2][:index2].rindex('>')+1
##                        s = lines[i2][index1:index2]
                        s = lines[i2].strip()
                        if d_cols[col] == 'SHARES OUTSTANDING':
                            if 'Mil' in s or 'Bil' in s:
                                f = float(s[:-len(' Xil')])
                                f *= d_factors[s[-3:]]
                            elif s == '0.00':
                                f = 0.
                            elif s == 'NA':
                                f = 0.
                            ## thousands
                            elif ',' in s and '.' in s:
                                f = float(s.replace(',',''))
                            else:
                                print(url)
                                print(d_cols[col])
                                print(s)
                                stop
                        elif d_cols[col] == 'DATE':
                            ## don't add dates again and assume same sequence in both (income/balance) tables
                            if 'BALANCE SHEET: 10 YEAR SUMMARY' in lines[i1]:
                                continue
                            f = '%s/%s' %(s[-2:],s[:2],)
                        else:
##                            s = s.replace('Bil','').replace('Mil','')
                            ## year
                            if d_cols[col] == '&nbsp;':
                                f = int(s[-2:])
                                if f > 90:
                                    stop
                                    f += 1900
                                else:
                                    f += 2000
                            elif s == 'NA':
                                f = 0.
                            ## float
                            else:
                                if 'Mil' in s or 'Bil' in s:
                                    f = float(s.replace(',','')[:-len(' Xil')])
                                    f *= d_factors[s[-3:]] ## make sure not a ratio with a number from financial stmt later on...
                                else:
                                    f = float(s.replace(',',''))
                        dic_10year_summary[d_cols[col]] += [f]

                ## break when second table is reached
                if 'BALANCE SHEET: 10 YEAR SUMMARY' in lines[i1]:
                    break

        ##
        ## MSN sorts dates and data by month/year; sort by year/date instead
        ##
        l_dates = list(dic_10year_summary['DATE'])
        l_dates.sort()
        l_dates.reverse()
        l_indexes = [dic_10year_summary['DATE'].index(date) for date in l_dates]
        for k in list(dic_10year_summary.keys()):
            if len(dic_10year_summary[k]) == 0:
                continue
            l = [dic_10year_summary[k][index] for index in l_indexes]
            dic_10year_summary[k] = l

        return dic_10year_summary
    def parse_overview(self,ticker,url):

        d_factors = {'Mil.':1000000,}

        name = ''
        mc = ''
        currencyCode = 'USD'
        price = ''
        sector = ''
        industry = ''
        statementNA = False
        beta = ''

        print(url)
        lines = screener.finance().read_url(url, ticker)

        for i1 in range(len(lines)):

            if '<div id="sectionTitle">' in lines[i1]:
                index1 = lines[i1+1].index('<h1>')+4
                index2 = lines[i1+1].index('</h1>')
                s = lines[i1+1][index1:index2].strip()
                name = s

            ## price, currency
            if '<div class="sectionQuoteDetail">' in lines[i1]:
                for i2 in range(i1,len(lines)):
                    if '<span style="font-size: 23px;">' in lines[i2]:
                        index1 = 0
                        index2 = lines[i2+1].index('</span>')
                        s = lines[i2+1][index1:index2].replace(',','')
##                                print lines[i2+1]
                        if s in ('--', '\t\t\t\t--'):
                            statementNA = True
                        else:
                            price = float(s)
                            index1 = index2+lines[i2+1][index2:].index('<span>')+6
                            index2 = index1+lines[i2+1][index1:].index('</span>')
                            s = lines[i2+1][index1:index2]
                            currencyCode = s.upper() ## upper if GBp

                        break

            ## sector, industry
            if '<div id="sectionHeaderTopics"><div id="headerTopics">' in lines[i1]:
                index1 = lines[i1+5].index('/sectors')+8+1
                index2 = index1+lines[i1+5][index1:].index('"')
                sector = lines[i1+5][index1:index2]
                index1 = lines[i1+5].index('/sectors/industries/')+len('/sectors/industries/')
                index1 += lines[i1+5][index1:].index('>')+1
                index2 = index1+lines[i1+5][index1:].index('<')
                industry = lines[i1+5][index1:index2]

            ## beta
            if '<td>Beta:</td>' in lines[i1]:
                index1 = lines[i1+1].index('<strong>')+8
                index2 = lines[i1+1].index('</strong>')
                s = lines[i1+1][index1:index2]
                beta = s

            ## market cap
            if '<td>Market Cap' in lines[i1]:
                factor = 'Mil.'
                if not 'Mil.' in lines[i1]:
                    print(lines[i1])
                    stop
                index1 = lines[i1+1].index('<strong>')+8
                index2 = lines[i1+1].index('</strong>')
                s = lines[i1+1][index1:index2]

                s = s.replace('&#8361;','') ## KRW
                s = s.replace('&#8364;','') ## EUR
                s = s.replace('&#72;&#75;&#36;','') ## HKD
                s = s.replace('Â¥','') ## CNY
                s = s.replace('Â','') ## CNY
                s = s.replace('¥','') ## CNY
                s = s.replace('&#165;','') ## JPY
                s = s.replace('&#67;&#72;&#70;','') ## CHF
                s = s.replace('&#163;','') ## GBP
                s = s.replace('Rs','') ## INR
                s = s.replace('&#107;&#114;.','') ## DKK
                s = s.replace('&#107;&#114;','') ## NOK
                s = s.replace('TL','') ## Turkish Lira
                s = s.replace('&#82;','') ## Brazil
                s = s.replace('&#78;&#84;&#36;','') ## TWD
##                        s = s.replace('&#78;&#84;&#36;','') ## SGD
                s = s.replace('&#1088;&#1091;&#1073;','') ## Russia
                s = s.replace('&#76;&#116;','') ## Lithuania
                s = s.replace('&#36;','') ## USD (dollar symbol)
                s = s.replace('&#77;','') ## MYR (Malaysian ringgit - MR)
                s = s.replace('&#3647;','') ## THB
                s = s.replace('&#3647;','') ## IDR Indonesian Rupiah
                s = s.replace('&#8360;','') ## PKR Pakistani ...
                s = s.replace('&#8362;','')

                s = s.replace(',','')
                if s == '--':
                    print('mc', s)
                else:
                    mc = float(s)*d_factors[factor]

##                    ## sector
##                    if '<a href="/finance/industries/allIndustries">' in lines[i1]:
##                        index1 = lines[i1].index('<a href="/finance/industries/allIndustries">')+len('<a href="/finance/industries/allIndustries">')
##                        index2 = index1+lines[i1][index1:].index('</a>')
##                        sector = lines[i1][index1:index2].strip()
##
##                    ## industry
##                    if '<strong>industry:</strong>' in lines[i1]:
##                        index2 = lines[i1].rindex('<')
##                        index1 = lines[i1][:index2].rindex('>')+1
##                        industry = lines[i1][index1:index2]
##                        if industry == 'N/A':
##                            print('industry', industry)
##                            stop_temp

##                    if '<label>Mkt Cap.</label>' in lines[i1]:
##                        index1 = lines[i1+1].index('<span>')+6
##                        index2 = lines[i1+1].index('</span>')
##                        s = lines[i1+1][index1:index2]
##                        while ';' in s:
##                            index1 = s.index('&')
##                            index2 = s.index(';')
##                            if s[index2+1] == '.':
##                                s = s[:index1]+s[index2+2:]
##                            else:
##                                s = s[:index1]+s[index2+1:]
##                        s = s.replace(',','').replace('¥','').replace('Â','')
##                        d_factors = {'M':1000000,}
##                        if s == '--M':
##                            statementNA = True
##                        elif s[-2:] == 'pM':
##                            mc = float(s[:-2])*d_factors[s[-1]]
##                        elif s[:2] == 'Rs':
##                            mc = float(s[2:-1])*d_factors[s[-1]]
##                        else:
##                            mc = float(s[:-1])*d_factors[s[-1]]

##                if mc == '' and statementNA == False:
##                    retry
##                else:
##                    break ## break loop of trys

        if name == '':
            statementNA = True

##        if price != '' and mc != '':
##            print(price, mc)
##            stop_loop

        if mc == '' or price == '' or mc == '--':
            statementNA = True

        return (
            name, currencyCode, price, sector, industry, statementNA,
            mc, beta,
            )
    def parse_CEOcompensation(self, ticker):

        url = 'http://www.reuters.com/finance/stocks/companyOfficers?symbol=%s&viewId=comp' %(ticker)

        lines = screener.finance().read_url(url, ticker)

        compensation = 0
        i2 = 0
        l_urls = []
        for i1 in range(len(lines)):
            if i1 < i2:
                continue
            if (
                '<div class="moduleHeader"><h3>Basic Compensation</h3></div>' in lines[i1]
                or
                '<div class="moduleHeader"><h3>Options Compensation</h3></div>' in lines[i1]
                ):

                basic = False
                if '<div class="moduleHeader"><h3>Basic Compensation</h3></div>' in lines[i1]:
                    basic = True
                    i_add = 2

                options = False
                if '<div class="moduleHeader"><h3>Options Compensation</h3></div>' in lines[i1]:
                    options = True
                    i_add = 3

                row = 0
                for i2 in range(i1+1,len(lines)):
                    if '<tr' in lines[i2]:
                        row += 1
                        if row >= 2:
                            index2 = str(lines[i2+i_add]).index('</td>')
                            index1 = str(lines[i2+i_add])[:index2].index('>')+1
                            s = str(lines[i2+i_add])[index1:index2].replace(',','')
                            if s != '--':
                                compensation += float(s)
                            ## if basic compensation table
                            if i_add == 2:
                                index1 = lines[i2+i_add-1].index('<a href="')+len('<a href="')
                                index2 = index1+lines[i2+i_add-1][index1:].index('"')
                                s = lines[i2+i_add-1][index1:index2]
                                url_executive = 'http://www.reuters.com%s' %(s)
                                l_urls += [url_executive]
                    if '</table>' in lines[i2]:
                        break
                if options == True:
                    break

        ##
        ##
        ##
        for i_url_executive in range(len(l_urls)):
            url_executive = l_urls[i_url_executive]
            bool_break = False
            lines = screener.finance().read_url(url_executive, '0')

            for i in range(len(lines)):

                if 'Fiscal Year Total, ' in lines[i]:

                    ## currency
                    index1 = lines[i].index('Fiscal Year Total, ')+len('Fiscal Year Total, ')
                    index2 = index1+lines[i][index1:].index('<')
                    currency = lines[i][index1:index2]
                    break

            if currency != '':
                break
                

##            if (
##                'Chief Executive Officer' in lines[i]
##                or
##                '>President, Director<' in lines[i]
##                or
##                'Chief Exec Officer' in lines[i]
##                or
##                '>Chairman of the Board, President<' in lines[i] ## e.g. NATI
##                or
##                '>President, Representative Director<' in lines[i] ## e.g. 6902.T
##                or
##                '>Representative Executive President, Director<' in lines[i] ## e.g. 4902.T
##                or
##                '>Chairman of the Board, Representative Director<' in lines[i] ## e.g. 7205.T
##                or
##                '>Group Managing Director, Executive Director<' in lines[i] ## 0013.HK
##                or
##                '>Chairman of the Board, Managing Director<' in lines[i] ## 0012.HK
##                or
##                '>Chairman of the Board, Chairman of a Subsidiary, Representative Director<' in lines[i] ## e.g. 8035.T
##                or
##                '>General Manager<' in lines[i] ## e.g. TKC
##                or
##                '>Managing Director (CEO), Chairman of the Executive Committee, Director<' in lines[i] ## e.g. TOTF.PA
##                or
##                '>Managing Director, Executive Director<' in lines[i] ## 0006.HK
##                or
##                '>Deputy Chairman of the Board, Managing Director<' in lines[i] ## 0001.HK
##                or
##                '>Chairman of the Executive Committee, Director<' in lines[i] ## SOLB.BR
##                ):
##                index1 = lines[i-3].index('<a href="')+len('<a href="')
##                index2 = index1+lines[i-3][index1:].index('"')
##                url = 'http://www.reuters.com'+lines[i-3][index1:index2]
##                break
##
##        if i == len(lines)-1:
##
##            print url
##            if ticker_reuters not in ['NL:MT','YZC','FR:FP','0003.HK',]:
##                stop
##            compensation = 0
##
##        else:
##
##            for i in range(10):
##                try:
##                    urllines = urllib2.urlopen(url)
##                    lines = urllines.readlines()
##                    break
##                except:
##                    continue
##            if i == 9:
##                print url
##                stop
##
##
##            for i in range(len(lines)):
##
##                if 'Fiscal Year Total, ' in lines[i]:
##
##                    ## currency
##                    index1 = lines[i].index('Fiscal Year Total, ')+len('Fiscal Year Total, ')
##                    index2 = index1+lines[i][index1:].index('<')
##                    currency = lines[i][index1:index2]
##                    if currency == '':
##                        rate = 0.
##                    else:
##                        rate = d_currency[currency]
##
##                    ## compensation
##                    index1 = lines[i+6].index('>')+1
##                    index2 = lines[i+6].rindex('<')
##                    s = lines[i+6][index1:index2].replace(',','')
##                    if s == '--':
##                        compensation = 0.
##                    else:
##                        compensation = float(s)/rate
##
##                    break
##
##                if i == len(lines)-1:
##                    compensation = 0.
##                    print url

        return compensation, currency
    def parse_statement(self,url,dic,statement,):

        print(url)

        d_periods = {
            '12 Weeks':'13 Weeks','14 Weeks':'13 Weeks',#'16 Weeks':'13 Weeks',
            '25 Weeks':'26 Weeks','27 Weeks':'26 Weeks',#'24 Weeks':'26 Weeks','28 Weeks':'26 Weeks','29 Weeks':'26 Weeks',
            '38 Weeks':'39 Weeks','40 Weeks':'39 Weeks',#'35 Weeks':'39 Weeks','36 Weeks':'39 Weeks',
            '51 Weeks':'52 Weeks','53 Weeks':'52 Weeks',#'48 Weeks':'52 Weeks',
##                                        '11 Months':'12 Months',
            }

        dic_out = {
            'period':[],
            'date':[],
            }

        lines = screener.finance().read_url(url, '0')

        statementNA = False
        bool_no_financials = False

        for i1 in range(len(lines)):

            if 'No Financials Data Available</div>' in lines[i1]:
                print('No Financials Data Available</div>')
                statementNA = True
                bool_no_financials = True
                stop5
                break

            if '<table class="dataTable financials" cellspacing="1" cellpadding="0" width="100%">' in lines[i1]:

                for i2 in range(i1+1,len(lines)):

                    if '<span class="units">' in lines[i2]:

                        l = lines[i2+1].split()
                        s = l[1]
                        d_factors = {'Millions':1000000.,'Thousands':1000.}
                        factor = d_factors[s.strip()]

                        index1 = str(lines[i2+1]).index(' of')+3
                        index2 = str(lines[i2+1]).index('<')
                        currency = str(lines[i2+1])[index1:index2].strip()

                    elif '<span class="period">' in lines[i2]:
                        index1 = 0
                        index2 = str(lines[i2-1]).index('<')
                        s = str(lines[i2-1])[index1:index2].strip()
                        if int(s[-2:]) <= 6: ## e.g. FXJ.AX, BBY, BKS
                            s = '%5s%02i%3s' %(s[:5],int(s[5:-3])-1,s[-3:],)
                        dic_out['date'] += [s[:-3]]

                        if statement != 'balance':
                            for i3 in range(i2+1,len(lines)):
                                if '</span>' in lines[i3]:
                                    index1 = 0
                                    index2 = lines[i3].index('</span>')
                                    s = lines[i3][index1:index2].strip().replace('&#160;',' ')
                                    if s in list(d_periods.keys()):
                                        s = d_periods[s]
                                    if s not in [
                                        '3 Months','6 Months','9 Months','12 Months',
                                        '13 Weeks','26 Weeks','39 Weeks','52 Weeks',
##                                        '27 Weeks','53 Weeks','25 Weeks',
                                        ]:
                                        if statement == 'income':
                                            statementNA = True
                                            break
                                        else:
                                            print(s)
                                            print((lines[i3]))
                                            print((lines[i3]))
                                            print((lines[i3][index1:index2]))
                                            stop
                                    l = s.split()
                                    dic_out['period'] += [[int(l[0]),l[1],]]
                                    break


                    elif '<tr ' in lines[i2]:

                        col1 = True
                        for i3 in range(i2+1,len(lines)):
                            if '<td ' in lines[i3]:
                                index1 = lines[i3].index('>')+1
                                index2 = lines[i3].rindex('<')
                                s = lines[i3][index1:index2].replace('  ',' ')
                                if col1 == True:
                                    key = s
                                    dic_out[key] = []
                                    col1 = False
                                else:
                                    s = s.replace(',','').replace('(','').replace(')','')
                                    if s == '--':
                                        s = 0
                                    value = factor*float(s)
                                    if 'minus' in lines[i3]:
                                        value *= -1
                                    dic_out[key] += [value]
                            if lines[i3].strip() == '</tr>':
                                break
                            if '<th>' in lines[i3].strip():
                                break

                break ## break loop over lines

        if lines == []:
            if statementNA == True:
                currency = 'N/A'
            else:
                stop_loop
            if bool_no_financials == False:
                stop

        return dic_out, statementNA, currency
Beispiel #17
0
    def find_candidates_TA(
        self,
        l_tickers,
        l_time,
        months,
        l_statementNA,
        d_portfolio,
        d_ADR,
    ):

        print('finding TA candidates')

        year2 = l_time[0]
        month2 = l_time[1]
        day2 = l_time[2]
        year1 = year2 - 11
        month1 = month2
        day1 = day2

        TAcandidates = []
        TAdata = {}

        l_supports = []
        l_breakouts = []

        l_MA50_increasing = []
        l_MA50_bounce = []
        l_52w_low = []
        l_down10percent_morethansp500 = []

        ##
        ## S&P 500
        ##
        ticker = ticker_yahoo = '^GSPC'  ## S&P500
        TAdata[ticker] = {}
        period = 'daily'
        TAdata[ticker][period] = {}
        ## read url
        url = 'http://ichart.finance.yahoo.com/table.csv?s=%s&d=%s&e=%s&f=%s&g=d&a=%s&b=%s&c=%s&ignore=.csv' % (
            ticker_yahoo,
            month2 - 1,
            day2,
            year2,
            month1 - 1,
            day1,
            year1,
        )  ## g=d signifies daily_weekly_monthly graph
        linesd = screener.finance().read_url(url, ticker)
        data = TAdata[ticker][period]['raw'] = linesd[1:]
        ## parse lines
        TAdata[ticker][period]['price'] = {
            'date': [],
            'open': [],
            'high': [],
            'low': [],
            'close': [],
            'volume': [],
            'adjclose': [],
        }
        TAdata = self.data_conversion(
            ticker,
            period,
            data,
            TAdata,
        )
        ## price today
        price_today = TAdata[ticker][period]['price']['adjclose'][-1]
        date_today = TAdata[ticker][period]['price']['date'][-1]
        ## price 52w
        date_52w = '%4s%s' % (int(date_today[:4]) - 1, date_today[4:])
        price_52w = None
        for i in range(2, len(TAdata[ticker][period]['price']['date'])):
            if TAdata[ticker][period]['price']['date'][-i] <= date_52w:
                price_52w = TAdata[ticker][period]['price']['adjclose'][-i]
                break
        ## change 52w
        sp500_52w_change = (price_today - price_52w) / price_52w

        for ticker in l_tickers:

            ticker_FA = ticker

            ##            if ticker[-2:] == '.I':
            ##                continue
            ##            if ticker[-3:] in [
            ##                '.HE','.VX','.IS','.BR','.MM',
            ##                '.MX','.SA',
            ##                '.HK','.BO',
            ##                ]:
            ##                continue

            if ticker[-3:] in [
                    ##                '.IC', ## Iceland not on Yahoo
                    ##                '.SI', ## Singapore not on Yahoo
                    '.BO',  ## India not on Yahoo
                    ##                '.ME', ## Russia not on Yahoo
            ]:
                continue
##            if ticker == 'SUN.BO':
##                continue
##            if ticker == 'WIPR.BO':
##                continue
            if ticker == 'INGC.BO':
                continue
            if ticker == 'HUVR.BO':
                continue

            if '.' in ticker and ticker[-2:] in [
                    '.A',
                    '.B',
            ] and ticker[-2:] not in ['.O']:
                index = ticker.index('.')
                ticker = ticker[:index] + '-' + ticker[index + 1:]
            ticker = ticker.replace('.a', '-a')
            ticker = ticker.replace('.b', '-b')
            ticker = ticker.replace('b', '-B')  ## HUBb, NOVO-B.CO
            ticker = ticker.replace('a', '-A')  ## BFa
            if ':' in ticker:
                index = ticker.index(':')
                ##                if ticker[:index] == 'JP': ## Japan not on Yahoo
                ##                    continue
                if ticker[:index] == 'CA' and '.' in ticker:
                    ticker.replace('.', '-')
                    stop
##                if ticker[:index] == 'SE' and '-' in ticker:
##                    ticker = ticker.replace('-','')
                ticker = ticker_conversion.unknown2yahoo(ticker)
            elif '.' in ticker:
                ticker = ticker_conversion.unknown2yahoo(ticker)
            ticker = ticker.replace('..', '.')  ## RB..L

            ticker_yahoo = ticker

            ticker = ticker_FA

            ##            if ticker in d_yahoo2reuters:

            ##
            ## parse historical data
            ##

            TAdata[ticker] = {}

            ## daily
            url = 'http://ichart.finance.yahoo.com/table.csv?s=%s&d=%s&e=%s&f=%s&g=d&a=%s&b=%s&c=%s&ignore=.csv' % (
                ticker_yahoo,
                month2 - 1,
                day2,
                year2,
                month1 - 1,
                day1,
                year1,
            )  ## g=d signifies daily_weekly_monthly graph

            linesd = screener.finance().read_url(url, ticker)
            fp = 'urls/%s' % (url.replace(':', '').replace('/', '').replace(
                '.', '').replace('?', ''))

            ## no data
            if linesd == ['']: continue

            ##            ## weekly
            ##            url = 'http://ichart.finance.yahoo.com/table.csv?s=%s&a=%s&b=%s&c=%s&d=%s&e=%s&f=%s&g=w&ignore=.csv' %(
            ##                ticker_yahoo, month1-1, day1, year1, month2-1, day2, year2,
            ##                ) ## g=w signifies daily_weekly_monthly graph
            ##            for x in range(10):
            ##                try:
            ##                    urllines = urllib2.urlopen(url)
            ##                    linesw = urllines.readlines()
            ##                    break
            ##                except:
            ##                    print x, url
            ##                    continue
            ##            if x == 9:
            ##                continue
            ##
            ##            ## monthly
            ##            url = 'http://ichart.finance.yahoo.com/table.csv?s=%s&a=%s&b=%s&c=%s&d=%s&e=%s&f=%s&g=w&ignore=.csv' %(
            ##                ticker_yahoo, month1-1, day1, year1, month2-1, day2, year2,
            ##                ) ## g=w signifies daily_weekly_monthly graph
            ##            for x in range(10):
            ##                try:
            ##                    urllines = urllib2.urlopen(url)
            ##                    linesm = urllines.readlines()
            ##                    break
            ##                except:
            ##                    print x, url
            ##                    continue
            ##            if x == 9:
            ##                continue

            TAdata[ticker]['daily'] = {
                'raw': linesd[1:],
            }

            ## find TA candidates
            periods = list(TAdata[ticker].keys())
            TAcandidate = True
            for period in [
                    'daily',
                    ##                'weekly','monthly',
            ]:

                TAdata[ticker][period]['price'] = {
                    'date': [],
                    'open': [],
                    'high': [],
                    'low': [],
                    'close': [],
                    'volume': [],
                    'adjclose': [],
                }

                data = TAdata[ticker][period]['raw']
                n = len(data)

                TAdata = self.data_conversion(
                    ticker,
                    period,
                    data,
                    TAdata,
                )

                ## calculate MA
                if period == 'daily':
                    TAdata, MA50, MA200, l_MA50_increasing, l_MA50_bounce = self.MA(
                        ticker,
                        period,
                        TAdata,
                        l_MA50_increasing,
                        l_MA50_bounce,
                    )

                    TAdata[ticker][period]['MA50'] = MA50
                    TAdata[ticker][period]['MA200'] = MA200

                    ##                    print ticker, 'ma50', MA50, 'ma200', MA200
                    price_today = TAdata[ticker][period]['price']['adjclose'][
                        -1]
                    date_today = TAdata[ticker][period]['price']['date'][-1]

                    date_52w = '%4s%s' % (int(date_today[:4]) - 1,
                                          date_today[4:])
                    price_52w = None
                    for i in range(
                            2, len(TAdata[ticker][period]['price']['date'])):
                        if TAdata[ticker][period]['price']['date'][
                                -i] <= date_52w:
                            price_52w = TAdata[ticker][period]['price'][
                                'adjclose'][-i]
                            break

                    if price_52w == None:
                        continue

                    date_10y = '%4s%s' % (int(date_today[:4]) - 10,
                                          date_today[4:])
                    price_10y = None
                    for i in range(
                            2, len(TAdata[ticker][period]['price']['date'])):
                        if TAdata[ticker][period]['price']['date'][
                                -i] <= date_10y:
                            price_10y = TAdata[ticker][period]['price'][
                                'adjclose'][-i]
                            break

                    l_prices_52w = []
                    for i in range(
                            2, len(TAdata[ticker][period]['price']['date'])):
                        if TAdata[ticker][period]['price']['date'][
                                -i] >= date_52w:
                            l_prices_52w += [
                                TAdata[ticker][period]['price']['adjclose'][-i]
                            ]
                            continue

                    if price_52w:
                        change_52w = (price_today - price_52w) / price_52w
                        TAdata[ticker][period]['change_52w'] = round(
                            100 * change_52w, 0)
                    else:
                        change_52w = None

                    if price_10y:
                        change_10y = (price_today - price_10y) / price_10y
                        TAdata[ticker][period]['change_10y'] = round(
                            100 * change_10y, 0)
                    else:
                        change_10y = None

                    price_52w_min = min(l_prices_52w)
                    price_52w_max = max(l_prices_52w)

                    above_52w = (price_today - price_52w_min) / price_52w_min
                    below_52w_max = (price_today -
                                     price_52w_max) / price_52w_max

                    TAdata[ticker][period]['above_52w'] = round(
                        100 * above_52w, 0)
                    TAdata[ticker][period]['below_52w_max'] = round(
                        100 * below_52w_max, 0)

                    if price_today < 1.05 * price_52w_min:
                        l_52w_low += [ticker]

                    ## dropped more than 10% relative to market
                    if (price_today -
                            price_52w) / price_52w < sp500_52w_change - 0.1:
                        l_down10percent_morethansp500 += [ticker]

                ## find support and resistance
                ## conflicts if support or resistance while paying out dividend...
                if period == 'daily':
                    l_supports, l_breakouts = self.support_and_resistance(
                        ticker,
                        TAdata,
                        l_supports,
                        l_breakouts,
                    )

                ## find gap support/resistance
                if period == 'daily':
                    l_gaps = self.gaps(
                        ticker,
                        data,
                    )

                ## calculate RSI
                if period == 'daily':
                    TAdata = self.RSI(ticker, period, TAdata)

                ## calculate MFI
                if period == 'daily':
                    TAdata = self.MFI(ticker, period, TAdata)

                ## calculate MACD
                TAdata = self.MACD(
                    ticker,
                    period,
                    TAdata,
                )

                ## evaluate MACD (bullish)
                if period != 'monthly' and not (
                        TAdata[ticker][period]['MACD']['DIV'][-1] >
                        TAdata[ticker][period]['MACD']['DIV'][-2]
                        and TAdata[ticker][period]['MACD']['DIV'][-2] < 0):
                    TAcandidate = False
                elif period == 'monthly' and not TAdata[ticker][period][
                        'MACD']['DIV'][-2] < 0:
                    TAcandidate = False

                ## end of loop over periods

##            ## evaluate MACD (bullish)
##            if (
####                TAdata[ticker]['daily']['MACD']['DIV'][-1] > TAdata[ticker]['daily']['MACD']['DIV'][-2]
####                and
####                TAdata[ticker]['daily']['MACD']['DIV'][-2] < 0
####                and
##                TAdata[ticker]['weekly']['MACD']['DIV'][-1] > TAdata[ticker]['weekly']['MACD']['DIV'][-2]
##                and
##                TAdata[ticker]['weekly']['MACD']['DIV'][-2] < 0
##                and
##                TAdata[ticker]['monthly']['MACD']['DIV'][-2] < 0
##                ):
##                    TAcandidates.append(ticker)
##                    print 'TAcandidate!!!'
##
##            ## evaluate MACD (bearish)
##            if ticker in d_portfolio.keys() and ticker not in l_statementNA:
##                if (
##                    TAdata[ticker]['daily']['MACD']['DIV'][-1] > 0 and
##                    TAdata[ticker]['weekly']['MACD']['DIV'][-1] > 0 and
##                    TAdata[ticker]['monthly']['MACD']['DIV'][-1] > 0
##                    ):
##                    print 'SELL %s !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' %(ticker)

        d_TA = {
            'MACD': TAcandidates,
            'bouncing at support level': l_supports,
            'breaking resistance level': l_breakouts,
            'MA50 increasing': l_MA50_increasing,
            'MA50 bounce': l_MA50_bounce,
        }
        for s_TA in list(d_TA.keys()):
            l_TA = d_TA[s_TA]
            yahoo = 'http://finance.yahoo.com/q/cq?d=v1&s='
            for ticker in l_TA:
                yahoo += '%s+' % (ticker)
            print('\n')
            print(s_TA)
            print((yahoo[:-1]))

        print('\n')
        print(('l_52w_low', l_52w_low))
        print(('l_down10percent_morethansp500', l_down10percent_morethansp500))
        print('\n')

        s_CAPS = ''
        for ticker in l_TA:
            if ticker in list(d_ADR.values()):
                for ADR, v in list(d_ADR.items()):
                    if v == ticker:
                        break
                ticker_US = ADR
            else:
                ticker_US = ticker
            s_CAPS += '%s,' % (ticker_US)
        print((s_CAPS[:-1]))

        fd = open('TAcandidates.txt', 'w')
        fd.write('%s\n%s' % (l_tickers, TAcandidates))
        fd.close()

        ##        matrix = self.covar_matrix(d_pca)
        ##        eigenvalues,eigenvectors = self.diagonalization(matrix)
        ##        print 'matrix', matrix
        ##        print 'eval', eigenvalues
        ##        print 'evec', eigenvectors[0]
        ##        print 'tickers', l_tickers
        ##        for i in range(len(l_tickers)):
        ##            print '%s \t %s' %(l_tickers[i],eigenvectors[0][i],)

        return TAcandidates, TAdata
Beispiel #18
0
    def parse_statement(self,url,):

        d_periods = {
            '12 Weeks':'13 Weeks','14 Weeks':'13 Weeks',#'16 Weeks':'13 Weeks',
            '25 Weeks':'26 Weeks','27 Weeks':'26 Weeks',#'24 Weeks':'26 Weeks','28 Weeks':'26 Weeks','29 Weeks':'26 Weeks',
            '38 Weeks':'39 Weeks','40 Weeks':'39 Weeks',#'35 Weeks':'39 Weeks','36 Weeks':'39 Weeks',
            '51 Weeks':'52 Weeks','53 Weeks':'52 Weeks',#'48 Weeks':'52 Weeks',
##                                        '11 Months':'12 Months',
            }

        dic_out = {
            'period':[],
            'date':[],
            }
        currency = None

        lines = screener.finance().read_url(url, '0')

        statementNA = False
        statement_pending = False

        for i1 in range(len(lines)):

            if '>FINANCIALS SECTOR<' in lines[i1]:
                statementNA = True
                break

            if 'There are no Income Statements available at this time for' in lines[i1]:
                print('There are no Income Statements available at this time')
                statement_pending = True
                break

            if ' public company results.' in lines[i1]:
                print('Multiple or zero results')
                print(lines[i1])
                statementNA = True
                break
            
##                    if '<div id="resultCaption">Your search for <strong>' in lines[i1]:
##                        statementNA = True
##                        break

            ## Empty Table (nothing between end of table headers and end of table)
            if '4-Year<br />Trend</td></tr></table>' in lines[i1]:
                print('Blank Statement')
                statementNA = True
                break

##                    if not '<div style="padding-bottom:12px;">' in lines[i1]:
##                        continue
            if not '<div class="financialsSelectContainer">' in lines[i1]:
                continue

            s = lines[i1]

            index1 = s.index('Currency in<br />')+len('Currency in<br />')
            index2 = index1+s[index1:].index('</td>')
            index = index1+s[index1:index2].index(' ')+1
            d_factors = {'Millions':1000000.,'Thousands':1000.}
            factor = d_factors[s[index1:index-1]]
            index = index+s[index:index2].index(' ')+1
            currency = s[index:index2]

            index2 = index1+s[index1:].index('</tr>')
            index1 += s[index1:index2].index('As of:')
            s_year = s[index1:index2]

##                    if (
##                        'Press<br />Release' in s_year
##                        and
##                        ## often Japanese financial statements are press releases in BusinessWeek for a long long time it seems...
##                        url[-3:] != ':JP'
##                        ):
##                        print 'Press Release'
##                        statement_pending = True
##                        break

            index = 0
            while '<br />' in s_year[index:]:
                index += s_year[index:].index('<br />')+1
                ## company has been around for less than 4 years
                if s_year[index:index+10] == 'br />--</t':
                    print('Less than 4 years of data')
                    statement_pending = True
                    break
                if s_year[index:index+10] not in [
                    'br /><span',
                    'br />Trend', ## 4-Year Trend
                    'br />Relea', ## Press Release
                    'br />--<br',
                    ]:
                    year = int(s_year[index+5:index+9])
                    dic_out['date'] += [year]

            td_label = '<td class="statementLabel cell '
            td_label = '<tr>'
            index = 0
##                    while td_label1 in s[index:] or td_label2 in s[index:]:
            while td_label in s[index:]:
                index += s[index:].index(td_label)+1
                index2 = index+s[index:].index('</td>')
                index1 = index+s[index:index2].rindex('>')+1
                key = s[index1:index2]
                if ' ' in key:
                    if key[:key.index(' ')] in list(d_factors.keys()):
                        index += 1
                        continue
                dic_out[key] = []
                s_row = s[index2:index2+s[index2:].index('</tr>')]
                index_row = 0
##                        p = r'<span class="quoteData">$78.08 <span'
                while '<td' in s_row[index_row:]:
                    index_row += s_row[index_row:].index('<td')+1
                    index_row1 = index_row+len('<td')-1
                    index_row1 += s_row[index_row1:].index('>')+1
                    index_row2 = index_row1+s_row[index_row1:].index('</td>')
                    s_value = s_row[index_row1:index_row2]
                    if s_value[:10] == '<img src="':
                        break
                    s_value = s_value.replace(',','')
                    if s_value == '--':
                        s_value = 0
                    if s_value == '&nbsp;':
                        continue
##                            print(s_row)
                    value = factor*float(s_value)
                    dic_out[key] += [value]

        if len(list(dic_out.keys())) == 2:
            statementNA = True

        if currency == None and statementNA == False:
            statementNA = True
##            stop_ticker_wrong_or_not_existing

        return dic_out, statementNA, statement_pending, currency
Beispiel #19
0
        }
    d = instance_MSN.parse_financial_10_year_summary(url,dic_10year,)
    print(d)
    stop

    d = instance_MSN.key_ratios_10_year_summary('MSFT',{},)
    print(d)
    stop

    d = {'ABT':{}}
    d = instance_MSN.parse_ownership(d,)
    print(d)
    stop_end

    import screener
    instance_finance = screener.finance()
    (
        tickers, months, time,
        d_indexes, d_msn2yahoo, d_msn2currency, d_ADR, d_yahoo2reuters,
        ) = instance_finance.init()
    d = {}
    for ticker in tickers:
        d[ticker] = {}
    d = instance_MSN.parse_ownership(d,)

    d2 = {}
    for ticker in list(d.keys()):
        holders = d[ticker]['holders']
        for holder in holders:
            if not holder in list(d2.keys()):
                d2[holder] = 0
    def parse_CEOcompensation(self, ticker):

        url = 'http://www.reuters.com/finance/stocks/companyOfficers?symbol=%s&viewId=comp' % (
            ticker)

        lines = screener.finance().read_url(url, ticker)

        compensation = 0
        i2 = 0
        l_urls = []
        for i1 in range(len(lines)):
            if i1 < i2:
                continue
            if ('<div class="moduleHeader"><h3>Basic Compensation</h3></div>'
                    in lines[i1] or
                    '<div class="moduleHeader"><h3>Options Compensation</h3></div>'
                    in lines[i1]):

                basic = False
                if '<div class="moduleHeader"><h3>Basic Compensation</h3></div>' in lines[
                        i1]:
                    basic = True
                    i_add = 2

                options = False
                if '<div class="moduleHeader"><h3>Options Compensation</h3></div>' in lines[
                        i1]:
                    options = True
                    i_add = 3

                row = 0
                for i2 in range(i1 + 1, len(lines)):
                    if '<tr' in lines[i2]:
                        row += 1
                        if row >= 2:
                            index2 = str(lines[i2 + i_add]).index('</td>')
                            index1 = str(
                                lines[i2 + i_add])[:index2].index('>') + 1
                            s = str(lines[i2 + i_add])[index1:index2].replace(
                                ',', '')
                            if s != '--':
                                compensation += float(s)
                            ## if basic compensation table
                            if i_add == 2:
                                index1 = lines[i2 + i_add - 1].index(
                                    '<a href="') + len('<a href="')
                                index2 = index1 + lines[i2 + i_add -
                                                        1][index1:].index('"')
                                s = lines[i2 + i_add - 1][index1:index2]
                                url_executive = 'http://www.reuters.com%s' % (
                                    s)
                                l_urls += [url_executive]
                    if '</table>' in lines[i2]:
                        break
                if options == True:
                    break

        ##
        ##
        ##
        for i_url_executive in range(len(l_urls)):
            url_executive = l_urls[i_url_executive]
            bool_break = False
            lines = screener.finance().read_url(url_executive, '0')

            for i in range(len(lines)):

                if 'Fiscal Year Total, ' in lines[i]:

                    ## currency
                    index1 = lines[i].index('Fiscal Year Total, ') + len(
                        'Fiscal Year Total, ')
                    index2 = index1 + lines[i][index1:].index('<')
                    currency = lines[i][index1:index2]
                    break

            if currency != '':
                break


##            if (
##                'Chief Executive Officer' in lines[i]
##                or
##                '>President, Director<' in lines[i]
##                or
##                'Chief Exec Officer' in lines[i]
##                or
##                '>Chairman of the Board, President<' in lines[i] ## e.g. NATI
##                or
##                '>President, Representative Director<' in lines[i] ## e.g. 6902.T
##                or
##                '>Representative Executive President, Director<' in lines[i] ## e.g. 4902.T
##                or
##                '>Chairman of the Board, Representative Director<' in lines[i] ## e.g. 7205.T
##                or
##                '>Group Managing Director, Executive Director<' in lines[i] ## 0013.HK
##                or
##                '>Chairman of the Board, Managing Director<' in lines[i] ## 0012.HK
##                or
##                '>Chairman of the Board, Chairman of a Subsidiary, Representative Director<' in lines[i] ## e.g. 8035.T
##                or
##                '>General Manager<' in lines[i] ## e.g. TKC
##                or
##                '>Managing Director (CEO), Chairman of the Executive Committee, Director<' in lines[i] ## e.g. TOTF.PA
##                or
##                '>Managing Director, Executive Director<' in lines[i] ## 0006.HK
##                or
##                '>Deputy Chairman of the Board, Managing Director<' in lines[i] ## 0001.HK
##                or
##                '>Chairman of the Executive Committee, Director<' in lines[i] ## SOLB.BR
##                ):
##                index1 = lines[i-3].index('<a href="')+len('<a href="')
##                index2 = index1+lines[i-3][index1:].index('"')
##                url = 'http://www.reuters.com'+lines[i-3][index1:index2]
##                break
##
##        if i == len(lines)-1:
##
##            print url
##            if ticker_reuters not in ['NL:MT','YZC','FR:FP','0003.HK',]:
##                stop
##            compensation = 0
##
##        else:
##
##            for i in range(10):
##                try:
##                    urllines = urllib2.urlopen(url)
##                    lines = urllines.readlines()
##                    break
##                except:
##                    continue
##            if i == 9:
##                print url
##                stop
##
##
##            for i in range(len(lines)):
##
##                if 'Fiscal Year Total, ' in lines[i]:
##
##                    ## currency
##                    index1 = lines[i].index('Fiscal Year Total, ')+len('Fiscal Year Total, ')
##                    index2 = index1+lines[i][index1:].index('<')
##                    currency = lines[i][index1:index2]
##                    if currency == '':
##                        rate = 0.
##                    else:
##                        rate = d_currency[currency]
##
##                    ## compensation
##                    index1 = lines[i+6].index('>')+1
##                    index2 = lines[i+6].rindex('<')
##                    s = lines[i+6][index1:index2].replace(',','')
##                    if s == '--':
##                        compensation = 0.
##                    else:
##                        compensation = float(s)/rate
##
##                    break
##
##                if i == len(lines)-1:
##                    compensation = 0.
##                    print url

        return compensation, currency
Beispiel #21
0
    def key_ratios_10_year_summary(self,ticker,dic_10year_summary):

        url = 'http://moneycentral.msn.com/investor/invsub/results/compare.asp?Page=TenYearSummary&symbol=%s' %(ticker)
        url = 'http://investing.money.msn.com/investments/key-ratios?symbol=%s&page=TenYearSummary' %(ticker)

        lines = screener.finance().read_url(url, ticker)

        ##
        ##
        ##
        d_cols = {}
        bool_init = 0
        bool_tr = False
        bool_td = False
        row = 0
        col = 0
        for i1 in range(len(lines)):

            if (
                ' is not available.</p>' in lines[i1]
                or
                '<span>SEARCH RESULTS</span>' in lines[i1]
                ):
                break

            elif (
                ' AVG P/E<' in lines[i1+8]
                or
                ('<table' in lines[i1] and bool_init == 1)
                ):
                bool_init += 1
                continue

            elif '</table' in lines[i1] and bool_init == 2:
                break

            elif bool_init == 0:
                continue

            elif '<tr' in lines[i1]:
                bool_tr = True
                row += 1
                continue

            elif '</tr' in lines[i1]:
                bool_tr = False
                col = 0
                continue

            elif bool_tr == False:
                continue

            elif '<td' in lines[i1] or '<th' in lines[i1]:
                bool_td = True
                col += 1
                continue

            elif '</td' in lines[i1] or '</th' in lines[i1]:
                bool_td = False
                continue

            elif bool_td == False:
                continue

            elif '<span' in lines[i1]:
                continue

            elif lines[i1].strip() == '</span>':
                continue

            if '</span>' in lines[i1]:
                s = lines[i1].strip()[:-7].replace('<br/>',' ')
                d_cols[col] = s
                dic_10year_summary[s] = []
            else:
                if col == 1:
                    continue
                s = lines[i1].strip()
                s = s.replace(',','') ## thousand separator
                if s == 'NA':
                    continue
                if col != 1:
                    s = float(s)
                dic_10year_summary[d_cols[col]] += [s]


##
##        d_cols = {}
##        for i1 in range(len(lines)):
##
##            if not ' AVG P/E<' in lines[i1]:
##                continue
##
##            i2 = 0
##            index1_tr = 0
##            while '<tr>' in lines[i1][i2:][index1_tr:]:
##                print 'c'
##                index1_tr = index1_tr+lines[i1][index1_tr:].index('<tr>')+len('<tr>')
##                index2_tr = index1_tr+lines[i1][index1_tr:].index('</tr>')
##                col = 0
##                index1_tx = 0
##                while '</t' in lines[i1][index1_tr:index2_tr][index1_tx:]:
##                    index2_tx = index1_tx+lines[i1][index1_tr:index2_tr][index1_tx:].index('</t')
##                    index1_tx = index1_tx+lines[i1][index1_tr:index2_tr][index1_tx:index2_tx].index('<t')
##                    s = lines[i1][index1_tr:index2_tr][index1_tx:index2_tx]
##                    while s[0] == '<':
##                        s = s[s.index('>')+1:]
##                    if lines[i1][index1_tr:index2_tr][:4] == '<th>':
##                        d_cols[col] = s
##                        dic_10year_summary[s] = []
##                    elif lines[i1][index1_tr:index2_tr][:4] == '<td>':
##                        if col != 0:
##                            if s != 'NA':
##                                ## characters (currency symbols) in front
##                                while len(s) > 0 and s[0] not in '-123456789':
##                                    s = s[1:]
####                                if s[1] not in '0.-123456789':
####                                    s = s[0]+s[2:]
##                                ## thousand seperator
##                                s = s.replace(',','')
##                                ## characters behind
##                                while len(s) > 0 and s[-1] not in '0123456789':
##                                    s = s[:-1]
##                                if len(s) > 0:
##                                    s = float(s)
##                            dic_10year_summary[d_cols[col]] += [s]
##                    else:
##                        print lines[i1][index1_tr:index2_tr][:4]
##                        stop
##                    index1_tx = index2_tx+1
##                    col += 1
##                index1_tr += 1
##
##            break

        return dic_10year_summary
    def parse_overview(self, ticker, url):

        d_factors = {
            'Mil.': 1000000,
        }

        name = ''
        mc = ''
        currencyCode = 'USD'
        price = ''
        sector = ''
        industry = ''
        statementNA = False
        beta = ''

        print(url)
        lines = screener.finance().read_url(url, ticker)

        for i1 in range(len(lines)):

            if '<div id="sectionTitle">' in lines[i1]:
                index1 = lines[i1 + 1].index('<h1>') + 4
                index2 = lines[i1 + 1].index('</h1>')
                s = lines[i1 + 1][index1:index2].strip()
                name = s

            ## price, currency
            if '<div class="sectionQuoteDetail">' in lines[i1]:
                for i2 in range(i1, len(lines)):
                    if '<span style="font-size: 23px;">' in lines[i2]:
                        index1 = 0
                        index2 = lines[i2 + 1].index('</span>')
                        s = lines[i2 + 1][index1:index2].replace(',', '')
                        ##                                print lines[i2+1]
                        if s in ('--', '\t\t\t\t--'):
                            statementNA = True
                        else:
                            price = float(s)
                            index1 = index2 + lines[i2 + 1][index2:].index(
                                '<span>') + 6
                            index2 = index1 + lines[i2 + 1][index1:].index(
                                '</span>')
                            s = lines[i2 + 1][index1:index2]
                            currencyCode = s.upper()  ## upper if GBp

                        break

            ## sector, industry
            if '<div id="sectionHeaderTopics"><div id="headerTopics">' in lines[
                    i1]:
                index1 = lines[i1 + 5].index('/sectors') + 8 + 1
                index2 = index1 + lines[i1 + 5][index1:].index('"')
                sector = lines[i1 + 5][index1:index2]
                index1 = lines[i1 + 5].index('/sectors/industries/') + len(
                    '/sectors/industries/')
                index1 += lines[i1 + 5][index1:].index('>') + 1
                index2 = index1 + lines[i1 + 5][index1:].index('<')
                industry = lines[i1 + 5][index1:index2]

            ## beta
            if '<td>Beta:</td>' in lines[i1]:
                index1 = lines[i1 + 1].index('<strong>') + 8
                index2 = lines[i1 + 1].index('</strong>')
                s = lines[i1 + 1][index1:index2]
                beta = s

            ## market cap
            if '<td>Market Cap' in lines[i1]:
                factor = 'Mil.'
                if not 'Mil.' in lines[i1]:
                    print(lines[i1])
                    stop
                index1 = lines[i1 + 1].index('<strong>') + 8
                index2 = lines[i1 + 1].index('</strong>')
                s = lines[i1 + 1][index1:index2]

                s = s.replace('&#8361;', '')  ## KRW
                s = s.replace('&#8364;', '')  ## EUR
                s = s.replace('&#72;&#75;&#36;', '')  ## HKD
                s = s.replace('Â¥', '')  ## CNY
                s = s.replace('Â', '')  ## CNY
                s = s.replace('¥', '')  ## CNY
                s = s.replace('&#165;', '')  ## JPY
                s = s.replace('&#67;&#72;&#70;', '')  ## CHF
                s = s.replace('&#163;', '')  ## GBP
                s = s.replace('Rs', '')  ## INR
                s = s.replace('&#107;&#114;.', '')  ## DKK
                s = s.replace('&#107;&#114;', '')  ## NOK
                s = s.replace('TL', '')  ## Turkish Lira
                s = s.replace('&#82;', '')  ## Brazil
                s = s.replace('&#78;&#84;&#36;', '')  ## TWD
                ##                        s = s.replace('&#78;&#84;&#36;','') ## SGD
                s = s.replace('&#1088;&#1091;&#1073;', '')  ## Russia
                s = s.replace('&#76;&#116;', '')  ## Lithuania
                s = s.replace('&#36;', '')  ## USD (dollar symbol)
                s = s.replace('&#77;', '')  ## MYR (Malaysian ringgit - MR)
                s = s.replace('&#3647;', '')  ## THB
                s = s.replace('&#3647;', '')  ## IDR Indonesian Rupiah
                s = s.replace('&#8360;', '')  ## PKR Pakistani ...
                s = s.replace('&#8362;', '')

                s = s.replace(',', '')
                if s == '--':
                    print('mc', s)
                else:
                    mc = float(s) * d_factors[factor]

##                    ## sector
##                    if '<a href="/finance/industries/allIndustries">' in lines[i1]:
##                        index1 = lines[i1].index('<a href="/finance/industries/allIndustries">')+len('<a href="/finance/industries/allIndustries">')
##                        index2 = index1+lines[i1][index1:].index('</a>')
##                        sector = lines[i1][index1:index2].strip()
##
##                    ## industry
##                    if '<strong>industry:</strong>' in lines[i1]:
##                        index2 = lines[i1].rindex('<')
##                        index1 = lines[i1][:index2].rindex('>')+1
##                        industry = lines[i1][index1:index2]
##                        if industry == 'N/A':
##                            print('industry', industry)
##                            stop_temp

##                    if '<label>Mkt Cap.</label>' in lines[i1]:
##                        index1 = lines[i1+1].index('<span>')+6
##                        index2 = lines[i1+1].index('</span>')
##                        s = lines[i1+1][index1:index2]
##                        while ';' in s:
##                            index1 = s.index('&')
##                            index2 = s.index(';')
##                            if s[index2+1] == '.':
##                                s = s[:index1]+s[index2+2:]
##                            else:
##                                s = s[:index1]+s[index2+1:]
##                        s = s.replace(',','').replace('¥','').replace('Â','')
##                        d_factors = {'M':1000000,}
##                        if s == '--M':
##                            statementNA = True
##                        elif s[-2:] == 'pM':
##                            mc = float(s[:-2])*d_factors[s[-1]]
##                        elif s[:2] == 'Rs':
##                            mc = float(s[2:-1])*d_factors[s[-1]]
##                        else:
##                            mc = float(s[:-1])*d_factors[s[-1]]

##                if mc == '' and statementNA == False:
##                    retry
##                else:
##                    break ## break loop of trys

        if name == '':
            statementNA = True

##        if price != '' and mc != '':
##            print(price, mc)
##            stop_loop

        if mc == '' or price == '' or mc == '--':
            statementNA = True

        return (
            name,
            currencyCode,
            price,
            sector,
            industry,
            statementNA,
            mc,
            beta,
        )
Beispiel #23
0
    def parse_statement_quarterly(self,url,dic,statement, d_currency):

        statementNA = False

        lines = screener.finance().read_url(url, '0')

        ## reset dictionary
        for key in list(dic.keys()):
            dic[key] = None

        for line in lines:
            if 'The financial statement for this symbol, is currently not available.' in line:
##                stop1
                statementNA = True
                s_business = 'Industry'
                return dic, statementNA, s_business
            if 'Statement information for this ticker symbol is not available at this time.' in line:
##                stop2
                statementNA = True
                s_business = 'Industry'
                return dic, statementNA, s_business

        for i in range(len(lines)):

            line = lines[i]

            if '<span id="lblErrorMessage"><br><br>Statement information for this ticker symbol is not available at this time.<br><br><br></span>' in line:
                statementNA = True
                break

            ## business type
            if '                        <p><b>Business Type:</b> <span id="lblBusinessType">' in line:
                i2 = line.index('</span></p>')
                i1 = line[:i2].rindex('>')+1
                s_business = line[i1:i2]
                if s_business in ['Bank','Insurance',]:
                    return dic, statementNA, s_business

            ## currency
            if 'Financial data in' in line:
                index1 = line.index('>')+1
                index2 = index1+line[index1:].index('<')
                s = line[index1:index2]
                rate = d_currency[s]
                dic['rate'] = rate

            ## multiple
            if 'Values in ' in line and ' (Except for per share items)' in line:
                index1 = line.index('Values in')+len('Values in')
                index2 = line.index('(Except for per share items)')
                s = line[index1:index2]
                d_factors = {'Millions':1000000.,'Thousands':1000.}
                factor = d_factors[s.strip()]
                dic['factor'] = factor

            ## filing dates
            if 'class="ftable"' in line:
                periods = []
                index = line.index('<tr class="r1">')
                index += line[index:].index('</td>')+1
                for quarter in range(5):
                    index2 = index+line[index:].index('</td>')
                    index1 = line[:index2].rindex('>')+1
                    index = index2+1
                    s = line[index1:index2]
                    if s == '': ## e.g. GB:SSE
                        periods += [None]
                    else:
                        periods += [[int(s[:4]),int(s[-1:])]]

            ## period lengths
            if statement == 'cashflow' and '>Period Length<' in line:
                period_lengths = []
                index = line.index('>Period Length<')
                index += line[index:].index('</td>')+1
                for quarter in range(5):
                    index2 = index+line[index:].index('</td>')
                    index1 = line[:index2].rindex('>')+1
                    index = index2+1
                    s = line[index1:index2]
                    if s != s.strip():
                        print(s)
                        notexpected
                    period_lengths += [s]

                cf_columns_y,cf_columns_qoq = self.columns_of_interim_statement(periods,period_lengths)
                dic['cf_columns_y'] = cf_columns_y
                dic['cf_columns_qoq'] = cf_columns_qoq

            ## statement sources
            if '>Stmt Source<' in line:
                column1,stmt_source = self.parse_stmt_source(line)
                dic['column1'] = column1
                dic['periods'] = periods
                dic['source'] = stmt_source

                if stmt_source == 'PRESS':
                    stop
                    statementNA = True
                    break ## break line loop

            for key in dic:
                if '>%s<' %key in line:
                    ## key already in another row?
                    if dic[key] not in ['N/A',None]:
                        print(key, dic[key])
                        print(stmt_source)
                        stop
                    if key in ['Total Common Shares Outstanding','Total Preferred Shares Outstanding']:
                        dic[key] = self.parse_statement_multiple('>%s<' %(key), line, 1., factor)
                    else:
                        dic[key] = self.parse_statement_multiple('>%s<' %(key), line, rate, factor)
                    if dic[key][column1] in ['N/A',None]:
                        print(key)
                        stop
                        break

        return dic, statementNA, s_business
Beispiel #24
0
    def parse_statements(self, url):

        print(url)

        d_factors = {'Millions':1000000.,'Bil':1000000000.}

        lines = screener.finance().read_url(url, '0')

        ## ticker wrong or simply no data on URL
        if not lines:
            return None, None, None, True, None, None

        for line in lines:
            if 'All amounts in' in line:
                break
        match = re.search(r'All amounts in (.*?) of', line)
        ## No financial data available.
        if not match:
            return None, None, None, True, None, None
        ## Millions or Billions or Thousands?
        factor = d_factors[match.group(1)]
        matches = re.findall(
            r'''<font face='arial' size='2'>(.*?)</font>''', line)
        currency_string = matches[1].strip()

        regex1 = r'<TR.*?(<td.*?</td>)</tr>'
        pattern1 = re.compile(regex1, re.DOTALL)
        regex2 = r'<td.*?>(.*?)</td>'
        pattern2 = re.compile(regex2, re.DOTALL)
        bool_init = False
        d_income = {}
        d_balance = {}
        d_cash = {}
        d = d_indicators = {}
        for line in lines:
            if 'INDICATORS' in line:
                bool_init = True
            if bool_init == False:
                continue
            if 'INCOME STATEMENT' in line:
                d = d_income
            if 'CASH-FLOW STATEMENT' in line or 'CASH FLOW STATEMENT' in line:
                d = d_cash
            if 'BALANCE SHEET' in line:
                d = d_balance
##            if '</table>' in line:
##                break
            if 'RATIOS CALCULATIONS' in line:
                break
            match1 = re.match(pattern1, line)
            if not match1:
                continue
            match2 = re.findall(pattern2, match1.group(0))
            try:
                d[match2[0]] = [factor*float(_.replace(',','')) for _ in match2[1:]]
            except ValueError:
                d[match2[0]] = match2[1:]

        statement_error = False
        print(d_income.keys())
##        ## less than 5 years of data
##        if len(d_income['total net income']) < 5:
##            statement_error = True
        ## less than 5 years of data
        if d_income['total net income'][-1] == '':
            statement_error = True

        for d in (d_income, d_balance, d_cash):
            for k in d.keys():
                d[k] = list(reversed(d[k]))  # reverse from old to new to new to old

        return d_income, d_balance, d_cash, statement_error, currency_string, d_indicators
Beispiel #25
0
    def parse_ownership(self,data):

        print('parsing ownership')

        tickers = list(data.keys())
        tickers.sort()

        for i in range(len(tickers)):

            ticker = tickers[i]
            ticker_msn = ticker_conversion.yahoo2msn(ticker)
            if i % 10 == 0:
                print('\n%s/%s %s' %(i+1, len(tickers), ticker))

            url = 'http://moneycentral.msn.com/ownership?symbol=%s&Holding=5%%25+Ownership' %(ticker_msn)
            url = 'http://investing.money.msn.com/investments/five-percent-ownership?symbol={}'.format(ticker_msn)

            lines = screener.finance().read_url(url, ticker)

            if lines == [''] or lines == []:
                print('no data', url)
                data[ticker]['holders'] = ''
                continue

            data[ticker]['holders'] = []
            for i in range(len(lines)):
                line = lines[i]
                if 'Ownership' and 'Holder Name' in line:
                    if 'No data available' in line:
                        break
                    index = line.index('Holder Name')
                    while '<tr>' in line[index:]:
                        index += line[index:].index('<tr>')
                        for i_td in range(4):
                            index2 = index+line[index:].index('</td>')
                            index1 = line[:index2].rindex('>')+1
                            index = index2+1
                            s = line[index1:index2]
                            if i_td == 0:
                                holder = s
                            elif i_td == 3:
                                percentage = float(s)
                        s = holder
                        if holder in [
                            ## public
                            'GAMCO Investors, Inc.', ## 1977, GBL, Bill Gates
                            'Royce & Associates, LLC', ## 1972, 1899 LM, Legg Mason acquisition 2001
                            'Franklin Advisory Services, LLC', ## 1947, BEN, Franklin Templeton Investments, franklintempleton.com
                            'State Street Global Advisors (US)', ## 1978, 1792, STT
                            ## private
                            'Neuberger Berman, LLC', ## 1939, private / Lehman Brothers
                            'Dimensional Fund Advisors, LP', ## 1981, private (Scholes, Merton)
                            'Capital World Investors', ## 1931
                            'Capital Research Global Investors', ## 1931
                            'Renaissance Technologies Corp.', ## 1982, private
                            ## policy holder owned (mutual)
                            'State Farm Insurance Companies', ## 1922
                            ## LLP
                            'Wellington Management Company, LLP', ## 1928
                            ## public or private? probably private...
                            'Lord, Abbett & Co. LLC', ## 1929
                            'Keeley Asset Management Corp.', ## 1982, keeleyasset.com
                            'Wells Capital Management Inc.', ## Wells Fargo???
                            'First Eagle Global Fund', ## firsteaglefunds.com
                            'First Eagle Investment Management LLC', ## firsteaglefunds.com
    ##                            'Keeley Small Cap Value Fund, Inc.', ## 1982, keeleyasset.com
    ##                            'Heartland Value Fund', ## heartlandfunds.com
    ##                            'Baron Capital Management, Inc.', ## 1982, Ronald S Baron
    ##                            'Artisan Partners Limited Partnership', ## artisanfunds.com

    ##                            'Morgan Stanley Investment Management Inc. (US)',
    ##                            'Goldman Sachs Asset Management (US)',
    ##                            'J.P. Morgan Investment Management Inc. (New York)',
    ##                            'Dodge & Cox', ## dodgeandcox.com 1930
    ##                            'Wells Fargo Advantage Small Cap Value Fund',
    ##                            'Davis Selected Advisers, L.P.',
                            
    ##                            'Ruane, Cunniff & Goldfarb, Inc.', ## Sequoia Fund
    ##                        'US Trust', ## ustrust.com 1853
    ##                        'Perry Capital', ## perrycap.com
                            ]:
                            continue
                        elif (
    ##                            (
    ##                                'Berkshire' not in s
    ##                                and
    ##                                'Walton' not in s
    ##                                )
    ##                            and
                            (
                                s[:len('Vanguard ')] == 'Vanguard ' ## client owned
                                or
                                s[:len('Fidelity ')] == 'Fidelity ' ## 1946, private
                                or
                                s[:len('American Funds ')] == 'American Funds '
                                or
                                s[:len('Columbia ')] == 'Columbia ' ## Columbia Management Group, Ameriprise Financial (AMP) subsidiary
                                or
                                s[:len('BlackRock ')] == 'BlackRock ' ## BLK
                                or
                                s[:len('T. Rowe Price ')] == 'T. Rowe Price ' ## 1937, TROW
                                or
                                s[:len('Ruane, Cunniff & Goldfarb, Inc. ')] == 'Ruane, Cunniff & Goldfarb, Inc. ' ## 1969, owns the Sequoia Fund [SEQUX]
                                
    ##                            'JPMorgan Chase' in s or
    ##                            'Lord Abbett' in s or
    ##    ##                        'Financial' in s or
    ##    ##                        'International' in s or
    ##    ##                        'Mgmt' in s or
    ##    ##                        'Plc' in s or 
    ##    ##                        'Associates' in s or ## 'T Rowe Price Associates', ## troweprice.com 1937

    ##                            'Management' in s or
    ##                            'Partners' in s or
    ##                            'Advisors' in s or
    ##                            'Investors' in s or
    ##                            'Investment' in s or

    ##    ####                        'REPUBLIC' in s.upper() or
    ##    ####                        'KINGDOM' in s.upper() or
    ##    ##                        'Capital' in s or
    ##    ##                        'Group' in s or

    ##                            s[-10:] == ' Companies' or
    ##                            s[-8:] == ' Company' or
    ##                            s[-11:] == ' Management' or
    ##                            s[-8:] == ' Managem' or
    ##                            s[-5:] == ' Bank' or
    ##                            s[-6:] == ' Trust' or ## http://en.wikipedia.org/wiki/Trust_company
    ##                            s[-9:] == ' Partners' or
    ##                            s[-8:] == ' Savings' or ## Applied Industrial Technologies Retirement Savings
    ##                            s[-5:] == ' ESOP' or ## employee stock ownership plan
    ##                            s[-7:] == ' (ESOP)' or ## employee stock ownership plan
    ##                            ## corporations
    ##                            s[-12:] == ' Corporation' or
    ##                            s[-6:] == ' Corp.' or
    ##                            s[-4:] == ' Co.' or
    ##                            s[-3:] == ' Co' or
    ##                                or
    ##                            ## limited
    ##                            s[-4:] == ' Ltd' or ## Limited (commonwealth)
    ##                            s[-5:] == ' Ltd.' or ## Limited (commonwealth)
    ##                                s[-4:] == ' LLC'
    ##                                or
    ##                                s[-7:] == ' L.L.C.'
    ##                                or
    ##                                s[-4:] == ' LLP'
    ##                                or
    ##                                s[-3:] == ' LP'
    ##                                or
    ##                                s[-5:] == ' L.P.'
    ##                                or
    ##                                s[-len(' Limited Partnership'):] == ' Limited Partnership'
    ##                                )
                                )
                            ):
                            ## check that a private person is not being excluded
                            if (
                                ## a name?
                                ('(' in s or ')' in s) and
                                ## not a name!
                                '(Grove Creek)' not in s and
                                '(US)' not in s and
                                '(UK)' not in s and
                                '(Americas)' not in s and
                                '401(k)' not in s and
                                '(New York)' not in s and
                                '(Switzerland)' not in s and
                                '(Singapore)' not in s and
                                '(International)' not in s
                                and
                                '(ESOP)' not in s
                                ):
                                print(s)
                                stop
                            continue
                        else:
                            data[ticker]['holders'] += ['%s (%.1f)' %(holder,percentage,)]
                            if (
                                '(' not in s and ')' not in s
                                and 'Berkshire' not in s
                                and 'Walton' not in s
                                ):
                                fd = open('investors_notexpected.txt','a')
                                fd.write('%s\t%s\t%s\n' %(holder.split()[-1], ticker, holder))
                                fd.close()

                    ## break loop over lines
                    break
                            
            data[ticker]['holders'] = ', '.join(data[ticker]['holders'])

        return data
Beispiel #26
0
    def parse_stmt(self, url, ticker):

        print(url)

        d_factors = {'millions': 1000000., 'Bil': 1000000000.}

        d = {}

        statement_error = False

        lines = screener.finance().read_url(url, ticker)

        ## ticker wrong or simply no data on URL
        if not lines:
            return None, None, None, True, None, None


#<div class="currencyDisclaimer contains"><span class="fleft">In millions of EUR<
        for i, line in enumerate(lines):
            #            if 'currencyDisclaimer' in line:
            #                break
            if 'mod-main-content' in line:
                ##            if '<table class="mod-ui-table">' in line:
                break
        line = lines[i + 1]
        match = re.findall(r'>In (.*?) of (.*?)<', line)
        ## No financial data available.
        if not match:
            return None, None, None, True, None, None
        ## Millions or Billions or Thousands?
        factor = d_factors[match[0][0]]
        ##        matches = re.findall(
        ##            r'''<font face='arial' size='2'>(.*?)</font>''', line)
        currency_string = match[0][1]

        ##        regex = r'<table data-ajax-content="true">(.*?)</table>'
        regex = r'<table class="mod-ui-table">(.*?)</table>'
        pattern = re.compile(regex)
        match = re.search(pattern, line)
        s = match.group(1)
        regex = r'<tr class="(odd|even|Bold even|Bold odd)">(.*?)</tr>'
        regex = r'<tr(.*?)>(.*?)</tr>'
        pattern = re.compile(regex)
        match = re.findall(pattern, s)
        for m in match:
            l = re.findall(r'<t[dh].*?>(.*?)</t[dh]>', m[1])
            if not l:
                continue
            print(ticker, l)
            if 'Fiscal data as of' in l[0]:
                l[0] = 'date'
            d[l[0]] = l[1:]
            for i, x in enumerate(d[l[0]]):
                if x.startswith('(') and x.endswith(')'):
                    d[l[0]][i] = factor * -float(x[1:-1].replace(',', ''))
                elif x == '--':
                    d[l[0]][i] = 0
                else:
                    try:
                        d[l[0]][i] = factor * float(x.replace(',', ''))
                    except:
                        d[l[0]][i] = x

        return d, statement_error, currency_string
Beispiel #27
0
    def parse_stmt(self, url, ticker):

        print(url)

        d_factors = {'millions':1000000.,'Bil':1000000000.}

        d = {}

        statement_error = False

        lines = screener.finance().read_url(url, ticker)

        ## ticker wrong or simply no data on URL
        if not lines:
            return None, None, None, True, None, None

#<div class="currencyDisclaimer contains"><span class="fleft">In millions of EUR<
        for i, line in enumerate(lines):
#            if 'currencyDisclaimer' in line:
#                break
            if 'mod-main-content' in line:
##            if '<table class="mod-ui-table">' in line:
                break
        line = lines[i+1]
        match = re.findall(r'>In (.*?) of (.*?)<', line)
        ## No financial data available.
        if not match:
            return None, None, None, True, None, None
        ## Millions or Billions or Thousands?
        factor = d_factors[match[0][0]]
##        matches = re.findall(
##            r'''<font face='arial' size='2'>(.*?)</font>''', line)
        currency_string = match[0][1]

##        regex = r'<table data-ajax-content="true">(.*?)</table>'
        regex = r'<table class="mod-ui-table">(.*?)</table>'
        pattern = re.compile(regex)
        match = re.search(pattern, line)
        s = match.group(1)
        regex = r'<tr class="(odd|even|Bold even|Bold odd)">(.*?)</tr>'
        regex = r'<tr(.*?)>(.*?)</tr>'
        pattern = re.compile(regex)
        match = re.findall(pattern, s)
        for m in match:
            l = re.findall(r'<t[dh].*?>(.*?)</t[dh]>', m[1])
            if not l:
                continue
            print(ticker, l)
            if 'Fiscal data as of' in l[0]:
                l[0] = 'date'
            d[l[0]] = l[1:]
            for i, x in enumerate(d[l[0]]):
                if x.startswith('(') and x.endswith(')'):
                    d[l[0]][i] = factor*-float(x[1:-1].replace(',',''))
                elif x == '--':
                    d[l[0]][i] = 0
                else:
                    try:
                        d[l[0]][i] = factor*float(x.replace(',',''))
                    except:
                        d[l[0]][i] = x

        return d, statement_error, currency_string
Beispiel #28
0
    def parse_statements(self, url):

        print(url)

        d_factors = {'Millions': 1000000., 'Bil': 1000000000.}

        lines = screener.finance().read_url(url, '0')

        ## ticker wrong or simply no data on URL
        if not lines:
            return None, None, None, True, None, None

        for line in lines:
            if 'All amounts in' in line:
                break
        match = re.search(r'All amounts in (.*?) of', line)
        ## No financial data available.
        if not match:
            return None, None, None, True, None, None
        ## Millions or Billions or Thousands?
        factor = d_factors[match.group(1)]
        matches = re.findall(r'''<font face='arial' size='2'>(.*?)</font>''',
                             line)
        currency_string = matches[1].strip()

        regex1 = r'<TR.*?(<td.*?</td>)</tr>'
        pattern1 = re.compile(regex1, re.DOTALL)
        regex2 = r'<td.*?>(.*?)</td>'
        pattern2 = re.compile(regex2, re.DOTALL)
        bool_init = False
        d_income = {}
        d_balance = {}
        d_cash = {}
        d = d_indicators = {}
        for line in lines:
            if 'INDICATORS' in line:
                bool_init = True
            if bool_init == False:
                continue
            if 'INCOME STATEMENT' in line:
                d = d_income
            if 'CASH-FLOW STATEMENT' in line or 'CASH FLOW STATEMENT' in line:
                d = d_cash
            if 'BALANCE SHEET' in line:
                d = d_balance
##            if '</table>' in line:
##                break
            if 'RATIOS CALCULATIONS' in line:
                break
            match1 = re.match(pattern1, line)
            if not match1:
                continue
            match2 = re.findall(pattern2, match1.group(0))
            try:
                d[match2[0]] = [
                    factor * float(_.replace(',', '')) for _ in match2[1:]
                ]
            except ValueError:
                d[match2[0]] = match2[1:]

        statement_error = False
        print(d_income.keys())
        ##        ## less than 5 years of data
        ##        if len(d_income['total net income']) < 5:
        ##            statement_error = True
        ## less than 5 years of data
        if d_income['total net income'][-1] == '':
            statement_error = True

        for d in (d_income, d_balance, d_cash):
            for k in d.keys():
                d[k] = list(reversed(
                    d[k]))  # reverse from old to new to new to old

        return d_income, d_balance, d_cash, statement_error, currency_string, d_indicators