Beispiel #1
0
def getItrInfos(edp):
    logging.info('Getting ITR Infos for %s', edp)
   
    # Getting newer version of documents
    itrs = dict([(i['date'], i) for i in sorted(getITRs(edp), cmp=mycmp) if i['version'] > 0.0])
    for dt, itr in itrs.iteritems():
        if dt.year < 2015:
            logging.info('Ignoring ITR of %s', dt.year)
            continue

        tm = '{:%Y%m}'.format(dt)
        logging.info('Getting ITR of %s for %s', edp, tm)

        # Get NSD and NSR
        url_fdf = RE_LOC.search(openUrl(itr['url'], True)).group(1)
        params = urlparse.parse_qs(urlparse.urlparse(url_fdf).query)
        nsd, nsr = params['NumeroSequencialDocumento'][0], params['NumeroSequencialRegistroCvm'][0]

        infos = dict()
        infos['id'] = edp
        infos['period'] = tm
        infos['ptype'] = 'ITR'
        # Get Infos From DFPs Consolidadas - Balanco Patrimonial Ativo
        getDfpConBPA(nsd, nsr, infos)
        # Get Infos From DFPs Consolidadas - Balanco Patrimonial Passivo
        getDfpConBPP(nsd, nsr, infos)
        # Get Infos From DFPs Consolidadas - Demonstracao do Resultado
        getDftDemRes(nsd, nsr, infos, PARMS_GERAL_ITR)
        # Get Infos From Dados da Empresa - Composicao do Capital
        getDaeCc(nsd, nsr, infos)
        
        yield infos
Beispiel #2
0
def getDetails(enterprise):
    logging.info('Getting Details of Company %s', enterprise[0])

    edp,name = enterprise[0],enterprise[1]
    d = dict()
    d['id'] = edp
    d['name'] = name
    d['register_date'] = datetime.now().isoformat()
    page = openUrl(buildUrl(URL_EDETAIL, 'CodCVM=' + edp))

    # Company Data
    att = getTables(page, 'panel1a')
    for table in att[:3]:
        for row in getRows(table): rowToDict(row,d)

    #print tostring(page)
    att = HtmlElement(page).get_element_by_id('panel1a')
    d['trading_codes'] = [ l.text for l in HtmlElement(att).find_class('LinkCodNeg') ]
    att = d['industry_classification'].split('/')
    d['sector'] = att[0].strip()
    d['subsector'] = att[1].strip()
    d['segment'] = att[2].strip()
    d['detailwebsite'] = buildUrl(URL_RESUMO, 'codigoCvm=' + edp, 'idioma=pt-BR')
    if 'website' in d: d['website'] = urlparse.urlsplit(d['website'], 'http').geturl()

    return d
Beispiel #3
0
def getDfpInfos(edp):
    logging.info('Getting DFP Infos for %s', edp)
    # Getting newer version of documents
    dfps = dict([(i['date'], i) for i in sorted(getDFPs(edp), cmp=mycmp) if i['version'] > 0.0])
    for year, dfp in dfps.iteritems():
        logging.info('Getting DFP of %s for %s', edp, year.year)

        # Get NSD and NSR
        url_fdf = RE_LOC.search(openUrl(dfp['url'], True)).group(1)
        params = urlparse.parse_qs(urlparse.urlparse(url_fdf).query)
        nsd, nsr = params['NumeroSequencialDocumento'][0], params['NumeroSequencialRegistroCvm'][0]

        infos = dict()
        infos['id'] = edp
        infos['period'] = str(year.year)
        infos['ptype'] = 'DFP'
        # Get Infos From DFPs Consolidadas - Balanco Patrimonial Ativo
        getDfpConBPA(nsd, nsr, infos)
        # Get Infos From DFPs Consolidadas - Balanco Patrimonial Passivo
        getDfpConBPP(nsd, nsr, infos)
        # Get Infos From DFPs Consolidadas - Demonstracao do Resultado
        getDftDemRes(nsd, nsr, infos)
        # Get Infos From Dados da Empresa - Composicao do Capital
        getDaeCc(nsd, nsr, infos)
        
        yield infos
Beispiel #4
0
    def updateEntries(cls, feed):
        xml = utils.openUrl(feed.url)
        if not xml:
            return

        dom = utils.parseXmlString(xml)
        parser = parsers.FeedParserFactory.create(dom)

        pagingKey = 0
        for entryDict in parser.entries():
            key = entryDict['key']

            entry = EntryModel.get_by_key_name(key, parent=feed)
            if not entry:
                entry = EntryModel(parent=feed, key_name=key)
                entry.feed = feed

                feed.total += 1

            if entry.fromDict(entryDict):
                entry.read = False
                entry.setPagingKey(pagingKey)
                entry.put()

                pagingKey += 1

                feed.unread += 1

        if pagingKey > 0:
            feed.put()
Beispiel #5
0
def downloadBdiFile(dt):
    fileurl = '{0}bdi{1}.zip'.format(URL_BDI, dt.strftime('%m%d')) 
    logging.info('Downloading file %s', fileurl)

    # save file
    fp = StringIO(openUrl(fileurl, True))    
    z = zipfile.ZipFile(fp)
    for name in z.namelist():
        yield z.open(name)
def msnMoneyTenYearSummary(symbol, local=False):

    import bs4
    import utils

    if not len(symbol):
        return None
    url = "http://investing.money.msn.com/investments/financial-statements?symbol=" + symbol
    url, page = utils.openUrl(url, local)
    print url
    soup = bs4.BeautifulSoup(page, "lxml")

    # Extract tables
    tables = soup.find_all("table", {"class": "mnytbl"})

    # Parse income statement table
    income = {}
    for row in tables[0].find_all("tr"):
        cols = row.find_all("td")
        if len(cols) == 0:
            continue
        tuple = ()
        for icol, col in enumerate(cols):
            entry = col.find_all(text=True)[1].strip()
            if icol == 0:
                try:
                    secs = utils.makeEpochTime(str(entry), "%m/%y")
                    tuple = tuple + (secs,)
                except ValueError:
                    tuple = tuple + (str(entry),)
            else:
                tuple = utils.extractData(entry, tuple)

        income[tuple[0]] = tuple[1:]

    # Parse balance sheet table
    balance = {}
    for row in tables[1].find_all("tr"):
        cols = row.find_all("td")
        if len(cols) == 0:
            continue
        tuple = ()
        for icol, col in enumerate(cols):
            entry = col.find_all(text=True)[1].strip()
            if icol == 0:
                try:
                    secs = utils.makeEpochTime(str(entry), "%m/%y")
                    tuple = tuple + (secs,)
                except ValueError:
                    tuple = tuple + (str(entry),)
            else:
                tuple = utils.extractData(entry, tuple)
        balance[tuple[0]] = tuple[1:]

    return income, balance
Beispiel #7
0
def getDaeCc(nsd, nsr, infos):
    logging.info('Getting Dados da Empresa - Composicao do Capital - %s %s', nsd, nsr)
    page = HtmlElement(openUrl(buildUrl(URL_DCC, PARMS_GERAL, PARMS_BPP, PARMS_BPP,
        'NumeroSequencialDocumento=' + nsd, 'NumeroSequencialRegistroCvm=' + nsr)))
  
    # Multiplicador 
    em = page.xpath('.//div[@id="UltimaTabela"]/table/tr/td/b/text()')
    m = 1000 if len(em) > 0 and em[0].find('(Mil)') != -1 else 1

    for i in ['QtdAordCapiItgz', 'QtdAprfCapiItgz', 'QtdTotAcaoCapiItgz', 'QtdAordTeso', 'QtdAprfTeso', 'QtdTotAcaoTeso']:    
        qnt = page.get_element_by_id('ctl00_cphPopUp_{0}_1'.format(i))
        infos[i] = 0 if qnt is None else toInt(qnt.text) * m
Beispiel #8
0
def getDftDemRes(url, infos):
    logging.info('Getting DFs Consolidadas - Demonstracao do Resultado')
    t = dict()
    page = openUrl(url) 
    table = HtmlElement(page).get_element_by_id(TABLE_BPP)
    for row in getRows(table): rowToDict(row, t)

    # Multiplicador
    m = getMultiplicador(page)

    # Receita Liquida
    infos['RL'] = toInt(t['3.01'][1])*m if '3.01' in t else 0
    # Lucro Liquito
    infos['LL'] = next((toInt(v[1])*m for k,v in t.iteritems() if re.match('^Lucro.+odo$', v[0])), 0)
Beispiel #9
0
def getDfpConBPA(url, infos):
    logging.info('Getting DFs Consolidadas - Balanco Patrimonial Ativo')
    t = dict()
    page = HtmlElement(openUrl(url)) 
    table = page.get_element_by_id(TABLE_BPP)
    for row in getRows(table): rowToDict(row, t)

    # Multiplicador
    m = getMultiplicador(page)

    # Caixa
    cxa = toInt(t['1.01.01'][1])*m if '1.01.01' in t else 0
    apf = toInt(t['1.01.02'][1])*m if '1.01.02' in t else 0
    infos['CAIXA'] = cxa + apf
Beispiel #10
0
def getDfpConBPA(nsd, nsr, infos):
    logging.info('Getting DFs Consolidadas - Balanco Patrimonial Ativo - %s %s', nsd, nsr)
    t = dict()
    page = HtmlElement(openUrl(buildUrl(URL_FDF, PARMS_GERAL, PARMS_BPP, 
        'NumeroSequencialDocumento=' + nsd, 'NumeroSequencialRegistroCvm=' + nsr, 
        'Informacao=2', 'Demonstracao=2')))
    table = page.get_element_by_id(TABLE_BPP)
    for row in getRows(table): rowToDict(row, t)

    # Multiplicador
    m = getMultiplicador(page)

    # Caixa
    cxa = toInt(t['1.01.01'][1])*m if '1.01.01' in t else 0
    apf = toInt(t['1.01.02'][1])*m if '1.01.02' in t else 0
    infos['CAIXA'] = cxa + apf
Beispiel #11
0
def getDftDemRes(nsd, nsr, infos, params_geral=PARMS_GERAL):
    logging.info('Getting DFs Consolidadas - Demonstracao do Resultado - %s %s', nsd, nsr)
    t = dict()
    page = openUrl(buildUrl(URL_FDF, params_geral, PARMS_BPP, 
        'NumeroSequencialDocumento=' + nsd, 'NumeroSequencialRegistroCvm=' + nsr, 
        'Informacao=2', 'Demonstracao=4'))
    table = HtmlElement(page).get_element_by_id(TABLE_BPP)
    for row in getRows(table): rowToDict(row, t)

    # Multiplicador
    m = getMultiplicador(page)

    # Receita Liquida
    infos['RL'] = toInt(t['3.01'][1])*m if '3.01' in t else 0
    # Lucro Liquito
    infos['LL'] = toInt(t['3.11'][1])*m if '3.11' in t else 0
Beispiel #12
0
def msnMoneyQuote( symbol, local = False ) :
    
    import bs4
    import utils

    if not len(symbol) : return None
    url = 'http://investing.money.msn.com/investments/stock-price?symbol=' + symbol
    url,page = utils.openUrl(url,local)
    print url
    soup = bs4.BeautifulSoup(page,"lxml")

    # Extract date stamp from below "details" table
    footers = soup.find_all("span",{"class":"foot"})
    string = footers[0].find_all(text=True)[0].strip().split(' ')[2]
    date = utils.makeEpochTime(string,'%m/%d/%Y')

    # Extract tables 
    tables = soup.find_all("table",{"class":"mnytbl"})

    # Parse "details" table
    details = {}
    tuple = ()
    cntr = 0
    for row in tables[0].find_all("tr") :
        cells = row.find_all("td")
        if len(cells) == 0 : continue
        data = cells[1].find_all(text=True)[1].strip()
        tuple = utils.extractData(data,tuple)
        cntr = cntr + 1
    details[date] = tuple

    # Parse "financial highlights" table
    highlights = {}
    tuple = ()
    cntr = 0
    for row in tables[1].find_all("tr") :
        cells = row.find_all("td")
        if len(cells) == 0 : continue
        index = 2 if ( cntr == 2 or cntr == 3 ) else 1
        data = cells[1].find_all(text=True)[index].strip()
        tuple = utils.extractData(data,tuple)
        cntr = cntr + 1
    highlights[date] = tuple

    return details,highlights
Beispiel #13
0
def getDfpConBPP(nsd, nsr, infos):
    logging.info('Getting DFs Consolidadas - Balanco Patrimonial Passivo - %s %s', nsd, nsr)
    t = dict()
    page = openUrl(buildUrl(URL_FDF, PARMS_GERAL, PARMS_BPP, 
        'NumeroSequencialDocumento=' + nsd, 'NumeroSequencialRegistroCvm=' + nsr, 
        'Informacao=2', 'Demonstracao=3'))
    table = HtmlElement(page).get_element_by_id(TABLE_BPP)
    for row in getRows(table): rowToDict(row, t)

    # Multiplicador
    m = getMultiplicador(page)

    # Patrimonio Liquido
    infos['PL'] = toInt(t['2.03'][1])*m if '2.03' in t else 0
    # Divida Bruta
    CP = toInt(t['2.01.04'][1])*m if '2.01.04' in t else 0
    LP = toInt(t['2.02.01'][1])*m if '2.02.01' in t else 0
    infos['DB'] = CP + LP
Beispiel #14
0
def getDfpConBPP(url, infos):
    logging.info('Getting DFs Consolidadas - Balanco Patrimonial Passivo')
    t = dict()
    page = openUrl(url) 
    table = HtmlElement(page).get_element_by_id(TABLE_BPP)
    for row in getRows(table): rowToDict(row, t)

    # Multiplicador
    m = getMultiplicador(page)

    # Patrimonio Liquido
    infos['PL'] = toInt(t['2.03'][1])*m if '2.03' in t else 0
    # Divida Bruta
    CP = toInt(t['2.01.04'][1])*m if '2.01.04' in t else 0
    LP = toInt(t['2.02.01'][1])*m if '2.02.01' in t else 0
    infos['DB'] = CP + LP
    # Dividendos
    Div = toInt(t['2.01.05.02.01'][1])*m if '2.01.05.02.01' in t else 0
    infos['DIV'] = Div
Beispiel #15
0
def msnMoneyBalanceSheet( symbol, local = False ) :

    import bs4  
    import utils

    if not len(symbol) : return None
    url = 'http://investing.money.msn.com/investments/stock-balance-sheet/?symbol=' + symbol + '&stmtView=Ann'
    url,page = utils.openUrl(url,local)
    print url
    soup = bs4.BeautifulSoup(page,"lxml")

    rows = soup.find_all("tr") 
    ncols = len(rows[-1].find_all("td"))-1
    titles = []
    tuples = [() for x in range(ncols)]
    for irow,row in enumerate( rows ) :
        for icol,col in enumerate( row.find_all("td") ) :
            entries = col.find_all(text=True)
            index = None
            if len(entries) == 1 : index = 0
            elif len(entries) == 3 : index = 1
            elif len(entries) == 7 : index = 4
            else : continue
            entry = entries[index].strip().encode("utf-8")
            if irow == 7 or irow == 30 : continue
            if len(entry) : 
                if icol == 0 : 
                    titles.append(str(entry))
                else :
                    dates = {1:'%Y',2:'%m/%d/%Y',4:'%m/%d/%Y'}
                    try :
                        date = dates[irow]
                        secs = utils.makeEpochTime(str(entry),date)
                        tuples[icol-1] = tuples[icol-1] + (secs,)
                    except KeyError : 
                        tuples[icol-1] = utils.extractData(entry,tuples[icol-1])
                
    dict = {}
    for col in range(len(tuples)) :
        dict[tuples[col][0]] = tuples[col][1:]

    return titles,dict
Beispiel #16
0
def getLinks(url):
    resp = dict()
    trp = openUrl(url, True)
    url_fdf = RE_LOC.search(trp).group(1)

    params = urlparse.parse_qs(urlparse.urlparse(url_fdf).query)
    resp['ctd'] = params['CodTipoDocumento'][0]
    resp['nsd'] = params['NumeroSequencialDocumento'][0]
    resp['nsr'] = params['NumeroSequencialRegistroCvm'][0]
    resp['cti'] = params['CodigoTipoInstituicao'][0]
    fparams = '&CodTipoDocumento={0}&NumeroSequencialDocumento={1}&'\
        'NumeroSequencialRegistroCvm={2}&CodigoTipoInstituicao={3}'.\
        format(resp['ctd'], resp['nsd'], resp['nsr'], resp['cti'])
 
    for i in re.findall('"Text":"([^"]+)","Value":"([^"]+)"', trp):
        if re.match('^Balan.+Ativo$', i[0]): resp['dfp_bpa'] = escapeUrl(URL_RAD + i[1] + fparams)
        if re.match('^Balan.+Passivo$', i[0]): resp['dfp_bpp'] = escapeUrl(URL_RAD + i[1] + fparams)
        if re.match('^Demonstra.+Resultado$', i[0]): resp['dfp_dr'] = escapeUrl(URL_RAD + i[1] + fparams)
    
    return resp
Beispiel #17
0
def msnMoneyCompanyProfile( symbol, local = False ) :

    import bs4
    import utils
    import datetime

    if not len(symbol) : return None
    url = 'http://investing.money.msn.com/investments/company-report?symbol=' + symbol
    url,page = utils.openUrl(url,local)
    print url
    soup = bs4.BeautifulSoup(page,"lxml")

    tables = soup.find_all("table",{"class":"mnytbl"})
    text = tables[1].find_all("span")[0].find_all(text=True)[0].strip().encode("utf-8")

    d = datetime.date(1900,1,1)

    length = {0:len(text)}
    profile = {0:text}
    
    return length,profile
Beispiel #18
0
def getFdFiles(edp, dftype):
    logging.info('Getting {0} Infos for {1}'.format(dftype, edp))
    # Getting newer version of documents
    fps = dict([(i['date'], i) for i in sorted(getFPs(edp, dftype), cmp=mycmp) if i['version'] > 0.0])
    for year, fp in fps.iteritems():
        logging.info('Getting {0} of {1} for {2}'.format(dftype, edp, year.year))

        params = urlparse.parse_qs(urlparse.urlparse(fp['url']).query)
        query = 'NumeroSequencialDocumento={0}&CodigoInstituicao={1}'.format(params['NumeroSequencialDocumento'][0], params['CodigoTipoInstituicao'][0])
        filepath = path.join(getPath(dftype), str(edp), str(year.year))
        filename = '{0}/{1}_{2}-{3}.zip'.format(filepath, params['NumeroSequencialDocumento'][0], params['CodigoTipoInstituicao'][0], fp['version'])
        if path.isfile(filename):
            logging.info('File {0} exists, skiping...'.format(filename))
            yield filename
            continue

        if not path.isdir(filepath): 
            makedirs(filepath)

        logging.info('Downloading file {0}'.format(filename))
        with open(filename, 'wb+') as f:
            f.write(openUrl(buildUrl(URL_DD, query), True))
        yield filename
Beispiel #19
0
def getFPs(edp, dftype):
    page = openUrl(buildUrl(URL_DF, 'codigoCVM=' + edp, 'idioma=pt-br', 'tipo='+dftype.lower()))
    return [parseFd(dfp) for dfp in page.findall('.//div[@id="' + DIV_DFPS + '"]/div/div/div/div/p/a')]
def msnMoneyHistoricalPrices(symbol, local=False):

    import bs4
    import utils

    if not len(symbol):
        return None
    url = (
        "http://investing.money.msn.com/investments/equity-historical-price/?PT=7&D4=1&DD=1&D5=0&DCS=2&MA0=0&MA1=0&CF=0&nocookie=1&SZ=0&symbol="
        + symbol
    )
    #    url = 'http://investing.money.msn.com/investments/equity-historical-price/?symbol=us%3a' + symbol + '&CA=0&CB=0&CC=0&CD=0&D4=1&DD=1&D5=0&DCS=2&MA0=0&MA1=0&C5=0&C5D=0&C6=0&C7=0&C7D=0&C8=0&C9=0&CF=4&D8=0&DB=1&DC=1&D9=0&DA=0&D1=0&SZ=0&PT=11'

    url, page = utils.openUrl(url, local)
    print url
    soup = bs4.BeautifulSoup(page, "lxml")

    rows = soup.find_all("tr")

    titles = []
    prices = {}
    dividends = {}
    for irow, row in enumerate(rows):
        cols = row.find_all("td")

        # Extract titles from table header
        headers = row.find_all("th")
        for header in headers:
            entries = header.find_all(text=True)
            entry = entries[1].strip()
            if not len(entry):
                continue
            titles.append(str(entry))

        # Extract ex-dividend dates, dividends paid, and share price
        if len(cols) == 3:
            date = 0
            div = 0.0
            price = 0.0
            try:
                entries = cols[0].find_all(text=True)
                entry = entries[1].strip()
                if len(entry):
                    date = utils.makeEpochTime(str(entry), "%m/%d/%Y")
            except ValueError:
                date = 0
            try:
                entries = cols[1].find_all(text=True)
                entry = entries[1].strip().split(" ")[0]
                if len(entry):
                    div = float(entry)
            except ValueError:
                div = 0.0
            try:
                if irow < len(rows):
                    entries = rows[irow + 1].find_all("td")[4].find_all(text=True)
                    entry = entries[1].strip()
                    price = float(entry)
                else:
                    price = 0.0
            except ValueError:
                price = 0.0
            if date != 0:
                dividends[date] = (div, price)

        # Loop through rows and extract share prices
        else:
            tuple = ()
            if len(cols) != 6:
                continue
            for icol, col in enumerate(cols):
                entries = col.find_all(text=True)
                entry = entries[1].strip()
                if not len(entry):
                    continue
                try:
                    secs = utils.makeEpochTime(str(entry), "%m/%d/%Y")
                    tuple = tuple + (secs,)
                except ValueError:
                    tuple = utils.extractData(entry, tuple)
            prices[tuple[0]] = tuple[1:]

    return titles, prices, dividends
Beispiel #21
0
def getITRs(edp):
    page = openUrl(buildUrl(URL_DF, 'codigoCVM=' + edp, 'idioma=pt-br', 'tipo=itr'))
    return [parseFd(itr) for itr in page.findall('.//div[@id="' + DIV_DFPS + '"]/div/div/div/div/p/a')]