Beispiel #1
0
def parse_6(cur, ticker_id, exch_id, data):

    # Check if source data has correct information
    try:
        js = json.loads(data)['componentData']
        if js is None:
            return 6
        soup = bs(js, 'html.parser')
    except KeyboardInterrupt:
        print('\nGoodbye!')
        exit()
    except:
        print('\n\nTicker_id = {}, Exch_id = {}'.format(ticker_id, exch_id))
        print('Data = {} {}\n'.format(data, len(data)))
        raise

    # Parse table
    table = {}
    trows = soup.find_all('tr')
    for trow in trows:
        row_tags = trow.find_all(['th', 'td'])
        for i, row_tag in enumerate(row_tags):
            if 'id' in row_tag.attrs:
                text = row_tag.text
                if i != 0:
                    text_id = fetch.sql_insert_one_get_id(
                        cur, 'TimeRefs', 'dates', text)
                else:
                    text_id = fetch.sql_insert_one_get_id(
                        cur, 'ColHeaders', 'header', text)
                table[row_tag.attrs['id']] = text_id
            elif 'headers' in row_tag.attrs:
                headers = row_tag.attrs['headers']
                headers.reverse()
                val = re.sub(',', '', row_tag.text)
                if val == '—':
                    val = None
                else:
                    val = float(val)
                table['_'.join(headers)] = val

    if table == {}:
        return 6

    # Insert data into tables
    tname = 'MSfinancials'
    # Update
    dict0 = {'ticker_id': ticker_id, 'exchange_id': exch_id}
    sql = fetch.sql_update_record(tname, table, dict0)
    ct = fetch.db_execute(cur, sql)
    # Insert
    if cur.rowcount == 0:
        table['ticker_id'] = ticker_id
        table['exchange_id'] = exch_id
        sql = fetch.sql_insert(tname, tuple(table.keys()),
                               tuple(table.values()))
        fetch.db_execute(cur, sql)

    return 200
Beispiel #2
0
def parse_2(cur, ticker_id, exch_id, data):

    soup = bs(data, 'html.parser')
    tags = soup.find_all('span')

    try:
        sector = tags[2].text.strip()
        industry = tags[4].text.strip()
        stype = tags[6].text.strip()
        fyend = tags[10].text.strip()
        style = tags[12].text.strip()
    except KeyboardInterrupt:
        print('\nGoodbye!')
        exit()
    except:
        print('\n\nTicker_id = {}, Exch_id = {}'.format(ticker_id, exch_id))
        print('Data = {} {}\n'.format(data, len(data)))
        raise

    # Insert sector into Sectors
    sector_id = fetch.sql_insert_one_get_id(cur, 'Sectors', 'Sector', sector)

    # Insert industry into Industries
    sql = fetch.sql_insert('Industries', '(industry, sector_id)',
                           (industry, sector_id))
    fetch.db_execute(cur, sql)
    sql = fetch.sql_record_id('Industries', '(industry)', industry)
    industry_id = fetch.db_execute(cur, sql).fetchone()[0]

    # Insert stock_type into StockTypes
    stype_id = fetch.sql_insert_one_get_id(cur, 'StockTypes', 'stock_type',
                                           stype)

    # Insert fyend into FYEnds
    fyend_id = fetch.sql_insert_one_get_id(cur, 'TimeRefs', 'dates', fyend)

    # Insert style into StockStyles
    style_id = fetch.sql_insert_one_get_id(cur, 'StockStyles', 'style', style)

    # Update Tickers table with parsed data
    sql = fetch.sql_update_record(
        'Master', {
            'industry_id': industry_id,
            'stock_type_id': stype_id,
            'fyend_id': fyend_id,
            'style_id': style_id
        }, {
            'ticker_id': ticker_id,
            'exchange_id': exch_id
        })
    fetch.db_execute(cur, sql)

    return 200
Beispiel #3
0
def parse_9(cur, ticker_id, exch_id, data):

    data = re.sub('([A-Z])', r' \1', data)
    data = re.sub(' +', ' ', data)
    data = re.sub('\\n|\\t', '', data)
    soup = bs(data, 'html.parser')
    table = get_html_table(soup)

    if len(table) > 1:
        for row in table:
            date = ''
            info = {}
            if row[0] != '':
                info['date'] = DT.datetime.strptime(
                    row[0], '%m/%d/%Y').strftime('%Y-%m-%d')
                try:
                    info['quantity'] = float(re.sub(',', '', row[3]))
                    info['value'] = float(re.sub(',', '', row[6]))
                except ValueError:
                    info['quantity'] = 0
                    info['value'] = 0
                except:
                    raise

                name = row[1].strip()
                info['name_id'] = fetch.sql_insert_one_get_id(
                    cur, 'Insiders', 'name', name)

                type = row[5].strip()
                if ' ' in type:
                    type = type.split()[0]
                info['transaction_id'] = fetch.sql_insert_one_get_id(
                    cur, 'TransactionType', 'type', type)

                # Insert data into tables
                info['ticker_id'] = ticker_id
                info['exchange_id'] = exch_id
                sql = fetch.sql_insert('InsiderTransactions',
                                       tuple(info.keys()),
                                       tuple(info.values()))
                fetch.db_execute(cur, sql)

    return 200
Beispiel #4
0
def parse_8(cur, api, ticker_id, exch_id, data):

    # Check if source data has correct information
    msg = 'There is no available information in our database to display.'
    if msg in data:
        return 8

    # Parse source data with JSON and BeautifulSoup
    try:
        js = json.loads(data)
        html = js['result']
        soup = bs(html, 'html.parser')
        tags = soup.find_all('div')
    except KeyboardInterrupt:
        print('\nGoodbye!')
        exit()
    except:
        print('\n\n', data)
        raise

    info = {}
    info0 = {}
    type = 'MSreport'

    if api in [10, 11]:
        type += '_is'
    elif api in [12, 13]:
        type += '_cf'
    elif api in [14, 15]:
        type += '_bs'
    if api in [10, 12, 14]:
        type += '_yr'
    elif api in [11, 13, 15]:
        type += '_qt'
    #fname = 'test/{}.json'.format(type)
    '''with open(fname) as file:
        info0 = json.load(file)'''

    # Parse data into info dictionary
    for tag in tags:
        attrs = tag.attrs

        if 'id' in attrs:
            tag_id = tag['id']
            value = tag.text

            # Parse currency and FY End month number
            if tag_id == 'unitsAndFiscalYear':
                info['fye_month'] = int(tag['fyenumber'])
                curr_id = fetch.sql_insert_one_get_id(cur, 'Currencies',
                                                      'currency_code',
                                                      tag['currency'])
                info['currency_id'] = curr_id

            # Parse Yrly or Qtrly values
            elif tag_id[:2] == 'Y_':
                parent = tag.parent['id']
                key = '{}_{}'.format(parent, tag_id)

                if 'rawvalue' in attrs:
                    if tag['rawvalue'] in ['—', 'nbsp']:
                        continue
                    info[key] = float(re.sub(',', '', tag['rawvalue']))
                    #info0[key] = 'REAL,'
                else:
                    if 'title' in attrs:
                        value = tag['title']
                    value_id = fetch.sql_insert_one_get_id(
                        cur, 'TimeRefs', 'dates', value)
                    info[key] = value_id
                    #info0[key] = 'INTEGER,'

            # Parse labels
            elif tag_id[:3] == 'lab' and 'padding' not in tag_id:
                value_id = fetch.sql_insert_one_get_id(cur, 'ColHeaders',
                                                       'header', value)
                info[tag_id] = value_id
                #info0[tag_id] = 'INTEGER,'

    # Check if parsing was successful
    if info == {} and info0 == {}:
        return 8

    # Insert data into tables
    # Update
    dict0 = {'ticker_id': ticker_id, 'exchange_id': exch_id}
    sql = fetch.sql_update_record(type, info, dict0)
    fetch.db_execute(cur, sql)
    # Insert
    if cur.rowcount == 0:
        info['ticker_id'] = ticker_id
        info['exchange_id'] = exch_id
        sql = fetch.sql_insert(type, tuple(info.keys()), tuple(info.values()))
        fetch.db_execute(cur, sql)

    return 200
Beispiel #5
0
def parse_5(cur, ticker_id, exch_id, data):

    # Check if source data has correct information
    try:
        js = json.loads(data)['componentData']
        if js is None:
            return 5
        soup = bs(js, 'html.parser')
    except KeyboardInterrupt:
        print('\nGoodbye!')
        exit()
    except:
        print('\n\nTicker_id = {}, Exch_id = {}'.format(ticker_id, exch_id))
        print('Data = {} {}\n'.format(data, len(data)))
        raise

    # Parse table
    tables = {}
    trows = soup.find_all('tr')
    tname = ''
    for trow in trows:
        div_id = trow.parent.parent.parent.attrs['id']
        tname0 = re.sub('tab-', 'MSratio_', div_id)
        if tname != tname0:
            tname = tname0
            tables[tname] = {}

        row_tags = trow.find_all(['th', 'td'])
        for i, row_tag in enumerate(row_tags):
            if 'id' in row_tag.attrs:
                text = row_tag.text
                id = re.sub('-', '_', row_tag.attrs['id'])
                if i != 0:
                    text_id = fetch.sql_insert_one_get_id(
                        cur, 'TimeRefs', 'dates', text)
                else:
                    text_id = fetch.sql_insert_one_get_id(
                        cur, 'ColHeaders', 'header', text)
                tables[tname][id] = text_id
            elif 'headers' in row_tag.attrs:
                headers = row_tag.attrs['headers']
                header = '_'.join([headers[2], headers[0]])
                header = re.sub('-', '_', header)
                val = re.sub(',', '', row_tag.text)
                if val == '—':
                    val = None
                else:
                    try:
                        val = float(val)
                    except:
                        val = None
                tables[tname][header] = val

    # Check if parsing was successful
    if tables == {}:
        return 5

    # Insert data into tables
    for table in tables:
        # Update
        info = tables[table]
        dict0 = {'ticker_id': ticker_id, 'exchange_id': exch_id}
        sql = fetch.sql_update_record(table, info, dict0)
        fetch.db_execute(cur, sql)
        # Insert
        if cur.rowcount == 0:
            tables[table]['ticker_id'] = ticker_id
            tables[table]['exchange_id'] = exch_id
            info = tables[table]
            sql = fetch.sql_insert(table, tuple(info.keys()),
                                   tuple(info.values()))
            fetch.db_execute(cur, sql)

    return 200
Beispiel #6
0
def parse_4(cur, ticker_id, exch_id, data):

    info = {}

    def clean_val(h, v):
        if v != '—':
            info[h] = v

    soup = bs(data, 'html.parser')
    table = get_html_table(soup)
    script = soup.find('script').text
    script = re.sub('[ \n\t]|\\n|\\t', '', script)
    script = re.findall('\[\[.+?\]\]', script)[0]
    columns = json.loads(script)

    # Parse Yr Columns
    for year, column in enumerate(columns):
        if column[0] % 2 == 0:
            yr = column[1]
            yr_id = fetch.sql_insert_one_get_id(cur, 'TimeRefs', 'dates', yr)
            header = 'Y{}'.format(int((year - 1) / 2))
            info[header] = yr_id

    # Parse 'Price/Earnings'
    for yr, val in enumerate(table[1][1:]):
        header = 'PE_Y{}'.format(yr)
        clean_val(header, val)

    # Parse 'Price/Book'
    for yr, val in enumerate(table[4][1:]):
        header = 'PB_Y{}'.format(yr)
        clean_val(header, val)

    # Parse 'Price/Sales'
    for yr, val in enumerate(table[7][1:]):
        header = 'PS_Y{}'.format(yr)
        clean_val(header, val)

    # Parse 'Price/Cash Flow'
    for yr, val in enumerate(table[10][1:]):
        header = 'PC_Y{}'.format(yr)
        clean_val(header, val)

    # Check if parsing was successful
    if info == {}:
        return 4

    # Insert data into MSvaluation table
    table = 'MSvaluation'
    # Update
    dict0 = {'ticker_id': ticker_id, 'exchange_id': exch_id}
    sql1 = fetch.sql_update_record(table, info, dict0)
    # Insert
    info['ticker_id'] = ticker_id
    info['exchange_id'] = exch_id
    sql2 = fetch.sql_insert(table, tuple(info.keys()), tuple(info.values()))
    fetch.db_execute(cur, sql1)
    if cur.rowcount == 0:
        fetch.db_execute(cur, sql2)

    return 200
Beispiel #7
0
def parse_3(cur, ticker_id, exch_id, data):

    soup = bs(data, 'html.parser')
    tags = soup.find_all('span') + soup.find_all('div')

    # Parse data into info dictionary
    info = {}
    noise = ['', '-', '—', '— mil', '— bil']
    for count, tag in enumerate(tags):

        attrs = tag.attrs
        text = re.sub('[\n\t]', '', tag.text.strip())
        text = re.sub('\s\s*', ' ', text)

        try:
            if attrs.get('vkey') == 'Currency':
                if text in noise:
                    info['currency_id'] = 'null'
                else:
                    val = fetch.sql_insert_one_get_id(cur, 'Currencies',
                                                      'currency_code', text)
                    info['currency_id'] = val

            elif attrs.get('vkey') == 'LastDate':
                if text == '':
                    info['lastdate'] = 'null'
                else:
                    info['lastdate'] = pd.to_datetime(text).strftime(
                        '%Y-%m-%d')
            elif attrs.get('vkey') == 'DayRange':
                text = re.sub('^-0.00', '0.00', text)
                vals = text.split('-')
                if '-' not in text or text in noise or '' in vals:
                    info['day_lo'] = 'null'
                    info['day_hi'] = 'null'
                else:
                    info['day_lo'] = float(re.sub(',', '', vals[0]))
                    info['day_hi'] = float(re.sub(',', '', vals[1]))
            elif attrs.get('vkey') == '_52Week':
                text = re.sub('^-0.00', '0.00', text)
                vals = text.split('-')
                if '-' not in text or text in noise or '' in vals:
                    info['_52wk_lo'] = 'null'
                    info['_52wk_hi'] = 'null'
                else:
                    info['_52wk_lo'] = float(re.sub(',', '', vals[0]))
                    info['_52wk_hi'] = float(re.sub(',', '', vals[1]))
            elif attrs.get('vkey') == 'Volume':
                if text in noise:
                    info['lastvol'] = 'null'
                else:
                    text = re.sub(',', '', text)
                    unit = 1
                    if ' mil' in text:
                        unit = 10E6
                        text = text.replace(' mil', '')
                    elif ' bil' in text:
                        unit = 10E9
                        text = text.replace(' bil', '')
                    elif ' tri' in text:
                        unit = 10E12
                        text = text.replace(' tri', '')
                    info['lastvol'] = float(text) * unit
            elif attrs.get('vkey') == 'AverageVolume':
                if text in noise:
                    info['avevol'] = 'null'
                else:
                    text = re.sub(',', '', text)
                    unit = 1
                    if ' mil' in text:
                        unit = 10E6
                        text = text.replace(' mil', '')
                    elif ' bil' in text:
                        unit = 10E9
                        text = text.replace(' bil', '')
                    elif ' tri' in text:
                        unit = 10E12
                        text = text.replace(' tri', '')
                    info['avevol'] = float(text) * unit
            elif attrs.get('gkey') == 'Forward':
                fpe = text
            elif attrs.get('vkey') == 'OpenPrice':
                if text in noise:
                    info['openprice'] = 'null'
                else:
                    info['openprice'] = float(re.sub(',', '', text))
            elif attrs.get('vkey') == 'LastPrice':
                if text in noise:
                    info['lastprice'] = 'null'
                else:
                    info['lastprice'] = float(re.sub(',', '', text))
            elif attrs.get('vkey') == 'ProjectedYield':
                if text in noise:
                    info['yield'] = 'null'
                else:
                    info['yield'] = float(re.sub('[%,]', '', text))
            elif attrs.get('vkey') == 'PE':
                if text in noise:
                    info['fpe'] = 'null'
                else:
                    info['fpe'] = float(re.sub(',', '', text))
            elif attrs.get('vkey') == 'PB':
                if text in noise:
                    info['pb'] = 'null'
                else:
                    info['pb'] = float(re.sub(',', '', text))
            elif attrs.get('vkey') == 'PS':
                if text in noise:
                    info['ps'] = 'null'
                else:
                    info['ps'] = float(re.sub(',', '', text))
            elif attrs.get('vkey') == 'PC':
                if text in noise:
                    info['pc'] = 'null'
                else:
                    info['pc'] = float(re.sub(',', '', text))
        except:
            print('\n\n{' + text + '}\n')
            raise

    # Check if parsing was successful
    if info == {}:
        return 3

    if 'fpe' in locals() and fpe != 'Forward' and 'fpe' in info:
        del info['fpe']

    # Remove 'empty' string values
    for k, v in info.items():
        if v == '' or v == ' ':
            info[k] = 'null'

    # Insert data into MSheader table
    table = 'MSheader'
    # Update
    dict0 = {'ticker_id': ticker_id, 'exchange_id': exch_id}
    sql = fetch.sql_update_record(table, info, dict0)
    fetch.db_execute(cur, sql)
    # Insert
    if cur.rowcount == 0:
        info['ticker_id'] = ticker_id
        info['exchange_id'] = exch_id
        sql = fetch.sql_insert(table, tuple(info.keys()), tuple(info.values()))
        fetch.db_execute(cur, sql)

    return 200
Beispiel #8
0
def parse_1(cur, data, api):

    results = []
    try:
        js = json.loads(data)
        if js['m'][0]['n'] != 0:
            results = js['m'][0]['r']
    except KeyError:
        fetch.print_('')
        print('\tError: KeyError at Parse_1\n')
        return 1
    except KeyboardInterrupt:
        print('\nGoodbye!')
        exit()
    except:
        print('Data = {} {}\n'.format(data, len(data)))
        raise

    if results == []:
        return 1

    for result in results:
        # Read data from current result
        exch = result['OS01X']
        symbol = result['OS001']
        exch_sym = result['LS01Z']
        country = result['XI018']
        type = result['OS010']
        comp = result['OS01W']
        curr = result['OS05M']

        if exch_sym == '' or symbol == '':
            continue

        # Fetch id's for data from db and update tables

        # Tickers
        ticker_id = int(
            fetch.sql_insert_one_get_id(cur, 'Tickers', 'ticker', symbol))
        # Currencies
        curr_id = int(
            fetch.sql_insert_one_get_id(cur, 'Currencies', 'currency_code',
                                        curr))
        # Companies
        comp_id = int(
            fetch.sql_insert_one_get_id(cur, 'Companies', 'company', comp))
        # SecurityTypes
        type_id = int(
            fetch.sql_insert_one_get_id(cur, 'SecurityTypes',
                                        'security_type_code', type))
        # Countries
        country_id = int(
            fetch.sql_insert_one_get_id(cur, 'Countries', 'a3_un', country))
        # Exchanges
        exch_id = int(
            fetch.sql_insert_one_get_id(cur, 'Exchanges', 'exchange_sym',
                                        exch_sym))
        dict1 = {
            'exchange': exch,
            'exchange_sym': exch_sym,
            'country_id': country_id
        }
        sql = fetch.sql_update_record('Exchanges', dict1, {'id': exch_id})
        fetch.db_execute(cur, sql)
        # Master Table
        columns = '(ticker_id, exchange_id)'
        sql = fetch.sql_insert('Master', columns, (ticker_id, exch_id))
        fetch.db_execute(cur, sql)
        dict1 = {
            'company_id': comp_id,
            'security_type_id': type_id,
            'update_date': DT.date.today().strftime('%Y-%m-%d')
        }
        dict2 = {'ticker_id': ticker_id, 'exchange_id': exch_id}
        sql = fetch.sql_update_record('Master', dict1, dict2)
        fetch.db_execute(cur, sql)

    return 200