def parse_6(cur, ticker_id, exch_id, data): # Check if source data has correct information try: js = json.loads(data)['componentData'] if js is None: return 6 soup = bs(js, 'html.parser') except KeyboardInterrupt: print('\nGoodbye!') exit() except: print('\n\nTicker_id = {}, Exch_id = {}'.format(ticker_id, exch_id)) print('Data = {} {}\n'.format(data, len(data))) raise # Parse table table = {} trows = soup.find_all('tr') for trow in trows: row_tags = trow.find_all(['th', 'td']) for i, row_tag in enumerate(row_tags): if 'id' in row_tag.attrs: text = row_tag.text if i != 0: text_id = fetch.sql_insert_one_get_id( cur, 'TimeRefs', 'dates', text) else: text_id = fetch.sql_insert_one_get_id( cur, 'ColHeaders', 'header', text) table[row_tag.attrs['id']] = text_id elif 'headers' in row_tag.attrs: headers = row_tag.attrs['headers'] headers.reverse() val = re.sub(',', '', row_tag.text) if val == '—': val = None else: val = float(val) table['_'.join(headers)] = val if table == {}: return 6 # Insert data into tables tname = 'MSfinancials' # Update dict0 = {'ticker_id': ticker_id, 'exchange_id': exch_id} sql = fetch.sql_update_record(tname, table, dict0) ct = fetch.db_execute(cur, sql) # Insert if cur.rowcount == 0: table['ticker_id'] = ticker_id table['exchange_id'] = exch_id sql = fetch.sql_insert(tname, tuple(table.keys()), tuple(table.values())) fetch.db_execute(cur, sql) return 200
def parse_2(cur, ticker_id, exch_id, data): soup = bs(data, 'html.parser') tags = soup.find_all('span') try: sector = tags[2].text.strip() industry = tags[4].text.strip() stype = tags[6].text.strip() fyend = tags[10].text.strip() style = tags[12].text.strip() except KeyboardInterrupt: print('\nGoodbye!') exit() except: print('\n\nTicker_id = {}, Exch_id = {}'.format(ticker_id, exch_id)) print('Data = {} {}\n'.format(data, len(data))) raise # Insert sector into Sectors sector_id = fetch.sql_insert_one_get_id(cur, 'Sectors', 'Sector', sector) # Insert industry into Industries sql = fetch.sql_insert('Industries', '(industry, sector_id)', (industry, sector_id)) fetch.db_execute(cur, sql) sql = fetch.sql_record_id('Industries', '(industry)', industry) industry_id = fetch.db_execute(cur, sql).fetchone()[0] # Insert stock_type into StockTypes stype_id = fetch.sql_insert_one_get_id(cur, 'StockTypes', 'stock_type', stype) # Insert fyend into FYEnds fyend_id = fetch.sql_insert_one_get_id(cur, 'TimeRefs', 'dates', fyend) # Insert style into StockStyles style_id = fetch.sql_insert_one_get_id(cur, 'StockStyles', 'style', style) # Update Tickers table with parsed data sql = fetch.sql_update_record( 'Master', { 'industry_id': industry_id, 'stock_type_id': stype_id, 'fyend_id': fyend_id, 'style_id': style_id }, { 'ticker_id': ticker_id, 'exchange_id': exch_id }) fetch.db_execute(cur, sql) return 200
def parse_9(cur, ticker_id, exch_id, data): data = re.sub('([A-Z])', r' \1', data) data = re.sub(' +', ' ', data) data = re.sub('\\n|\\t', '', data) soup = bs(data, 'html.parser') table = get_html_table(soup) if len(table) > 1: for row in table: date = '' info = {} if row[0] != '': info['date'] = DT.datetime.strptime( row[0], '%m/%d/%Y').strftime('%Y-%m-%d') try: info['quantity'] = float(re.sub(',', '', row[3])) info['value'] = float(re.sub(',', '', row[6])) except ValueError: info['quantity'] = 0 info['value'] = 0 except: raise name = row[1].strip() info['name_id'] = fetch.sql_insert_one_get_id( cur, 'Insiders', 'name', name) type = row[5].strip() if ' ' in type: type = type.split()[0] info['transaction_id'] = fetch.sql_insert_one_get_id( cur, 'TransactionType', 'type', type) # Insert data into tables info['ticker_id'] = ticker_id info['exchange_id'] = exch_id sql = fetch.sql_insert('InsiderTransactions', tuple(info.keys()), tuple(info.values())) fetch.db_execute(cur, sql) return 200
def parse_8(cur, api, ticker_id, exch_id, data): # Check if source data has correct information msg = 'There is no available information in our database to display.' if msg in data: return 8 # Parse source data with JSON and BeautifulSoup try: js = json.loads(data) html = js['result'] soup = bs(html, 'html.parser') tags = soup.find_all('div') except KeyboardInterrupt: print('\nGoodbye!') exit() except: print('\n\n', data) raise info = {} info0 = {} type = 'MSreport' if api in [10, 11]: type += '_is' elif api in [12, 13]: type += '_cf' elif api in [14, 15]: type += '_bs' if api in [10, 12, 14]: type += '_yr' elif api in [11, 13, 15]: type += '_qt' #fname = 'test/{}.json'.format(type) '''with open(fname) as file: info0 = json.load(file)''' # Parse data into info dictionary for tag in tags: attrs = tag.attrs if 'id' in attrs: tag_id = tag['id'] value = tag.text # Parse currency and FY End month number if tag_id == 'unitsAndFiscalYear': info['fye_month'] = int(tag['fyenumber']) curr_id = fetch.sql_insert_one_get_id(cur, 'Currencies', 'currency_code', tag['currency']) info['currency_id'] = curr_id # Parse Yrly or Qtrly values elif tag_id[:2] == 'Y_': parent = tag.parent['id'] key = '{}_{}'.format(parent, tag_id) if 'rawvalue' in attrs: if tag['rawvalue'] in ['—', 'nbsp']: continue info[key] = float(re.sub(',', '', tag['rawvalue'])) #info0[key] = 'REAL,' else: if 'title' in attrs: value = tag['title'] value_id = fetch.sql_insert_one_get_id( cur, 'TimeRefs', 'dates', value) info[key] = value_id #info0[key] = 'INTEGER,' # Parse labels elif tag_id[:3] == 'lab' and 'padding' not in tag_id: value_id = fetch.sql_insert_one_get_id(cur, 'ColHeaders', 'header', value) info[tag_id] = value_id #info0[tag_id] = 'INTEGER,' # Check if parsing was successful if info == {} and info0 == {}: return 8 # Insert data into tables # Update dict0 = {'ticker_id': ticker_id, 'exchange_id': exch_id} sql = fetch.sql_update_record(type, info, dict0) fetch.db_execute(cur, sql) # Insert if cur.rowcount == 0: info['ticker_id'] = ticker_id info['exchange_id'] = exch_id sql = fetch.sql_insert(type, tuple(info.keys()), tuple(info.values())) fetch.db_execute(cur, sql) return 200
def parse_5(cur, ticker_id, exch_id, data): # Check if source data has correct information try: js = json.loads(data)['componentData'] if js is None: return 5 soup = bs(js, 'html.parser') except KeyboardInterrupt: print('\nGoodbye!') exit() except: print('\n\nTicker_id = {}, Exch_id = {}'.format(ticker_id, exch_id)) print('Data = {} {}\n'.format(data, len(data))) raise # Parse table tables = {} trows = soup.find_all('tr') tname = '' for trow in trows: div_id = trow.parent.parent.parent.attrs['id'] tname0 = re.sub('tab-', 'MSratio_', div_id) if tname != tname0: tname = tname0 tables[tname] = {} row_tags = trow.find_all(['th', 'td']) for i, row_tag in enumerate(row_tags): if 'id' in row_tag.attrs: text = row_tag.text id = re.sub('-', '_', row_tag.attrs['id']) if i != 0: text_id = fetch.sql_insert_one_get_id( cur, 'TimeRefs', 'dates', text) else: text_id = fetch.sql_insert_one_get_id( cur, 'ColHeaders', 'header', text) tables[tname][id] = text_id elif 'headers' in row_tag.attrs: headers = row_tag.attrs['headers'] header = '_'.join([headers[2], headers[0]]) header = re.sub('-', '_', header) val = re.sub(',', '', row_tag.text) if val == '—': val = None else: try: val = float(val) except: val = None tables[tname][header] = val # Check if parsing was successful if tables == {}: return 5 # Insert data into tables for table in tables: # Update info = tables[table] dict0 = {'ticker_id': ticker_id, 'exchange_id': exch_id} sql = fetch.sql_update_record(table, info, dict0) fetch.db_execute(cur, sql) # Insert if cur.rowcount == 0: tables[table]['ticker_id'] = ticker_id tables[table]['exchange_id'] = exch_id info = tables[table] sql = fetch.sql_insert(table, tuple(info.keys()), tuple(info.values())) fetch.db_execute(cur, sql) return 200
def parse_4(cur, ticker_id, exch_id, data): info = {} def clean_val(h, v): if v != '—': info[h] = v soup = bs(data, 'html.parser') table = get_html_table(soup) script = soup.find('script').text script = re.sub('[ \n\t]|\\n|\\t', '', script) script = re.findall('\[\[.+?\]\]', script)[0] columns = json.loads(script) # Parse Yr Columns for year, column in enumerate(columns): if column[0] % 2 == 0: yr = column[1] yr_id = fetch.sql_insert_one_get_id(cur, 'TimeRefs', 'dates', yr) header = 'Y{}'.format(int((year - 1) / 2)) info[header] = yr_id # Parse 'Price/Earnings' for yr, val in enumerate(table[1][1:]): header = 'PE_Y{}'.format(yr) clean_val(header, val) # Parse 'Price/Book' for yr, val in enumerate(table[4][1:]): header = 'PB_Y{}'.format(yr) clean_val(header, val) # Parse 'Price/Sales' for yr, val in enumerate(table[7][1:]): header = 'PS_Y{}'.format(yr) clean_val(header, val) # Parse 'Price/Cash Flow' for yr, val in enumerate(table[10][1:]): header = 'PC_Y{}'.format(yr) clean_val(header, val) # Check if parsing was successful if info == {}: return 4 # Insert data into MSvaluation table table = 'MSvaluation' # Update dict0 = {'ticker_id': ticker_id, 'exchange_id': exch_id} sql1 = fetch.sql_update_record(table, info, dict0) # Insert info['ticker_id'] = ticker_id info['exchange_id'] = exch_id sql2 = fetch.sql_insert(table, tuple(info.keys()), tuple(info.values())) fetch.db_execute(cur, sql1) if cur.rowcount == 0: fetch.db_execute(cur, sql2) return 200
def parse_3(cur, ticker_id, exch_id, data): soup = bs(data, 'html.parser') tags = soup.find_all('span') + soup.find_all('div') # Parse data into info dictionary info = {} noise = ['', '-', '—', '— mil', '— bil'] for count, tag in enumerate(tags): attrs = tag.attrs text = re.sub('[\n\t]', '', tag.text.strip()) text = re.sub('\s\s*', ' ', text) try: if attrs.get('vkey') == 'Currency': if text in noise: info['currency_id'] = 'null' else: val = fetch.sql_insert_one_get_id(cur, 'Currencies', 'currency_code', text) info['currency_id'] = val elif attrs.get('vkey') == 'LastDate': if text == '': info['lastdate'] = 'null' else: info['lastdate'] = pd.to_datetime(text).strftime( '%Y-%m-%d') elif attrs.get('vkey') == 'DayRange': text = re.sub('^-0.00', '0.00', text) vals = text.split('-') if '-' not in text or text in noise or '' in vals: info['day_lo'] = 'null' info['day_hi'] = 'null' else: info['day_lo'] = float(re.sub(',', '', vals[0])) info['day_hi'] = float(re.sub(',', '', vals[1])) elif attrs.get('vkey') == '_52Week': text = re.sub('^-0.00', '0.00', text) vals = text.split('-') if '-' not in text or text in noise or '' in vals: info['_52wk_lo'] = 'null' info['_52wk_hi'] = 'null' else: info['_52wk_lo'] = float(re.sub(',', '', vals[0])) info['_52wk_hi'] = float(re.sub(',', '', vals[1])) elif attrs.get('vkey') == 'Volume': if text in noise: info['lastvol'] = 'null' else: text = re.sub(',', '', text) unit = 1 if ' mil' in text: unit = 10E6 text = text.replace(' mil', '') elif ' bil' in text: unit = 10E9 text = text.replace(' bil', '') elif ' tri' in text: unit = 10E12 text = text.replace(' tri', '') info['lastvol'] = float(text) * unit elif attrs.get('vkey') == 'AverageVolume': if text in noise: info['avevol'] = 'null' else: text = re.sub(',', '', text) unit = 1 if ' mil' in text: unit = 10E6 text = text.replace(' mil', '') elif ' bil' in text: unit = 10E9 text = text.replace(' bil', '') elif ' tri' in text: unit = 10E12 text = text.replace(' tri', '') info['avevol'] = float(text) * unit elif attrs.get('gkey') == 'Forward': fpe = text elif attrs.get('vkey') == 'OpenPrice': if text in noise: info['openprice'] = 'null' else: info['openprice'] = float(re.sub(',', '', text)) elif attrs.get('vkey') == 'LastPrice': if text in noise: info['lastprice'] = 'null' else: info['lastprice'] = float(re.sub(',', '', text)) elif attrs.get('vkey') == 'ProjectedYield': if text in noise: info['yield'] = 'null' else: info['yield'] = float(re.sub('[%,]', '', text)) elif attrs.get('vkey') == 'PE': if text in noise: info['fpe'] = 'null' else: info['fpe'] = float(re.sub(',', '', text)) elif attrs.get('vkey') == 'PB': if text in noise: info['pb'] = 'null' else: info['pb'] = float(re.sub(',', '', text)) elif attrs.get('vkey') == 'PS': if text in noise: info['ps'] = 'null' else: info['ps'] = float(re.sub(',', '', text)) elif attrs.get('vkey') == 'PC': if text in noise: info['pc'] = 'null' else: info['pc'] = float(re.sub(',', '', text)) except: print('\n\n{' + text + '}\n') raise # Check if parsing was successful if info == {}: return 3 if 'fpe' in locals() and fpe != 'Forward' and 'fpe' in info: del info['fpe'] # Remove 'empty' string values for k, v in info.items(): if v == '' or v == ' ': info[k] = 'null' # Insert data into MSheader table table = 'MSheader' # Update dict0 = {'ticker_id': ticker_id, 'exchange_id': exch_id} sql = fetch.sql_update_record(table, info, dict0) fetch.db_execute(cur, sql) # Insert if cur.rowcount == 0: info['ticker_id'] = ticker_id info['exchange_id'] = exch_id sql = fetch.sql_insert(table, tuple(info.keys()), tuple(info.values())) fetch.db_execute(cur, sql) return 200
def parse_1(cur, data, api): results = [] try: js = json.loads(data) if js['m'][0]['n'] != 0: results = js['m'][0]['r'] except KeyError: fetch.print_('') print('\tError: KeyError at Parse_1\n') return 1 except KeyboardInterrupt: print('\nGoodbye!') exit() except: print('Data = {} {}\n'.format(data, len(data))) raise if results == []: return 1 for result in results: # Read data from current result exch = result['OS01X'] symbol = result['OS001'] exch_sym = result['LS01Z'] country = result['XI018'] type = result['OS010'] comp = result['OS01W'] curr = result['OS05M'] if exch_sym == '' or symbol == '': continue # Fetch id's for data from db and update tables # Tickers ticker_id = int( fetch.sql_insert_one_get_id(cur, 'Tickers', 'ticker', symbol)) # Currencies curr_id = int( fetch.sql_insert_one_get_id(cur, 'Currencies', 'currency_code', curr)) # Companies comp_id = int( fetch.sql_insert_one_get_id(cur, 'Companies', 'company', comp)) # SecurityTypes type_id = int( fetch.sql_insert_one_get_id(cur, 'SecurityTypes', 'security_type_code', type)) # Countries country_id = int( fetch.sql_insert_one_get_id(cur, 'Countries', 'a3_un', country)) # Exchanges exch_id = int( fetch.sql_insert_one_get_id(cur, 'Exchanges', 'exchange_sym', exch_sym)) dict1 = { 'exchange': exch, 'exchange_sym': exch_sym, 'country_id': country_id } sql = fetch.sql_update_record('Exchanges', dict1, {'id': exch_id}) fetch.db_execute(cur, sql) # Master Table columns = '(ticker_id, exchange_id)' sql = fetch.sql_insert('Master', columns, (ticker_id, exch_id)) fetch.db_execute(cur, sql) dict1 = { 'company_id': comp_id, 'security_type_id': type_id, 'update_date': DT.date.today().strftime('%Y-%m-%d') } dict2 = {'ticker_id': ticker_id, 'exchange_id': exch_id} sql = fetch.sql_update_record('Master', dict1, dict2) fetch.db_execute(cur, sql) return 200