def parse_2(cur, ticker_id, exch_id, data): try: soup = bs(data, 'html.parser') tags = soup.find_all('span') sector = tags[2].text.strip() industry = tags[4].text.strip() ctype = tags[6].text.strip() fyend = tags[10].text.strip() style = tags[12].text.strip() except Exception as e: #print('\n# ERROR API 2:', e, end=' ') #print('at {}:{}\n'.format(exch_id, ticker_id)) return 0 # Insert sector into Sectors sector_id = up.sql_insert_one_get_id(cur, 'Sectors', 'Sector', sector) # Insert industry into Industries sql = up.sql_insert('Industries', '(industry, sector_id)', (industry, sector_id)) up.execute_db(cur, sql) sql = up.sql_record_id('Industries', '(industry)', industry) industry_id = up.execute_db(cur, sql).fetchone()[0] # Insert companytype into CompanyTypes ctype_id = up.sql_insert_one_get_id(cur, 'CompanyTypes', 'companytype', ctype) # Insert fyend into FYEnds fyend_id = up.sql_insert_one_get_id(cur, 'TimeRefs', 'dates', fyend) # Insert style into StockStyles style_id = up.sql_insert_one_get_id(cur, 'StockStyles', 'style', style) # Update Tickers table with parsed data sql = update_record( 'Master', { 'industry_id': industry_id, 'companytype_id': ctype_id, 'fyend_id': fyend_id, 'style_id': style_id }, { 'ticker_id': ticker_id, 'exchange_id': exch_id }) up.execute_db(cur, sql) return 200
def parse_8(cur, api, ticker_id, exch_id, data): try: js = json.loads(data) html = js['result'] soup = bs(html, 'html.parser') tags = soup.find_all('div') except Exception as e: #print('\n# ERROR API {}:'.format(api), e, end=' ') #print('at {}:{}\n'.format(exch_id, ticker_id)) return 0 info = {} #info0 = {} type = 'MSreport' if api in [10, 11]: type += '_is' elif api in [12, 13]: type += '_cf' elif api in [14, 15]: type += '_bs' if api in [10, 12, 14]: type += '_yr' elif api in [11, 13, 15]: type += '_qt' #fname = 'test/{}.json'.format(type) '''with open(fname) as file: info0 = json.load(file)''' # Parse data into info dictionary for tag in tags: attrs = tag.attrs if 'id' in attrs: tag_id = tag['id'] value = tag.text # Parse Yrly or Qtrly values if tag_id[:2] == 'Y_': parent = tag.parent['id'] key = '{}_{}'.format(parent, tag_id) if 'rawvalue' in attrs: if tag['rawvalue'] in ['—', 'nbsp']: continue info[key] = float(re.sub(',', '', tag['rawvalue'])) #info0[key] = 'REAL,' else: if 'title' in attrs: value = tag['title'] value_id = up.sql_insert_one_get_id( cur, 'TimeRefs', 'dates', value) info[key] = value_id #info0[key] = 'INTEGER,' # Parse labels elif tag_id[:3] == 'lab' and 'padding' not in tag_id: value_id = up.sql_insert_one_get_id(cur, 'RowHeaders', 'header', value) info[tag_id] = value_id #info0[tag_id] = 'INTEGER,' # Check if parsing was successful if info == {} and info0 == {}: return 0 # Insert data into tables info['ticker_id'] = ticker_id info['exchange_id'] = exch_id sql = up.sql_insert(type, tuple(info.keys()), tuple(info.values())) cur.execute(sql) del info['ticker_id'] del info['exchange_id'] dict2 = {'ticker_id': ticker_id, 'exchange_id': exch_id} sql = update_record(type, info, dict2) cur.execute(sql) '''with open(fname, 'w') as file: file.write(json.dumps(info0, indent=2))''' return 200
def parse_6(cur, ticker_id, exch_id, data): try: html = json.loads(data)['componentData'] soup = bs(html, 'html.parser') trs = soup.find_all('tr') except Exception as e: #print('\n# ERROR API 6:', e, end=' ') #print('at {}:{}\n'.format(exch_id, ticker_id)) return 0 # Parse table info = {} #info0 = {} #with open('test/MSfinancials.json') as file: # info0 = json.load(file) for tr in trs: tags = tr.find_all(['th', 'td']) for ct, tag in enumerate(tags): # Parse column and row headers if 'id' in tag.attrs: if ct != 0: text_id = up.sql_insert_one_get_id(cur, 'TimeRefs', 'dates', tag.text) else: text = re.findall('\>(.+?)\<', str(tag))[0] text = re.sub('\%|\*', '', text).strip() text = re.sub('\s', '_', text) text_id = up.sql_insert_one_get_id(cur, 'RowHeaders', 'header', text) key = re.sub('-', '_', tag['id']) info[key] = int(text_id) #info0[tag['id']] = 'INTEGER,' # Parse values if 'headers' in tag.attrs: headers = tag['headers'] text = '_'.join([headers[1], headers[0]]) text = re.sub('[-,]', '_', text) try: info[text] = float(tag.text) except: pass #info0[text] = 'REAL,' if info == {}: return 0 # Insert data into tables table = 'MSfinancials' info['ticker_id'] = ticker_id #'INTEGER,' info['exchange_id'] = exch_id #'INTEGER,' sql = up.sql_insert(table, tuple(info.keys()), tuple(info.values())) cur.execute(sql) del info['ticker_id'] del info['exchange_id'] dict2 = {'ticker_id': ticker_id, 'exchange_id': exch_id} sql = update_record(table, info, dict2) cur.execute(sql) '''for k, v in info.items(): print(k, v)''' '''with open('test/MSfinancials.json', 'w') as file: file.write(json.dumps(info0, indent=2))''' '''with open('test/api12.html', 'w') as file: file.write(soup.prettify())''' return 200
def parse_5(cur, ticker_id, exch_id, data): try: html = json.loads(data)['componentData'] soup = bs(html, 'html.parser') except Exception as e: #print('\n# ERROR API 5:', e, end=' ') #print('at {}:{}\n'.format(exch_id, ticker_id)) return 0 # Parse data batabase tables (5 tables) tabs = soup.find_all('div') tables = {} for tab in tabs: if 'id' in tab.attrs: info = {} #info0 = {} table = re.sub('tab-', 'MSratio_', tab['id']) '''# Parse data into info dictionary with open('test/{}.json'.format(table)) as file: info0 = json.load(file)''' trs = tab.find_all('tr') for tr in trs: tags = tr.find_all(['th', 'td']) for ct, tag in enumerate(tags): # Parse column and row headers if 'id' in tag.attrs: if ct == 0: text_id = text_id = up.sql_insert_one_get_id( cur, 'RowHeaders', 'header', tag.text) else: text_id = up.sql_insert_one_get_id( cur, 'TimeRefs', 'dates', tag.text) key = re.sub('-', '_', tag['id']) info[key] = int(text_id) #info0[tag['id']] = 'INTEGER,' # Parse values if 'headers' in tag.attrs: col = tag['headers'] col = '{}_{}'.format(col[2], col[0]) col = re.sub('-', '_', col) try: info[col] = float(tag.text) except: pass #info0[col] = 'REAL,' if info != {}: # Insert data into tables info['ticker_id'] = ticker_id #'INTEGER,' info['exchange_id'] = exch_id #'INTEGER,' tables[table] = info sql = up.sql_insert(table, tuple(info.keys()), tuple(info.values())) cur.execute(sql) del info['ticker_id'] del info['exchange_id'] dict2 = {'ticker_id': ticker_id, 'exchange_id': exch_id} sql = update_record(table, info, dict2) cur.execute(sql) '''with open('test/{}.json'.format(table), 'w') as file: file.write(json.dumps(info0, indent=2))''' # Check if parsing was successful if tables == {}: return 0 '''with open('test/api11.html', 'w') as file: file.write(soup.prettify())''' '''for k0, v0 in tables.items(): print(k0) for k1, v1 in v0.items(): print(k1, '\t', v1)''' return 200
def parse_4(cur, ticker_id, exch_id, data): info = {} def clean_val(h, v): if v != '—': info[h] = v try: soup = bs(data, 'html.parser') table = gethtmltable(soup) script = soup.find('script').text except Exception as e: #print('\n# ERROR API 4:', e, end=' ') #print('at {}:{}\n'.format(exch_id, ticker_id)) return 0 script = re.sub('[ \n\t]|\\n|\\t', '', script) script = re.findall('\[\[.+?\]\]', script)[0] columns = json.loads(script) # Parse Yr Columns for year, column in enumerate(columns): if column[0] % 2 == 0: yr = column[1] yr_id = up.sql_insert_one_get_id(cur, 'TimeRefs', 'dates', yr) header = 'Y{}'.format(int((year - 1) / 2)) info[header] = yr_id # Parse 'Price/Earnings' for yr, val in enumerate(table[1][1:]): header = 'PE_Y{}'.format(yr) clean_val(header, val) # Parse 'Price/Book' for yr, val in enumerate(table[4][1:]): header = 'PB_Y{}'.format(yr) clean_val(header, val) # Parse 'Price/Sales' for yr, val in enumerate(table[7][1:]): header = 'PS_Y{}'.format(yr) clean_val(header, val) # Parse 'Price/Cash Flow' for yr, val in enumerate(table[10][1:]): header = 'PC_Y{}'.format(yr) clean_val(header, val) # Check if parsing was successful if info == {}: return 0 # Insert data into MSvaluation table info['ticker_id'] = ticker_id info['exchange_id'] = exch_id sql = up.sql_insert('MSvaluation', tuple(info.keys()), tuple(info.values())) cur.execute(sql) del info['ticker_id'] del info['exchange_id'] dict2 = {'ticker_id': ticker_id, 'exchange_id': exch_id} sql = update_record('MSvaluation', info, dict2) cur.execute(sql) # Check if parsing was successful if info == {}: return 0 return 200
def parse_3(cur, ticker_id, exch_id, data): try: soup = bs(data, 'html.parser') tags = soup.find_all('span') + soup.find_all('div') except Exception as e: #print('\n# ERROR API 3:', e, end=' ') #print('at {}:{}\n'.format(exch_id, ticker_id)) return 0 # Parse data into info dictionary info = {} for count, tag in enumerate(tags): attrs = tag.attrs text = re.sub('[\n\t]', '', tag.text.strip()) text = re.sub('\s\s*', ' ', text) if text == '—' or text == '— mil' or text == '— bil': continue if attrs.get('vkey') == 'Currency': val = up.sql_insert_one_get_id(cur, 'Currencies', 'code', text) info['currency_id'] = val elif attrs.get('vkey') == 'OpenPrice': info['openprice'] = re.sub(',', '', text) elif attrs.get('vkey') == 'LastPrice': info['lastprice'] = re.sub(',', '', text) elif attrs.get('vkey') == 'DayRange': info['day_hi'] = re.sub(',', '', text.split('-')[0]) info['day_lo'] = re.sub(',', '', text.split('-')[1]) elif attrs.get('vkey') == '_52Week': info['_52wk_hi'] = re.sub(',', '', text.split('-')[0]) info['_52wk_lo'] = re.sub(',', '', text.split('-')[1]) elif attrs.get('vkey') == 'ProjectedYield': info['yield'] = re.sub('[%,]', '', text) elif attrs.get('vkey') == 'Volume': if ',' in text: text = float(re.sub(',', '', text)) elif ' ' in text: s = text.find(' ') unit = 1 if text[s + 1:] == 'mil': unit = 10E6 elif text[s + 1:] == 'bil': unit = 10E9 elif text[s + 1:] == 'tri': unit = 10E12 info['aprvol'] = float(text[:s]) * unit elif attrs.get('vkey') == 'AverageVolume': if ',' in text: text = float(re.sub(',', '', text)) elif ' ' in text: s = text.find(' ') unit = 1 if text[s + 1:] == 'mil': unit = 10E6 elif text[s + 1:] == 'bil': unit = 10E9 elif text[s + 1:] == 'tri': unit = 10E12 info['avevol'] = float(text[:s]) * unit elif attrs.get('gkey') == 'Forward': fpe = text elif attrs.get('vkey') == 'PE': info['fpe'] = re.sub(',', '', text) elif attrs.get('vkey') == 'PB': info['pb'] = re.sub(',', '', text) elif attrs.get('vkey') == 'PS': info['ps'] = re.sub(',', '', text) elif attrs.get('vkey') == 'PC': info['pc'] = re.sub(',', '', text) # Check if parsing was successful if info == {}: return 0 if 'fpe' in locals() and fpe != 'Forward' and 'fpe' in info: del info['fpe'] # Insert data into MSheader table info['ticker_id'] = ticker_id info['exchange_id'] = exch_id sql = up.sql_insert('MSheader', tuple(info.keys()), tuple(info.values())) cur.execute(sql) del info['ticker_id'] del info['exchange_id'] dict2 = {'ticker_id': ticker_id, 'exchange_id': exch_id} sql = update_record('MSheader', info, dict2) cur.execute(sql) '''with open('test/api3.html', 'w') as file: file.write(soup.prettify())''' return 200
def parse_1(cur, ticker_id, exch_id, data): results = [] try: js = json.loads(data) if js['m'][0]['n'] != 0: results = js['m'][0]['r'] except Exception as e: #print('\n# ERROR API 1:', e, end=' ') #print('at {}:{}\n'.format(exch_id, ticker_id)) return 0 if results == []: return 0 for result in results: # Read data from current result exch = result['OS01X'] symbol = result['OS001'] exch_sym = result['LS01Z'] country = result['XI018'] type = result['OS010'] comp = result['OS01W'] curr = result['OS05M'] if exch_sym == '' or symbol == '': continue # Fetch id's for data from db and update tables # Tickers ticker_id = up.sql_insert_one_get_id(cur, 'Tickers', 'ticker', symbol) # Currencies curr_id = up.sql_insert_one_get_id(cur, 'Currencies', 'code', curr) # Companies comp_id = up.sql_insert_one_get_id(cur, 'Companies', 'company', comp) # Types type_id = up.sql_insert_one_get_id(cur, 'Types', 'type_code', type) # Countries country_id = up.sql_insert_one_get_id(cur, 'Countries', 'a3_un', country) # Updated date update = date.today().strftime('%Y-%m-%d') date_id = up.sql_insert_one_get_id(cur, 'TimeRefs', 'dates', update) # Exchanges exch_id = up.sql_insert_one_get_id(cur, 'Exchanges', 'exchange_sym', exch_sym) dict1 = { 'exchange': exch, 'exchange_sym': exch_sym, 'country_id': country_id } sql = update_record('Exchanges', dict1, {'id': exch_id}) up.execute_db(cur, sql) # Master Table columns = '(ticker_id, exchange_id)' sql = up.sql_insert('Master', columns, (ticker_id, exch_id)) up.execute_db(cur, sql) dict1 = { 'company_id': comp_id, 'type_id': type_id, 'update_date_id': date_id } dict2 = {'ticker_id': ticker_id, 'exchange_id': exch_id} sql = update_record('Master', dict1, dict2) up.execute_db(cur, sql) return 200