def get_product_page_interest_rates(url,savings_data): logger = logging.getLogger('retrieve ' + url) bsobj = themortgagemeter_utils.get_page(False,'',url,logger) #logger.info(url) #logger.info(bsobj) if re.match('.*isa.*',url): savings_data['isa'] = 'Y' for t in bsobj.find_all('table'): #logger.info("TABLE")# logger.info(t) # Get all tables, then match on summary == "Interest rates:.*", and set up variables accordingly. summary = t.get('summary').encode('utf-8').lower() if summary: # Set up data for this page summary_info = re.match('.*interest rates: (.*)',summary).group(1) #logger.info("summary info: " + summary_info) if summary_info in ("cash e-isa#"): savings_data['isa'] = 'Y' elif summary_info in ("fixed rate saver - monthly interest"): savings_data['variability'] = 'F' savings_data['interest_paid'] = 'M' elif summary_info in ("fixed rate saver - annual interest"): savings_data['variability'] = 'F' savings_data['interest_paid'] = 'Y' elif "regular saver" in summary_info: savings_data['regular_saver'] = 'Y' savings_data['interest_paid'] = 'Y' elif "online bonus" in summary_info: savings_data['bonus'] = 'Y' savings_data['branch'] = 'N' savings_data['bonus_frequency_period'] = '1' savings_data['bonus_frequency_type'] = 'M' # skip bonus for HSBC- it's complicated - probably needs its own function TODO continue elif "flexible saver" in summary_info: savings_data['variability'] = 'V' else: themortgagemeter_utils.record_alert('NEED TO HANDLE: ' + summary_info,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor) exit() tr_count = 0 for tr in t.find_all('tr'): # This is a new savings product, so clone the data at this point and use that from here. this_savings_data = savings_data.copy() #logger.info("TR " + str(tr_count)) #logger.info(tr) if this_savings_data['bonus'] == 'Y': #print "BONUS" #print tr pass if this_savings_data['regular_saver'] == 'Y': td_count = -1 else: td_count = 0 if tr_count >= 1: # If tax-free, this will be true for td in tr.find_all('td'): td_style = td.get('style') if td_style != None: td_style = td_style.lower().encode('utf-8').translate(None, ' ') if td_style == 'vertical-align:middle': continue #logger.info("TD" + str(td_count)) #logger.info(tr_count) #logger.info(td_count) logger.info(td) v = td.text.encode('utf-8').lower().strip() if td_count == 0: #logger.info(this_savings_data['regular_saver']) if this_savings_data['regular_saver'] == 'Y': logger.info('regular_saver: ' + v) this_savings_data['regular_saver_min_amt'] = v.split()[0][2:] this_savings_data['regular_saver_max_amt'] = v.split()[2][2:] if v.split()[4] == "month": this_savings_data['regular_saver_frequency_period'] = '1' this_savings_data['regular_saver_frequency_type'] = 'M' else: themortgagemeter_utils.record_alert('ERROR: reg saver not parsed: ' + v,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor) exit() else: # if it's got a + at the end, it's a min, if it's "up to" it's a max. res = savings_util.get_money_range(v,logger) this_savings_data['min_amt'] = res[0] this_savings_data['max_amt'] = res[1] # TODO: remove this section #if re.match('^.*\+$',v): # money_val = themortgagemeter_utils.get_money(v,logger) # this_savings_data['min_amt'] = money_val #elif re.match('^.*up to.*$',v) or re.match('^.*under.*$',v): # money_val = themortgagemeter_utils.get_money(v,logger) # this_savings_data['max_amt'] = money_val # this_savings_data['min_amt'] = 0 #elif re.match('^.* - .*$',v): # this_savings_data['min_amt'] = v.split()[0][2:].translate(None,',') # this_savings_data['max_amt'] = v.split()[2][2:].translate(None,',') #else: # #logger.info(t) #logger.info('value not handled: ' + v) # themortgagemeter_utils.record_alert('ERROR: value wrong: ' + v,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor) # exit() elif td_count == 1: # we don't bother with net_percent pass elif td_count == 2: # gross % this_savings_data['gross_percent'] = v elif td_count == 3: this_savings_data['aer_percent'] = v td_count += 1 # Some trs have no tds; we ignore those. if td_count > 0: # Now store this product # TODO: fixed savings? logger.info(this_savings_data) isa = this_savings_data['isa'] regular_saver = this_savings_data['regular_saver'] regular_saver_frequency_period = this_savings_data['regular_saver_frequency_period'] regular_saver_frequency_type = this_savings_data['regular_saver_frequency_type'] regular_saver_min_amt = this_savings_data['regular_saver_min_amt'] regular_saver_max_amt = this_savings_data['regular_saver_max_amt'] bonus = this_savings_data['bonus'] bonus_frequency_period = this_savings_data['bonus_frequency_period'] bonus_frequency_type = this_savings_data['bonus_frequency_type'] online = this_savings_data['online'] branch = this_savings_data['branch'] variability = this_savings_data['variability'] min_amt = this_savings_data['min_amt'] max_amt = this_savings_data['max_amt'] gross_percent = this_savings_data['gross_percent'] aer_percent = this_savings_data['aer_percent'] interest_paid = this_savings_data['interest_paid'] child = this_savings_data['child'] savings_period = this_savings_data['savings_period'] savings_util.handle_savings_insert(institution_code, isa, regular_saver, regular_saver_frequency_period, regular_saver_frequency_type, regular_saver_min_amt, regular_saver_max_amt, bonus, bonus_frequency_period, bonus_frequency_type, online, branch, variability, savings_period, min_amt, max_amt, gross_percent, aer_percent, child, interest_paid, url, logger) else: tr_count += 1 continue tr_count += 1 else: #print url #print bsobj exit()
def process_more_info_page(savings_data, url, logger): bsobj = themortgagemeter_utils.get_page( False, 'static_html/halifax/savings-accounts.html', url, logger) #print bsobj savings_array = [] #print "Passed in:" #print savings_data print url if savings_data['isa'] == 'Y': for i1 in bsobj.find_all("h2", text="Summary box"): for i2 in i1.parent(): if i2.find_all("table") != []: tabs = i2.find_all("table") if re.match(".*isa-saver-fixed.*", url): if len(tabs) != 2: themortgagemeter_utils.record_alert( 'ERROR: too many tabs in isa', logger, themortgagemeter_db.db_connection, themortgagemeter_db.cursor) exit() else: tabs.pop(0) for tab in tabs: tbody = tab.find_all("tbody")[0] trs = tbody.find_all("tr") for tr in trs: savings_data_tmp = savings_data.copy() tds = tr.find_all("td") savings_data_tmp[ 'savings_period'] = themortgagemeter_utils.get_months( tds[0].text.strip().encode('utf-8'), logger) savings_data_tmp[ 'aer_percent'] = themortgagemeter_utils.get_percentage( tds[1].text.strip().encode('utf-8'), logger) savings_data_tmp[ 'gross_percent'] = savings_data_tmp[ 'aer_percent'] savings_array.append(savings_data_tmp) else: if len(tabs) > 1: #print tabs themortgagemeter_utils.record_alert( 'ERROR: too many tabs in isa', logger, themortgagemeter_db.db_connection, themortgagemeter_db.cursor) exit() for tab in tabs: #print tab for tr in tab.find_all("tr"): ths = tr.find_all("th") tds = tr.find_all("td") if len(ths) > 0 and len(tds) > 0: th = tr.find_all("th")[0] td = tr.find_all("td")[0] th_text = th.text.lower() td_text = td.text.lower() if re.match('interest rates.*', th_text): #print "IR:" + td_text pc = themortgagemeter_utils.get_percentage( td_text, logger) savings_data_tmp = savings_data.copy() savings_data_tmp['gross_percent'] = pc savings_data_tmp['aer_percent'] = pc savings_array.append(savings_data_tmp) else: if len(ths) == 0 and len(tds) > 0: td1 = tds[0] td2 = tds[1] td1_text = td1.text.lower() td2_text = td2.text.lower() if re.match('interest rates.*', td1_text): pc = themortgagemeter_utils.get_percentage( td2_text, logger) savings_data_tmp = savings_data.copy( ) savings_data_tmp[ 'gross_percent'] = pc savings_data_tmp[ 'aer_percent'] = pc savings_array.append( savings_data_tmp) else: themortgagemeter_utils.record_alert( 'ERROR: unhandled case: ' + url, logger, themortgagemeter_db.db_connection, themortgagemeter_db.cursor) exit() elif re.match('.*fixed-online-saver.*', url) or re.match( '.*tracker-bond.*', url) or re.match('.*fixed-saver.*', url): if re.match('.*fixed-online-saver.*', url) or re.match( '.*fixed-saver.*', url): #print bsobj code = "FOS" i1s = bsobj.find_all("h3", text="Current Rates") if i1s == []: i1s = bsobj.find_all("h3", text="Current rates") elif re.match('.*tracker-bond.*', url): #print bsobj code = "TB" i1s = [] res = bsobj.find_all("h4") for i in res: #print i.text if i.text == "Current rates and apply": i1s.append(i) break if i1s == []: themortgagemeter_utils.record_alert( 'No items from expected h3/4 match!', logger, themortgagemeter_db.db_connection, themortgagemeter_db.cursor) for i1 in i1s: for i2 in i1.parent(): tbodys = i2.find_all("tbody") # if this is tracker bond, discard the first table if len(tbodys) == 0: continue if code == "TB": ok = False for tbody in tbodys: for tr in tbody.find_all("tr"): tds = tr.find_all("td") if tds[0].text == "Term": ok = True if not ok: continue for tbody in tbodys: tr_count = -1 table_savings_period = "unset" for tr in tbody.find_all("tr"): tr_count = tr_count + 1 if code == "TB" and tr_count == 0: # skip the first row continue # clone the savings_data ready to write to savings_data_tmp = savings_data.copy() # First td is time only on first row for TB if code == "TB" and tr_count > 1: td_count = 1 else: td_count = 0 if code == "TB" and tr_count > 1: if table_savings_period == "unset": themortgagemeter_utils.record_alert( 'ERROR: table_savings_period should not be unset', logger, themortgagemeter_db.db_connection, themortgagemeter_db.cursor) exit() savings_data_tmp[ 'savings_period'] = table_savings_period for td in tr.find_all("td"): # 0 - term # 1 - balance # 2 - Gross # 3 - AER # 4 - NET (ignore) # Ignore remainder of cols text = td.text.lower().strip().encode('utf-8') if td_count == 0: # store this in a variable for use on next row if necessary table_savings_period = themortgagemeter_utils.get_months( text, logger) savings_data_tmp[ 'savings_period'] = table_savings_period elif td_count == 1: res = savings_util.get_money_range( text, logger) savings_data_tmp['min_amt'] = res[0] savings_data_tmp['max_amt'] = res[1] elif td_count == 2: savings_data_tmp[ 'gross_percent'] = themortgagemeter_utils.get_percentage( text, logger) elif td_count == 3: savings_data_tmp[ 'aer_percent'] = themortgagemeter_utils.get_percentage( text, logger) # and then break out break td_count = td_count + 1 savings_array.append(savings_data_tmp) elif re.match('.*/online-saver/', url): # TODO: need to set this for other types savings_data['interest_paid'] = 'Y' #print bsobj # get the apr class element, as that contains the text we need apr = bsobj.find_all( attrs={'class': 'apr'})[0].parent.parent.text.encode('utf-8') # split this line by \n apr = apr.split('\n') lines = [] for l in apr: if re.match('.* or [0-9].*', l): for l2 in l.split(' or ', 1): lines.append(l2) else: lines.append(l) while '' in lines: lines.remove('') for l in lines: # copy savings_data_tmp = savings_data.copy() #print l # get percentage savings_data_tmp[ 'gross_percent'] = themortgagemeter_utils.get_percentage( l, logger) savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent'] # get_money range res = savings_util.get_money_range(l, logger) savings_data_tmp['min_amt'] = res[0] savings_data_tmp['max_amt'] = res[1] # append to savings_array savings_array.append(savings_data_tmp) elif re.match('.*/regular-saver/', url): # TODO: need to set this for other types savings_data['interest_paid'] = 'Y' savings_data['regular_saver_frequency_period'] = '1' savings_data['regular_saver_frequency_type'] = 'M' savings_data['regular_saver'] = 'Y' # Always fixed savings_data['variability'] = 'F' #print bsobj # get the apr class element, as that contains the text we need apr = bsobj.find_all( attrs={'class': 'apr'})[0].parent.parent.text.encode('utf-8') # split this line by \n apr = apr.split('\n') lines = [] for l in apr: if re.match('.* or [0-9].*', l): for l2 in l.split(' or ', 1): lines.append(l2) else: lines.append(l) while '' in lines: lines.remove('') for l in lines: # copy savings_data_tmp = savings_data.copy() # get percentage savings_data_tmp[ 'gross_percent'] = themortgagemeter_utils.get_percentage( l, logger) if savings_data_tmp['gross_percent'] == '': # abandon ship! continue savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent'] # Hard-code to 25-250 for now, this seems standard savings_data_tmp['regular_saver_min_amt'] = '25' savings_data_tmp['regular_saver_max_amt'] = '250' # append to savings_array savings_array.append(savings_data_tmp) elif re.match('.*/everyday-saver/', url): # This one's quite simple (I think) # TODO: need to set this for other types savings_data['interest_paid'] = 'Y' #print bsobj # get the apr class element, as that contains the text we need apr = bsobj.find_all( attrs={'class': 'apr'})[0].parent.parent.text.encode('utf-8') #print apr # split this line by \n apr = apr.split('\n') lines = [] for l in apr: if re.match('.*gross.*', l): lines.append(l) while '' in lines: lines.remove('') for l in lines: # copy savings_data_tmp = savings_data.copy() #print l # get percentage savings_data_tmp[ 'gross_percent'] = themortgagemeter_utils.get_percentage( l, logger) savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent'] # TODO: bonus_frequency_period set to 1, or get from data? # append to savings_array savings_array.append(savings_data_tmp) elif re.match('.*/branch-accounts/.*', url): return savings_array else: logger.info('unhandled:' + url) exit() if savings_array == []: themortgagemeter_utils.record_alert( 'ERROR: returning nothing from a page', logger, themortgagemeter_db.db_connection, themortgagemeter_db.cursor) exit() # Return the savings_array logger.info('returning savings_array:' + str(savings_array)) return savings_array
def process_more_info_page(savings_data,url,logger): bsobj = themortgagemeter_utils.get_page(False,'static_html/halifax/savings-accounts.html',url,logger) #print bsobj savings_array = [] #print "Passed in:" #print savings_data print url if savings_data['isa'] == 'Y': for i1 in bsobj.find_all("h2",text="Summary box"): for i2 in i1.parent(): if i2.find_all("table") != []: tabs = i2.find_all("table") if re.match(".*isa-saver-fixed.*",url): if len(tabs) != 2: themortgagemeter_utils.record_alert('ERROR: too many tabs in isa',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor) exit() else: tabs.pop(0) for tab in tabs: tbody = tab.find_all("tbody")[0] trs = tbody.find_all("tr") for tr in trs: savings_data_tmp = savings_data.copy() tds = tr.find_all("td") savings_data_tmp['savings_period'] = themortgagemeter_utils.get_months(tds[0].text.strip().encode('utf-8'),logger) savings_data_tmp['aer_percent'] = themortgagemeter_utils.get_percentage(tds[1].text.strip().encode('utf-8'),logger) savings_data_tmp['gross_percent'] = savings_data_tmp['aer_percent'] savings_array.append(savings_data_tmp) else: if len(tabs) > 1: #print tabs themortgagemeter_utils.record_alert('ERROR: too many tabs in isa',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor) exit() for tab in tabs: #print tab for tr in tab.find_all("tr"): ths = tr.find_all("th") tds = tr.find_all("td") if len(ths) > 0 and len(tds) > 0: th = tr.find_all("th")[0] td = tr.find_all("td")[0] th_text = th.text.lower() td_text = td.text.lower() if re.match('interest rates.*',th_text): #print "IR:" + td_text pc = themortgagemeter_utils.get_percentage(td_text,logger) savings_data_tmp = savings_data.copy() savings_data_tmp['gross_percent'] = pc savings_data_tmp['aer_percent'] = pc savings_array.append(savings_data_tmp) else: if len(ths) == 0 and len(tds) > 0: td1 = tds[0] td2 = tds[1] td1_text = td1.text.lower() td2_text = td2.text.lower() if re.match('interest rates.*',td1_text): pc = themortgagemeter_utils.get_percentage(td2_text,logger) savings_data_tmp = savings_data.copy() savings_data_tmp['gross_percent'] = pc savings_data_tmp['aer_percent'] = pc savings_array.append(savings_data_tmp) else: themortgagemeter_utils.record_alert('ERROR: unhandled case: ' + url,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor) exit() elif re.match('.*fixed-online-saver.*',url) or re.match('.*tracker-bond.*',url) or re.match('.*fixed-saver.*',url): if re.match('.*fixed-online-saver.*',url) or re.match('.*fixed-saver.*',url): #print bsobj code = "FOS" i1s = bsobj.find_all("h3",text="Current Rates") if i1s== []: i1s = bsobj.find_all("h3",text="Current rates") elif re.match('.*tracker-bond.*',url): #print bsobj code = "TB" i1s = [] res = bsobj.find_all("h4") for i in res: #print i.text if i.text == "Current rates and apply": i1s.append(i) break if i1s == []: themortgagemeter_utils.record_alert('No items from expected h3/4 match!',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor) for i1 in i1s: for i2 in i1.parent(): tbodys = i2.find_all("tbody") # if this is tracker bond, discard the first table if len(tbodys) == 0: continue if code == "TB": ok = False for tbody in tbodys: for tr in tbody.find_all("tr"): tds = tr.find_all("td") if tds[0].text == "Term": ok = True if not ok: continue for tbody in tbodys: tr_count = -1 table_savings_period = "unset" for tr in tbody.find_all("tr"): tr_count = tr_count + 1 if code == "TB" and tr_count == 0: # skip the first row continue # clone the savings_data ready to write to savings_data_tmp = savings_data.copy() # First td is time only on first row for TB if code == "TB" and tr_count > 1: td_count = 1 else: td_count = 0 if code == "TB" and tr_count > 1: if table_savings_period == "unset": themortgagemeter_utils.record_alert('ERROR: table_savings_period should not be unset',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor) exit() savings_data_tmp['savings_period'] = table_savings_period for td in tr.find_all("td"): # 0 - term # 1 - balance # 2 - Gross # 3 - AER # 4 - NET (ignore) # Ignore remainder of cols text = td.text.lower().strip().encode('utf-8') if td_count == 0: # store this in a variable for use on next row if necessary table_savings_period = themortgagemeter_utils.get_months(text,logger) savings_data_tmp['savings_period'] = table_savings_period elif td_count == 1: res = savings_util.get_money_range(text,logger) savings_data_tmp['min_amt'] = res[0] savings_data_tmp['max_amt'] = res[1] elif td_count == 2: savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(text,logger) elif td_count == 3: savings_data_tmp['aer_percent'] = themortgagemeter_utils.get_percentage(text,logger) # and then break out break td_count = td_count + 1 savings_array.append(savings_data_tmp) elif re.match('.*/online-saver/',url): # TODO: need to set this for other types savings_data['interest_paid'] = 'Y' #print bsobj # get the apr class element, as that contains the text we need apr = bsobj.find_all(attrs={'class':'apr'})[0].parent.parent.text.encode('utf-8') # split this line by \n apr = apr.split('\n') lines = [] for l in apr: if re.match('.* or [0-9].*',l): for l2 in l.split(' or ',1): lines.append(l2) else: lines.append(l) while '' in lines: lines.remove('') for l in lines: # copy savings_data_tmp = savings_data.copy() #print l # get percentage savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(l,logger) savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent'] # get_money range res = savings_util.get_money_range(l,logger) savings_data_tmp['min_amt'] = res[0] savings_data_tmp['max_amt'] = res[1] # append to savings_array savings_array.append(savings_data_tmp) elif re.match('.*/regular-saver/',url): # TODO: need to set this for other types savings_data['interest_paid'] = 'Y' savings_data['regular_saver_frequency_period'] = '1' savings_data['regular_saver_frequency_type'] = 'M' savings_data['regular_saver'] = 'Y' # Always fixed savings_data['variability'] = 'F' #print bsobj # get the apr class element, as that contains the text we need apr = bsobj.find_all(attrs={'class':'apr'})[0].parent.parent.text.encode('utf-8') # split this line by \n apr = apr.split('\n') lines = [] for l in apr: if re.match('.* or [0-9].*',l): for l2 in l.split(' or ',1): lines.append(l2) else: lines.append(l) while '' in lines: lines.remove('') for l in lines: # copy savings_data_tmp = savings_data.copy() # get percentage savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(l,logger) if savings_data_tmp['gross_percent'] == '': # abandon ship! continue savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent'] # Hard-code to 25-250 for now, this seems standard savings_data_tmp['regular_saver_min_amt'] = '25' savings_data_tmp['regular_saver_max_amt'] = '250' # append to savings_array savings_array.append(savings_data_tmp) elif re.match('.*/everyday-saver/',url): # This one's quite simple (I think) # TODO: need to set this for other types savings_data['interest_paid'] = 'Y' #print bsobj # get the apr class element, as that contains the text we need apr = bsobj.find_all(attrs={'class':'apr'})[0].parent.parent.text.encode('utf-8') #print apr # split this line by \n apr = apr.split('\n') lines = [] for l in apr: if re.match('.*gross.*',l): lines.append(l) while '' in lines: lines.remove('') for l in lines: # copy savings_data_tmp = savings_data.copy() #print l # get percentage savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(l,logger) savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent'] # TODO: bonus_frequency_period set to 1, or get from data? # append to savings_array savings_array.append(savings_data_tmp) elif re.match('.*/branch-accounts/.*',url): return savings_array else: logger.info('unhandled:' + url) exit() if savings_array == []: themortgagemeter_utils.record_alert('ERROR: returning nothing from a page',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor) exit() # Return the savings_array logger.info('returning savings_array:' + str(savings_array)) return savings_array
def get_product_page_interest_rates(url, savings_data): logger = logging.getLogger('retrieve ' + url) bsobj = themortgagemeter_utils.get_page(False, '', url, logger) #logger.info(url) #logger.info(bsobj) if re.match('.*isa.*', url): savings_data['isa'] = 'Y' for t in bsobj.find_all('table'): #logger.info("TABLE")# logger.info(t) # Get all tables, then match on summary == "Interest rates:.*", and set up variables accordingly. summary = t.get('summary').encode('utf-8').lower() if summary: # Set up data for this page summary_info = re.match('.*interest rates: (.*)', summary).group(1) #logger.info("summary info: " + summary_info) if summary_info in ("cash e-isa#"): savings_data['isa'] = 'Y' elif summary_info in ("fixed rate saver - monthly interest"): savings_data['variability'] = 'F' savings_data['interest_paid'] = 'M' elif summary_info in ("fixed rate saver - annual interest"): savings_data['variability'] = 'F' savings_data['interest_paid'] = 'Y' elif "regular saver" in summary_info: savings_data['regular_saver'] = 'Y' savings_data['interest_paid'] = 'Y' elif "online bonus" in summary_info: savings_data['bonus'] = 'Y' savings_data['branch'] = 'N' savings_data['bonus_frequency_period'] = '1' savings_data['bonus_frequency_type'] = 'M' # skip bonus for HSBC- it's complicated - probably needs its own function TODO continue elif "flexible saver" in summary_info: savings_data['variability'] = 'V' else: themortgagemeter_utils.record_alert( 'NEED TO HANDLE: ' + summary_info, logger, themortgagemeter_db.db_connection, themortgagemeter_db.cursor) exit() tr_count = 0 for tr in t.find_all('tr'): # This is a new savings product, so clone the data at this point and use that from here. this_savings_data = savings_data.copy() #logger.info("TR " + str(tr_count)) #logger.info(tr) if this_savings_data['bonus'] == 'Y': #print "BONUS" #print tr pass if this_savings_data['regular_saver'] == 'Y': td_count = -1 else: td_count = 0 if tr_count >= 1: # If tax-free, this will be true for td in tr.find_all('td'): td_style = td.get('style') if td_style != None: td_style = td_style.lower().encode( 'utf-8').translate(None, ' ') if td_style == 'vertical-align:middle': continue #logger.info("TD" + str(td_count)) #logger.info(tr_count) #logger.info(td_count) logger.info(td) v = td.text.encode('utf-8').lower().strip() if td_count == 0: #logger.info(this_savings_data['regular_saver']) if this_savings_data['regular_saver'] == 'Y': logger.info('regular_saver: ' + v) this_savings_data[ 'regular_saver_min_amt'] = v.split()[0][2:] this_savings_data[ 'regular_saver_max_amt'] = v.split()[2][2:] if v.split()[4] == "month": this_savings_data[ 'regular_saver_frequency_period'] = '1' this_savings_data[ 'regular_saver_frequency_type'] = 'M' else: themortgagemeter_utils.record_alert( 'ERROR: reg saver not parsed: ' + v, logger, themortgagemeter_db.db_connection, themortgagemeter_db.cursor) exit() else: # if it's got a + at the end, it's a min, if it's "up to" it's a max. res = savings_util.get_money_range(v, logger) this_savings_data['min_amt'] = res[0] this_savings_data['max_amt'] = res[1] # TODO: remove this section #if re.match('^.*\+$',v): # money_val = themortgagemeter_utils.get_money(v,logger) # this_savings_data['min_amt'] = money_val #elif re.match('^.*up to.*$',v) or re.match('^.*under.*$',v): # money_val = themortgagemeter_utils.get_money(v,logger) # this_savings_data['max_amt'] = money_val # this_savings_data['min_amt'] = 0 #elif re.match('^.* - .*$',v): # this_savings_data['min_amt'] = v.split()[0][2:].translate(None,',') # this_savings_data['max_amt'] = v.split()[2][2:].translate(None,',') #else: # #logger.info(t) #logger.info('value not handled: ' + v) # themortgagemeter_utils.record_alert('ERROR: value wrong: ' + v,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor) # exit() elif td_count == 1: # we don't bother with net_percent pass elif td_count == 2: # gross % this_savings_data['gross_percent'] = v elif td_count == 3: this_savings_data['aer_percent'] = v td_count += 1 # Some trs have no tds; we ignore those. if td_count > 0: # Now store this product # TODO: fixed savings? logger.info(this_savings_data) isa = this_savings_data['isa'] regular_saver = this_savings_data['regular_saver'] regular_saver_frequency_period = this_savings_data[ 'regular_saver_frequency_period'] regular_saver_frequency_type = this_savings_data[ 'regular_saver_frequency_type'] regular_saver_min_amt = this_savings_data[ 'regular_saver_min_amt'] regular_saver_max_amt = this_savings_data[ 'regular_saver_max_amt'] bonus = this_savings_data['bonus'] bonus_frequency_period = this_savings_data[ 'bonus_frequency_period'] bonus_frequency_type = this_savings_data[ 'bonus_frequency_type'] online = this_savings_data['online'] branch = this_savings_data['branch'] variability = this_savings_data['variability'] min_amt = this_savings_data['min_amt'] max_amt = this_savings_data['max_amt'] gross_percent = this_savings_data['gross_percent'] aer_percent = this_savings_data['aer_percent'] interest_paid = this_savings_data['interest_paid'] child = this_savings_data['child'] savings_period = this_savings_data['savings_period'] savings_util.handle_savings_insert( institution_code, isa, regular_saver, regular_saver_frequency_period, regular_saver_frequency_type, regular_saver_min_amt, regular_saver_max_amt, bonus, bonus_frequency_period, bonus_frequency_type, online, branch, variability, savings_period, min_amt, max_amt, gross_percent, aer_percent, child, interest_paid, url, logger) else: tr_count += 1 continue tr_count += 1 else: #print url #print bsobj exit()