Example #1
0
def get_product_page_interest_rates(url,savings_data):
	logger = logging.getLogger('retrieve ' + url)
	bsobj = themortgagemeter_utils.get_page(False,'',url,logger)
	#logger.info(url) #logger.info(bsobj)
	if re.match('.*isa.*',url):
		savings_data['isa'] = 'Y'
	for t in bsobj.find_all('table'):
		#logger.info("TABLE")# logger.info(t)
		# Get all tables, then match on summary == "Interest rates:.*", and set up variables accordingly.
		summary = t.get('summary').encode('utf-8').lower()
		if summary:
			# Set up data for this page
			summary_info = re.match('.*interest rates: (.*)',summary).group(1)
			#logger.info("summary info: " + summary_info)
			if summary_info in ("cash e-isa#"):
				savings_data['isa'] = 'Y'
			elif summary_info in ("fixed rate saver - monthly interest"):
				savings_data['variability'] = 'F'
				savings_data['interest_paid'] = 'M'
			elif summary_info in ("fixed rate saver - annual interest"):
				savings_data['variability'] = 'F'
				savings_data['interest_paid'] = 'Y'
			elif "regular saver" in summary_info:
				savings_data['regular_saver'] = 'Y'
				savings_data['interest_paid'] = 'Y'
			elif "online bonus" in summary_info:
				savings_data['bonus']  = 'Y'
				savings_data['branch'] = 'N'
				savings_data['bonus_frequency_period'] = '1'
				savings_data['bonus_frequency_type']   = 'M'
				# skip bonus for HSBC- it's complicated - probably needs its own function TODO
				continue
			elif "flexible saver" in summary_info:
				savings_data['variability']     = 'V'
			else:
				themortgagemeter_utils.record_alert('NEED TO HANDLE: ' + summary_info,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
				exit()
			tr_count = 0
			for tr in t.find_all('tr'):
				# This is a new savings product, so clone the data at this point and use that from here.
				this_savings_data = savings_data.copy()
				#logger.info("TR " + str(tr_count)) #logger.info(tr)
				if this_savings_data['bonus'] == 'Y':
					#print "BONUS"
					#print tr
					pass
				if this_savings_data['regular_saver'] == 'Y':
					td_count = -1
				else:
					td_count = 0
				if tr_count >= 1:
					# If tax-free, this will be true
					for td in tr.find_all('td'):
						td_style = td.get('style')
						if td_style != None:
							td_style = td_style.lower().encode('utf-8').translate(None, ' ')
							if td_style == 'vertical-align:middle':
								continue
						#logger.info("TD" + str(td_count)) #logger.info(tr_count) #logger.info(td_count)
						logger.info(td)
						v = td.text.encode('utf-8').lower().strip()
						if td_count == 0:
							#logger.info(this_savings_data['regular_saver'])
							if this_savings_data['regular_saver'] == 'Y':
								logger.info('regular_saver: ' + v)
								this_savings_data['regular_saver_min_amt'] = v.split()[0][2:]
								this_savings_data['regular_saver_max_amt'] = v.split()[2][2:]
								if v.split()[4] == "month":
									this_savings_data['regular_saver_frequency_period'] = '1'
									this_savings_data['regular_saver_frequency_type'] = 'M'
								else:
									themortgagemeter_utils.record_alert('ERROR: reg saver not parsed: ' + v,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
									exit()
							else:
								# if it's got a + at the end, it's a min, if it's "up to" it's a max.
								res = savings_util.get_money_range(v,logger)
								this_savings_data['min_amt'] = res[0]
								this_savings_data['max_amt'] = res[1]
								# TODO: remove this section
								#if re.match('^.*\+$',v):
								#	money_val = themortgagemeter_utils.get_money(v,logger)
								#	this_savings_data['min_amt'] = money_val
								#elif re.match('^.*up to.*$',v) or re.match('^.*under.*$',v):
								#	money_val = themortgagemeter_utils.get_money(v,logger)
								#	this_savings_data['max_amt'] = money_val
								#	this_savings_data['min_amt'] = 0
								#elif re.match('^.* - .*$',v):
								#	this_savings_data['min_amt'] = v.split()[0][2:].translate(None,',')
								#	this_savings_data['max_amt'] = v.split()[2][2:].translate(None,',')
								#else:
								#	#logger.info(t) #logger.info('value not handled: ' + v)
								#	themortgagemeter_utils.record_alert('ERROR: value wrong: ' + v,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
								#	exit()
						elif td_count == 1:
							# we don't bother with net_percent
							pass
						elif td_count == 2:
							# gross %
							this_savings_data['gross_percent'] = v
						elif td_count == 3:
							this_savings_data['aer_percent'] = v
						td_count += 1
					# Some trs have no tds; we ignore those.
					if td_count > 0:
						# Now store this product
						# TODO: fixed savings?
						logger.info(this_savings_data)
						isa = this_savings_data['isa']
						regular_saver = this_savings_data['regular_saver']
						regular_saver_frequency_period = this_savings_data['regular_saver_frequency_period']
						regular_saver_frequency_type = this_savings_data['regular_saver_frequency_type']
						regular_saver_min_amt = this_savings_data['regular_saver_min_amt']
						regular_saver_max_amt = this_savings_data['regular_saver_max_amt']
						bonus = this_savings_data['bonus']
						bonus_frequency_period = this_savings_data['bonus_frequency_period']
						bonus_frequency_type = this_savings_data['bonus_frequency_type']
						online = this_savings_data['online']
						branch = this_savings_data['branch']
						variability = this_savings_data['variability']
						min_amt = this_savings_data['min_amt']
						max_amt = this_savings_data['max_amt']
						gross_percent = this_savings_data['gross_percent']
						aer_percent = this_savings_data['aer_percent']
						interest_paid = this_savings_data['interest_paid']
						child = this_savings_data['child']
						savings_period = this_savings_data['savings_period']
						savings_util.handle_savings_insert(institution_code, isa, regular_saver, regular_saver_frequency_period, regular_saver_frequency_type, regular_saver_min_amt, regular_saver_max_amt, bonus, bonus_frequency_period, bonus_frequency_type, online, branch, variability, savings_period, min_amt, max_amt, gross_percent, aer_percent, child, interest_paid, url, logger)
				else:
					tr_count += 1
					continue
				tr_count += 1
		else:
			#print url
			#print bsobj
			exit()
def process_more_info_page(savings_data, url, logger):
    bsobj = themortgagemeter_utils.get_page(
        False, 'static_html/halifax/savings-accounts.html', url, logger)
    #print bsobj
    savings_array = []
    #print "Passed in:"
    #print savings_data
    print url
    if savings_data['isa'] == 'Y':
        for i1 in bsobj.find_all("h2", text="Summary box"):
            for i2 in i1.parent():
                if i2.find_all("table") != []:
                    tabs = i2.find_all("table")
                    if re.match(".*isa-saver-fixed.*", url):
                        if len(tabs) != 2:
                            themortgagemeter_utils.record_alert(
                                'ERROR: too many tabs in isa', logger,
                                themortgagemeter_db.db_connection,
                                themortgagemeter_db.cursor)
                            exit()
                        else:
                            tabs.pop(0)
                        for tab in tabs:
                            tbody = tab.find_all("tbody")[0]
                            trs = tbody.find_all("tr")
                            for tr in trs:
                                savings_data_tmp = savings_data.copy()
                                tds = tr.find_all("td")
                                savings_data_tmp[
                                    'savings_period'] = themortgagemeter_utils.get_months(
                                        tds[0].text.strip().encode('utf-8'),
                                        logger)
                                savings_data_tmp[
                                    'aer_percent'] = themortgagemeter_utils.get_percentage(
                                        tds[1].text.strip().encode('utf-8'),
                                        logger)
                                savings_data_tmp[
                                    'gross_percent'] = savings_data_tmp[
                                        'aer_percent']
                                savings_array.append(savings_data_tmp)
                    else:
                        if len(tabs) > 1:
                            #print tabs
                            themortgagemeter_utils.record_alert(
                                'ERROR: too many tabs in isa', logger,
                                themortgagemeter_db.db_connection,
                                themortgagemeter_db.cursor)
                            exit()
                        for tab in tabs:
                            #print tab
                            for tr in tab.find_all("tr"):
                                ths = tr.find_all("th")
                                tds = tr.find_all("td")
                                if len(ths) > 0 and len(tds) > 0:
                                    th = tr.find_all("th")[0]
                                    td = tr.find_all("td")[0]
                                    th_text = th.text.lower()
                                    td_text = td.text.lower()
                                    if re.match('interest rates.*', th_text):
                                        #print "IR:" + td_text
                                        pc = themortgagemeter_utils.get_percentage(
                                            td_text, logger)
                                        savings_data_tmp = savings_data.copy()
                                        savings_data_tmp['gross_percent'] = pc
                                        savings_data_tmp['aer_percent'] = pc
                                        savings_array.append(savings_data_tmp)
                                else:
                                    if len(ths) == 0 and len(tds) > 0:
                                        td1 = tds[0]
                                        td2 = tds[1]
                                        td1_text = td1.text.lower()
                                        td2_text = td2.text.lower()
                                        if re.match('interest rates.*',
                                                    td1_text):
                                            pc = themortgagemeter_utils.get_percentage(
                                                td2_text, logger)
                                            savings_data_tmp = savings_data.copy(
                                            )
                                            savings_data_tmp[
                                                'gross_percent'] = pc
                                            savings_data_tmp[
                                                'aer_percent'] = pc
                                            savings_array.append(
                                                savings_data_tmp)
                                    else:
                                        themortgagemeter_utils.record_alert(
                                            'ERROR: unhandled case: ' + url,
                                            logger,
                                            themortgagemeter_db.db_connection,
                                            themortgagemeter_db.cursor)
                                        exit()
    elif re.match('.*fixed-online-saver.*', url) or re.match(
            '.*tracker-bond.*', url) or re.match('.*fixed-saver.*', url):
        if re.match('.*fixed-online-saver.*', url) or re.match(
                '.*fixed-saver.*', url):
            #print bsobj
            code = "FOS"
            i1s = bsobj.find_all("h3", text="Current Rates")
            if i1s == []:
                i1s = bsobj.find_all("h3", text="Current rates")
        elif re.match('.*tracker-bond.*', url):
            #print bsobj
            code = "TB"
            i1s = []
            res = bsobj.find_all("h4")
            for i in res:
                #print i.text
                if i.text == "Current rates and apply":
                    i1s.append(i)
                    break
        if i1s == []:
            themortgagemeter_utils.record_alert(
                'No items from expected h3/4 match!', logger,
                themortgagemeter_db.db_connection, themortgagemeter_db.cursor)
        for i1 in i1s:
            for i2 in i1.parent():
                tbodys = i2.find_all("tbody")
                # if this is tracker bond, discard the first table
                if len(tbodys) == 0:
                    continue
                if code == "TB":
                    ok = False
                    for tbody in tbodys:
                        for tr in tbody.find_all("tr"):
                            tds = tr.find_all("td")
                            if tds[0].text == "Term":
                                ok = True
                    if not ok:
                        continue
                for tbody in tbodys:
                    tr_count = -1
                    table_savings_period = "unset"
                    for tr in tbody.find_all("tr"):
                        tr_count = tr_count + 1
                        if code == "TB" and tr_count == 0:
                            # skip the first row
                            continue
                        # clone the savings_data ready to write to
                        savings_data_tmp = savings_data.copy()
                        # First td is time only on first row for TB
                        if code == "TB" and tr_count > 1:
                            td_count = 1
                        else:
                            td_count = 0
                        if code == "TB" and tr_count > 1:
                            if table_savings_period == "unset":
                                themortgagemeter_utils.record_alert(
                                    'ERROR: table_savings_period should not be unset',
                                    logger, themortgagemeter_db.db_connection,
                                    themortgagemeter_db.cursor)
                                exit()
                            savings_data_tmp[
                                'savings_period'] = table_savings_period
                        for td in tr.find_all("td"):
                            # 0 - term
                            # 1 - balance
                            # 2 - Gross
                            # 3 - AER
                            # 4 - NET (ignore)
                            # Ignore remainder of cols
                            text = td.text.lower().strip().encode('utf-8')
                            if td_count == 0:
                                # store this in a variable for use on next row if necessary
                                table_savings_period = themortgagemeter_utils.get_months(
                                    text, logger)
                                savings_data_tmp[
                                    'savings_period'] = table_savings_period
                            elif td_count == 1:
                                res = savings_util.get_money_range(
                                    text, logger)
                                savings_data_tmp['min_amt'] = res[0]
                                savings_data_tmp['max_amt'] = res[1]
                            elif td_count == 2:
                                savings_data_tmp[
                                    'gross_percent'] = themortgagemeter_utils.get_percentage(
                                        text, logger)
                            elif td_count == 3:
                                savings_data_tmp[
                                    'aer_percent'] = themortgagemeter_utils.get_percentage(
                                        text, logger)
                                # and then break out
                                break
                            td_count = td_count + 1
                        savings_array.append(savings_data_tmp)
    elif re.match('.*/online-saver/', url):
        # TODO: need to set this for other types
        savings_data['interest_paid'] = 'Y'
        #print bsobj
        # get the apr class element, as that contains the text we need
        apr = bsobj.find_all(
            attrs={'class': 'apr'})[0].parent.parent.text.encode('utf-8')
        # split this line by \n
        apr = apr.split('\n')
        lines = []
        for l in apr:
            if re.match('.* or [0-9].*', l):
                for l2 in l.split(' or ', 1):
                    lines.append(l2)
            else:
                lines.append(l)
        while '' in lines:
            lines.remove('')
        for l in lines:
            # copy
            savings_data_tmp = savings_data.copy()
            #print l
            # get percentage
            savings_data_tmp[
                'gross_percent'] = themortgagemeter_utils.get_percentage(
                    l, logger)
            savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
            # get_money range
            res = savings_util.get_money_range(l, logger)
            savings_data_tmp['min_amt'] = res[0]
            savings_data_tmp['max_amt'] = res[1]
            # append to savings_array
            savings_array.append(savings_data_tmp)
    elif re.match('.*/regular-saver/', url):
        # TODO: need to set this for other types
        savings_data['interest_paid'] = 'Y'
        savings_data['regular_saver_frequency_period'] = '1'
        savings_data['regular_saver_frequency_type'] = 'M'
        savings_data['regular_saver'] = 'Y'
        # Always fixed
        savings_data['variability'] = 'F'
        #print bsobj
        # get the apr class element, as that contains the text we need
        apr = bsobj.find_all(
            attrs={'class': 'apr'})[0].parent.parent.text.encode('utf-8')
        # split this line by \n
        apr = apr.split('\n')
        lines = []
        for l in apr:
            if re.match('.* or [0-9].*', l):
                for l2 in l.split(' or ', 1):
                    lines.append(l2)
            else:
                lines.append(l)
        while '' in lines:
            lines.remove('')
        for l in lines:
            # copy
            savings_data_tmp = savings_data.copy()
            # get percentage
            savings_data_tmp[
                'gross_percent'] = themortgagemeter_utils.get_percentage(
                    l, logger)
            if savings_data_tmp['gross_percent'] == '':
                # abandon ship!
                continue
            savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
            # Hard-code to 25-250 for now, this seems standard
            savings_data_tmp['regular_saver_min_amt'] = '25'
            savings_data_tmp['regular_saver_max_amt'] = '250'
            # append to savings_array
            savings_array.append(savings_data_tmp)
    elif re.match('.*/everyday-saver/', url):
        # This one's quite simple (I think)
        # TODO: need to set this for other types
        savings_data['interest_paid'] = 'Y'
        #print bsobj
        # get the apr class element, as that contains the text we need
        apr = bsobj.find_all(
            attrs={'class': 'apr'})[0].parent.parent.text.encode('utf-8')
        #print apr
        # split this line by \n
        apr = apr.split('\n')
        lines = []
        for l in apr:
            if re.match('.*gross.*', l):
                lines.append(l)
        while '' in lines:
            lines.remove('')
        for l in lines:
            # copy
            savings_data_tmp = savings_data.copy()
            #print l
            # get percentage
            savings_data_tmp[
                'gross_percent'] = themortgagemeter_utils.get_percentage(
                    l, logger)
            savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
            # TODO: bonus_frequency_period set to 1, or get from data?
            # append to savings_array
            savings_array.append(savings_data_tmp)
    elif re.match('.*/branch-accounts/.*', url):
        return savings_array
    else:
        logger.info('unhandled:' + url)
        exit()
    if savings_array == []:
        themortgagemeter_utils.record_alert(
            'ERROR: returning nothing from a page', logger,
            themortgagemeter_db.db_connection, themortgagemeter_db.cursor)
        exit()
    # Return the savings_array
    logger.info('returning savings_array:' + str(savings_array))
    return savings_array
Example #3
0
def process_more_info_page(savings_data,url,logger):
	bsobj = themortgagemeter_utils.get_page(False,'static_html/halifax/savings-accounts.html',url,logger)
	#print bsobj
	savings_array = []
	#print "Passed in:"
	#print savings_data
	print url
	if savings_data['isa'] == 'Y':
		for i1 in bsobj.find_all("h2",text="Summary box"):
			for i2 in i1.parent():
				if i2.find_all("table") != []:
					tabs = i2.find_all("table")
					if re.match(".*isa-saver-fixed.*",url):
						if len(tabs) != 2:
							themortgagemeter_utils.record_alert('ERROR: too many tabs in isa',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
							exit()
						else:
							tabs.pop(0)
						for tab in tabs:
							tbody = tab.find_all("tbody")[0]
							trs = tbody.find_all("tr")
							for tr in trs:
								savings_data_tmp = savings_data.copy()
								tds = tr.find_all("td")
								savings_data_tmp['savings_period'] = themortgagemeter_utils.get_months(tds[0].text.strip().encode('utf-8'),logger)
								savings_data_tmp['aer_percent'] = themortgagemeter_utils.get_percentage(tds[1].text.strip().encode('utf-8'),logger)
								savings_data_tmp['gross_percent'] = savings_data_tmp['aer_percent']
								savings_array.append(savings_data_tmp)
					else:
						if len(tabs) > 1:
							#print tabs
							themortgagemeter_utils.record_alert('ERROR: too many tabs in isa',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
							exit()
						for tab in tabs:
							#print tab
							for tr in tab.find_all("tr"):
								ths = tr.find_all("th")
								tds = tr.find_all("td")
								if len(ths) > 0 and len(tds) > 0:
									th = tr.find_all("th")[0]
									td = tr.find_all("td")[0]
									th_text = th.text.lower()
									td_text = td.text.lower()
									if re.match('interest rates.*',th_text):
										#print "IR:" + td_text
										pc = themortgagemeter_utils.get_percentage(td_text,logger)
										savings_data_tmp = savings_data.copy()
										savings_data_tmp['gross_percent'] = pc
										savings_data_tmp['aer_percent'] = pc
										savings_array.append(savings_data_tmp)
								else:
									if len(ths) == 0 and len(tds) > 0:
										td1 = tds[0]
										td2 = tds[1]
										td1_text = td1.text.lower()
										td2_text = td2.text.lower()
										if re.match('interest rates.*',td1_text):
											pc = themortgagemeter_utils.get_percentage(td2_text,logger)
											savings_data_tmp = savings_data.copy()
											savings_data_tmp['gross_percent'] = pc
											savings_data_tmp['aer_percent'] = pc
											savings_array.append(savings_data_tmp)
									else:
										themortgagemeter_utils.record_alert('ERROR: unhandled case: ' + url,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
										exit()
	elif re.match('.*fixed-online-saver.*',url) or re.match('.*tracker-bond.*',url) or re.match('.*fixed-saver.*',url):
		if re.match('.*fixed-online-saver.*',url) or re.match('.*fixed-saver.*',url):
			#print bsobj
			code = "FOS"
			i1s = bsobj.find_all("h3",text="Current Rates")
			if i1s== []:
				i1s = bsobj.find_all("h3",text="Current rates")
		elif re.match('.*tracker-bond.*',url):
			#print bsobj
			code = "TB"
			i1s = []
			res = bsobj.find_all("h4")
			for i in res:
				#print i.text
				if i.text == "Current rates and apply":
					i1s.append(i)
					break
		if i1s == []:
			themortgagemeter_utils.record_alert('No items from expected h3/4 match!',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		for i1 in i1s:
			for i2 in i1.parent():
				tbodys = i2.find_all("tbody")
				# if this is tracker bond, discard the first table
				if len(tbodys) == 0:
					continue
				if code == "TB":
					ok = False
					for tbody in tbodys:
						for tr in tbody.find_all("tr"):
							tds = tr.find_all("td")
							if tds[0].text == "Term":
								ok = True
					if not ok:
						continue
				for tbody in tbodys:
					tr_count = -1
					table_savings_period = "unset"
					for tr in tbody.find_all("tr"):
						tr_count = tr_count + 1
						if code == "TB" and tr_count == 0:
							# skip the first row
							continue
						# clone the savings_data ready to write to
						savings_data_tmp = savings_data.copy()
						# First td is time only on first row for TB
						if code == "TB" and tr_count > 1:
							td_count = 1
						else:
							td_count = 0
						if code == "TB" and tr_count > 1:
							if table_savings_period == "unset":
								themortgagemeter_utils.record_alert('ERROR: table_savings_period should not be unset',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
								exit()
							savings_data_tmp['savings_period'] = table_savings_period
						for td in tr.find_all("td"):
							# 0 - term
							# 1 - balance 
							# 2 - Gross
							# 3 - AER
							# 4 - NET (ignore)
							# Ignore remainder of cols
							text = td.text.lower().strip().encode('utf-8')
							if td_count == 0:
								# store this in a variable for use on next row if necessary
								table_savings_period = themortgagemeter_utils.get_months(text,logger)
								savings_data_tmp['savings_period'] = table_savings_period
							elif td_count == 1:
								res = savings_util.get_money_range(text,logger)
								savings_data_tmp['min_amt'] = res[0]
								savings_data_tmp['max_amt'] = res[1]
							elif td_count == 2:
								savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(text,logger)
							elif td_count == 3:
								savings_data_tmp['aer_percent'] = themortgagemeter_utils.get_percentage(text,logger)
								# and then break out
								break
							td_count = td_count + 1
						savings_array.append(savings_data_tmp)
	elif re.match('.*/online-saver/',url):
		# TODO: need to set this for other types
		savings_data['interest_paid'] = 'Y'
		#print bsobj
		# get the apr class element, as that contains the text we need
		apr = bsobj.find_all(attrs={'class':'apr'})[0].parent.parent.text.encode('utf-8')
		# split this line by \n
		apr = apr.split('\n')
		lines = []
		for l in apr:
			if re.match('.* or [0-9].*',l):
				for l2 in l.split(' or ',1):
					lines.append(l2)
			else:
				lines.append(l)
		while '' in lines:
			lines.remove('')
		for l in lines:
			# copy 
			savings_data_tmp = savings_data.copy()
			#print l
			# get percentage
			savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(l,logger)
			savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
			# get_money range
			res = savings_util.get_money_range(l,logger)
			savings_data_tmp['min_amt'] = res[0]
			savings_data_tmp['max_amt'] = res[1]
			# append to savings_array
			savings_array.append(savings_data_tmp)
	elif re.match('.*/regular-saver/',url):
		# TODO: need to set this for other types
		savings_data['interest_paid'] = 'Y'
		savings_data['regular_saver_frequency_period'] = '1'
		savings_data['regular_saver_frequency_type'] = 'M'
		savings_data['regular_saver'] = 'Y'
		# Always fixed
		savings_data['variability'] = 'F'
		#print bsobj
		# get the apr class element, as that contains the text we need
		apr = bsobj.find_all(attrs={'class':'apr'})[0].parent.parent.text.encode('utf-8')
		# split this line by \n
		apr = apr.split('\n')
		lines = []
		for l in apr:
			if re.match('.* or [0-9].*',l):
				for l2 in l.split(' or ',1):
					lines.append(l2)
			else:
				lines.append(l)
		while '' in lines:
			lines.remove('')
		for l in lines:
			# copy 
			savings_data_tmp = savings_data.copy()
			# get percentage
			savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(l,logger)
			if savings_data_tmp['gross_percent'] == '':
				# abandon ship!
				continue
			savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
			# Hard-code to 25-250 for now, this seems standard
			savings_data_tmp['regular_saver_min_amt'] = '25'
			savings_data_tmp['regular_saver_max_amt'] = '250'
			# append to savings_array
			savings_array.append(savings_data_tmp)
	elif re.match('.*/everyday-saver/',url):
		# This one's quite simple (I think)
		# TODO: need to set this for other types
		savings_data['interest_paid'] = 'Y'
		#print bsobj
		# get the apr class element, as that contains the text we need
		apr = bsobj.find_all(attrs={'class':'apr'})[0].parent.parent.text.encode('utf-8')
		#print apr
		# split this line by \n
		apr = apr.split('\n')
		lines = []
		for l in apr:
			if re.match('.*gross.*',l):
				lines.append(l)
		while '' in lines:
			lines.remove('')
		for l in lines:
			# copy 
			savings_data_tmp = savings_data.copy()
			#print l
			# get percentage
			savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(l,logger)
			savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
			# TODO: bonus_frequency_period set to 1, or get from data?
			# append to savings_array
			savings_array.append(savings_data_tmp)
	elif re.match('.*/branch-accounts/.*',url):
		return savings_array
	else:
		logger.info('unhandled:' + url)
		exit()
	if savings_array == []:
		themortgagemeter_utils.record_alert('ERROR: returning nothing from a page',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
	# Return the savings_array
	logger.info('returning savings_array:' + str(savings_array))
	return savings_array
def get_product_page_interest_rates(url, savings_data):
    logger = logging.getLogger('retrieve ' + url)
    bsobj = themortgagemeter_utils.get_page(False, '', url, logger)
    #logger.info(url) #logger.info(bsobj)
    if re.match('.*isa.*', url):
        savings_data['isa'] = 'Y'
    for t in bsobj.find_all('table'):
        #logger.info("TABLE")# logger.info(t)
        # Get all tables, then match on summary == "Interest rates:.*", and set up variables accordingly.
        summary = t.get('summary').encode('utf-8').lower()
        if summary:
            # Set up data for this page
            summary_info = re.match('.*interest rates: (.*)', summary).group(1)
            #logger.info("summary info: " + summary_info)
            if summary_info in ("cash e-isa#"):
                savings_data['isa'] = 'Y'
            elif summary_info in ("fixed rate saver - monthly interest"):
                savings_data['variability'] = 'F'
                savings_data['interest_paid'] = 'M'
            elif summary_info in ("fixed rate saver - annual interest"):
                savings_data['variability'] = 'F'
                savings_data['interest_paid'] = 'Y'
            elif "regular saver" in summary_info:
                savings_data['regular_saver'] = 'Y'
                savings_data['interest_paid'] = 'Y'
            elif "online bonus" in summary_info:
                savings_data['bonus'] = 'Y'
                savings_data['branch'] = 'N'
                savings_data['bonus_frequency_period'] = '1'
                savings_data['bonus_frequency_type'] = 'M'
                # skip bonus for HSBC- it's complicated - probably needs its own function TODO
                continue
            elif "flexible saver" in summary_info:
                savings_data['variability'] = 'V'
            else:
                themortgagemeter_utils.record_alert(
                    'NEED TO HANDLE: ' + summary_info, logger,
                    themortgagemeter_db.db_connection,
                    themortgagemeter_db.cursor)
                exit()
            tr_count = 0
            for tr in t.find_all('tr'):
                # This is a new savings product, so clone the data at this point and use that from here.
                this_savings_data = savings_data.copy()
                #logger.info("TR " + str(tr_count)) #logger.info(tr)
                if this_savings_data['bonus'] == 'Y':
                    #print "BONUS"
                    #print tr
                    pass
                if this_savings_data['regular_saver'] == 'Y':
                    td_count = -1
                else:
                    td_count = 0
                if tr_count >= 1:
                    # If tax-free, this will be true
                    for td in tr.find_all('td'):
                        td_style = td.get('style')
                        if td_style != None:
                            td_style = td_style.lower().encode(
                                'utf-8').translate(None, ' ')
                            if td_style == 'vertical-align:middle':
                                continue
                        #logger.info("TD" + str(td_count)) #logger.info(tr_count) #logger.info(td_count)
                        logger.info(td)
                        v = td.text.encode('utf-8').lower().strip()
                        if td_count == 0:
                            #logger.info(this_savings_data['regular_saver'])
                            if this_savings_data['regular_saver'] == 'Y':
                                logger.info('regular_saver: ' + v)
                                this_savings_data[
                                    'regular_saver_min_amt'] = v.split()[0][2:]
                                this_savings_data[
                                    'regular_saver_max_amt'] = v.split()[2][2:]
                                if v.split()[4] == "month":
                                    this_savings_data[
                                        'regular_saver_frequency_period'] = '1'
                                    this_savings_data[
                                        'regular_saver_frequency_type'] = 'M'
                                else:
                                    themortgagemeter_utils.record_alert(
                                        'ERROR: reg saver not parsed: ' + v,
                                        logger,
                                        themortgagemeter_db.db_connection,
                                        themortgagemeter_db.cursor)
                                    exit()
                            else:
                                # if it's got a + at the end, it's a min, if it's "up to" it's a max.
                                res = savings_util.get_money_range(v, logger)
                                this_savings_data['min_amt'] = res[0]
                                this_savings_data['max_amt'] = res[1]
                                # TODO: remove this section
                                #if re.match('^.*\+$',v):
                                #	money_val = themortgagemeter_utils.get_money(v,logger)
                                #	this_savings_data['min_amt'] = money_val
                                #elif re.match('^.*up to.*$',v) or re.match('^.*under.*$',v):
                                #	money_val = themortgagemeter_utils.get_money(v,logger)
                                #	this_savings_data['max_amt'] = money_val
                                #	this_savings_data['min_amt'] = 0
                                #elif re.match('^.* - .*$',v):
                                #	this_savings_data['min_amt'] = v.split()[0][2:].translate(None,',')
                                #	this_savings_data['max_amt'] = v.split()[2][2:].translate(None,',')
                                #else:
                                #	#logger.info(t) #logger.info('value not handled: ' + v)
                                #	themortgagemeter_utils.record_alert('ERROR: value wrong: ' + v,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
                                #	exit()
                        elif td_count == 1:
                            # we don't bother with net_percent
                            pass
                        elif td_count == 2:
                            # gross %
                            this_savings_data['gross_percent'] = v
                        elif td_count == 3:
                            this_savings_data['aer_percent'] = v
                        td_count += 1
                    # Some trs have no tds; we ignore those.
                    if td_count > 0:
                        # Now store this product
                        # TODO: fixed savings?
                        logger.info(this_savings_data)
                        isa = this_savings_data['isa']
                        regular_saver = this_savings_data['regular_saver']
                        regular_saver_frequency_period = this_savings_data[
                            'regular_saver_frequency_period']
                        regular_saver_frequency_type = this_savings_data[
                            'regular_saver_frequency_type']
                        regular_saver_min_amt = this_savings_data[
                            'regular_saver_min_amt']
                        regular_saver_max_amt = this_savings_data[
                            'regular_saver_max_amt']
                        bonus = this_savings_data['bonus']
                        bonus_frequency_period = this_savings_data[
                            'bonus_frequency_period']
                        bonus_frequency_type = this_savings_data[
                            'bonus_frequency_type']
                        online = this_savings_data['online']
                        branch = this_savings_data['branch']
                        variability = this_savings_data['variability']
                        min_amt = this_savings_data['min_amt']
                        max_amt = this_savings_data['max_amt']
                        gross_percent = this_savings_data['gross_percent']
                        aer_percent = this_savings_data['aer_percent']
                        interest_paid = this_savings_data['interest_paid']
                        child = this_savings_data['child']
                        savings_period = this_savings_data['savings_period']
                        savings_util.handle_savings_insert(
                            institution_code, isa, regular_saver,
                            regular_saver_frequency_period,
                            regular_saver_frequency_type,
                            regular_saver_min_amt, regular_saver_max_amt,
                            bonus, bonus_frequency_period,
                            bonus_frequency_type, online, branch, variability,
                            savings_period, min_amt, max_amt, gross_percent,
                            aer_percent, child, interest_paid, url, logger)
                else:
                    tr_count += 1
                    continue
                tr_count += 1
        else:
            #print url
            #print bsobj
            exit()