Example #1
0
def get_product_page(static,url):
	logger = logging.getLogger('retrieve')
	bsobj = themortgagemeter_utils.get_page(static,'static_html/first_direct/mortgage-rates',url,logger)
	print bsobj
	sections = bsobj.find_all(attrs={'class':'section'})
	for section in sections:
		#print section
		#print "============================="
		tbodys = section.find_all("tbody")
		for tbody in tbodys:
			print tbody
			trs = tbody.find_all("tr")
			for tr in trs:
				tds = tr.find_all("td")
				booking_fee_int = 0
				count = 0
				for td in tds:
					# assume default of 25 years
					term = str(25 * 12)
					td_text = td.text.strip().encode('utf-8')
					#print count
					#print td
					if count == 0:
						#initial_period
						initial_period = themortgagemeter_utils.get_months(td_text,logger)
						#mortgage_type F/D/T/O/V
						mortgage_type = mc_util.get_mortgage_type(td_text,logger)
						#eligibility
						print td_text
						pass
					elif count == 1:
						#ltv_percent
						ltv_percent = themortgagemeter_utils.get_percentage(td_text,logger)
					elif count == 2:
						#rate_percent
						rate_percent = themortgagemeter_utils.get_percentage(td_text,logger)
					elif count == 3:
						#svr_percent
						svr_percent = themortgagemeter_utils.get_percentage(td_text,logger)
					elif count == 4:
						#apr_percent
						apr_percent = themortgagemeter_utils.get_percentage(td_text,logger)
					elif count == 5:
						booking_fee_int = booking_fee_int + int(themortgagemeter_utils.get_money(td_text,logger))
					elif count == 6:
						booking_fee_int = booking_fee_int + int(themortgagemeter_utils.get_money(td_text,logger))

					count = count + 1
				booking_fee = str(booking_fee_int)
			mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)
Example #2
0
def process_more_info_page(savings_data,url,logger):
	bsobj = themortgagemeter_utils.get_page(False,'static_html/halifax/savings-accounts.html',url,logger)
	#print bsobj
	savings_array = []
	#print "Passed in:"
	#print savings_data
	print url
	if savings_data['isa'] == 'Y':
		for i1 in bsobj.find_all("h2",text="Summary box"):
			for i2 in i1.parent():
				if i2.find_all("table") != []:
					tabs = i2.find_all("table")
					if re.match(".*isa-saver-fixed.*",url):
						if len(tabs) != 2:
							themortgagemeter_utils.record_alert('ERROR: too many tabs in isa',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
							exit()
						else:
							tabs.pop(0)
						for tab in tabs:
							tbody = tab.find_all("tbody")[0]
							trs = tbody.find_all("tr")
							for tr in trs:
								savings_data_tmp = savings_data.copy()
								tds = tr.find_all("td")
								savings_data_tmp['savings_period'] = themortgagemeter_utils.get_months(tds[0].text.strip().encode('utf-8'),logger)
								savings_data_tmp['aer_percent'] = themortgagemeter_utils.get_percentage(tds[1].text.strip().encode('utf-8'),logger)
								savings_data_tmp['gross_percent'] = savings_data_tmp['aer_percent']
								savings_array.append(savings_data_tmp)
					else:
						if len(tabs) > 1:
							#print tabs
							themortgagemeter_utils.record_alert('ERROR: too many tabs in isa',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
							exit()
						for tab in tabs:
							#print tab
							for tr in tab.find_all("tr"):
								ths = tr.find_all("th")
								tds = tr.find_all("td")
								if len(ths) > 0 and len(tds) > 0:
									th = tr.find_all("th")[0]
									td = tr.find_all("td")[0]
									th_text = th.text.lower()
									td_text = td.text.lower()
									if re.match('interest rates.*',th_text):
										#print "IR:" + td_text
										pc = themortgagemeter_utils.get_percentage(td_text,logger)
										savings_data_tmp = savings_data.copy()
										savings_data_tmp['gross_percent'] = pc
										savings_data_tmp['aer_percent'] = pc
										savings_array.append(savings_data_tmp)
								else:
									if len(ths) == 0 and len(tds) > 0:
										td1 = tds[0]
										td2 = tds[1]
										td1_text = td1.text.lower()
										td2_text = td2.text.lower()
										if re.match('interest rates.*',td1_text):
											pc = themortgagemeter_utils.get_percentage(td2_text,logger)
											savings_data_tmp = savings_data.copy()
											savings_data_tmp['gross_percent'] = pc
											savings_data_tmp['aer_percent'] = pc
											savings_array.append(savings_data_tmp)
									else:
										themortgagemeter_utils.record_alert('ERROR: unhandled case: ' + url,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
										exit()
	elif re.match('.*fixed-online-saver.*',url) or re.match('.*tracker-bond.*',url) or re.match('.*fixed-saver.*',url):
		if re.match('.*fixed-online-saver.*',url) or re.match('.*fixed-saver.*',url):
			#print bsobj
			code = "FOS"
			i1s = bsobj.find_all("h3",text="Current Rates")
			if i1s== []:
				i1s = bsobj.find_all("h3",text="Current rates")
		elif re.match('.*tracker-bond.*',url):
			#print bsobj
			code = "TB"
			i1s = []
			res = bsobj.find_all("h4")
			for i in res:
				#print i.text
				if i.text == "Current rates and apply":
					i1s.append(i)
					break
		if i1s == []:
			themortgagemeter_utils.record_alert('No items from expected h3/4 match!',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		for i1 in i1s:
			for i2 in i1.parent():
				tbodys = i2.find_all("tbody")
				# if this is tracker bond, discard the first table
				if len(tbodys) == 0:
					continue
				if code == "TB":
					ok = False
					for tbody in tbodys:
						for tr in tbody.find_all("tr"):
							tds = tr.find_all("td")
							if tds[0].text == "Term":
								ok = True
					if not ok:
						continue
				for tbody in tbodys:
					tr_count = -1
					table_savings_period = "unset"
					for tr in tbody.find_all("tr"):
						tr_count = tr_count + 1
						if code == "TB" and tr_count == 0:
							# skip the first row
							continue
						# clone the savings_data ready to write to
						savings_data_tmp = savings_data.copy()
						# First td is time only on first row for TB
						if code == "TB" and tr_count > 1:
							td_count = 1
						else:
							td_count = 0
						if code == "TB" and tr_count > 1:
							if table_savings_period == "unset":
								themortgagemeter_utils.record_alert('ERROR: table_savings_period should not be unset',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
								exit()
							savings_data_tmp['savings_period'] = table_savings_period
						for td in tr.find_all("td"):
							# 0 - term
							# 1 - balance 
							# 2 - Gross
							# 3 - AER
							# 4 - NET (ignore)
							# Ignore remainder of cols
							text = td.text.lower().strip().encode('utf-8')
							if td_count == 0:
								# store this in a variable for use on next row if necessary
								table_savings_period = themortgagemeter_utils.get_months(text,logger)
								savings_data_tmp['savings_period'] = table_savings_period
							elif td_count == 1:
								res = savings_util.get_money_range(text,logger)
								savings_data_tmp['min_amt'] = res[0]
								savings_data_tmp['max_amt'] = res[1]
							elif td_count == 2:
								savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(text,logger)
							elif td_count == 3:
								savings_data_tmp['aer_percent'] = themortgagemeter_utils.get_percentage(text,logger)
								# and then break out
								break
							td_count = td_count + 1
						savings_array.append(savings_data_tmp)
	elif re.match('.*/online-saver/',url):
		# TODO: need to set this for other types
		savings_data['interest_paid'] = 'Y'
		#print bsobj
		# get the apr class element, as that contains the text we need
		apr = bsobj.find_all(attrs={'class':'apr'})[0].parent.parent.text.encode('utf-8')
		# split this line by \n
		apr = apr.split('\n')
		lines = []
		for l in apr:
			if re.match('.* or [0-9].*',l):
				for l2 in l.split(' or ',1):
					lines.append(l2)
			else:
				lines.append(l)
		while '' in lines:
			lines.remove('')
		for l in lines:
			# copy 
			savings_data_tmp = savings_data.copy()
			#print l
			# get percentage
			savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(l,logger)
			savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
			# get_money range
			res = savings_util.get_money_range(l,logger)
			savings_data_tmp['min_amt'] = res[0]
			savings_data_tmp['max_amt'] = res[1]
			# append to savings_array
			savings_array.append(savings_data_tmp)
	elif re.match('.*/regular-saver/',url):
		# TODO: need to set this for other types
		savings_data['interest_paid'] = 'Y'
		savings_data['regular_saver_frequency_period'] = '1'
		savings_data['regular_saver_frequency_type'] = 'M'
		savings_data['regular_saver'] = 'Y'
		# Always fixed
		savings_data['variability'] = 'F'
		#print bsobj
		# get the apr class element, as that contains the text we need
		apr = bsobj.find_all(attrs={'class':'apr'})[0].parent.parent.text.encode('utf-8')
		# split this line by \n
		apr = apr.split('\n')
		lines = []
		for l in apr:
			if re.match('.* or [0-9].*',l):
				for l2 in l.split(' or ',1):
					lines.append(l2)
			else:
				lines.append(l)
		while '' in lines:
			lines.remove('')
		for l in lines:
			# copy 
			savings_data_tmp = savings_data.copy()
			# get percentage
			savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(l,logger)
			if savings_data_tmp['gross_percent'] == '':
				# abandon ship!
				continue
			savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
			# Hard-code to 25-250 for now, this seems standard
			savings_data_tmp['regular_saver_min_amt'] = '25'
			savings_data_tmp['regular_saver_max_amt'] = '250'
			# append to savings_array
			savings_array.append(savings_data_tmp)
	elif re.match('.*/everyday-saver/',url):
		# This one's quite simple (I think)
		# TODO: need to set this for other types
		savings_data['interest_paid'] = 'Y'
		#print bsobj
		# get the apr class element, as that contains the text we need
		apr = bsobj.find_all(attrs={'class':'apr'})[0].parent.parent.text.encode('utf-8')
		#print apr
		# split this line by \n
		apr = apr.split('\n')
		lines = []
		for l in apr:
			if re.match('.*gross.*',l):
				lines.append(l)
		while '' in lines:
			lines.remove('')
		for l in lines:
			# copy 
			savings_data_tmp = savings_data.copy()
			#print l
			# get percentage
			savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(l,logger)
			savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
			# TODO: bonus_frequency_period set to 1, or get from data?
			# append to savings_array
			savings_array.append(savings_data_tmp)
	elif re.match('.*/branch-accounts/.*',url):
		return savings_array
	else:
		logger.info('unhandled:' + url)
		exit()
	if savings_array == []:
		themortgagemeter_utils.record_alert('ERROR: returning nothing from a page',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
	# Return the savings_array
	logger.info('returning savings_array:' + str(savings_array))
	return savings_array
def process_more_info_page(savings_data, url, logger):
    bsobj = themortgagemeter_utils.get_page(
        False, 'static_html/halifax/savings-accounts.html', url, logger)
    #print bsobj
    savings_array = []
    #print "Passed in:"
    #print savings_data
    print url
    if savings_data['isa'] == 'Y':
        for i1 in bsobj.find_all("h2", text="Summary box"):
            for i2 in i1.parent():
                if i2.find_all("table") != []:
                    tabs = i2.find_all("table")
                    if re.match(".*isa-saver-fixed.*", url):
                        if len(tabs) != 2:
                            themortgagemeter_utils.record_alert(
                                'ERROR: too many tabs in isa', logger,
                                themortgagemeter_db.db_connection,
                                themortgagemeter_db.cursor)
                            exit()
                        else:
                            tabs.pop(0)
                        for tab in tabs:
                            tbody = tab.find_all("tbody")[0]
                            trs = tbody.find_all("tr")
                            for tr in trs:
                                savings_data_tmp = savings_data.copy()
                                tds = tr.find_all("td")
                                savings_data_tmp[
                                    'savings_period'] = themortgagemeter_utils.get_months(
                                        tds[0].text.strip().encode('utf-8'),
                                        logger)
                                savings_data_tmp[
                                    'aer_percent'] = themortgagemeter_utils.get_percentage(
                                        tds[1].text.strip().encode('utf-8'),
                                        logger)
                                savings_data_tmp[
                                    'gross_percent'] = savings_data_tmp[
                                        'aer_percent']
                                savings_array.append(savings_data_tmp)
                    else:
                        if len(tabs) > 1:
                            #print tabs
                            themortgagemeter_utils.record_alert(
                                'ERROR: too many tabs in isa', logger,
                                themortgagemeter_db.db_connection,
                                themortgagemeter_db.cursor)
                            exit()
                        for tab in tabs:
                            #print tab
                            for tr in tab.find_all("tr"):
                                ths = tr.find_all("th")
                                tds = tr.find_all("td")
                                if len(ths) > 0 and len(tds) > 0:
                                    th = tr.find_all("th")[0]
                                    td = tr.find_all("td")[0]
                                    th_text = th.text.lower()
                                    td_text = td.text.lower()
                                    if re.match('interest rates.*', th_text):
                                        #print "IR:" + td_text
                                        pc = themortgagemeter_utils.get_percentage(
                                            td_text, logger)
                                        savings_data_tmp = savings_data.copy()
                                        savings_data_tmp['gross_percent'] = pc
                                        savings_data_tmp['aer_percent'] = pc
                                        savings_array.append(savings_data_tmp)
                                else:
                                    if len(ths) == 0 and len(tds) > 0:
                                        td1 = tds[0]
                                        td2 = tds[1]
                                        td1_text = td1.text.lower()
                                        td2_text = td2.text.lower()
                                        if re.match('interest rates.*',
                                                    td1_text):
                                            pc = themortgagemeter_utils.get_percentage(
                                                td2_text, logger)
                                            savings_data_tmp = savings_data.copy(
                                            )
                                            savings_data_tmp[
                                                'gross_percent'] = pc
                                            savings_data_tmp[
                                                'aer_percent'] = pc
                                            savings_array.append(
                                                savings_data_tmp)
                                    else:
                                        themortgagemeter_utils.record_alert(
                                            'ERROR: unhandled case: ' + url,
                                            logger,
                                            themortgagemeter_db.db_connection,
                                            themortgagemeter_db.cursor)
                                        exit()
    elif re.match('.*fixed-online-saver.*', url) or re.match(
            '.*tracker-bond.*', url) or re.match('.*fixed-saver.*', url):
        if re.match('.*fixed-online-saver.*', url) or re.match(
                '.*fixed-saver.*', url):
            #print bsobj
            code = "FOS"
            i1s = bsobj.find_all("h3", text="Current Rates")
            if i1s == []:
                i1s = bsobj.find_all("h3", text="Current rates")
        elif re.match('.*tracker-bond.*', url):
            #print bsobj
            code = "TB"
            i1s = []
            res = bsobj.find_all("h4")
            for i in res:
                #print i.text
                if i.text == "Current rates and apply":
                    i1s.append(i)
                    break
        if i1s == []:
            themortgagemeter_utils.record_alert(
                'No items from expected h3/4 match!', logger,
                themortgagemeter_db.db_connection, themortgagemeter_db.cursor)
        for i1 in i1s:
            for i2 in i1.parent():
                tbodys = i2.find_all("tbody")
                # if this is tracker bond, discard the first table
                if len(tbodys) == 0:
                    continue
                if code == "TB":
                    ok = False
                    for tbody in tbodys:
                        for tr in tbody.find_all("tr"):
                            tds = tr.find_all("td")
                            if tds[0].text == "Term":
                                ok = True
                    if not ok:
                        continue
                for tbody in tbodys:
                    tr_count = -1
                    table_savings_period = "unset"
                    for tr in tbody.find_all("tr"):
                        tr_count = tr_count + 1
                        if code == "TB" and tr_count == 0:
                            # skip the first row
                            continue
                        # clone the savings_data ready to write to
                        savings_data_tmp = savings_data.copy()
                        # First td is time only on first row for TB
                        if code == "TB" and tr_count > 1:
                            td_count = 1
                        else:
                            td_count = 0
                        if code == "TB" and tr_count > 1:
                            if table_savings_period == "unset":
                                themortgagemeter_utils.record_alert(
                                    'ERROR: table_savings_period should not be unset',
                                    logger, themortgagemeter_db.db_connection,
                                    themortgagemeter_db.cursor)
                                exit()
                            savings_data_tmp[
                                'savings_period'] = table_savings_period
                        for td in tr.find_all("td"):
                            # 0 - term
                            # 1 - balance
                            # 2 - Gross
                            # 3 - AER
                            # 4 - NET (ignore)
                            # Ignore remainder of cols
                            text = td.text.lower().strip().encode('utf-8')
                            if td_count == 0:
                                # store this in a variable for use on next row if necessary
                                table_savings_period = themortgagemeter_utils.get_months(
                                    text, logger)
                                savings_data_tmp[
                                    'savings_period'] = table_savings_period
                            elif td_count == 1:
                                res = savings_util.get_money_range(
                                    text, logger)
                                savings_data_tmp['min_amt'] = res[0]
                                savings_data_tmp['max_amt'] = res[1]
                            elif td_count == 2:
                                savings_data_tmp[
                                    'gross_percent'] = themortgagemeter_utils.get_percentage(
                                        text, logger)
                            elif td_count == 3:
                                savings_data_tmp[
                                    'aer_percent'] = themortgagemeter_utils.get_percentage(
                                        text, logger)
                                # and then break out
                                break
                            td_count = td_count + 1
                        savings_array.append(savings_data_tmp)
    elif re.match('.*/online-saver/', url):
        # TODO: need to set this for other types
        savings_data['interest_paid'] = 'Y'
        #print bsobj
        # get the apr class element, as that contains the text we need
        apr = bsobj.find_all(
            attrs={'class': 'apr'})[0].parent.parent.text.encode('utf-8')
        # split this line by \n
        apr = apr.split('\n')
        lines = []
        for l in apr:
            if re.match('.* or [0-9].*', l):
                for l2 in l.split(' or ', 1):
                    lines.append(l2)
            else:
                lines.append(l)
        while '' in lines:
            lines.remove('')
        for l in lines:
            # copy
            savings_data_tmp = savings_data.copy()
            #print l
            # get percentage
            savings_data_tmp[
                'gross_percent'] = themortgagemeter_utils.get_percentage(
                    l, logger)
            savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
            # get_money range
            res = savings_util.get_money_range(l, logger)
            savings_data_tmp['min_amt'] = res[0]
            savings_data_tmp['max_amt'] = res[1]
            # append to savings_array
            savings_array.append(savings_data_tmp)
    elif re.match('.*/regular-saver/', url):
        # TODO: need to set this for other types
        savings_data['interest_paid'] = 'Y'
        savings_data['regular_saver_frequency_period'] = '1'
        savings_data['regular_saver_frequency_type'] = 'M'
        savings_data['regular_saver'] = 'Y'
        # Always fixed
        savings_data['variability'] = 'F'
        #print bsobj
        # get the apr class element, as that contains the text we need
        apr = bsobj.find_all(
            attrs={'class': 'apr'})[0].parent.parent.text.encode('utf-8')
        # split this line by \n
        apr = apr.split('\n')
        lines = []
        for l in apr:
            if re.match('.* or [0-9].*', l):
                for l2 in l.split(' or ', 1):
                    lines.append(l2)
            else:
                lines.append(l)
        while '' in lines:
            lines.remove('')
        for l in lines:
            # copy
            savings_data_tmp = savings_data.copy()
            # get percentage
            savings_data_tmp[
                'gross_percent'] = themortgagemeter_utils.get_percentage(
                    l, logger)
            if savings_data_tmp['gross_percent'] == '':
                # abandon ship!
                continue
            savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
            # Hard-code to 25-250 for now, this seems standard
            savings_data_tmp['regular_saver_min_amt'] = '25'
            savings_data_tmp['regular_saver_max_amt'] = '250'
            # append to savings_array
            savings_array.append(savings_data_tmp)
    elif re.match('.*/everyday-saver/', url):
        # This one's quite simple (I think)
        # TODO: need to set this for other types
        savings_data['interest_paid'] = 'Y'
        #print bsobj
        # get the apr class element, as that contains the text we need
        apr = bsobj.find_all(
            attrs={'class': 'apr'})[0].parent.parent.text.encode('utf-8')
        #print apr
        # split this line by \n
        apr = apr.split('\n')
        lines = []
        for l in apr:
            if re.match('.*gross.*', l):
                lines.append(l)
        while '' in lines:
            lines.remove('')
        for l in lines:
            # copy
            savings_data_tmp = savings_data.copy()
            #print l
            # get percentage
            savings_data_tmp[
                'gross_percent'] = themortgagemeter_utils.get_percentage(
                    l, logger)
            savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
            # TODO: bonus_frequency_period set to 1, or get from data?
            # append to savings_array
            savings_array.append(savings_data_tmp)
    elif re.match('.*/branch-accounts/.*', url):
        return savings_array
    else:
        logger.info('unhandled:' + url)
        exit()
    if savings_array == []:
        themortgagemeter_utils.record_alert(
            'ERROR: returning nothing from a page', logger,
            themortgagemeter_db.db_connection, themortgagemeter_db.cursor)
        exit()
    # Return the savings_array
    logger.info('returning savings_array:' + str(savings_array))
    return savings_array
Example #4
0
def get_product_page(static,url,eligibilities):
	logger = logging.getLogger('retrieve')
	bsobj = themortgagemeter_utils.get_page(static,'static_html/post_office/our-full-range.html',url,logger)
	#print bsobj
	term = str(25 * 12)
	ltv_elems = bsobj.find_all('h2')
	# foreach h2 element, determine the ltv.
	# then get the next element (which is the div, class displaytable). Then, for each tr:
	# td0 = years of fixed or tracker
	# td1 = initial rate
	# td2 = svr
	# td3 = apr
	# td4 = fees
	for ltv_elem in ltv_elems:
		# For post office, first reported % is 100 - LTV
		ltv_elem_str = ltv_elem.string
		if (ltv_elem_str):
			ltv_percent = themortgagemeter_utils.get_percentage(ltv_elem_str,logger)
			if ltv_percent != '':
				ltv_percent = str(100 - int(ltv_percent))
			else:
				continue
		else:
			continue
		div = ltv_elem.fetchNextSiblings(attrs={'class' : 'displaytable'},limit=1)
		if (div):
			logger.debug('here')
			logger.debug(div)
			trs = div[0].find_all('tr')
			for tr in trs:
				logger.debug(tr)
				# initialise:
				rate_percent = ''
				svr_percent = ''
				apr_percent = ''
				booking_fee = ''
				tds = tr.find_all('td')
				i = 0
				# If there are tds and there are more than 1 of them then we can extract a mortgage...
				logger.debug(tr)
				if tds and len(tds) > 1:
					logger.debug(tds[0].text.encode('utf-8').split('\n'))
					s = tds[0].text.encode('utf-8').split('\n')
					# Sometimes we get empty fields - we remove them here.
					while '' in s:
						s.remove('')
					initial_period = str(themortgagemeter_utils.get_months(s[i],logger))
					#logger.debug('type_str before split: ' + tds[i].text.encode('utf-8'))
					#logger.debug('tds i: ' + str(i) + ' tds: ' + str(tds))
					#logger.debug('tds i: ' + str(i) + ' tds[i]: ' + str(tds[i].text.encode('utf-8')))
					#logger.debug(re.sub('\xa0','',tds[i].text.encode('utf-8')).split())
					# TODO: generic text cleansing function
					type_str = re.sub('\xa0','',re.sub('\xc2',' ',tds[i].text.encode('utf-8'))).split()[2]
					logger.debug('type_str: ' + type_str)
					if type_str == 'fixed':
						mortgage_type = 'F'
					elif type_str == 'tracker':
						mortgage_type = 'T'
					else:
							themortgagemeter_utils.record_alert('ERROR: PSTFFC neither fixed nor tracker: ' + type_str,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
					i+=1
					j = 0
					for td in tds[i].text.encode('utf-8').split('\n'):
						t = tds[i].text.encode('utf-8').split('\n')[j]
						rate_percent = themortgagemeter_utils.get_percentage(t,logger)
						if rate_percent != '':
							break
						j += 1
					while svr_percent == '':
						i+=1
						for t in tds[i].text.encode('utf-8').split('\n'):
							svr_percent = themortgagemeter_utils.get_percentage(t,logger)
							if svr_percent != '':
								break
					while apr_percent == '':
						i+=1
						for t in tds[i].text.encode('utf-8').split('\n'):
							apr_percent = themortgagemeter_utils.get_percentage(t,logger)
							if apr_percent != '':
								break
					i+=1
					booking_fee = tds[i].text.strip().encode('utf-8')[2:].replace(',','')
					for eligibility in eligibilities:
						mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)
		else:
			pass
Example #5
0
def get_product_page(static, url, eligibilities):
    logger = logging.getLogger('retrieve')
    bsobj = themortgagemeter_utils.get_page(
        static, 'static_html/post_office/our-full-range.html', url, logger)
    #print bsobj
    term = str(25 * 12)
    ltv_elems = bsobj.find_all('h2')
    # foreach h2 element, determine the ltv.
    # then get the next element (which is the div, class displaytable). Then, for each tr:
    # td0 = years of fixed or tracker
    # td1 = initial rate
    # td2 = svr
    # td3 = apr
    # td4 = fees
    for ltv_elem in ltv_elems:
        # For post office, first reported % is 100 - LTV
        ltv_elem_str = ltv_elem.string
        if (ltv_elem_str):
            ltv_percent = themortgagemeter_utils.get_percentage(
                ltv_elem_str, logger)
            if ltv_percent != '':
                ltv_percent = str(100 - int(ltv_percent))
            else:
                continue
        else:
            continue
        div = ltv_elem.fetchNextSiblings(attrs={'class': 'displaytable'},
                                         limit=1)
        if (div):
            logger.debug('here')
            logger.debug(div)
            trs = div[0].find_all('tr')
            for tr in trs:
                logger.debug(tr)
                # initialise:
                rate_percent = ''
                svr_percent = ''
                apr_percent = ''
                booking_fee = ''
                tds = tr.find_all('td')
                i = 0
                # If there are tds and there are more than 1 of them then we can extract a mortgage...
                logger.debug(tr)
                if tds and len(tds) > 1:
                    logger.debug(tds[0].text.encode('utf-8').split('\n'))
                    s = tds[0].text.encode('utf-8').split('\n')
                    # Sometimes we get empty fields - we remove them here.
                    while '' in s:
                        s.remove('')
                    initial_period = str(
                        themortgagemeter_utils.get_months(s[i], logger))
                    #logger.debug('type_str before split: ' + tds[i].text.encode('utf-8'))
                    #logger.debug('tds i: ' + str(i) + ' tds: ' + str(tds))
                    #logger.debug('tds i: ' + str(i) + ' tds[i]: ' + str(tds[i].text.encode('utf-8')))
                    #logger.debug(re.sub('\xa0','',tds[i].text.encode('utf-8')).split())
                    # TODO: generic text cleansing function
                    type_str = re.sub(
                        '\xa0', '',
                        re.sub('\xc2', ' ',
                               tds[i].text.encode('utf-8'))).split()[2]
                    logger.debug('type_str: ' + type_str)
                    if type_str == 'fixed':
                        mortgage_type = 'F'
                    elif type_str == 'tracker':
                        mortgage_type = 'T'
                    else:
                        themortgagemeter_utils.record_alert(
                            'ERROR: PSTFFC neither fixed nor tracker: ' +
                            type_str, logger,
                            themortgagemeter_db.db_connection,
                            themortgagemeter_db.cursor)
                    i += 1
                    j = 0
                    for td in tds[i].text.encode('utf-8').split('\n'):
                        t = tds[i].text.encode('utf-8').split('\n')[j]
                        rate_percent = themortgagemeter_utils.get_percentage(
                            t, logger)
                        if rate_percent != '':
                            break
                        j += 1
                    while svr_percent == '':
                        i += 1
                        for t in tds[i].text.encode('utf-8').split('\n'):
                            svr_percent = themortgagemeter_utils.get_percentage(
                                t, logger)
                            if svr_percent != '':
                                break
                    while apr_percent == '':
                        i += 1
                        for t in tds[i].text.encode('utf-8').split('\n'):
                            apr_percent = themortgagemeter_utils.get_percentage(
                                t, logger)
                            if apr_percent != '':
                                break
                    i += 1
                    booking_fee = tds[i].text.strip().encode(
                        'utf-8')[2:].replace(',', '')
                    for eligibility in eligibilities:
                        mc_util.handle_mortgage_insert(
                            institution_code, mortgage_type, rate_percent,
                            svr_percent, apr_percent, ltv_percent,
                            initial_period, booking_fee, term, url,
                            eligibility, logger)
        else:
            pass
def get_product_pages(static,base_url,suffix,mortgage_type,href_re):
	logger = logging.getLogger('retrieve')
	bsobj = themortgagemeter_utils.get_page(static,'static_html/skipton/fixed_rate_mortgages.html',base_url + suffix,logger)
	term = str(25 * 12)
	#print bsobj
	anchors = bsobj.find_all(href=href_re)
	for anchor in anchors:
		#print anchor
		# Get from the anchor the ltv and the term
		link = anchor.get('href')
		url = base_url + link
		# Still to get:
		rate_percent    = 0
		svr_percent     = 0
		apr_percent     = 0
		booking_fee     = 0
		application_fee = 0
		# eligibilities - first time buyers have own page, so all others?
		eligibilities   = ['NMH','NRM','ERM','EMH','EBM','EED']
		#print link
		if re.search(fr_re,link):
			initial_period = str(int(re.search(fr_re,link).group(1)) * 12)
			ltv_percent = str(int(re.search(fr_re,link).group(2)))
			# Now go to link
			subpage_bsobj = themortgagemeter_utils.get_page(static,'N/A',url,logger)
			table = subpage_bsobj.find_all(attrs={'id' : 'centralContent'},limit=1)[0].find_all('table',limit=1)[0]
			#print '==================================================='
			#print table
			tr_count = 0
			for tr in table.find_all('tr'):
				tr_count += 1
				if tr_count == 3:
					rate_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[1].string,logger)
				elif tr_count == 4:
					svr_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[1].string,logger)
				elif tr_count == 5:
					apr_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[0].string,logger)
				elif tr_count == 7:
					application_fee = tr.find_all('td')[0].string.encode('utf_8')[2:].replace(',','')
				elif tr_count == 8:
					booking_fee = tr.find_all('td')[0].string.encode('utf_8')[2:].replace(',','')
		elif re.search(tracker_re,link):
			initial_period = str(int(re.search(tracker_re,link).group(1)) * 10)
			ltv_percent = str(int(re.search(tracker_re,link).group(2)))
			# Now go to link
			subpage_bsobj = themortgagemeter_utils.get_page(static,'N/A',url,logger)
			#print subpage_bsobj
			table = subpage_bsobj.find_all(attrs={'id' : 'centralContent'},limit=1)[0].find_all('table',limit=1)[0]
			#print '==================================================='
			#print table
			tr_count = 0
			for tr in table.find_all('tr'):
				tr_count += 1
				if tr_count == 3:
					rate_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[0].string,logger)
				elif tr_count == 4:
					svr_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[1].string,logger)
				elif tr_count == 5:
					apr_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[0].string,logger)
				elif tr_count == 7:
					application_fee = tr.find_all('td')[0].string.encode('utf_8')[2:].replace(',','')
				elif tr_count == 8:
					booking_fee = tr.find_all('td')[0].string.encode('utf_8')[2:].replace(',','')
		elif re.search(discount_re,link):
			initial_period = str(int(re.search(discount_re,link).group(1)) * 10)
			ltv_percent = str(int(re.search(discount_re,link).group(2)))
			# Now go to link
			subpage_bsobj = themortgagemeter_utils.get_page(static,'N/A',url,logger)
			#print subpage_bsobj
			table = subpage_bsobj.find_all(attrs={'id' : 'centralContent'},limit=1)[0].find_all('table',limit=1)[0]
			#print '==================================================='
			#print table
			tr_count = 0
			for tr in table.find_all('tr'):
				tr_count += 1
				if tr_count == 3:
					rate_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[1].string,logger)
				elif tr_count == 4:
					svr_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[1].string,logger)
				elif tr_count == 5:
					apr_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[0].string,logger)
				elif tr_count == 7:
					application_fee = tr.find_all('td')[0].string.encode('utf_8')[2:].replace(',','')
				elif tr_count == 8:
					booking_fee = tr.find_all('td')[0].string.encode('utf_8')[2:].replace(',','')
		elif re.search(ftb_re,link):
			themortgagemeter_utils.record_alert('ERROR: SKIPTON first time buyer seen for the first time',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
			continue
		else:
			raise Exception("Unhandled link " + url,'')
		# set up the booking fee
		# Sometimes it's "No Fee" on the page
		if booking_fee.strip() == "Fee":
			booking_fee = "0"
		if application_fee.strip() == "Fee":
			application_fee = "0"
		booking_fee = str(int(booking_fee) + int(application_fee))
		for eligibility in eligibilities:
			mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)