Beispiel #1
0
def get_product_pages(static,url,logger):
	logger.debug("In get_product_pages: " + url)
	# Get the svr first (it's global)
	lines = themortgagemeter_utils.get_page(False,'',url,logger,True).split('\n')
	# Now get the mortgage data
	if static:
		tree = ET.parse('static_html/tesco/Products.xml')
		root = tree.getroot()
	else:
		root = ET.fromstring(themortgagemeter_utils.get_page(False,'',url,logger,True))
	term = str(25 * 12)
	for purchase in ('HousePurchase','Remortgage'):
		if purchase == 'HousePurchase':
			eligibilities = ['NFTB','NMH']
		elif purchase == 'Remortgage':
			eligibilities = ['NRM']
		for rate_type in ('FixedRate','TrackerRate'):
			if rate_type == 'FixedRate':
				mortgage_type = 'F'
			elif rate_type == 'TrackerRate':
				mortgage_type = 'T'
			rate_set = root.find(purchase).find(rate_type)
			for rate in rate_set.findall('LTV'):
				ltv_percent = rate.get('max')
				for mortgage in rate.findall('Mortgage'):
					#ET.dump(mortgage)
					#print "--------------------"
					rate_percent = mortgage.find('initialRate').text
					apr_percent = mortgage.find('APR').text
					svr_percent = mortgage.find('variableRate').text
					name = mortgage.find('name').text.split('\n')[0]
					initial_period = themortgagemeter_utils.get_months(name,logger)
					booking_fee = str(int(mortgage.find('bookingFee').text) + int(mortgage.find('productFee').text))
					for eligibility in eligibilities:
						mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,'http://www.tescobank.com/personal/finance/mortgages',eligibility,logger)
def process_title(title, logger):
    # Take the title, and break down into initial_period and mortgage type
    # mortgage_type:
    # If it contains Fixed = F, Tracker = T, "Freedom to Fix"?, "Fixed Cashback"?, "Flexi Tracker"?
    initial_period = str(themortgagemeter_utils.get_months(title, logger))
    mortgage_type = mc_util.get_mortgage_type(title, logger)
    return (initial_period, mortgage_type)
Beispiel #3
0
def halifax_ftb_page(static,url,mortgage_type,eligibility,logger):
	logger = logging.getLogger('retrieve')
	bsobj = themortgagemeter_utils.get_page(static,'static_html/halifax/fixed.html',url,logger)
	trs = bsobj.find_all('tr')
	for tr in trs:
		mortgage_details = []
		for d in tr.strings:
			mortgage_details.append(string.strip(d.encode('utf-8')))
			if len(mortgage_details) > 19 and len(mortgage_details) < 25:
				if mortgage_details[3].find('%') != -1:
					initial_period = mortgage_details[1]
					if initial_period[0] == 'x':
						# handle special case of "dummy row"
						continue
					rate_percent = mortgage_details[3][:-1]
					svr_percent = mortgage_details[6].split()[0][:-1].strip('\xc2').strip('\xa0')
					apr_percent = mortgage_details[10].split()[0][:-1]
					booking_fee = mortgage_details[12][2:].replace(',','')
					# handle special nonsense case
					if re.search(r'years',initial_period) and not re.search(r'[0-9]+ years',initial_period):
						years = initial_period[0]
						initial_period = str(int(years) * 12)
					elif re.search(r'months',initial_period) and not re.search(r'[0-9]+ month',initial_period):
						initial_period = initial_period[0:2]
					else:
						initial_period = str(themortgagemeter_utils.get_months(initial_period,logger))
					#print mortgage_details
					if len(mortgage_details[14].split('-')) > 1:
						ltv_percent = str(100 - int(mortgage_details[14].split('-')[0]))
					else:
						ltv_percent = str(100 - int(mortgage_details[14][0:2]))
					mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)
def process_title(title,logger):
	# Take the title, and break down into initial_period and mortgage type
	# mortgage_type:
	# If it contains Fixed = F, Tracker = T, "Freedom to Fix"?, "Fixed Cashback"?, "Flexi Tracker"?
	initial_period = str(themortgagemeter_utils.get_months(title,logger))
	mortgage_type = mc_util.get_mortgage_type(title,logger)
	return (initial_period,mortgage_type)
Beispiel #5
0
def halifax_remortgage_page(static,url,mortgage_type,eligibility,logger):
	bsobj = themortgagemeter_utils.get_page(static,'static_html/halifax/remortgage-fixed-75ltv.asp',url,logger)
	trs = bsobj.find_all('tr')
	for tr in trs:
		mortgage_details = []
		for d in tr.strings:
			mortgage_details.append(string.strip(d.encode('utf-8')))
		#['\n', 'Term', 'Initial rate', '\xc2\xa0', 'Halifax Homeowner Variable rate thereafter', '\xc2\xa0', 'For the remainder of the term from', '\xc2\xa0', 'The overall cost for comparison is', '\xc2\xa0', 'Product fee', '\xc2\xa0', 'LTV\xc2\xa0\xc2\xa0\xc2\xa0\xc2\xa0\xc2\xa0\xc2\xa0 ', 'Early Repayment Charges until', '\xc2\xa0', 'Loan amount', '\n', 'Extra benefits', '\xc2\xa0', '\n', '\xc2\xa0', '\n']
		#['\n', '2 years', '\n', '4.44%', '\n', 'Currently', ' \xc2\xa03.99%', '\n', '30/11/2014', '\n', '4.3% APR', '\n', '\xc2\xa3995', '\n', '75-80%', '\n', '30/11/2014', '\n', '\xc2\xa30-\xc2\xa31m', '\n', 'Halifax Remortgage Service*', '\n', '\n']
		logger.debug(mortgage_details)
		if len(mortgage_details) > 19 and len(mortgage_details) < 25:
			if mortgage_details[3].find('%') != -1:
				rate_percent = mortgage_details[3][:-1]
				svr_percent = mortgage_details[6].split()[0][:-1].strip('\xc2').strip('\xa0')
				apr_percent = mortgage_details[10].split()[0][:-1]
				booking_fee = mortgage_details[12][2:].replace(',','')
				initial_period = mortgage_details[1]
				# handle special nonsense case
				if re.search(r'years',initial_period) and not re.search(r'[0-9]+ years',initial_period):
					years = initial_period[0]
					initial_period = str(int(years) * 12)
				elif re.search(r'months',initial_period) and not re.search(r'[0-9]+ month',initial_period):
					initial_period = initial_period[0:2]
				else:
					initial_period = str(themortgagemeter_utils.get_months(initial_period,logger))
				ltv_percent = mortgage_details[14].split('-')[1].strip('%')
				mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)
		elif len(mortgage_details) == 25:
			if mortgage_details[3].find('%') != -1:
				rate_percent = mortgage_details[3][:-1]
				svr_percent = mortgage_details[8].split()[0][:-1].strip('\xc2').strip('\xa0')
				apr_percent = mortgage_details[12].split()[0][:-1]
				booking_fee = mortgage_details[14][2:].replace(',','')
				initial_period = mortgage_details[1]
				if re.search(r'years',initial_period) and not re.search(r'[0-9]+ years',initial_period):
					years = initial_period[0]
					initial_period = str(int(years) * 12)
				elif re.search(r'months',initial_period) and not re.search(r'[0-9]+ month',initial_period):
					initial_period = initial_period[0:2]
				else:
					initial_period = str(themortgagemeter_utils.get_months(initial_period,logger))
				ltv_percent = mortgage_details[16].split('-')[1].strip('%')
				# handle special nonsense case
				mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)
		elif len(mortgage_details) > 3:
			logger.debug('Should this be handled?: %s',(mortgage_details))
Beispiel #6
0
def get_mortgage_page_details(static, url, eligibility):
    logger = logging.getLogger('retrieve')
    bsobj = themortgagemeter_utils.get_page_headless(url, logger)
    # assume default of 25 years
    term = str(25 * 12)
    #logger.info(bsobj)
    # rate
    rate_percent = bsobj.find_all(id='InterestRate', limit=1)[0].find_all(
        attrs={'class': 'last'}, limit=1)[0].string.string.split('%')[0]
    # fee(n)
    booking_fee = bsobj.find_all(id='BookingFee', limit=1)[0].find_all(
        attrs={'class': 'last'},
        limit=1)[0].string.encode('utf_8')[2:].replace(',', '')
    if booking_fee == '':
        booking_fee = str(0)
    # LTV
    ltv_percent = bsobj.find_all(id='maxLTV', limit=1)[0].find_all(
        attrs={'class': 'last'}, limit=1)[0].string.split('%')[0]
    # APR for comparison %
    apr_percent = bsobj.find_all(id='OverallCost', limit=1)[0].find_all(
        attrs={'class': 'last'}, limit=1)[0].strong.string.strip().split()[0]
    # SVR - To test!
    svr_percent = bsobj.find_all(id='RevertingVariableRate',
                                 limit=1)[0].find_all(
                                     attrs={'class': 'last'},
                                     limit=1)[0].string.strip().split('%')[0]
    # Sometimes not displayed, presumably because not applicable so let's assume it's the headline rate.
    if svr_percent == '-':
        svr_percent = rate_percent
    # fixed/tracker/discount/variable
    desc_lower = bsobj.find_all(attrs={'class': 'productResults'},
                                limit=1)[0].div.h3.string.lower()
    if desc_lower.find('fixed') != -1:
        mortgage_type = 'F'
    elif desc_lower.find('discount') != -1:
        mortgage_type = 'D'
    elif desc_lower.find('tracker') != -1:
        mortgage_type = 'T'
    else:
        # default to variable
        mortgage_type = 'V'
    # fix period (months)
    for initial_period in (bsobj.find_all(
            id='RatePeriod', limit=1)[0].find_all(attrs={'class': 'last'},
                                                  limit=1)[0].children):
        if initial_period == 'Term of Loan':
            initial_period = term
        else:
            initial_period = themortgagemeter_utils.get_months(
                initial_period, logger)
            break
    mc_util.handle_mortgage_insert(institution_code, mortgage_type,
                                   rate_percent, svr_percent, apr_percent,
                                   ltv_percent, initial_period, booking_fee,
                                   term, url, eligibility, logger)
Beispiel #7
0
def get_product_page(static,url):
	logger = logging.getLogger('retrieve')
	bsobj = themortgagemeter_utils.get_page(static,'static_html/first_direct/mortgage-rates',url,logger)
	print bsobj
	sections = bsobj.find_all(attrs={'class':'section'})
	for section in sections:
		#print section
		#print "============================="
		tbodys = section.find_all("tbody")
		for tbody in tbodys:
			print tbody
			trs = tbody.find_all("tr")
			for tr in trs:
				tds = tr.find_all("td")
				booking_fee_int = 0
				count = 0
				for td in tds:
					# assume default of 25 years
					term = str(25 * 12)
					td_text = td.text.strip().encode('utf-8')
					#print count
					#print td
					if count == 0:
						#initial_period
						initial_period = themortgagemeter_utils.get_months(td_text,logger)
						#mortgage_type F/D/T/O/V
						mortgage_type = mc_util.get_mortgage_type(td_text,logger)
						#eligibility
						print td_text
						pass
					elif count == 1:
						#ltv_percent
						ltv_percent = themortgagemeter_utils.get_percentage(td_text,logger)
					elif count == 2:
						#rate_percent
						rate_percent = themortgagemeter_utils.get_percentage(td_text,logger)
					elif count == 3:
						#svr_percent
						svr_percent = themortgagemeter_utils.get_percentage(td_text,logger)
					elif count == 4:
						#apr_percent
						apr_percent = themortgagemeter_utils.get_percentage(td_text,logger)
					elif count == 5:
						booking_fee_int = booking_fee_int + int(themortgagemeter_utils.get_money(td_text,logger))
					elif count == 6:
						booking_fee_int = booking_fee_int + int(themortgagemeter_utils.get_money(td_text,logger))

					count = count + 1
				booking_fee = str(booking_fee_int)
			mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)
def halifax_ftb_page(static, url, mortgage_type, eligibility, logger):
    logger = logging.getLogger('retrieve')
    bsobj = themortgagemeter_utils.get_page(static,
                                            'static_html/halifax/fixed.html',
                                            url, logger)
    trs = bsobj.find_all('tr')
    for tr in trs:
        mortgage_details = []
        for d in tr.strings:
            mortgage_details.append(string.strip(d.encode('utf-8')))
            if len(mortgage_details) > 19 and len(mortgage_details) < 25:
                if mortgage_details[3].find('%') != -1:
                    initial_period = mortgage_details[1]
                    if initial_period[0] == 'x':
                        # handle special case of "dummy row"
                        continue
                    rate_percent = mortgage_details[3][:-1]
                    svr_percent = mortgage_details[6].split()[0][:-1].strip(
                        '\xc2').strip('\xa0')
                    apr_percent = mortgage_details[10].split()[0][:-1]
                    booking_fee = mortgage_details[12][2:].replace(',', '')
                    # handle special nonsense case
                    if re.search(r'years', initial_period) and not re.search(
                            r'[0-9]+ years', initial_period):
                        years = initial_period[0]
                        initial_period = str(int(years) * 12)
                    elif re.search(r'months',
                                   initial_period) and not re.search(
                                       r'[0-9]+ month', initial_period):
                        initial_period = initial_period[0:2]
                    else:
                        initial_period = str(
                            themortgagemeter_utils.get_months(
                                initial_period, logger))
                    #print mortgage_details
                    if len(mortgage_details[14].split('-')) > 1:
                        ltv_percent = str(
                            100 - int(mortgage_details[14].split('-')[0]))
                    else:
                        ltv_percent = str(100 - int(mortgage_details[14][0:2]))
                    mc_util.handle_mortgage_insert(institution_code,
                                                   mortgage_type, rate_percent,
                                                   svr_percent, apr_percent,
                                                   ltv_percent, initial_period,
                                                   booking_fee, term, url,
                                                   eligibility, logger)
def get_product_pages(static, url, logger):
    logger.debug("In get_product_pages: " + url)
    # Get the svr first (it's global)
    lines = themortgagemeter_utils.get_page(False, '', url, logger,
                                            True).split('\n')
    # Now get the mortgage data
    if static:
        tree = ET.parse('static_html/tesco/Products.xml')
        root = tree.getroot()
    else:
        root = ET.fromstring(
            themortgagemeter_utils.get_page(False, '', url, logger, True))
    term = str(25 * 12)
    for purchase in ('HousePurchase', 'Remortgage'):
        if purchase == 'HousePurchase':
            eligibilities = ['NFTB', 'NMH']
        elif purchase == 'Remortgage':
            eligibilities = ['NRM']
        for rate_type in ('FixedRate', 'TrackerRate'):
            if rate_type == 'FixedRate':
                mortgage_type = 'F'
            elif rate_type == 'TrackerRate':
                mortgage_type = 'T'
            rate_set = root.find(purchase).find(rate_type)
            for rate in rate_set.findall('LTV'):
                ltv_percent = rate.get('max')
                for mortgage in rate.findall('Mortgage'):
                    #ET.dump(mortgage)
                    #print "--------------------"
                    rate_percent = mortgage.find('initialRate').text
                    apr_percent = mortgage.find('APR').text
                    svr_percent = mortgage.find('variableRate').text
                    name = mortgage.find('name').text.split('\n')[0]
                    initial_period = themortgagemeter_utils.get_months(
                        name, logger)
                    booking_fee = str(
                        int(mortgage.find('bookingFee').text) +
                        int(mortgage.find('productFee').text))
                    for eligibility in eligibilities:
                        mc_util.handle_mortgage_insert(
                            institution_code, mortgage_type, rate_percent,
                            svr_percent, apr_percent, ltv_percent,
                            initial_period, booking_fee, term,
                            'http://www.tescobank.com/personal/finance/mortgages',
                            eligibility, logger)
Beispiel #10
0
def get_mortgage_page_details(static,url,eligibility):
	logger = logging.getLogger('retrieve')
	bsobj = themortgagemeter_utils.get_page_headless(url,logger)
	# assume default of 25 years
	term = str(25 * 12)
	#logger.info(bsobj)
	# rate
	rate_percent = bsobj.find_all(id='InterestRate',limit=1)[0].find_all(attrs={'class' : 'last'},limit=1)[0].string.string.split('%')[0]
	# fee(n)
	booking_fee = bsobj.find_all(id='BookingFee',limit=1)[0].find_all(attrs={'class' : 'last'},limit=1)[0].string.encode('utf_8')[2:].replace(',','')
	if booking_fee == '':
		booking_fee = str(0)
	# LTV
	ltv_percent = bsobj.find_all(id='maxLTV',limit=1)[0].find_all(attrs={'class' : 'last'},limit=1)[0].string.split('%')[0]
	# APR for comparison %
	apr_percent = bsobj.find_all(id='OverallCost',limit=1)[0].find_all(attrs={'class' : 'last'},limit=1)[0].strong.string.strip().split()[0]
	# SVR - To test!
	svr_percent = bsobj.find_all(id='RevertingVariableRate',limit=1)[0].find_all(attrs={'class' : 'last'},limit=1)[0].string.strip().split('%')[0]
	# Sometimes not displayed, presumably because not applicable so let's assume it's the headline rate.
	if svr_percent == '-':
		svr_percent = rate_percent
	# fixed/tracker/discount/variable
	desc_lower = bsobj.find_all(attrs={'class' : 'productResults'},limit=1)[0].div.h3.string.lower()
	if desc_lower.find('fixed') != -1:
		mortgage_type = 'F'
	elif desc_lower.find('discount') != -1:
		mortgage_type = 'D'
	elif desc_lower.find('tracker') != -1:
		mortgage_type = 'T'	
	else:
		# default to variable
		mortgage_type = 'V'
	# fix period (months)
	for initial_period in (bsobj.find_all(id='RatePeriod',limit=1)[0].find_all(attrs={'class' : 'last'},limit=1)[0].children):
		if initial_period == 'Term of Loan':
			initial_period = term
		else:
			initial_period = themortgagemeter_utils.get_months(initial_period,logger)
			break
	mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)
def halifax_remortgage_page(static, url, mortgage_type, eligibility, logger):
    bsobj = themortgagemeter_utils.get_page(
        static, 'static_html/halifax/remortgage-fixed-75ltv.asp', url, logger)
    trs = bsobj.find_all('tr')
    for tr in trs:
        mortgage_details = []
        for d in tr.strings:
            mortgage_details.append(string.strip(d.encode('utf-8')))
        #['\n', 'Term', 'Initial rate', '\xc2\xa0', 'Halifax Homeowner Variable rate thereafter', '\xc2\xa0', 'For the remainder of the term from', '\xc2\xa0', 'The overall cost for comparison is', '\xc2\xa0', 'Product fee', '\xc2\xa0', 'LTV\xc2\xa0\xc2\xa0\xc2\xa0\xc2\xa0\xc2\xa0\xc2\xa0 ', 'Early Repayment Charges until', '\xc2\xa0', 'Loan amount', '\n', 'Extra benefits', '\xc2\xa0', '\n', '\xc2\xa0', '\n']
        #['\n', '2 years', '\n', '4.44%', '\n', 'Currently', ' \xc2\xa03.99%', '\n', '30/11/2014', '\n', '4.3% APR', '\n', '\xc2\xa3995', '\n', '75-80%', '\n', '30/11/2014', '\n', '\xc2\xa30-\xc2\xa31m', '\n', 'Halifax Remortgage Service*', '\n', '\n']
        logger.debug(mortgage_details)
        if len(mortgage_details) > 19 and len(mortgage_details) < 25:
            if mortgage_details[3].find('%') != -1:
                rate_percent = mortgage_details[3][:-1]
                svr_percent = mortgage_details[6].split()[0][:-1].strip(
                    '\xc2').strip('\xa0')
                apr_percent = mortgage_details[10].split()[0][:-1]
                booking_fee = mortgage_details[12][2:].replace(',', '')
                initial_period = mortgage_details[1]
                # handle special nonsense case
                if re.search(r'years', initial_period) and not re.search(
                        r'[0-9]+ years', initial_period):
                    years = initial_period[0]
                    initial_period = str(int(years) * 12)
                elif re.search(r'months', initial_period) and not re.search(
                        r'[0-9]+ month', initial_period):
                    initial_period = initial_period[0:2]
                else:
                    initial_period = str(
                        themortgagemeter_utils.get_months(
                            initial_period, logger))
                ltv_percent = mortgage_details[14].split('-')[1].strip('%')
                mc_util.handle_mortgage_insert(institution_code, mortgage_type,
                                               rate_percent, svr_percent,
                                               apr_percent, ltv_percent,
                                               initial_period, booking_fee,
                                               term, url, eligibility, logger)
        elif len(mortgage_details) == 25:
            if mortgage_details[3].find('%') != -1:
                rate_percent = mortgage_details[3][:-1]
                svr_percent = mortgage_details[8].split()[0][:-1].strip(
                    '\xc2').strip('\xa0')
                apr_percent = mortgage_details[12].split()[0][:-1]
                booking_fee = mortgage_details[14][2:].replace(',', '')
                initial_period = mortgage_details[1]
                if re.search(r'years', initial_period) and not re.search(
                        r'[0-9]+ years', initial_period):
                    years = initial_period[0]
                    initial_period = str(int(years) * 12)
                elif re.search(r'months', initial_period) and not re.search(
                        r'[0-9]+ month', initial_period):
                    initial_period = initial_period[0:2]
                else:
                    initial_period = str(
                        themortgagemeter_utils.get_months(
                            initial_period, logger))
                ltv_percent = mortgage_details[16].split('-')[1].strip('%')
                # handle special nonsense case
                mc_util.handle_mortgage_insert(institution_code, mortgage_type,
                                               rate_percent, svr_percent,
                                               apr_percent, ltv_percent,
                                               initial_period, booking_fee,
                                               term, url, eligibility, logger)
        elif len(mortgage_details) > 3:
            logger.debug('Should this be handled?: %s', (mortgage_details))
def get_product_pages(static,url,logger):
	logger.debug("In get_product_pages: " + url)
	# Get the svr first (it's global)
	lines = themortgagemeter_utils.get_page(False,'','http://www.thechelsea.co.uk/js/mortgage-finder.js',logger,True).split('\n')
	for line in lines:
		if re.match(r'^var chelseaSVR = "[^%]*%".*',line) != None:
			svr_percent = re.match(r'^var chelseaSVR = "([^%]*)%".*$',line).group(1)
			break
	# Now get the mortgage data
	if static:
		tree = ET.parse('static_html/chelsea/mortage-product-data-0031.xml')
		root = tree.getroot()
	else:
		root = ET.fromstring(themortgagemeter_utils.get_page(False,'',url,logger,True))
	term = str(25 * 12)
	for product in root.findall('product'):
		apr_percent = product.get('apr').split('%')[0]
		rate_percent = product.get('interestRate').split('%')[0]
		# No svr supplied, take apr
		ltv_percent = product.get('maxLTV').split('%')[0]
		mortgage_type_raw = product.get('mortgageType')
		name = product.get('name')
		booking_fee = product.get('completionFee')
		if booking_fee == '':
			booking_fee = '0'
		existing_borrower = product.get('existingBorrower')
		new_borrower = product.get('newBorrower')
		first_time_buyer = product.get('firstTimeBuyer')
		moving_home = product.get('movingHome')
		remortgaging = product.get('remortgaging')
		# Gathered data, now let's marshall before submitting.
		if mortgage_type_raw == 'fixed':
			mortgage_type = 'F'
		elif mortgage_type_raw == 'fixedoffset':
			mortgage_type = 'F'
		elif mortgage_type_raw == 'ftbfixed':
			mortgage_type = 'F'
		elif mortgage_type_raw == 'ftbfixedoffset':
			mortgage_type = 'F'
		elif mortgage_type_raw == 'fixedtracker':
			# Presumably fixed, then a tracker??
			mortgage_type = 'F'
		elif mortgage_type_raw == 'tracker':
			mortgage_type = 'T'
		elif mortgage_type_raw == 'trackeroffset':
			mortgage_type = 'T'
		elif mortgage_type_raw == 'offset':
			mortgage_type = 'T'
		elif mortgage_type_raw == 'mixedoffset':
			mortgage_type = 'T'
		elif mortgage_type_raw == 'rollover':
			# rollover? no example, but exists in the docs
			#print 'rollover'
			#ET.dump(product)
			mortgage_type = 'T'
		elif mortgage_type_raw == 'mixed':
			# WTF is mixed?
			mortgage_type = 'T'
		else:
			# default to variable
			#print mortgage_type_raw
			mortgage_type = 'V'

		# Get a mortgage eligibility dictionary to submit.
		mortgage_eligibility_dict = mc_util.get_mortgage_eligibility_dict()
		if existing_borrower == 'Y':
			mortgage_eligibility_dict['existing_customer'] = 'B'
		if new_borrower == 'Y':
			mortgage_eligibility_dict['moving_home'] = 'B'
		if first_time_buyer == 'Y':
			mortgage_eligibility_dict['ftb'] = 'B'
		if moving_home == 'Y':
			mortgage_eligibility_dict['moving_home'] = 'B'
		if remortgaging == 'Y':
			mortgage_eligibility_dict['remortgage']= 'B'
		eligibilities = mc_util.validate_eligibility_dict(mortgage_eligibility_dict,[])

		# use get_months to determine period
		initial_period = themortgagemeter_utils.get_months(name,logger)

		#ET.dump(product)
		#print eligibilities
		#print initial_period
		#print mortgage_eligibility_dict
		for eligibility in eligibilities:
			mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)
Beispiel #13
0
def process_more_info_page(savings_data,url,logger):
	bsobj = themortgagemeter_utils.get_page(False,'static_html/halifax/savings-accounts.html',url,logger)
	#print bsobj
	savings_array = []
	#print "Passed in:"
	#print savings_data
	print url
	if savings_data['isa'] == 'Y':
		for i1 in bsobj.find_all("h2",text="Summary box"):
			for i2 in i1.parent():
				if i2.find_all("table") != []:
					tabs = i2.find_all("table")
					if re.match(".*isa-saver-fixed.*",url):
						if len(tabs) != 2:
							themortgagemeter_utils.record_alert('ERROR: too many tabs in isa',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
							exit()
						else:
							tabs.pop(0)
						for tab in tabs:
							tbody = tab.find_all("tbody")[0]
							trs = tbody.find_all("tr")
							for tr in trs:
								savings_data_tmp = savings_data.copy()
								tds = tr.find_all("td")
								savings_data_tmp['savings_period'] = themortgagemeter_utils.get_months(tds[0].text.strip().encode('utf-8'),logger)
								savings_data_tmp['aer_percent'] = themortgagemeter_utils.get_percentage(tds[1].text.strip().encode('utf-8'),logger)
								savings_data_tmp['gross_percent'] = savings_data_tmp['aer_percent']
								savings_array.append(savings_data_tmp)
					else:
						if len(tabs) > 1:
							#print tabs
							themortgagemeter_utils.record_alert('ERROR: too many tabs in isa',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
							exit()
						for tab in tabs:
							#print tab
							for tr in tab.find_all("tr"):
								ths = tr.find_all("th")
								tds = tr.find_all("td")
								if len(ths) > 0 and len(tds) > 0:
									th = tr.find_all("th")[0]
									td = tr.find_all("td")[0]
									th_text = th.text.lower()
									td_text = td.text.lower()
									if re.match('interest rates.*',th_text):
										#print "IR:" + td_text
										pc = themortgagemeter_utils.get_percentage(td_text,logger)
										savings_data_tmp = savings_data.copy()
										savings_data_tmp['gross_percent'] = pc
										savings_data_tmp['aer_percent'] = pc
										savings_array.append(savings_data_tmp)
								else:
									if len(ths) == 0 and len(tds) > 0:
										td1 = tds[0]
										td2 = tds[1]
										td1_text = td1.text.lower()
										td2_text = td2.text.lower()
										if re.match('interest rates.*',td1_text):
											pc = themortgagemeter_utils.get_percentage(td2_text,logger)
											savings_data_tmp = savings_data.copy()
											savings_data_tmp['gross_percent'] = pc
											savings_data_tmp['aer_percent'] = pc
											savings_array.append(savings_data_tmp)
									else:
										themortgagemeter_utils.record_alert('ERROR: unhandled case: ' + url,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
										exit()
	elif re.match('.*fixed-online-saver.*',url) or re.match('.*tracker-bond.*',url) or re.match('.*fixed-saver.*',url):
		if re.match('.*fixed-online-saver.*',url) or re.match('.*fixed-saver.*',url):
			#print bsobj
			code = "FOS"
			i1s = bsobj.find_all("h3",text="Current Rates")
			if i1s== []:
				i1s = bsobj.find_all("h3",text="Current rates")
		elif re.match('.*tracker-bond.*',url):
			#print bsobj
			code = "TB"
			i1s = []
			res = bsobj.find_all("h4")
			for i in res:
				#print i.text
				if i.text == "Current rates and apply":
					i1s.append(i)
					break
		if i1s == []:
			themortgagemeter_utils.record_alert('No items from expected h3/4 match!',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		for i1 in i1s:
			for i2 in i1.parent():
				tbodys = i2.find_all("tbody")
				# if this is tracker bond, discard the first table
				if len(tbodys) == 0:
					continue
				if code == "TB":
					ok = False
					for tbody in tbodys:
						for tr in tbody.find_all("tr"):
							tds = tr.find_all("td")
							if tds[0].text == "Term":
								ok = True
					if not ok:
						continue
				for tbody in tbodys:
					tr_count = -1
					table_savings_period = "unset"
					for tr in tbody.find_all("tr"):
						tr_count = tr_count + 1
						if code == "TB" and tr_count == 0:
							# skip the first row
							continue
						# clone the savings_data ready to write to
						savings_data_tmp = savings_data.copy()
						# First td is time only on first row for TB
						if code == "TB" and tr_count > 1:
							td_count = 1
						else:
							td_count = 0
						if code == "TB" and tr_count > 1:
							if table_savings_period == "unset":
								themortgagemeter_utils.record_alert('ERROR: table_savings_period should not be unset',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
								exit()
							savings_data_tmp['savings_period'] = table_savings_period
						for td in tr.find_all("td"):
							# 0 - term
							# 1 - balance 
							# 2 - Gross
							# 3 - AER
							# 4 - NET (ignore)
							# Ignore remainder of cols
							text = td.text.lower().strip().encode('utf-8')
							if td_count == 0:
								# store this in a variable for use on next row if necessary
								table_savings_period = themortgagemeter_utils.get_months(text,logger)
								savings_data_tmp['savings_period'] = table_savings_period
							elif td_count == 1:
								res = savings_util.get_money_range(text,logger)
								savings_data_tmp['min_amt'] = res[0]
								savings_data_tmp['max_amt'] = res[1]
							elif td_count == 2:
								savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(text,logger)
							elif td_count == 3:
								savings_data_tmp['aer_percent'] = themortgagemeter_utils.get_percentage(text,logger)
								# and then break out
								break
							td_count = td_count + 1
						savings_array.append(savings_data_tmp)
	elif re.match('.*/online-saver/',url):
		# TODO: need to set this for other types
		savings_data['interest_paid'] = 'Y'
		#print bsobj
		# get the apr class element, as that contains the text we need
		apr = bsobj.find_all(attrs={'class':'apr'})[0].parent.parent.text.encode('utf-8')
		# split this line by \n
		apr = apr.split('\n')
		lines = []
		for l in apr:
			if re.match('.* or [0-9].*',l):
				for l2 in l.split(' or ',1):
					lines.append(l2)
			else:
				lines.append(l)
		while '' in lines:
			lines.remove('')
		for l in lines:
			# copy 
			savings_data_tmp = savings_data.copy()
			#print l
			# get percentage
			savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(l,logger)
			savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
			# get_money range
			res = savings_util.get_money_range(l,logger)
			savings_data_tmp['min_amt'] = res[0]
			savings_data_tmp['max_amt'] = res[1]
			# append to savings_array
			savings_array.append(savings_data_tmp)
	elif re.match('.*/regular-saver/',url):
		# TODO: need to set this for other types
		savings_data['interest_paid'] = 'Y'
		savings_data['regular_saver_frequency_period'] = '1'
		savings_data['regular_saver_frequency_type'] = 'M'
		savings_data['regular_saver'] = 'Y'
		# Always fixed
		savings_data['variability'] = 'F'
		#print bsobj
		# get the apr class element, as that contains the text we need
		apr = bsobj.find_all(attrs={'class':'apr'})[0].parent.parent.text.encode('utf-8')
		# split this line by \n
		apr = apr.split('\n')
		lines = []
		for l in apr:
			if re.match('.* or [0-9].*',l):
				for l2 in l.split(' or ',1):
					lines.append(l2)
			else:
				lines.append(l)
		while '' in lines:
			lines.remove('')
		for l in lines:
			# copy 
			savings_data_tmp = savings_data.copy()
			# get percentage
			savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(l,logger)
			if savings_data_tmp['gross_percent'] == '':
				# abandon ship!
				continue
			savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
			# Hard-code to 25-250 for now, this seems standard
			savings_data_tmp['regular_saver_min_amt'] = '25'
			savings_data_tmp['regular_saver_max_amt'] = '250'
			# append to savings_array
			savings_array.append(savings_data_tmp)
	elif re.match('.*/everyday-saver/',url):
		# This one's quite simple (I think)
		# TODO: need to set this for other types
		savings_data['interest_paid'] = 'Y'
		#print bsobj
		# get the apr class element, as that contains the text we need
		apr = bsobj.find_all(attrs={'class':'apr'})[0].parent.parent.text.encode('utf-8')
		#print apr
		# split this line by \n
		apr = apr.split('\n')
		lines = []
		for l in apr:
			if re.match('.*gross.*',l):
				lines.append(l)
		while '' in lines:
			lines.remove('')
		for l in lines:
			# copy 
			savings_data_tmp = savings_data.copy()
			#print l
			# get percentage
			savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(l,logger)
			savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
			# TODO: bonus_frequency_period set to 1, or get from data?
			# append to savings_array
			savings_array.append(savings_data_tmp)
	elif re.match('.*/branch-accounts/.*',url):
		return savings_array
	else:
		logger.info('unhandled:' + url)
		exit()
	if savings_array == []:
		themortgagemeter_utils.record_alert('ERROR: returning nothing from a page',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
	# Return the savings_array
	logger.info('returning savings_array:' + str(savings_array))
	return savings_array
def process_more_info_page(savings_data, url, logger):
    bsobj = themortgagemeter_utils.get_page(
        False, 'static_html/halifax/savings-accounts.html', url, logger)
    #print bsobj
    savings_array = []
    #print "Passed in:"
    #print savings_data
    print url
    if savings_data['isa'] == 'Y':
        for i1 in bsobj.find_all("h2", text="Summary box"):
            for i2 in i1.parent():
                if i2.find_all("table") != []:
                    tabs = i2.find_all("table")
                    if re.match(".*isa-saver-fixed.*", url):
                        if len(tabs) != 2:
                            themortgagemeter_utils.record_alert(
                                'ERROR: too many tabs in isa', logger,
                                themortgagemeter_db.db_connection,
                                themortgagemeter_db.cursor)
                            exit()
                        else:
                            tabs.pop(0)
                        for tab in tabs:
                            tbody = tab.find_all("tbody")[0]
                            trs = tbody.find_all("tr")
                            for tr in trs:
                                savings_data_tmp = savings_data.copy()
                                tds = tr.find_all("td")
                                savings_data_tmp[
                                    'savings_period'] = themortgagemeter_utils.get_months(
                                        tds[0].text.strip().encode('utf-8'),
                                        logger)
                                savings_data_tmp[
                                    'aer_percent'] = themortgagemeter_utils.get_percentage(
                                        tds[1].text.strip().encode('utf-8'),
                                        logger)
                                savings_data_tmp[
                                    'gross_percent'] = savings_data_tmp[
                                        'aer_percent']
                                savings_array.append(savings_data_tmp)
                    else:
                        if len(tabs) > 1:
                            #print tabs
                            themortgagemeter_utils.record_alert(
                                'ERROR: too many tabs in isa', logger,
                                themortgagemeter_db.db_connection,
                                themortgagemeter_db.cursor)
                            exit()
                        for tab in tabs:
                            #print tab
                            for tr in tab.find_all("tr"):
                                ths = tr.find_all("th")
                                tds = tr.find_all("td")
                                if len(ths) > 0 and len(tds) > 0:
                                    th = tr.find_all("th")[0]
                                    td = tr.find_all("td")[0]
                                    th_text = th.text.lower()
                                    td_text = td.text.lower()
                                    if re.match('interest rates.*', th_text):
                                        #print "IR:" + td_text
                                        pc = themortgagemeter_utils.get_percentage(
                                            td_text, logger)
                                        savings_data_tmp = savings_data.copy()
                                        savings_data_tmp['gross_percent'] = pc
                                        savings_data_tmp['aer_percent'] = pc
                                        savings_array.append(savings_data_tmp)
                                else:
                                    if len(ths) == 0 and len(tds) > 0:
                                        td1 = tds[0]
                                        td2 = tds[1]
                                        td1_text = td1.text.lower()
                                        td2_text = td2.text.lower()
                                        if re.match('interest rates.*',
                                                    td1_text):
                                            pc = themortgagemeter_utils.get_percentage(
                                                td2_text, logger)
                                            savings_data_tmp = savings_data.copy(
                                            )
                                            savings_data_tmp[
                                                'gross_percent'] = pc
                                            savings_data_tmp[
                                                'aer_percent'] = pc
                                            savings_array.append(
                                                savings_data_tmp)
                                    else:
                                        themortgagemeter_utils.record_alert(
                                            'ERROR: unhandled case: ' + url,
                                            logger,
                                            themortgagemeter_db.db_connection,
                                            themortgagemeter_db.cursor)
                                        exit()
    elif re.match('.*fixed-online-saver.*', url) or re.match(
            '.*tracker-bond.*', url) or re.match('.*fixed-saver.*', url):
        if re.match('.*fixed-online-saver.*', url) or re.match(
                '.*fixed-saver.*', url):
            #print bsobj
            code = "FOS"
            i1s = bsobj.find_all("h3", text="Current Rates")
            if i1s == []:
                i1s = bsobj.find_all("h3", text="Current rates")
        elif re.match('.*tracker-bond.*', url):
            #print bsobj
            code = "TB"
            i1s = []
            res = bsobj.find_all("h4")
            for i in res:
                #print i.text
                if i.text == "Current rates and apply":
                    i1s.append(i)
                    break
        if i1s == []:
            themortgagemeter_utils.record_alert(
                'No items from expected h3/4 match!', logger,
                themortgagemeter_db.db_connection, themortgagemeter_db.cursor)
        for i1 in i1s:
            for i2 in i1.parent():
                tbodys = i2.find_all("tbody")
                # if this is tracker bond, discard the first table
                if len(tbodys) == 0:
                    continue
                if code == "TB":
                    ok = False
                    for tbody in tbodys:
                        for tr in tbody.find_all("tr"):
                            tds = tr.find_all("td")
                            if tds[0].text == "Term":
                                ok = True
                    if not ok:
                        continue
                for tbody in tbodys:
                    tr_count = -1
                    table_savings_period = "unset"
                    for tr in tbody.find_all("tr"):
                        tr_count = tr_count + 1
                        if code == "TB" and tr_count == 0:
                            # skip the first row
                            continue
                        # clone the savings_data ready to write to
                        savings_data_tmp = savings_data.copy()
                        # First td is time only on first row for TB
                        if code == "TB" and tr_count > 1:
                            td_count = 1
                        else:
                            td_count = 0
                        if code == "TB" and tr_count > 1:
                            if table_savings_period == "unset":
                                themortgagemeter_utils.record_alert(
                                    'ERROR: table_savings_period should not be unset',
                                    logger, themortgagemeter_db.db_connection,
                                    themortgagemeter_db.cursor)
                                exit()
                            savings_data_tmp[
                                'savings_period'] = table_savings_period
                        for td in tr.find_all("td"):
                            # 0 - term
                            # 1 - balance
                            # 2 - Gross
                            # 3 - AER
                            # 4 - NET (ignore)
                            # Ignore remainder of cols
                            text = td.text.lower().strip().encode('utf-8')
                            if td_count == 0:
                                # store this in a variable for use on next row if necessary
                                table_savings_period = themortgagemeter_utils.get_months(
                                    text, logger)
                                savings_data_tmp[
                                    'savings_period'] = table_savings_period
                            elif td_count == 1:
                                res = savings_util.get_money_range(
                                    text, logger)
                                savings_data_tmp['min_amt'] = res[0]
                                savings_data_tmp['max_amt'] = res[1]
                            elif td_count == 2:
                                savings_data_tmp[
                                    'gross_percent'] = themortgagemeter_utils.get_percentage(
                                        text, logger)
                            elif td_count == 3:
                                savings_data_tmp[
                                    'aer_percent'] = themortgagemeter_utils.get_percentage(
                                        text, logger)
                                # and then break out
                                break
                            td_count = td_count + 1
                        savings_array.append(savings_data_tmp)
    elif re.match('.*/online-saver/', url):
        # TODO: need to set this for other types
        savings_data['interest_paid'] = 'Y'
        #print bsobj
        # get the apr class element, as that contains the text we need
        apr = bsobj.find_all(
            attrs={'class': 'apr'})[0].parent.parent.text.encode('utf-8')
        # split this line by \n
        apr = apr.split('\n')
        lines = []
        for l in apr:
            if re.match('.* or [0-9].*', l):
                for l2 in l.split(' or ', 1):
                    lines.append(l2)
            else:
                lines.append(l)
        while '' in lines:
            lines.remove('')
        for l in lines:
            # copy
            savings_data_tmp = savings_data.copy()
            #print l
            # get percentage
            savings_data_tmp[
                'gross_percent'] = themortgagemeter_utils.get_percentage(
                    l, logger)
            savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
            # get_money range
            res = savings_util.get_money_range(l, logger)
            savings_data_tmp['min_amt'] = res[0]
            savings_data_tmp['max_amt'] = res[1]
            # append to savings_array
            savings_array.append(savings_data_tmp)
    elif re.match('.*/regular-saver/', url):
        # TODO: need to set this for other types
        savings_data['interest_paid'] = 'Y'
        savings_data['regular_saver_frequency_period'] = '1'
        savings_data['regular_saver_frequency_type'] = 'M'
        savings_data['regular_saver'] = 'Y'
        # Always fixed
        savings_data['variability'] = 'F'
        #print bsobj
        # get the apr class element, as that contains the text we need
        apr = bsobj.find_all(
            attrs={'class': 'apr'})[0].parent.parent.text.encode('utf-8')
        # split this line by \n
        apr = apr.split('\n')
        lines = []
        for l in apr:
            if re.match('.* or [0-9].*', l):
                for l2 in l.split(' or ', 1):
                    lines.append(l2)
            else:
                lines.append(l)
        while '' in lines:
            lines.remove('')
        for l in lines:
            # copy
            savings_data_tmp = savings_data.copy()
            # get percentage
            savings_data_tmp[
                'gross_percent'] = themortgagemeter_utils.get_percentage(
                    l, logger)
            if savings_data_tmp['gross_percent'] == '':
                # abandon ship!
                continue
            savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
            # Hard-code to 25-250 for now, this seems standard
            savings_data_tmp['regular_saver_min_amt'] = '25'
            savings_data_tmp['regular_saver_max_amt'] = '250'
            # append to savings_array
            savings_array.append(savings_data_tmp)
    elif re.match('.*/everyday-saver/', url):
        # This one's quite simple (I think)
        # TODO: need to set this for other types
        savings_data['interest_paid'] = 'Y'
        #print bsobj
        # get the apr class element, as that contains the text we need
        apr = bsobj.find_all(
            attrs={'class': 'apr'})[0].parent.parent.text.encode('utf-8')
        #print apr
        # split this line by \n
        apr = apr.split('\n')
        lines = []
        for l in apr:
            if re.match('.*gross.*', l):
                lines.append(l)
        while '' in lines:
            lines.remove('')
        for l in lines:
            # copy
            savings_data_tmp = savings_data.copy()
            #print l
            # get percentage
            savings_data_tmp[
                'gross_percent'] = themortgagemeter_utils.get_percentage(
                    l, logger)
            savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
            # TODO: bonus_frequency_period set to 1, or get from data?
            # append to savings_array
            savings_array.append(savings_data_tmp)
    elif re.match('.*/branch-accounts/.*', url):
        return savings_array
    else:
        logger.info('unhandled:' + url)
        exit()
    if savings_array == []:
        themortgagemeter_utils.record_alert(
            'ERROR: returning nothing from a page', logger,
            themortgagemeter_db.db_connection, themortgagemeter_db.cursor)
        exit()
    # Return the savings_array
    logger.info('returning savings_array:' + str(savings_array))
    return savings_array
def get_product_page(static,url,eligibilities):
	logger = logging.getLogger('retrieve')
	bsobj = themortgagemeter_utils.get_page(static,'static_html/post_office/our-full-range.html',url,logger)
	#print bsobj
	term = str(25 * 12)
	ltv_elems = bsobj.find_all('h2')
	# foreach h2 element, determine the ltv.
	# then get the next element (which is the div, class displaytable). Then, for each tr:
	# td0 = years of fixed or tracker
	# td1 = initial rate
	# td2 = svr
	# td3 = apr
	# td4 = fees
	for ltv_elem in ltv_elems:
		# For post office, first reported % is 100 - LTV
		ltv_elem_str = ltv_elem.string
		if (ltv_elem_str):
			ltv_percent = themortgagemeter_utils.get_percentage(ltv_elem_str,logger)
			if ltv_percent != '':
				ltv_percent = str(100 - int(ltv_percent))
			else:
				continue
		else:
			continue
		div = ltv_elem.fetchNextSiblings(attrs={'class' : 'displaytable'},limit=1)
		if (div):
			logger.debug('here')
			logger.debug(div)
			trs = div[0].find_all('tr')
			for tr in trs:
				logger.debug(tr)
				# initialise:
				rate_percent = ''
				svr_percent = ''
				apr_percent = ''
				booking_fee = ''
				tds = tr.find_all('td')
				i = 0
				# If there are tds and there are more than 1 of them then we can extract a mortgage...
				logger.debug(tr)
				if tds and len(tds) > 1:
					logger.debug(tds[0].text.encode('utf-8').split('\n'))
					s = tds[0].text.encode('utf-8').split('\n')
					# Sometimes we get empty fields - we remove them here.
					while '' in s:
						s.remove('')
					initial_period = str(themortgagemeter_utils.get_months(s[i],logger))
					#logger.debug('type_str before split: ' + tds[i].text.encode('utf-8'))
					#logger.debug('tds i: ' + str(i) + ' tds: ' + str(tds))
					#logger.debug('tds i: ' + str(i) + ' tds[i]: ' + str(tds[i].text.encode('utf-8')))
					#logger.debug(re.sub('\xa0','',tds[i].text.encode('utf-8')).split())
					# TODO: generic text cleansing function
					type_str = re.sub('\xa0','',re.sub('\xc2',' ',tds[i].text.encode('utf-8'))).split()[2]
					logger.debug('type_str: ' + type_str)
					if type_str == 'fixed':
						mortgage_type = 'F'
					elif type_str == 'tracker':
						mortgage_type = 'T'
					else:
							themortgagemeter_utils.record_alert('ERROR: PSTFFC neither fixed nor tracker: ' + type_str,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
					i+=1
					j = 0
					for td in tds[i].text.encode('utf-8').split('\n'):
						t = tds[i].text.encode('utf-8').split('\n')[j]
						rate_percent = themortgagemeter_utils.get_percentage(t,logger)
						if rate_percent != '':
							break
						j += 1
					while svr_percent == '':
						i+=1
						for t in tds[i].text.encode('utf-8').split('\n'):
							svr_percent = themortgagemeter_utils.get_percentage(t,logger)
							if svr_percent != '':
								break
					while apr_percent == '':
						i+=1
						for t in tds[i].text.encode('utf-8').split('\n'):
							apr_percent = themortgagemeter_utils.get_percentage(t,logger)
							if apr_percent != '':
								break
					i+=1
					booking_fee = tds[i].text.strip().encode('utf-8')[2:].replace(',','')
					for eligibility in eligibilities:
						mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)
		else:
			pass
Beispiel #16
0
def get_product_page(static, url, eligibilities):
    logger = logging.getLogger('retrieve')
    bsobj = themortgagemeter_utils.get_page(
        static, 'static_html/post_office/our-full-range.html', url, logger)
    #print bsobj
    term = str(25 * 12)
    ltv_elems = bsobj.find_all('h2')
    # foreach h2 element, determine the ltv.
    # then get the next element (which is the div, class displaytable). Then, for each tr:
    # td0 = years of fixed or tracker
    # td1 = initial rate
    # td2 = svr
    # td3 = apr
    # td4 = fees
    for ltv_elem in ltv_elems:
        # For post office, first reported % is 100 - LTV
        ltv_elem_str = ltv_elem.string
        if (ltv_elem_str):
            ltv_percent = themortgagemeter_utils.get_percentage(
                ltv_elem_str, logger)
            if ltv_percent != '':
                ltv_percent = str(100 - int(ltv_percent))
            else:
                continue
        else:
            continue
        div = ltv_elem.fetchNextSiblings(attrs={'class': 'displaytable'},
                                         limit=1)
        if (div):
            logger.debug('here')
            logger.debug(div)
            trs = div[0].find_all('tr')
            for tr in trs:
                logger.debug(tr)
                # initialise:
                rate_percent = ''
                svr_percent = ''
                apr_percent = ''
                booking_fee = ''
                tds = tr.find_all('td')
                i = 0
                # If there are tds and there are more than 1 of them then we can extract a mortgage...
                logger.debug(tr)
                if tds and len(tds) > 1:
                    logger.debug(tds[0].text.encode('utf-8').split('\n'))
                    s = tds[0].text.encode('utf-8').split('\n')
                    # Sometimes we get empty fields - we remove them here.
                    while '' in s:
                        s.remove('')
                    initial_period = str(
                        themortgagemeter_utils.get_months(s[i], logger))
                    #logger.debug('type_str before split: ' + tds[i].text.encode('utf-8'))
                    #logger.debug('tds i: ' + str(i) + ' tds: ' + str(tds))
                    #logger.debug('tds i: ' + str(i) + ' tds[i]: ' + str(tds[i].text.encode('utf-8')))
                    #logger.debug(re.sub('\xa0','',tds[i].text.encode('utf-8')).split())
                    # TODO: generic text cleansing function
                    type_str = re.sub(
                        '\xa0', '',
                        re.sub('\xc2', ' ',
                               tds[i].text.encode('utf-8'))).split()[2]
                    logger.debug('type_str: ' + type_str)
                    if type_str == 'fixed':
                        mortgage_type = 'F'
                    elif type_str == 'tracker':
                        mortgage_type = 'T'
                    else:
                        themortgagemeter_utils.record_alert(
                            'ERROR: PSTFFC neither fixed nor tracker: ' +
                            type_str, logger,
                            themortgagemeter_db.db_connection,
                            themortgagemeter_db.cursor)
                    i += 1
                    j = 0
                    for td in tds[i].text.encode('utf-8').split('\n'):
                        t = tds[i].text.encode('utf-8').split('\n')[j]
                        rate_percent = themortgagemeter_utils.get_percentage(
                            t, logger)
                        if rate_percent != '':
                            break
                        j += 1
                    while svr_percent == '':
                        i += 1
                        for t in tds[i].text.encode('utf-8').split('\n'):
                            svr_percent = themortgagemeter_utils.get_percentage(
                                t, logger)
                            if svr_percent != '':
                                break
                    while apr_percent == '':
                        i += 1
                        for t in tds[i].text.encode('utf-8').split('\n'):
                            apr_percent = themortgagemeter_utils.get_percentage(
                                t, logger)
                            if apr_percent != '':
                                break
                    i += 1
                    booking_fee = tds[i].text.strip().encode(
                        'utf-8')[2:].replace(',', '')
                    for eligibility in eligibilities:
                        mc_util.handle_mortgage_insert(
                            institution_code, mortgage_type, rate_percent,
                            svr_percent, apr_percent, ltv_percent,
                            initial_period, booking_fee, term, url,
                            eligibility, logger)
        else:
            pass