Beispiel #1
0
def get_product_pages(static,url,logger):
	logger.debug("In get_product_pages: " + url)
	# Get the svr first (it's global)
	lines = themortgagemeter_utils.get_page(False,'',url,logger,True).split('\n')
	# Now get the mortgage data
	if static:
		tree = ET.parse('static_html/tesco/Products.xml')
		root = tree.getroot()
	else:
		root = ET.fromstring(themortgagemeter_utils.get_page(False,'',url,logger,True))
	term = str(25 * 12)
	for purchase in ('HousePurchase','Remortgage'):
		if purchase == 'HousePurchase':
			eligibilities = ['NFTB','NMH']
		elif purchase == 'Remortgage':
			eligibilities = ['NRM']
		for rate_type in ('FixedRate','TrackerRate'):
			if rate_type == 'FixedRate':
				mortgage_type = 'F'
			elif rate_type == 'TrackerRate':
				mortgage_type = 'T'
			rate_set = root.find(purchase).find(rate_type)
			for rate in rate_set.findall('LTV'):
				ltv_percent = rate.get('max')
				for mortgage in rate.findall('Mortgage'):
					#ET.dump(mortgage)
					#print "--------------------"
					rate_percent = mortgage.find('initialRate').text
					apr_percent = mortgage.find('APR').text
					svr_percent = mortgage.find('variableRate').text
					name = mortgage.find('name').text.split('\n')[0]
					initial_period = themortgagemeter_utils.get_months(name,logger)
					booking_fee = str(int(mortgage.find('bookingFee').text) + int(mortgage.find('productFee').text))
					for eligibility in eligibilities:
						mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,'http://www.tescobank.com/personal/finance/mortgages',eligibility,logger)
def process_page(static, base_url, url_suffix, eligibility):
    logger = logging.getLogger('retrieve')
    bsobj = themortgagemeter_utils.get_page(
        static, 'static_html/northernrock/First-Time-Buyer',
        base_url + url_suffix, logger)
    anchors = bsobj.find_all(attrs={'class': 'continue moreinfo'})
    for anchor in anchors:
        url = base_url + anchor['href']
        logger.info(url)
        anchor_bsobj = themortgagemeter_utils.get_page(
            static, 'static_html/northernrock/5yr_everyday_fixed_5ct5', url,
            logger)
        title = anchor_bsobj.find_all('h1')[0].string
        for class_str in ('fixedpanel', 'trackerpanel'):
            trs = anchor_bsobj.find_all('tr', 'list ' + class_str)
            if trs:
                (initial_period, mortgage_type) = process_title(title, logger)
                #print title
                #print initial_period
                #print mortgage_type
                # TODO get time period and type from title
                # TODO: I think this is wrong! fixedpanel is different from trackerpanel!
                #print trs
                for tr in trs:
                    spans = tr.find_all('span')
                    count = 0
                    for span in spans:
                        # Skip the first one.
                        #print span
                        #print count
                        count += 1
                        if count > 5:
                            continue
                        else:
                            s = span.string
                        if count == 1:
                            if s == None:
                                s = span.em.string
                            rate_percent = s.split('%')[0]
                        elif count == 2:
                            svr_percent = s.split('%')[0]
                        elif count == 3:
                            apr_percent = s.split('%')[0]
                        elif count == 4:
                            booking_fee = s[1:].replace(',', '')
                        elif count == 5:
                            ltv_percent = s.split('%')[0].replace(',', '')
                    #print spans
                    if spans:
                        mc_util.handle_mortgage_insert(
                            institution_code, mortgage_type, rate_percent,
                            svr_percent, apr_percent, ltv_percent,
                            initial_period, booking_fee, term, url,
                            eligibility, logger)
                        #print 'rate_percent:' + rate_percent + ' apr_percent:' + apr_percent + ' booking_fee:' + booking_fee + ' ltv_percent:' + ltv_percent + ' mortgage_type:' + mortgage_type + ' initial_period:' + initial_period + ' svr_percent:' + svr_percent
                    else:
                        logger.critical('No data from url: ' + url)
def process_page(static,base_url,url_suffix,eligibility):
	logger = logging.getLogger('retrieve')
	bsobj = themortgagemeter_utils.get_page(static,'static_html/northernrock/First-Time-Buyer',base_url + url_suffix,logger)
	anchors = bsobj.find_all(attrs={'class' : 'continue moreinfo'})
	for anchor in anchors:
		url = base_url + anchor['href']
		logger.info(url)
		anchor_bsobj = themortgagemeter_utils.get_page(static,'static_html/northernrock/5yr_everyday_fixed_5ct5',url,logger)
		title = anchor_bsobj.find_all('h1')[0].string
		for class_str in ('fixedpanel','trackerpanel'):
			trs = anchor_bsobj.find_all('tr','list ' + class_str)
			if trs:
				(initial_period,mortgage_type) = process_title(title,logger)
				#print title
				#print initial_period
				#print mortgage_type
				# TODO get time period and type from title
				# TODO: I think this is wrong! fixedpanel is different from trackerpanel!
				#print trs
				for tr in trs:
					spans = tr.find_all('span')
					count = 0
					for span in spans:
						# Skip the first one.
						#print span
						#print count
						count += 1
						if count > 5:
							continue
						else:
							s = span.string
						if count == 1:
							if s == None:
								s = span.em.string
							rate_percent = s.split('%')[0]
						elif count == 2:
							svr_percent = s.split('%')[0]
						elif count == 3:
							apr_percent = s.split('%')[0]
						elif count == 4:
							booking_fee = s[1:].replace(',','')
						elif count == 5:
							ltv_percent = s.split('%')[0].replace(',','')
					#print spans
					if spans:
						mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)
						#print 'rate_percent:' + rate_percent + ' apr_percent:' + apr_percent + ' booking_fee:' + booking_fee + ' ltv_percent:' + ltv_percent + ' mortgage_type:' + mortgage_type + ' initial_period:' + initial_period + ' svr_percent:' + svr_percent
					else:
						logger.critical('No data from url: ' + url)
def process_page(static, url, mortgage_type, eligibility):
    logger = logging.getLogger("retrieve")
    resp = themortgagemeter_utils.get_page(static, "static_html/natwest/fix_ftb.html", url, logger, tostring=True)
    json_obj = json.loads(resp)
    if json_obj["Mortgages"] == None:
        logger.info("URL returned nothing: " + url)
        return
        # themortgagemeter_utils.pretty_print_json(json_obj)
    mortgages = json_obj["Mortgages"]["mortgage"]
    for mortgage in mortgages:
        rate_percent = mortgage["initialRate"]["value"]
        svr_percent = mortgage["followOnRate"]
        apr_percent = mortgage["overallCostAPR"]
        ltv_percent = mortgage["LTV"]
        initial_period = str(int(mortgage["initialRate"]["duration"]) * 12)
        booking_fee = mortgage["arrangementFee"]
        mc_util.handle_mortgage_insert(
            institution_code,
            mortgage_type,
            rate_percent,
            svr_percent,
            apr_percent,
            ltv_percent,
            initial_period,
            booking_fee,
            term,
            url,
            eligibility,
            logger,
        )
Beispiel #5
0
def get_mortgage_page_details(static,base_url,suffix_url,logger):
	bsobj = themortgagemeter_utils.get_page(static,'NA',base_url + suffix_url,logger)
	mtgtables = bsobj.find_all(id='mtgTableData')
	if len(mtgtables) == 0:
		# We're in a product page with potentially further product pages and no mortgage info,
		# so give up and pass back through get_product_pages.
		get_product_pages(static,base_url,suffix_url,logger)
		return
	url = base_url + suffix_url
	# assume default of 25 years
	logger.info("URL:" + url)
	if re.match('.*fixed.*',url):
		mortgage_type = 'F'
	elif re.match('.*offset.*',url):
		mortgage_type = 'O'
	elif re.match('.*tracker.*',url):
		mortgage_type = 'T'
	else:
		# default to variable
		logger.critical("Couldn't identify url: " + url)
		mortgage_type = 'V'
	term = str(25 * 12)
	trs = bsobj.find_all(id='mtgTableData')[0].find_all('tbody')[0].find_all('tr')
	for tr in trs:
		tds = tr.find_all('td')
		logger.info(tds)
		for td in tds:
			# Row 1: tells you type of mortgage and fix period "until dd/mm/year"
			pass
Beispiel #6
0
def get_mortgage_page_details(static, base_url, suffix_url, logger):
    bsobj = themortgagemeter_utils.get_page(static, 'NA',
                                            base_url + suffix_url, logger)
    mtgtables = bsobj.find_all(id='mtgTableData')
    if len(mtgtables) == 0:
        # We're in a product page with potentially further product pages and no mortgage info,
        # so give up and pass back through get_product_pages.
        get_product_pages(static, base_url, suffix_url, logger)
        return
    url = base_url + suffix_url
    # assume default of 25 years
    logger.info("URL:" + url)
    if re.match('.*fixed.*', url):
        mortgage_type = 'F'
    elif re.match('.*offset.*', url):
        mortgage_type = 'O'
    elif re.match('.*tracker.*', url):
        mortgage_type = 'T'
    else:
        # default to variable
        logger.critical("Couldn't identify url: " + url)
        mortgage_type = 'V'
    term = str(25 * 12)
    trs = bsobj.find_all(
        id='mtgTableData')[0].find_all('tbody')[0].find_all('tr')
    for tr in trs:
        tds = tr.find_all('td')
        logger.info(tds)
        for td in tds:
            # Row 1: tells you type of mortgage and fix period "until dd/mm/year"
            pass
Beispiel #7
0
def process_page(static, url, mortgage_type, eligibility):
    logger = logging.getLogger('retrieve')
    resp = themortgagemeter_utils.get_page(static,
                                           'static_html/natwest/fix_ftb.html',
                                           url,
                                           logger,
                                           tostring=True)
    json_obj = json.loads(resp)
    if json_obj['Mortgages'] == None:
        logger.info('URL returned nothing: ' + url)
        return
    #themortgagemeter_utils.pretty_print_json(json_obj)
    mortgages = json_obj['Mortgages']['mortgage']
    for mortgage in mortgages:
        rate_percent = mortgage['initialRate']['value']
        svr_percent = mortgage['followOnRate']
        apr_percent = mortgage['overallCostAPR']
        ltv_percent = mortgage['LTV']
        initial_period = str(int(mortgage['initialRate']['duration']) * 12)
        booking_fee = mortgage['arrangementFee']
        mc_util.handle_mortgage_insert(institution_code, mortgage_type,
                                       rate_percent, svr_percent, apr_percent,
                                       ltv_percent, initial_period,
                                       booking_fee, term, url, eligibility,
                                       logger)
Beispiel #8
0
def halifax_ftb_page(static,url,mortgage_type,eligibility,logger):
	logger = logging.getLogger('retrieve')
	bsobj = themortgagemeter_utils.get_page(static,'static_html/halifax/fixed.html',url,logger)
	trs = bsobj.find_all('tr')
	for tr in trs:
		mortgage_details = []
		for d in tr.strings:
			mortgage_details.append(string.strip(d.encode('utf-8')))
			if len(mortgage_details) > 19 and len(mortgage_details) < 25:
				if mortgage_details[3].find('%') != -1:
					initial_period = mortgage_details[1]
					if initial_period[0] == 'x':
						# handle special case of "dummy row"
						continue
					rate_percent = mortgage_details[3][:-1]
					svr_percent = mortgage_details[6].split()[0][:-1].strip('\xc2').strip('\xa0')
					apr_percent = mortgage_details[10].split()[0][:-1]
					booking_fee = mortgage_details[12][2:].replace(',','')
					# handle special nonsense case
					if re.search(r'years',initial_period) and not re.search(r'[0-9]+ years',initial_period):
						years = initial_period[0]
						initial_period = str(int(years) * 12)
					elif re.search(r'months',initial_period) and not re.search(r'[0-9]+ month',initial_period):
						initial_period = initial_period[0:2]
					else:
						initial_period = str(themortgagemeter_utils.get_months(initial_period,logger))
					#print mortgage_details
					if len(mortgage_details[14].split('-')) > 1:
						ltv_percent = str(100 - int(mortgage_details[14].split('-')[0]))
					else:
						ltv_percent = str(100 - int(mortgage_details[14][0:2]))
					mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)
def get_product_pages(url, mortgage_type, ltv_percent, eligibilities, logger):
    resp = themortgagemeter_utils.get_page(False,
                                           '',
                                           url,
                                           logger,
                                           tostring=True)
    json_obj = json.loads(resp)
    #print json_obj
    mortgage_list = json_obj['mortgageList']
    if mortgage_list == 'none':
        logger.info('URL returned nothing: ' + url)
        return
    for item in mortgage_list:
        #print item['mortgages']
        #mortgage_type #product_fee #ltv #offer #initial_rate #homeowner_variable_rate #repayment #erc #buyer_type #loan_size #overall_cost_for_comparison #payment #initial_term
        mortgage = item['mortgages']
        #print mortgage
        if mortgage['initial_term'] == None:
            #print "continuing"
            continue
        rate_percent = mortgage['initial_rate']['rate']
        svr_percent = mortgage['homeowner_variable_rate']['rate']
        apr_percent = mortgage['overall_cost_for_comparison']['rate']
        initial_period = str(int(float(mortgage['initial_term']) * 12))
        booking_fee = mortgage['product_fee']['rate']
        buyer_types = mortgage['buyer_type']
        for eligibility in eligibilities:
            mc_util.handle_mortgage_insert(institution_code, mortgage_type,
                                           rate_percent, svr_percent,
                                           apr_percent, ltv_percent,
                                           initial_period, booking_fee, term,
                                           'http://www.lloydsbank.com',
                                           eligibility, logger)
def chelsea_main(static,forcedelete,logger):
	# http://www.thechelsea.co.uk/js/mortgage-data-ref.js 
	# get the xml file from there, then parse it, eg
	# http://www.thechelsea.co.uk/mortgages/mortage-product-data-0031.xml
	url = themortgagemeter_utils.get_page(False,'','http://www.thechelsea.co.uk/js/mortgage-data-ref.js',logger,True).split('"')[1]
	xml_url = url
	get_product_pages(static,'http://www.thechelsea.co.uk/' + xml_url,logger)
	mc_db.update_current(institution_code,main.today,forcedelete,logger)
def get_product_pages(static, url, logger):
    logger.debug("In get_product_pages: " + url)
    # Get the svr first (it's global)
    lines = themortgagemeter_utils.get_page(False, '', url, logger,
                                            True).split('\n')
    # Now get the mortgage data
    if static:
        tree = ET.parse('static_html/tesco/Products.xml')
        root = tree.getroot()
    else:
        root = ET.fromstring(
            themortgagemeter_utils.get_page(False, '', url, logger, True))
    term = str(25 * 12)
    for purchase in ('HousePurchase', 'Remortgage'):
        if purchase == 'HousePurchase':
            eligibilities = ['NFTB', 'NMH']
        elif purchase == 'Remortgage':
            eligibilities = ['NRM']
        for rate_type in ('FixedRate', 'TrackerRate'):
            if rate_type == 'FixedRate':
                mortgage_type = 'F'
            elif rate_type == 'TrackerRate':
                mortgage_type = 'T'
            rate_set = root.find(purchase).find(rate_type)
            for rate in rate_set.findall('LTV'):
                ltv_percent = rate.get('max')
                for mortgage in rate.findall('Mortgage'):
                    #ET.dump(mortgage)
                    #print "--------------------"
                    rate_percent = mortgage.find('initialRate').text
                    apr_percent = mortgage.find('APR').text
                    svr_percent = mortgage.find('variableRate').text
                    name = mortgage.find('name').text.split('\n')[0]
                    initial_period = themortgagemeter_utils.get_months(
                        name, logger)
                    booking_fee = str(
                        int(mortgage.find('bookingFee').text) +
                        int(mortgage.find('productFee').text))
                    for eligibility in eligibilities:
                        mc_util.handle_mortgage_insert(
                            institution_code, mortgage_type, rate_percent,
                            svr_percent, apr_percent, ltv_percent,
                            initial_period, booking_fee, term,
                            'http://www.tescobank.com/personal/finance/mortgages',
                            eligibility, logger)
Beispiel #12
0
def get_product_pages(static,base_url,suffix_url,logger):
	logger.info("In get_product_pages: " + base_url + suffix_url)
	bsobj = themortgagemeter_utils.get_page(static,'NA',base_url + suffix_url,logger)
	pages_so_far.append(suffix_url)
	for anchor in bsobj.find_all('a'):
		href = anchor.get('href')
		if href and re.match('.*/products/.*',href):
			logger.info("HREF:" + href)
			if href in pages_so_far:
				# Already done this page.
				continue
			get_mortgage_page_details(static,base_url,href,logger)
Beispiel #13
0
def process_page(url,ltv,eligibility,mortgage_type,logger):
#  "Rates": [
#    {
#      "Apr": 3.7,
#      "AssetUrl": "58305",
#      "Availability": "All",
#      "BaseRateDifferential": 0,
#      "BookingFee": 99,
#      "CashBackAmount": 0,
#      "Changed": "Changed",
#      "Eligibility": "First Time Buyer",
#      "EligibilityFeatures": null,
#      "ErcPercentage": 3,
#      "FeesPayable": 499,
#      "HasFreeLegals": false,
#      "HasFreeValuations": false,
#      "InitialRate": 1.94,
#      "MaxLoanAmount": 1000000,
#      "MaxLoanToValue": 60,
#      "MinLoanAmount": 25000,
#      "MinLoanToValue": 0,
#      "MonthlyRepayment": 420.93,
#      "MortgageType": 0,
#      "OverpaymentAmountAllowed": 0,
#      "ProductDescription": "2 Year Fixed Rate (First Time Buyer)",
#      "ProductFee": 400,
#      "RequiresExistingBorrower": false,
#      "RequiresFirstTimeBuyer": true,
#      "RequiresFlexAccount": false,
#      "RequiresFurtherAdvance": false,
#      "RequiresHomeMover": true,
#      "RequiresOwnSolicitor": false,
#      "RequiresRemortgage": false,
#      "ReservationFeeScale": "",
#      "RevertRate1": 3.99,
#      "SpecialProductFee": 900,
#      "Term": 24,
#      "Withdrawn": false
	resp = themortgagemeter_utils.get_page(False,'',url,logger,tostring=True)
	json_obj = json.loads(resp)
	mortgage_list = json_obj['Rates']
	if mortgage_list == 'none':
		logger.info('URL returned nothing: ' + url)
		return
	for mortgage in mortgage_list:
		rate_percent = str(mortgage['InitialRate'])
		svr_percent = str(mortgage['RevertRate1'])
		apr_percent = str(mortgage['Apr'])
		initial_period = str(mortgage['Term'])
		booking_fee = str(int(mortgage['BookingFee'] + mortgage['SpecialProductFee'] + mortgage['FeesPayable']))
		ltv_percent = str(mortgage['MaxLoanToValue'])
		mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,'http://www.nationwide.co.uk',eligibility,logger)
Beispiel #14
0
def get_product_pages(static, base_url, suffix_url, logger):
    logger.info("In get_product_pages: " + base_url + suffix_url)
    bsobj = themortgagemeter_utils.get_page(static, 'NA',
                                            base_url + suffix_url, logger)
    pages_so_far.append(suffix_url)
    for anchor in bsobj.find_all('a'):
        href = anchor.get('href')
        if href and re.match('.*/products/.*', href):
            logger.info("HREF:" + href)
            if href in pages_so_far:
                # Already done this page.
                continue
            get_mortgage_page_details(static, base_url, href, logger)
Beispiel #15
0
def get_product_page(static,url):
	logger = logging.getLogger('retrieve')
	bsobj = themortgagemeter_utils.get_page(static,'static_html/first_direct/mortgage-rates',url,logger)
	print bsobj
	sections = bsobj.find_all(attrs={'class':'section'})
	for section in sections:
		#print section
		#print "============================="
		tbodys = section.find_all("tbody")
		for tbody in tbodys:
			print tbody
			trs = tbody.find_all("tr")
			for tr in trs:
				tds = tr.find_all("td")
				booking_fee_int = 0
				count = 0
				for td in tds:
					# assume default of 25 years
					term = str(25 * 12)
					td_text = td.text.strip().encode('utf-8')
					#print count
					#print td
					if count == 0:
						#initial_period
						initial_period = themortgagemeter_utils.get_months(td_text,logger)
						#mortgage_type F/D/T/O/V
						mortgage_type = mc_util.get_mortgage_type(td_text,logger)
						#eligibility
						print td_text
						pass
					elif count == 1:
						#ltv_percent
						ltv_percent = themortgagemeter_utils.get_percentage(td_text,logger)
					elif count == 2:
						#rate_percent
						rate_percent = themortgagemeter_utils.get_percentage(td_text,logger)
					elif count == 3:
						#svr_percent
						svr_percent = themortgagemeter_utils.get_percentage(td_text,logger)
					elif count == 4:
						#apr_percent
						apr_percent = themortgagemeter_utils.get_percentage(td_text,logger)
					elif count == 5:
						booking_fee_int = booking_fee_int + int(themortgagemeter_utils.get_money(td_text,logger))
					elif count == 6:
						booking_fee_int = booking_fee_int + int(themortgagemeter_utils.get_money(td_text,logger))

					count = count + 1
				booking_fee = str(booking_fee_int)
			mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)
Beispiel #16
0
def halifax_remortgage_page(static,url,mortgage_type,eligibility,logger):
	bsobj = themortgagemeter_utils.get_page(static,'static_html/halifax/remortgage-fixed-75ltv.asp',url,logger)
	trs = bsobj.find_all('tr')
	for tr in trs:
		mortgage_details = []
		for d in tr.strings:
			mortgage_details.append(string.strip(d.encode('utf-8')))
		#['\n', 'Term', 'Initial rate', '\xc2\xa0', 'Halifax Homeowner Variable rate thereafter', '\xc2\xa0', 'For the remainder of the term from', '\xc2\xa0', 'The overall cost for comparison is', '\xc2\xa0', 'Product fee', '\xc2\xa0', 'LTV\xc2\xa0\xc2\xa0\xc2\xa0\xc2\xa0\xc2\xa0\xc2\xa0 ', 'Early Repayment Charges until', '\xc2\xa0', 'Loan amount', '\n', 'Extra benefits', '\xc2\xa0', '\n', '\xc2\xa0', '\n']
		#['\n', '2 years', '\n', '4.44%', '\n', 'Currently', ' \xc2\xa03.99%', '\n', '30/11/2014', '\n', '4.3% APR', '\n', '\xc2\xa3995', '\n', '75-80%', '\n', '30/11/2014', '\n', '\xc2\xa30-\xc2\xa31m', '\n', 'Halifax Remortgage Service*', '\n', '\n']
		logger.debug(mortgage_details)
		if len(mortgage_details) > 19 and len(mortgage_details) < 25:
			if mortgage_details[3].find('%') != -1:
				rate_percent = mortgage_details[3][:-1]
				svr_percent = mortgage_details[6].split()[0][:-1].strip('\xc2').strip('\xa0')
				apr_percent = mortgage_details[10].split()[0][:-1]
				booking_fee = mortgage_details[12][2:].replace(',','')
				initial_period = mortgage_details[1]
				# handle special nonsense case
				if re.search(r'years',initial_period) and not re.search(r'[0-9]+ years',initial_period):
					years = initial_period[0]
					initial_period = str(int(years) * 12)
				elif re.search(r'months',initial_period) and not re.search(r'[0-9]+ month',initial_period):
					initial_period = initial_period[0:2]
				else:
					initial_period = str(themortgagemeter_utils.get_months(initial_period,logger))
				ltv_percent = mortgage_details[14].split('-')[1].strip('%')
				mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)
		elif len(mortgage_details) == 25:
			if mortgage_details[3].find('%') != -1:
				rate_percent = mortgage_details[3][:-1]
				svr_percent = mortgage_details[8].split()[0][:-1].strip('\xc2').strip('\xa0')
				apr_percent = mortgage_details[12].split()[0][:-1]
				booking_fee = mortgage_details[14][2:].replace(',','')
				initial_period = mortgage_details[1]
				if re.search(r'years',initial_period) and not re.search(r'[0-9]+ years',initial_period):
					years = initial_period[0]
					initial_period = str(int(years) * 12)
				elif re.search(r'months',initial_period) and not re.search(r'[0-9]+ month',initial_period):
					initial_period = initial_period[0:2]
				else:
					initial_period = str(themortgagemeter_utils.get_months(initial_period,logger))
				ltv_percent = mortgage_details[16].split('-')[1].strip('%')
				# handle special nonsense case
				mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)
		elif len(mortgage_details) > 3:
			logger.debug('Should this be handled?: %s',(mortgage_details))
def halifax_ftb_page(static, url, mortgage_type, eligibility, logger):
    logger = logging.getLogger('retrieve')
    bsobj = themortgagemeter_utils.get_page(static,
                                            'static_html/halifax/fixed.html',
                                            url, logger)
    trs = bsobj.find_all('tr')
    for tr in trs:
        mortgage_details = []
        for d in tr.strings:
            mortgage_details.append(string.strip(d.encode('utf-8')))
            if len(mortgage_details) > 19 and len(mortgage_details) < 25:
                if mortgage_details[3].find('%') != -1:
                    initial_period = mortgage_details[1]
                    if initial_period[0] == 'x':
                        # handle special case of "dummy row"
                        continue
                    rate_percent = mortgage_details[3][:-1]
                    svr_percent = mortgage_details[6].split()[0][:-1].strip(
                        '\xc2').strip('\xa0')
                    apr_percent = mortgage_details[10].split()[0][:-1]
                    booking_fee = mortgage_details[12][2:].replace(',', '')
                    # handle special nonsense case
                    if re.search(r'years', initial_period) and not re.search(
                            r'[0-9]+ years', initial_period):
                        years = initial_period[0]
                        initial_period = str(int(years) * 12)
                    elif re.search(r'months',
                                   initial_period) and not re.search(
                                       r'[0-9]+ month', initial_period):
                        initial_period = initial_period[0:2]
                    else:
                        initial_period = str(
                            themortgagemeter_utils.get_months(
                                initial_period, logger))
                    #print mortgage_details
                    if len(mortgage_details[14].split('-')) > 1:
                        ltv_percent = str(
                            100 - int(mortgage_details[14].split('-')[0]))
                    else:
                        ltv_percent = str(100 - int(mortgage_details[14][0:2]))
                    mc_util.handle_mortgage_insert(institution_code,
                                                   mortgage_type, rate_percent,
                                                   svr_percent, apr_percent,
                                                   ltv_percent, initial_period,
                                                   booking_fee, term, url,
                                                   eligibility, logger)
Beispiel #18
0
def get_product_pages(static,base_url,ext):
	logger = logging.getLogger('retrieve')
	bsobj = themortgagemeter_utils.get_page(static,'static_html/hsbc/savings-accounts.html',base_url + ext,logger)
	# foreach item in the doormatCol, in the ul, get each li's a element href attribute.
	doormatCols = bsobj.find_all(attrs={'class' : 'doormatCol'})
	for d in doormatCols:
		anchors = d.find_all('a')
		for a in anchors:
			href = a.get('href')
			if href and re.match('.*savings-accounts/.*',href):
				url = base_url + href
				savings_data = savings_util.get_savings_data_object()
				# Set online to and branch to default to Y for HSBC
				savings_data['online'] = 'Y'
				savings_data['branch'] = 'Y'
				#print savings_data
				get_product_page_interest_rates(url + '/interest-rates',savings_data)
				get_product_page_details(url + '/details',savings_data)
def get_product_pages(static, base_url, ext):
    logger = logging.getLogger('retrieve')
    bsobj = themortgagemeter_utils.get_page(
        static, 'static_html/hsbc/savings-accounts.html', base_url + ext,
        logger)
    # foreach item in the doormatCol, in the ul, get each li's a element href attribute.
    doormatCols = bsobj.find_all(attrs={'class': 'doormatCol'})
    for d in doormatCols:
        anchors = d.find_all('a')
        for a in anchors:
            href = a.get('href')
            if href and re.match('.*savings-accounts/.*', href):
                url = base_url + href
                savings_data = savings_util.get_savings_data_object()
                # Set online to and branch to default to Y for HSBC
                savings_data['online'] = 'Y'
                savings_data['branch'] = 'Y'
                #print savings_data
                get_product_page_interest_rates(url + '/interest-rates',
                                                savings_data)
                get_product_page_details(url + '/details', savings_data)
Beispiel #20
0
def get_product_pages(url,mortgage_type,ltv_percent,eligibilities,logger):
	resp = themortgagemeter_utils.get_page(False,'',url,logger,tostring=True)
	json_obj = json.loads(resp)
	#print json_obj
	mortgage_list = json_obj['mortgageList']
	if mortgage_list == 'none':
		logger.info('URL returned nothing: ' + url)
		return
	for item in mortgage_list:
		#print item['mortgages']
		#mortgage_type #product_fee #ltv #offer #initial_rate #homeowner_variable_rate #repayment #erc #buyer_type #loan_size #overall_cost_for_comparison #payment #initial_term
		mortgage = item['mortgages']
		#print mortgage
		if mortgage['initial_term'] == None:
			#print "continuing"
			continue
		rate_percent = mortgage['initial_rate']['rate']
		svr_percent = mortgage['homeowner_variable_rate']['rate']
		apr_percent = mortgage['overall_cost_for_comparison']['rate']
		initial_period = str(int(float(mortgage['initial_term']) * 12))
		booking_fee = mortgage['product_fee']['rate']
		buyer_types = mortgage['buyer_type']
		for eligibility in eligibilities:
			mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,'http://www.lloydsbank.com',eligibility,logger)
Beispiel #21
0
def get_product_page_interest_rates(url,savings_data):
	logger = logging.getLogger('retrieve ' + url)
	bsobj = themortgagemeter_utils.get_page(False,'',url,logger)
	#logger.info(url) #logger.info(bsobj)
	if re.match('.*isa.*',url):
		savings_data['isa'] = 'Y'
	for t in bsobj.find_all('table'):
		#logger.info("TABLE")# logger.info(t)
		# Get all tables, then match on summary == "Interest rates:.*", and set up variables accordingly.
		summary = t.get('summary').encode('utf-8').lower()
		if summary:
			# Set up data for this page
			summary_info = re.match('.*interest rates: (.*)',summary).group(1)
			#logger.info("summary info: " + summary_info)
			if summary_info in ("cash e-isa#"):
				savings_data['isa'] = 'Y'
			elif summary_info in ("fixed rate saver - monthly interest"):
				savings_data['variability'] = 'F'
				savings_data['interest_paid'] = 'M'
			elif summary_info in ("fixed rate saver - annual interest"):
				savings_data['variability'] = 'F'
				savings_data['interest_paid'] = 'Y'
			elif "regular saver" in summary_info:
				savings_data['regular_saver'] = 'Y'
				savings_data['interest_paid'] = 'Y'
			elif "online bonus" in summary_info:
				savings_data['bonus']  = 'Y'
				savings_data['branch'] = 'N'
				savings_data['bonus_frequency_period'] = '1'
				savings_data['bonus_frequency_type']   = 'M'
				# skip bonus for HSBC- it's complicated - probably needs its own function TODO
				continue
			elif "flexible saver" in summary_info:
				savings_data['variability']     = 'V'
			else:
				themortgagemeter_utils.record_alert('NEED TO HANDLE: ' + summary_info,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
				exit()
			tr_count = 0
			for tr in t.find_all('tr'):
				# This is a new savings product, so clone the data at this point and use that from here.
				this_savings_data = savings_data.copy()
				#logger.info("TR " + str(tr_count)) #logger.info(tr)
				if this_savings_data['bonus'] == 'Y':
					#print "BONUS"
					#print tr
					pass
				if this_savings_data['regular_saver'] == 'Y':
					td_count = -1
				else:
					td_count = 0
				if tr_count >= 1:
					# If tax-free, this will be true
					for td in tr.find_all('td'):
						td_style = td.get('style')
						if td_style != None:
							td_style = td_style.lower().encode('utf-8').translate(None, ' ')
							if td_style == 'vertical-align:middle':
								continue
						#logger.info("TD" + str(td_count)) #logger.info(tr_count) #logger.info(td_count)
						logger.info(td)
						v = td.text.encode('utf-8').lower().strip()
						if td_count == 0:
							#logger.info(this_savings_data['regular_saver'])
							if this_savings_data['regular_saver'] == 'Y':
								logger.info('regular_saver: ' + v)
								this_savings_data['regular_saver_min_amt'] = v.split()[0][2:]
								this_savings_data['regular_saver_max_amt'] = v.split()[2][2:]
								if v.split()[4] == "month":
									this_savings_data['regular_saver_frequency_period'] = '1'
									this_savings_data['regular_saver_frequency_type'] = 'M'
								else:
									themortgagemeter_utils.record_alert('ERROR: reg saver not parsed: ' + v,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
									exit()
							else:
								# if it's got a + at the end, it's a min, if it's "up to" it's a max.
								res = savings_util.get_money_range(v,logger)
								this_savings_data['min_amt'] = res[0]
								this_savings_data['max_amt'] = res[1]
								# TODO: remove this section
								#if re.match('^.*\+$',v):
								#	money_val = themortgagemeter_utils.get_money(v,logger)
								#	this_savings_data['min_amt'] = money_val
								#elif re.match('^.*up to.*$',v) or re.match('^.*under.*$',v):
								#	money_val = themortgagemeter_utils.get_money(v,logger)
								#	this_savings_data['max_amt'] = money_val
								#	this_savings_data['min_amt'] = 0
								#elif re.match('^.* - .*$',v):
								#	this_savings_data['min_amt'] = v.split()[0][2:].translate(None,',')
								#	this_savings_data['max_amt'] = v.split()[2][2:].translate(None,',')
								#else:
								#	#logger.info(t) #logger.info('value not handled: ' + v)
								#	themortgagemeter_utils.record_alert('ERROR: value wrong: ' + v,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
								#	exit()
						elif td_count == 1:
							# we don't bother with net_percent
							pass
						elif td_count == 2:
							# gross %
							this_savings_data['gross_percent'] = v
						elif td_count == 3:
							this_savings_data['aer_percent'] = v
						td_count += 1
					# Some trs have no tds; we ignore those.
					if td_count > 0:
						# Now store this product
						# TODO: fixed savings?
						logger.info(this_savings_data)
						isa = this_savings_data['isa']
						regular_saver = this_savings_data['regular_saver']
						regular_saver_frequency_period = this_savings_data['regular_saver_frequency_period']
						regular_saver_frequency_type = this_savings_data['regular_saver_frequency_type']
						regular_saver_min_amt = this_savings_data['regular_saver_min_amt']
						regular_saver_max_amt = this_savings_data['regular_saver_max_amt']
						bonus = this_savings_data['bonus']
						bonus_frequency_period = this_savings_data['bonus_frequency_period']
						bonus_frequency_type = this_savings_data['bonus_frequency_type']
						online = this_savings_data['online']
						branch = this_savings_data['branch']
						variability = this_savings_data['variability']
						min_amt = this_savings_data['min_amt']
						max_amt = this_savings_data['max_amt']
						gross_percent = this_savings_data['gross_percent']
						aer_percent = this_savings_data['aer_percent']
						interest_paid = this_savings_data['interest_paid']
						child = this_savings_data['child']
						savings_period = this_savings_data['savings_period']
						savings_util.handle_savings_insert(institution_code, isa, regular_saver, regular_saver_frequency_period, regular_saver_frequency_type, regular_saver_min_amt, regular_saver_max_amt, bonus, bonus_frequency_period, bonus_frequency_type, online, branch, variability, savings_period, min_amt, max_amt, gross_percent, aer_percent, child, interest_paid, url, logger)
				else:
					tr_count += 1
					continue
				tr_count += 1
		else:
			#print url
			#print bsobj
			exit()
Beispiel #22
0
def get_product_page_details(url,savings_data):
	logger = logging.getLogger('retrieve')
	#logger.info(url)
	bsobj = themortgagemeter_utils.get_page(False,'',url,logger)
def get_product_pages(static,url,logger):
	logger.debug("In get_product_pages: " + url)
	# Get the svr first (it's global)
	lines = themortgagemeter_utils.get_page(False,'','http://www.thechelsea.co.uk/js/mortgage-finder.js',logger,True).split('\n')
	for line in lines:
		if re.match(r'^var chelseaSVR = "[^%]*%".*',line) != None:
			svr_percent = re.match(r'^var chelseaSVR = "([^%]*)%".*$',line).group(1)
			break
	# Now get the mortgage data
	if static:
		tree = ET.parse('static_html/chelsea/mortage-product-data-0031.xml')
		root = tree.getroot()
	else:
		root = ET.fromstring(themortgagemeter_utils.get_page(False,'',url,logger,True))
	term = str(25 * 12)
	for product in root.findall('product'):
		apr_percent = product.get('apr').split('%')[0]
		rate_percent = product.get('interestRate').split('%')[0]
		# No svr supplied, take apr
		ltv_percent = product.get('maxLTV').split('%')[0]
		mortgage_type_raw = product.get('mortgageType')
		name = product.get('name')
		booking_fee = product.get('completionFee')
		if booking_fee == '':
			booking_fee = '0'
		existing_borrower = product.get('existingBorrower')
		new_borrower = product.get('newBorrower')
		first_time_buyer = product.get('firstTimeBuyer')
		moving_home = product.get('movingHome')
		remortgaging = product.get('remortgaging')
		# Gathered data, now let's marshall before submitting.
		if mortgage_type_raw == 'fixed':
			mortgage_type = 'F'
		elif mortgage_type_raw == 'fixedoffset':
			mortgage_type = 'F'
		elif mortgage_type_raw == 'ftbfixed':
			mortgage_type = 'F'
		elif mortgage_type_raw == 'ftbfixedoffset':
			mortgage_type = 'F'
		elif mortgage_type_raw == 'fixedtracker':
			# Presumably fixed, then a tracker??
			mortgage_type = 'F'
		elif mortgage_type_raw == 'tracker':
			mortgage_type = 'T'
		elif mortgage_type_raw == 'trackeroffset':
			mortgage_type = 'T'
		elif mortgage_type_raw == 'offset':
			mortgage_type = 'T'
		elif mortgage_type_raw == 'mixedoffset':
			mortgage_type = 'T'
		elif mortgage_type_raw == 'rollover':
			# rollover? no example, but exists in the docs
			#print 'rollover'
			#ET.dump(product)
			mortgage_type = 'T'
		elif mortgage_type_raw == 'mixed':
			# WTF is mixed?
			mortgage_type = 'T'
		else:
			# default to variable
			#print mortgage_type_raw
			mortgage_type = 'V'

		# Get a mortgage eligibility dictionary to submit.
		mortgage_eligibility_dict = mc_util.get_mortgage_eligibility_dict()
		if existing_borrower == 'Y':
			mortgage_eligibility_dict['existing_customer'] = 'B'
		if new_borrower == 'Y':
			mortgage_eligibility_dict['moving_home'] = 'B'
		if first_time_buyer == 'Y':
			mortgage_eligibility_dict['ftb'] = 'B'
		if moving_home == 'Y':
			mortgage_eligibility_dict['moving_home'] = 'B'
		if remortgaging == 'Y':
			mortgage_eligibility_dict['remortgage']= 'B'
		eligibilities = mc_util.validate_eligibility_dict(mortgage_eligibility_dict,[])

		# use get_months to determine period
		initial_period = themortgagemeter_utils.get_months(name,logger)

		#ET.dump(product)
		#print eligibilities
		#print initial_period
		#print mortgage_eligibility_dict
		for eligibility in eligibilities:
			mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)
def get_product_pages(static,base_url,suffix,mortgage_type,href_re):
	logger = logging.getLogger('retrieve')
	bsobj = themortgagemeter_utils.get_page(static,'static_html/skipton/fixed_rate_mortgages.html',base_url + suffix,logger)
	term = str(25 * 12)
	#print bsobj
	anchors = bsobj.find_all(href=href_re)
	for anchor in anchors:
		#print anchor
		# Get from the anchor the ltv and the term
		link = anchor.get('href')
		url = base_url + link
		# Still to get:
		rate_percent    = 0
		svr_percent     = 0
		apr_percent     = 0
		booking_fee     = 0
		application_fee = 0
		# eligibilities - first time buyers have own page, so all others?
		eligibilities   = ['NMH','NRM','ERM','EMH','EBM','EED']
		#print link
		if re.search(fr_re,link):
			initial_period = str(int(re.search(fr_re,link).group(1)) * 12)
			ltv_percent = str(int(re.search(fr_re,link).group(2)))
			# Now go to link
			subpage_bsobj = themortgagemeter_utils.get_page(static,'N/A',url,logger)
			table = subpage_bsobj.find_all(attrs={'id' : 'centralContent'},limit=1)[0].find_all('table',limit=1)[0]
			#print '==================================================='
			#print table
			tr_count = 0
			for tr in table.find_all('tr'):
				tr_count += 1
				if tr_count == 3:
					rate_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[1].string,logger)
				elif tr_count == 4:
					svr_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[1].string,logger)
				elif tr_count == 5:
					apr_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[0].string,logger)
				elif tr_count == 7:
					application_fee = tr.find_all('td')[0].string.encode('utf_8')[2:].replace(',','')
				elif tr_count == 8:
					booking_fee = tr.find_all('td')[0].string.encode('utf_8')[2:].replace(',','')
		elif re.search(tracker_re,link):
			initial_period = str(int(re.search(tracker_re,link).group(1)) * 10)
			ltv_percent = str(int(re.search(tracker_re,link).group(2)))
			# Now go to link
			subpage_bsobj = themortgagemeter_utils.get_page(static,'N/A',url,logger)
			#print subpage_bsobj
			table = subpage_bsobj.find_all(attrs={'id' : 'centralContent'},limit=1)[0].find_all('table',limit=1)[0]
			#print '==================================================='
			#print table
			tr_count = 0
			for tr in table.find_all('tr'):
				tr_count += 1
				if tr_count == 3:
					rate_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[0].string,logger)
				elif tr_count == 4:
					svr_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[1].string,logger)
				elif tr_count == 5:
					apr_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[0].string,logger)
				elif tr_count == 7:
					application_fee = tr.find_all('td')[0].string.encode('utf_8')[2:].replace(',','')
				elif tr_count == 8:
					booking_fee = tr.find_all('td')[0].string.encode('utf_8')[2:].replace(',','')
		elif re.search(discount_re,link):
			initial_period = str(int(re.search(discount_re,link).group(1)) * 10)
			ltv_percent = str(int(re.search(discount_re,link).group(2)))
			# Now go to link
			subpage_bsobj = themortgagemeter_utils.get_page(static,'N/A',url,logger)
			#print subpage_bsobj
			table = subpage_bsobj.find_all(attrs={'id' : 'centralContent'},limit=1)[0].find_all('table',limit=1)[0]
			#print '==================================================='
			#print table
			tr_count = 0
			for tr in table.find_all('tr'):
				tr_count += 1
				if tr_count == 3:
					rate_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[1].string,logger)
				elif tr_count == 4:
					svr_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[1].string,logger)
				elif tr_count == 5:
					apr_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[0].string,logger)
				elif tr_count == 7:
					application_fee = tr.find_all('td')[0].string.encode('utf_8')[2:].replace(',','')
				elif tr_count == 8:
					booking_fee = tr.find_all('td')[0].string.encode('utf_8')[2:].replace(',','')
		elif re.search(ftb_re,link):
			themortgagemeter_utils.record_alert('ERROR: SKIPTON first time buyer seen for the first time',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
			continue
		else:
			raise Exception("Unhandled link " + url,'')
		# set up the booking fee
		# Sometimes it's "No Fee" on the page
		if booking_fee.strip() == "Fee":
			booking_fee = "0"
		if application_fee.strip() == "Fee":
			application_fee = "0"
		booking_fee = str(int(booking_fee) + int(application_fee))
		for eligibility in eligibilities:
			mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)
Beispiel #25
0
def process_page(url, logger):
    #var mortgages = [
    #{
    #data: "65543",
    #name: "2 year fixed",
    #offer: " ",
    #customer: "Existing Customer",
    #type: "Fixed Rate",
    #lowltv: 0,
    #highltv: 60,
    #initialrate: 2.19,
    #until: 2,
    #rateafter: 4.95,
    #apr: 4.6,
    #fee: 499,
    #minloan: 1,
    #maxloan: 1000000,
    #links: "/personal/mortgages/all-our-mortgages/fixed-rate-mortgages/mortgages-fixed-rate-2year-60ltv"},{
    #[...]
    #data: "655434",
    #name: "3 year fixed - Fee Offer ",
    #offer: " ",
    #customer: "First Time Buyer",
    #type: "Fixed Rate",
    #lowltv: 90,
    #highltv: 95,
    #initialrate: 4.99,
    #until: 3,
    #rateafter: 4.95,
    #apr: 5.2,
    #fee: 0,
    #minloan: 1,
    #maxloan: 1000000,
    #links: "/personal/mortgages/all-our-mortgages/fixed-rate-mortgages/mortgages-three-year-fixed-rate-95ltv"}
    #]

    resp = themortgagemeter_utils.get_page(False,
                                           '',
                                           url,
                                           logger,
                                           tostring=True)
    # Tidy up json
    # http://stackoverflow.com/questions/4033633/handling-lazy-json-in-python-expecting-property-name
    resp = re.sub(r"{\s*'?(\w)", r'{"\1', resp)
    resp = re.sub(r",\s*'?(\w)", r',"\1', resp)
    resp = re.sub(r"(\w)'?\s*:", r'\1":', resp)
    resp = re.sub(r":\s*'(\w+)'\s*([,}])", r':"\1"\2', resp)
    json_obj = json.loads(resp[16:])
    print json_obj
    #mortgage_list = json_obj['mortgages']
    #if mortgage_list == 'none':
    #	logger.info('URL returned nothing: ' + url)
    #	return
    for mortgage in json_obj:
        customer = mortgage['customer']
        if customer == "Existing Customer":
            eligibilities = ("EMH", "EBM", "EDE", "EED")
        elif customer == "First Time Buyer":
            eligibilities = ("NFTB", )
        elif customer == "New Customer":
            eligibilities = ("NRM", "NMH")
        else:
            raise Exception('Unrecognised eligibility: ' + eligibility,
                            eligibility, l)
        mortgage_type = mc_util.get_mortgage_type(mortgage['name'], logger)
        rate_percent = str(mortgage['initialrate'])
        svr_percent = str(mortgage['rateafter'])
        apr_percent = str(mortgage['apr'])
        initial_period = str(int(mortgage['until'] * 12.0))
        booking_fee = str(mortgage['fee'])
        ltv_percent = str(mortgage['highltv'])
        for eligibility in eligibilities:
            print eligibility
            mc_util.handle_mortgage_insert(institution_code, mortgage_type,
                                           rate_percent, svr_percent,
                                           apr_percent, ltv_percent,
                                           initial_period, booking_fee, term,
                                           'http://www.nationwide.co.uk',
                                           eligibility, logger)
Beispiel #26
0
def process_more_info_page(savings_data,url,logger):
	bsobj = themortgagemeter_utils.get_page(False,'static_html/halifax/savings-accounts.html',url,logger)
	#print bsobj
	savings_array = []
	#print "Passed in:"
	#print savings_data
	print url
	if savings_data['isa'] == 'Y':
		for i1 in bsobj.find_all("h2",text="Summary box"):
			for i2 in i1.parent():
				if i2.find_all("table") != []:
					tabs = i2.find_all("table")
					if re.match(".*isa-saver-fixed.*",url):
						if len(tabs) != 2:
							themortgagemeter_utils.record_alert('ERROR: too many tabs in isa',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
							exit()
						else:
							tabs.pop(0)
						for tab in tabs:
							tbody = tab.find_all("tbody")[0]
							trs = tbody.find_all("tr")
							for tr in trs:
								savings_data_tmp = savings_data.copy()
								tds = tr.find_all("td")
								savings_data_tmp['savings_period'] = themortgagemeter_utils.get_months(tds[0].text.strip().encode('utf-8'),logger)
								savings_data_tmp['aer_percent'] = themortgagemeter_utils.get_percentage(tds[1].text.strip().encode('utf-8'),logger)
								savings_data_tmp['gross_percent'] = savings_data_tmp['aer_percent']
								savings_array.append(savings_data_tmp)
					else:
						if len(tabs) > 1:
							#print tabs
							themortgagemeter_utils.record_alert('ERROR: too many tabs in isa',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
							exit()
						for tab in tabs:
							#print tab
							for tr in tab.find_all("tr"):
								ths = tr.find_all("th")
								tds = tr.find_all("td")
								if len(ths) > 0 and len(tds) > 0:
									th = tr.find_all("th")[0]
									td = tr.find_all("td")[0]
									th_text = th.text.lower()
									td_text = td.text.lower()
									if re.match('interest rates.*',th_text):
										#print "IR:" + td_text
										pc = themortgagemeter_utils.get_percentage(td_text,logger)
										savings_data_tmp = savings_data.copy()
										savings_data_tmp['gross_percent'] = pc
										savings_data_tmp['aer_percent'] = pc
										savings_array.append(savings_data_tmp)
								else:
									if len(ths) == 0 and len(tds) > 0:
										td1 = tds[0]
										td2 = tds[1]
										td1_text = td1.text.lower()
										td2_text = td2.text.lower()
										if re.match('interest rates.*',td1_text):
											pc = themortgagemeter_utils.get_percentage(td2_text,logger)
											savings_data_tmp = savings_data.copy()
											savings_data_tmp['gross_percent'] = pc
											savings_data_tmp['aer_percent'] = pc
											savings_array.append(savings_data_tmp)
									else:
										themortgagemeter_utils.record_alert('ERROR: unhandled case: ' + url,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
										exit()
	elif re.match('.*fixed-online-saver.*',url) or re.match('.*tracker-bond.*',url) or re.match('.*fixed-saver.*',url):
		if re.match('.*fixed-online-saver.*',url) or re.match('.*fixed-saver.*',url):
			#print bsobj
			code = "FOS"
			i1s = bsobj.find_all("h3",text="Current Rates")
			if i1s== []:
				i1s = bsobj.find_all("h3",text="Current rates")
		elif re.match('.*tracker-bond.*',url):
			#print bsobj
			code = "TB"
			i1s = []
			res = bsobj.find_all("h4")
			for i in res:
				#print i.text
				if i.text == "Current rates and apply":
					i1s.append(i)
					break
		if i1s == []:
			themortgagemeter_utils.record_alert('No items from expected h3/4 match!',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		for i1 in i1s:
			for i2 in i1.parent():
				tbodys = i2.find_all("tbody")
				# if this is tracker bond, discard the first table
				if len(tbodys) == 0:
					continue
				if code == "TB":
					ok = False
					for tbody in tbodys:
						for tr in tbody.find_all("tr"):
							tds = tr.find_all("td")
							if tds[0].text == "Term":
								ok = True
					if not ok:
						continue
				for tbody in tbodys:
					tr_count = -1
					table_savings_period = "unset"
					for tr in tbody.find_all("tr"):
						tr_count = tr_count + 1
						if code == "TB" and tr_count == 0:
							# skip the first row
							continue
						# clone the savings_data ready to write to
						savings_data_tmp = savings_data.copy()
						# First td is time only on first row for TB
						if code == "TB" and tr_count > 1:
							td_count = 1
						else:
							td_count = 0
						if code == "TB" and tr_count > 1:
							if table_savings_period == "unset":
								themortgagemeter_utils.record_alert('ERROR: table_savings_period should not be unset',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
								exit()
							savings_data_tmp['savings_period'] = table_savings_period
						for td in tr.find_all("td"):
							# 0 - term
							# 1 - balance 
							# 2 - Gross
							# 3 - AER
							# 4 - NET (ignore)
							# Ignore remainder of cols
							text = td.text.lower().strip().encode('utf-8')
							if td_count == 0:
								# store this in a variable for use on next row if necessary
								table_savings_period = themortgagemeter_utils.get_months(text,logger)
								savings_data_tmp['savings_period'] = table_savings_period
							elif td_count == 1:
								res = savings_util.get_money_range(text,logger)
								savings_data_tmp['min_amt'] = res[0]
								savings_data_tmp['max_amt'] = res[1]
							elif td_count == 2:
								savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(text,logger)
							elif td_count == 3:
								savings_data_tmp['aer_percent'] = themortgagemeter_utils.get_percentage(text,logger)
								# and then break out
								break
							td_count = td_count + 1
						savings_array.append(savings_data_tmp)
	elif re.match('.*/online-saver/',url):
		# TODO: need to set this for other types
		savings_data['interest_paid'] = 'Y'
		#print bsobj
		# get the apr class element, as that contains the text we need
		apr = bsobj.find_all(attrs={'class':'apr'})[0].parent.parent.text.encode('utf-8')
		# split this line by \n
		apr = apr.split('\n')
		lines = []
		for l in apr:
			if re.match('.* or [0-9].*',l):
				for l2 in l.split(' or ',1):
					lines.append(l2)
			else:
				lines.append(l)
		while '' in lines:
			lines.remove('')
		for l in lines:
			# copy 
			savings_data_tmp = savings_data.copy()
			#print l
			# get percentage
			savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(l,logger)
			savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
			# get_money range
			res = savings_util.get_money_range(l,logger)
			savings_data_tmp['min_amt'] = res[0]
			savings_data_tmp['max_amt'] = res[1]
			# append to savings_array
			savings_array.append(savings_data_tmp)
	elif re.match('.*/regular-saver/',url):
		# TODO: need to set this for other types
		savings_data['interest_paid'] = 'Y'
		savings_data['regular_saver_frequency_period'] = '1'
		savings_data['regular_saver_frequency_type'] = 'M'
		savings_data['regular_saver'] = 'Y'
		# Always fixed
		savings_data['variability'] = 'F'
		#print bsobj
		# get the apr class element, as that contains the text we need
		apr = bsobj.find_all(attrs={'class':'apr'})[0].parent.parent.text.encode('utf-8')
		# split this line by \n
		apr = apr.split('\n')
		lines = []
		for l in apr:
			if re.match('.* or [0-9].*',l):
				for l2 in l.split(' or ',1):
					lines.append(l2)
			else:
				lines.append(l)
		while '' in lines:
			lines.remove('')
		for l in lines:
			# copy 
			savings_data_tmp = savings_data.copy()
			# get percentage
			savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(l,logger)
			if savings_data_tmp['gross_percent'] == '':
				# abandon ship!
				continue
			savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
			# Hard-code to 25-250 for now, this seems standard
			savings_data_tmp['regular_saver_min_amt'] = '25'
			savings_data_tmp['regular_saver_max_amt'] = '250'
			# append to savings_array
			savings_array.append(savings_data_tmp)
	elif re.match('.*/everyday-saver/',url):
		# This one's quite simple (I think)
		# TODO: need to set this for other types
		savings_data['interest_paid'] = 'Y'
		#print bsobj
		# get the apr class element, as that contains the text we need
		apr = bsobj.find_all(attrs={'class':'apr'})[0].parent.parent.text.encode('utf-8')
		#print apr
		# split this line by \n
		apr = apr.split('\n')
		lines = []
		for l in apr:
			if re.match('.*gross.*',l):
				lines.append(l)
		while '' in lines:
			lines.remove('')
		for l in lines:
			# copy 
			savings_data_tmp = savings_data.copy()
			#print l
			# get percentage
			savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(l,logger)
			savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
			# TODO: bonus_frequency_period set to 1, or get from data?
			# append to savings_array
			savings_array.append(savings_data_tmp)
	elif re.match('.*/branch-accounts/.*',url):
		return savings_array
	else:
		logger.info('unhandled:' + url)
		exit()
	if savings_array == []:
		themortgagemeter_utils.record_alert('ERROR: returning nothing from a page',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
	# Return the savings_array
	logger.info('returning savings_array:' + str(savings_array))
	return savings_array
def get_product_pages(static, base_url, ext_url, logger):
    url = base_url + ext_url
    urls_seen = []
    bsobj = themortgagemeter_utils.get_page(
        static, 'static_html/halifax/savings-accounts.html', url, logger)
    # let's see how much info we can extract from the page
    # Get all the sortable tables and divine as much info as possible from that.
    sortable_tables = doormatCols = bsobj.find_all(
        attrs={'class': 'sortableTable'})
    for table in sortable_tables:
        #print table
        for tr in table.find_all('tr'):
            td_idx = 0
            savings_data = savings_util.get_savings_data_object()
            for td in tr.find_all('td'):
                td_text = td.text.encode('utf-8').strip().lower()
                if td_idx == 0:
                    # title of account - Junior == child
                    #re.match('/product/A[0-9]+.*',href)
                    ##0 ##<td style="text-align: left;"><a href="/savings/accounts/cash-isas/isa-saver-online/">ISA Saver Online</a></td>
                    ##1 ##<td style="text-align: left;"><strong class="apr">1.35%</strong> tax free/AER variable including 12 month fixed bonus of<strong> </strong>1.10%</td>
                    ##2 ##<td>£1</td>
                    ##3 ##<td>Variable</td>
                    ##4 ##<td>Unlimited</td>
                    ##5 ##<td style="text-align: center;"><img alt="Online" src="/common/images/icons/mousegrey.gif" title="Online"/></td>
                    ##6 ##<td><a href="/savings/accounts/cash-isas/isa-saver-online/"><img alt="Find out more" src="/common/images/Buttons/primary_find_out_more.gif"/></a></td>
                    #print "0: " + td_text
                    if re.match('^.*isa.*$', td_text):
                        savings_data['isa'] = 'Y'
                    if re.match('^.*junior.*$', td_text):
                        savings_data['child'] = 'Y'
                elif td_idx == 1:
                    #print "1: " + td_text
                    # We don't bother with this at the moment - TODO - sort this out
                    #if re.match('.*bonus.*',td_text):
                    #	savings_data['bonus'] = 'Y'
                    pass
                elif td_idx == 2:
                    #print "2: " + td_text
                    # minimum investment, max is always infinity
                    min_amt = themortgagemeter_utils.get_money(td_text, logger)
                    savings_data['min_amt'] = min_amt
                elif td_idx == 3:
                    #print "3: " + td_text
                    # Variable/Fixed
                    if re.match('.*variable.*', td_text):
                        savings_data['variability'] = 'V'
                    elif re.match('.*fixed.*', td_text):
                        savings_data['variability'] = 'F'
                    else:
                        themortgagemeter_utils.record_alert(
                            'ERROR: unknown variability: ' + td_text, logger,
                            themortgagemeter_db.db_connection,
                            themortgagemeter_db.cursor)
                        exit()
                elif td_idx == 4:
                    #print "4: " + td_text
                    # Let's assume we'll get this info from the sub-page.
                    # Withdrawals allowed: "None, by closure only", "Unlimited", "None, until child is 18"
                    pass
                elif td_idx == 5:
                    #print "5: " + td_text
                    for img in td.find_all('img'):
                        title = img.get('title').lower().strip()
                        if title == 'online':
                            savings_data['online'] = 'Y'
                        elif title == 'branch':
                            savings_data['branch'] = 'Y'
                            # I'm going to ignore "phone"
                        elif title == 'phone':
                            pass
                elif td_idx == 6:
                    #print "6: " + td_text
                    # more details link
                    new_url = base_url + td.find_all('a')[0].get('href')
                    if new_url in urls_seen:
                        continue
                    savings_array = process_more_info_page(
                        savings_data, new_url, logger)
                    print new_url
                    for this_savings_data in savings_array:
                        # insert savings here TODO.
                        print this_savings_data
                        isa = this_savings_data['isa']
                        regular_saver = this_savings_data['regular_saver']
                        regular_saver_frequency_period = this_savings_data[
                            'regular_saver_frequency_period']
                        regular_saver_frequency_type = this_savings_data[
                            'regular_saver_frequency_type']
                        regular_saver_min_amt = this_savings_data[
                            'regular_saver_min_amt']
                        regular_saver_max_amt = this_savings_data[
                            'regular_saver_max_amt']
                        bonus = this_savings_data['bonus']
                        bonus_frequency_period = this_savings_data[
                            'bonus_frequency_period']
                        bonus_frequency_type = this_savings_data[
                            'bonus_frequency_type']
                        online = this_savings_data['online']
                        branch = this_savings_data['branch']
                        variability = this_savings_data['variability']
                        min_amt = this_savings_data['min_amt']
                        max_amt = this_savings_data['max_amt']
                        gross_percent = this_savings_data['gross_percent']
                        aer_percent = this_savings_data['aer_percent']
                        interest_paid = this_savings_data['interest_paid']
                        child = this_savings_data['child']
                        savings_period = this_savings_data['savings_period']
                        savings_util.handle_savings_insert(
                            institution_code, isa, regular_saver,
                            regular_saver_frequency_period,
                            regular_saver_frequency_type,
                            regular_saver_min_amt, regular_saver_max_amt,
                            bonus, bonus_frequency_period,
                            bonus_frequency_type, online, branch, variability,
                            savings_period, min_amt, max_amt, gross_percent,
                            aer_percent, child, interest_paid, url, logger)
                    urls_seen.insert(0, new_url)
                else:
                    themortgagemeter_utils.record_alert(
                        'ERROR: too many tds in tr: ' + tr, logger,
                        themortgagemeter_db.db_connection,
                        themortgagemeter_db.cursor)
                    exit()
                td_idx = td_idx + 1
def process_more_info_page(savings_data, url, logger):
    bsobj = themortgagemeter_utils.get_page(
        False, 'static_html/halifax/savings-accounts.html', url, logger)
    #print bsobj
    savings_array = []
    #print "Passed in:"
    #print savings_data
    print url
    if savings_data['isa'] == 'Y':
        for i1 in bsobj.find_all("h2", text="Summary box"):
            for i2 in i1.parent():
                if i2.find_all("table") != []:
                    tabs = i2.find_all("table")
                    if re.match(".*isa-saver-fixed.*", url):
                        if len(tabs) != 2:
                            themortgagemeter_utils.record_alert(
                                'ERROR: too many tabs in isa', logger,
                                themortgagemeter_db.db_connection,
                                themortgagemeter_db.cursor)
                            exit()
                        else:
                            tabs.pop(0)
                        for tab in tabs:
                            tbody = tab.find_all("tbody")[0]
                            trs = tbody.find_all("tr")
                            for tr in trs:
                                savings_data_tmp = savings_data.copy()
                                tds = tr.find_all("td")
                                savings_data_tmp[
                                    'savings_period'] = themortgagemeter_utils.get_months(
                                        tds[0].text.strip().encode('utf-8'),
                                        logger)
                                savings_data_tmp[
                                    'aer_percent'] = themortgagemeter_utils.get_percentage(
                                        tds[1].text.strip().encode('utf-8'),
                                        logger)
                                savings_data_tmp[
                                    'gross_percent'] = savings_data_tmp[
                                        'aer_percent']
                                savings_array.append(savings_data_tmp)
                    else:
                        if len(tabs) > 1:
                            #print tabs
                            themortgagemeter_utils.record_alert(
                                'ERROR: too many tabs in isa', logger,
                                themortgagemeter_db.db_connection,
                                themortgagemeter_db.cursor)
                            exit()
                        for tab in tabs:
                            #print tab
                            for tr in tab.find_all("tr"):
                                ths = tr.find_all("th")
                                tds = tr.find_all("td")
                                if len(ths) > 0 and len(tds) > 0:
                                    th = tr.find_all("th")[0]
                                    td = tr.find_all("td")[0]
                                    th_text = th.text.lower()
                                    td_text = td.text.lower()
                                    if re.match('interest rates.*', th_text):
                                        #print "IR:" + td_text
                                        pc = themortgagemeter_utils.get_percentage(
                                            td_text, logger)
                                        savings_data_tmp = savings_data.copy()
                                        savings_data_tmp['gross_percent'] = pc
                                        savings_data_tmp['aer_percent'] = pc
                                        savings_array.append(savings_data_tmp)
                                else:
                                    if len(ths) == 0 and len(tds) > 0:
                                        td1 = tds[0]
                                        td2 = tds[1]
                                        td1_text = td1.text.lower()
                                        td2_text = td2.text.lower()
                                        if re.match('interest rates.*',
                                                    td1_text):
                                            pc = themortgagemeter_utils.get_percentage(
                                                td2_text, logger)
                                            savings_data_tmp = savings_data.copy(
                                            )
                                            savings_data_tmp[
                                                'gross_percent'] = pc
                                            savings_data_tmp[
                                                'aer_percent'] = pc
                                            savings_array.append(
                                                savings_data_tmp)
                                    else:
                                        themortgagemeter_utils.record_alert(
                                            'ERROR: unhandled case: ' + url,
                                            logger,
                                            themortgagemeter_db.db_connection,
                                            themortgagemeter_db.cursor)
                                        exit()
    elif re.match('.*fixed-online-saver.*', url) or re.match(
            '.*tracker-bond.*', url) or re.match('.*fixed-saver.*', url):
        if re.match('.*fixed-online-saver.*', url) or re.match(
                '.*fixed-saver.*', url):
            #print bsobj
            code = "FOS"
            i1s = bsobj.find_all("h3", text="Current Rates")
            if i1s == []:
                i1s = bsobj.find_all("h3", text="Current rates")
        elif re.match('.*tracker-bond.*', url):
            #print bsobj
            code = "TB"
            i1s = []
            res = bsobj.find_all("h4")
            for i in res:
                #print i.text
                if i.text == "Current rates and apply":
                    i1s.append(i)
                    break
        if i1s == []:
            themortgagemeter_utils.record_alert(
                'No items from expected h3/4 match!', logger,
                themortgagemeter_db.db_connection, themortgagemeter_db.cursor)
        for i1 in i1s:
            for i2 in i1.parent():
                tbodys = i2.find_all("tbody")
                # if this is tracker bond, discard the first table
                if len(tbodys) == 0:
                    continue
                if code == "TB":
                    ok = False
                    for tbody in tbodys:
                        for tr in tbody.find_all("tr"):
                            tds = tr.find_all("td")
                            if tds[0].text == "Term":
                                ok = True
                    if not ok:
                        continue
                for tbody in tbodys:
                    tr_count = -1
                    table_savings_period = "unset"
                    for tr in tbody.find_all("tr"):
                        tr_count = tr_count + 1
                        if code == "TB" and tr_count == 0:
                            # skip the first row
                            continue
                        # clone the savings_data ready to write to
                        savings_data_tmp = savings_data.copy()
                        # First td is time only on first row for TB
                        if code == "TB" and tr_count > 1:
                            td_count = 1
                        else:
                            td_count = 0
                        if code == "TB" and tr_count > 1:
                            if table_savings_period == "unset":
                                themortgagemeter_utils.record_alert(
                                    'ERROR: table_savings_period should not be unset',
                                    logger, themortgagemeter_db.db_connection,
                                    themortgagemeter_db.cursor)
                                exit()
                            savings_data_tmp[
                                'savings_period'] = table_savings_period
                        for td in tr.find_all("td"):
                            # 0 - term
                            # 1 - balance
                            # 2 - Gross
                            # 3 - AER
                            # 4 - NET (ignore)
                            # Ignore remainder of cols
                            text = td.text.lower().strip().encode('utf-8')
                            if td_count == 0:
                                # store this in a variable for use on next row if necessary
                                table_savings_period = themortgagemeter_utils.get_months(
                                    text, logger)
                                savings_data_tmp[
                                    'savings_period'] = table_savings_period
                            elif td_count == 1:
                                res = savings_util.get_money_range(
                                    text, logger)
                                savings_data_tmp['min_amt'] = res[0]
                                savings_data_tmp['max_amt'] = res[1]
                            elif td_count == 2:
                                savings_data_tmp[
                                    'gross_percent'] = themortgagemeter_utils.get_percentage(
                                        text, logger)
                            elif td_count == 3:
                                savings_data_tmp[
                                    'aer_percent'] = themortgagemeter_utils.get_percentage(
                                        text, logger)
                                # and then break out
                                break
                            td_count = td_count + 1
                        savings_array.append(savings_data_tmp)
    elif re.match('.*/online-saver/', url):
        # TODO: need to set this for other types
        savings_data['interest_paid'] = 'Y'
        #print bsobj
        # get the apr class element, as that contains the text we need
        apr = bsobj.find_all(
            attrs={'class': 'apr'})[0].parent.parent.text.encode('utf-8')
        # split this line by \n
        apr = apr.split('\n')
        lines = []
        for l in apr:
            if re.match('.* or [0-9].*', l):
                for l2 in l.split(' or ', 1):
                    lines.append(l2)
            else:
                lines.append(l)
        while '' in lines:
            lines.remove('')
        for l in lines:
            # copy
            savings_data_tmp = savings_data.copy()
            #print l
            # get percentage
            savings_data_tmp[
                'gross_percent'] = themortgagemeter_utils.get_percentage(
                    l, logger)
            savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
            # get_money range
            res = savings_util.get_money_range(l, logger)
            savings_data_tmp['min_amt'] = res[0]
            savings_data_tmp['max_amt'] = res[1]
            # append to savings_array
            savings_array.append(savings_data_tmp)
    elif re.match('.*/regular-saver/', url):
        # TODO: need to set this for other types
        savings_data['interest_paid'] = 'Y'
        savings_data['regular_saver_frequency_period'] = '1'
        savings_data['regular_saver_frequency_type'] = 'M'
        savings_data['regular_saver'] = 'Y'
        # Always fixed
        savings_data['variability'] = 'F'
        #print bsobj
        # get the apr class element, as that contains the text we need
        apr = bsobj.find_all(
            attrs={'class': 'apr'})[0].parent.parent.text.encode('utf-8')
        # split this line by \n
        apr = apr.split('\n')
        lines = []
        for l in apr:
            if re.match('.* or [0-9].*', l):
                for l2 in l.split(' or ', 1):
                    lines.append(l2)
            else:
                lines.append(l)
        while '' in lines:
            lines.remove('')
        for l in lines:
            # copy
            savings_data_tmp = savings_data.copy()
            # get percentage
            savings_data_tmp[
                'gross_percent'] = themortgagemeter_utils.get_percentage(
                    l, logger)
            if savings_data_tmp['gross_percent'] == '':
                # abandon ship!
                continue
            savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
            # Hard-code to 25-250 for now, this seems standard
            savings_data_tmp['regular_saver_min_amt'] = '25'
            savings_data_tmp['regular_saver_max_amt'] = '250'
            # append to savings_array
            savings_array.append(savings_data_tmp)
    elif re.match('.*/everyday-saver/', url):
        # This one's quite simple (I think)
        # TODO: need to set this for other types
        savings_data['interest_paid'] = 'Y'
        #print bsobj
        # get the apr class element, as that contains the text we need
        apr = bsobj.find_all(
            attrs={'class': 'apr'})[0].parent.parent.text.encode('utf-8')
        #print apr
        # split this line by \n
        apr = apr.split('\n')
        lines = []
        for l in apr:
            if re.match('.*gross.*', l):
                lines.append(l)
        while '' in lines:
            lines.remove('')
        for l in lines:
            # copy
            savings_data_tmp = savings_data.copy()
            #print l
            # get percentage
            savings_data_tmp[
                'gross_percent'] = themortgagemeter_utils.get_percentage(
                    l, logger)
            savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
            # TODO: bonus_frequency_period set to 1, or get from data?
            # append to savings_array
            savings_array.append(savings_data_tmp)
    elif re.match('.*/branch-accounts/.*', url):
        return savings_array
    else:
        logger.info('unhandled:' + url)
        exit()
    if savings_array == []:
        themortgagemeter_utils.record_alert(
            'ERROR: returning nothing from a page', logger,
            themortgagemeter_db.db_connection, themortgagemeter_db.cursor)
        exit()
    # Return the savings_array
    logger.info('returning savings_array:' + str(savings_array))
    return savings_array
def get_product_page(static,url,eligibilities):
	logger = logging.getLogger('retrieve')
	bsobj = themortgagemeter_utils.get_page(static,'static_html/post_office/our-full-range.html',url,logger)
	#print bsobj
	term = str(25 * 12)
	ltv_elems = bsobj.find_all('h2')
	# foreach h2 element, determine the ltv.
	# then get the next element (which is the div, class displaytable). Then, for each tr:
	# td0 = years of fixed or tracker
	# td1 = initial rate
	# td2 = svr
	# td3 = apr
	# td4 = fees
	for ltv_elem in ltv_elems:
		# For post office, first reported % is 100 - LTV
		ltv_elem_str = ltv_elem.string
		if (ltv_elem_str):
			ltv_percent = themortgagemeter_utils.get_percentage(ltv_elem_str,logger)
			if ltv_percent != '':
				ltv_percent = str(100 - int(ltv_percent))
			else:
				continue
		else:
			continue
		div = ltv_elem.fetchNextSiblings(attrs={'class' : 'displaytable'},limit=1)
		if (div):
			logger.debug('here')
			logger.debug(div)
			trs = div[0].find_all('tr')
			for tr in trs:
				logger.debug(tr)
				# initialise:
				rate_percent = ''
				svr_percent = ''
				apr_percent = ''
				booking_fee = ''
				tds = tr.find_all('td')
				i = 0
				# If there are tds and there are more than 1 of them then we can extract a mortgage...
				logger.debug(tr)
				if tds and len(tds) > 1:
					logger.debug(tds[0].text.encode('utf-8').split('\n'))
					s = tds[0].text.encode('utf-8').split('\n')
					# Sometimes we get empty fields - we remove them here.
					while '' in s:
						s.remove('')
					initial_period = str(themortgagemeter_utils.get_months(s[i],logger))
					#logger.debug('type_str before split: ' + tds[i].text.encode('utf-8'))
					#logger.debug('tds i: ' + str(i) + ' tds: ' + str(tds))
					#logger.debug('tds i: ' + str(i) + ' tds[i]: ' + str(tds[i].text.encode('utf-8')))
					#logger.debug(re.sub('\xa0','',tds[i].text.encode('utf-8')).split())
					# TODO: generic text cleansing function
					type_str = re.sub('\xa0','',re.sub('\xc2',' ',tds[i].text.encode('utf-8'))).split()[2]
					logger.debug('type_str: ' + type_str)
					if type_str == 'fixed':
						mortgage_type = 'F'
					elif type_str == 'tracker':
						mortgage_type = 'T'
					else:
							themortgagemeter_utils.record_alert('ERROR: PSTFFC neither fixed nor tracker: ' + type_str,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
					i+=1
					j = 0
					for td in tds[i].text.encode('utf-8').split('\n'):
						t = tds[i].text.encode('utf-8').split('\n')[j]
						rate_percent = themortgagemeter_utils.get_percentage(t,logger)
						if rate_percent != '':
							break
						j += 1
					while svr_percent == '':
						i+=1
						for t in tds[i].text.encode('utf-8').split('\n'):
							svr_percent = themortgagemeter_utils.get_percentage(t,logger)
							if svr_percent != '':
								break
					while apr_percent == '':
						i+=1
						for t in tds[i].text.encode('utf-8').split('\n'):
							apr_percent = themortgagemeter_utils.get_percentage(t,logger)
							if apr_percent != '':
								break
					i+=1
					booking_fee = tds[i].text.strip().encode('utf-8')[2:].replace(',','')
					for eligibility in eligibilities:
						mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)
		else:
			pass
def get_product_page_details(url, savings_data):
    logger = logging.getLogger('retrieve')
    #logger.info(url)
    bsobj = themortgagemeter_utils.get_page(False, '', url, logger)
def halifax_remortgage_page(static, url, mortgage_type, eligibility, logger):
    bsobj = themortgagemeter_utils.get_page(
        static, 'static_html/halifax/remortgage-fixed-75ltv.asp', url, logger)
    trs = bsobj.find_all('tr')
    for tr in trs:
        mortgage_details = []
        for d in tr.strings:
            mortgage_details.append(string.strip(d.encode('utf-8')))
        #['\n', 'Term', 'Initial rate', '\xc2\xa0', 'Halifax Homeowner Variable rate thereafter', '\xc2\xa0', 'For the remainder of the term from', '\xc2\xa0', 'The overall cost for comparison is', '\xc2\xa0', 'Product fee', '\xc2\xa0', 'LTV\xc2\xa0\xc2\xa0\xc2\xa0\xc2\xa0\xc2\xa0\xc2\xa0 ', 'Early Repayment Charges until', '\xc2\xa0', 'Loan amount', '\n', 'Extra benefits', '\xc2\xa0', '\n', '\xc2\xa0', '\n']
        #['\n', '2 years', '\n', '4.44%', '\n', 'Currently', ' \xc2\xa03.99%', '\n', '30/11/2014', '\n', '4.3% APR', '\n', '\xc2\xa3995', '\n', '75-80%', '\n', '30/11/2014', '\n', '\xc2\xa30-\xc2\xa31m', '\n', 'Halifax Remortgage Service*', '\n', '\n']
        logger.debug(mortgage_details)
        if len(mortgage_details) > 19 and len(mortgage_details) < 25:
            if mortgage_details[3].find('%') != -1:
                rate_percent = mortgage_details[3][:-1]
                svr_percent = mortgage_details[6].split()[0][:-1].strip(
                    '\xc2').strip('\xa0')
                apr_percent = mortgage_details[10].split()[0][:-1]
                booking_fee = mortgage_details[12][2:].replace(',', '')
                initial_period = mortgage_details[1]
                # handle special nonsense case
                if re.search(r'years', initial_period) and not re.search(
                        r'[0-9]+ years', initial_period):
                    years = initial_period[0]
                    initial_period = str(int(years) * 12)
                elif re.search(r'months', initial_period) and not re.search(
                        r'[0-9]+ month', initial_period):
                    initial_period = initial_period[0:2]
                else:
                    initial_period = str(
                        themortgagemeter_utils.get_months(
                            initial_period, logger))
                ltv_percent = mortgage_details[14].split('-')[1].strip('%')
                mc_util.handle_mortgage_insert(institution_code, mortgage_type,
                                               rate_percent, svr_percent,
                                               apr_percent, ltv_percent,
                                               initial_period, booking_fee,
                                               term, url, eligibility, logger)
        elif len(mortgage_details) == 25:
            if mortgage_details[3].find('%') != -1:
                rate_percent = mortgage_details[3][:-1]
                svr_percent = mortgage_details[8].split()[0][:-1].strip(
                    '\xc2').strip('\xa0')
                apr_percent = mortgage_details[12].split()[0][:-1]
                booking_fee = mortgage_details[14][2:].replace(',', '')
                initial_period = mortgage_details[1]
                if re.search(r'years', initial_period) and not re.search(
                        r'[0-9]+ years', initial_period):
                    years = initial_period[0]
                    initial_period = str(int(years) * 12)
                elif re.search(r'months', initial_period) and not re.search(
                        r'[0-9]+ month', initial_period):
                    initial_period = initial_period[0:2]
                else:
                    initial_period = str(
                        themortgagemeter_utils.get_months(
                            initial_period, logger))
                ltv_percent = mortgage_details[16].split('-')[1].strip('%')
                # handle special nonsense case
                mc_util.handle_mortgage_insert(institution_code, mortgage_type,
                                               rate_percent, svr_percent,
                                               apr_percent, ltv_percent,
                                               initial_period, booking_fee,
                                               term, url, eligibility, logger)
        elif len(mortgage_details) > 3:
            logger.debug('Should this be handled?: %s', (mortgage_details))
Beispiel #32
0
def get_product_pages(static,base_url,ext_url,logger):
	url = base_url + ext_url
	urls_seen = []
	bsobj = themortgagemeter_utils.get_page(static,'static_html/halifax/savings-accounts.html',url,logger)
	# let's see how much info we can extract from the page
	# Get all the sortable tables and divine as much info as possible from that.
	sortable_tables = doormatCols = bsobj.find_all(attrs={'class' : 'sortableTable'})
	for table in sortable_tables:
		#print table
		for tr in table.find_all('tr'):
			td_idx = 0
			savings_data = savings_util.get_savings_data_object()
			for td in tr.find_all('td'):
				td_text = td.text.encode('utf-8').strip().lower()
				if td_idx == 0:
					# title of account - Junior == child
					#re.match('/product/A[0-9]+.*',href)
					##0 ##<td style="text-align: left;"><a href="/savings/accounts/cash-isas/isa-saver-online/">ISA Saver Online</a></td>
					##1 ##<td style="text-align: left;"><strong class="apr">1.35%</strong> tax free/AER variable including 12 month fixed bonus of<strong> </strong>1.10%</td>
					##2 ##<td>£1</td>
					##3 ##<td>Variable</td>
					##4 ##<td>Unlimited</td>
					##5 ##<td style="text-align: center;"><img alt="Online" src="/common/images/icons/mousegrey.gif" title="Online"/></td>
					##6 ##<td><a href="/savings/accounts/cash-isas/isa-saver-online/"><img alt="Find out more" src="/common/images/Buttons/primary_find_out_more.gif"/></a></td>
					#print "0: " + td_text
					if re.match('^.*isa.*$',td_text):
						savings_data['isa'] = 'Y'
					if re.match('^.*junior.*$',td_text):
						savings_data['child'] = 'Y'
				elif td_idx == 1:
					#print "1: " + td_text
					# We don't bother with this at the moment - TODO - sort this out
					#if re.match('.*bonus.*',td_text):
					#	savings_data['bonus'] = 'Y'
					pass
				elif td_idx == 2:
					#print "2: " + td_text
					# minimum investment, max is always infinity
					min_amt = themortgagemeter_utils.get_money(td_text,logger)
					savings_data['min_amt'] = min_amt
				elif td_idx == 3:
					#print "3: " + td_text
					# Variable/Fixed
					if re.match('.*variable.*',td_text):
						savings_data['variability'] = 'V'
					elif re.match('.*fixed.*',td_text):
						savings_data['variability'] = 'F'
					else:
						themortgagemeter_utils.record_alert('ERROR: unknown variability: ' + td_text,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
						exit()
				elif td_idx == 4:
					#print "4: " + td_text
					# Let's assume we'll get this info from the sub-page.
					# Withdrawals allowed: "None, by closure only", "Unlimited", "None, until child is 18"
					pass
				elif td_idx == 5:
					#print "5: " + td_text
					for img in td.find_all('img'):
						title = img.get('title').lower().strip()
						if title == 'online':
							savings_data['online'] = 'Y'
						elif title == 'branch':
							savings_data['branch'] = 'Y'
							# I'm going to ignore "phone"
						elif title == 'phone':
							pass
				elif td_idx == 6:
					#print "6: " + td_text
					# more details link
					new_url = base_url + td.find_all('a')[0].get('href')
					if new_url in urls_seen:
						continue
					savings_array = process_more_info_page(savings_data,new_url,logger)
					print new_url
					for this_savings_data in savings_array:
						# insert savings here TODO.
						print this_savings_data
						isa = this_savings_data['isa']
						regular_saver = this_savings_data['regular_saver']
						regular_saver_frequency_period = this_savings_data['regular_saver_frequency_period']
						regular_saver_frequency_type = this_savings_data['regular_saver_frequency_type']
						regular_saver_min_amt = this_savings_data['regular_saver_min_amt']
						regular_saver_max_amt = this_savings_data['regular_saver_max_amt']
						bonus = this_savings_data['bonus']
						bonus_frequency_period = this_savings_data['bonus_frequency_period']
						bonus_frequency_type = this_savings_data['bonus_frequency_type']
						online = this_savings_data['online']
						branch = this_savings_data['branch']
						variability = this_savings_data['variability']
						min_amt = this_savings_data['min_amt']
						max_amt = this_savings_data['max_amt']
						gross_percent = this_savings_data['gross_percent']
						aer_percent = this_savings_data['aer_percent']
						interest_paid = this_savings_data['interest_paid']
						child = this_savings_data['child']
						savings_period = this_savings_data['savings_period']
						savings_util.handle_savings_insert(institution_code, isa, regular_saver, regular_saver_frequency_period, regular_saver_frequency_type, regular_saver_min_amt, regular_saver_max_amt, bonus, bonus_frequency_period, bonus_frequency_type, online, branch, variability, savings_period, min_amt, max_amt, gross_percent, aer_percent, child, interest_paid, url, logger)
					urls_seen.insert(0,new_url)
				else:
					themortgagemeter_utils.record_alert('ERROR: too many tds in tr: ' + tr,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
					exit()
				td_idx = td_idx + 1
Beispiel #33
0
def get_product_page(static, url, eligibilities):
    logger = logging.getLogger('retrieve')
    bsobj = themortgagemeter_utils.get_page(
        static, 'static_html/post_office/our-full-range.html', url, logger)
    #print bsobj
    term = str(25 * 12)
    ltv_elems = bsobj.find_all('h2')
    # foreach h2 element, determine the ltv.
    # then get the next element (which is the div, class displaytable). Then, for each tr:
    # td0 = years of fixed or tracker
    # td1 = initial rate
    # td2 = svr
    # td3 = apr
    # td4 = fees
    for ltv_elem in ltv_elems:
        # For post office, first reported % is 100 - LTV
        ltv_elem_str = ltv_elem.string
        if (ltv_elem_str):
            ltv_percent = themortgagemeter_utils.get_percentage(
                ltv_elem_str, logger)
            if ltv_percent != '':
                ltv_percent = str(100 - int(ltv_percent))
            else:
                continue
        else:
            continue
        div = ltv_elem.fetchNextSiblings(attrs={'class': 'displaytable'},
                                         limit=1)
        if (div):
            logger.debug('here')
            logger.debug(div)
            trs = div[0].find_all('tr')
            for tr in trs:
                logger.debug(tr)
                # initialise:
                rate_percent = ''
                svr_percent = ''
                apr_percent = ''
                booking_fee = ''
                tds = tr.find_all('td')
                i = 0
                # If there are tds and there are more than 1 of them then we can extract a mortgage...
                logger.debug(tr)
                if tds and len(tds) > 1:
                    logger.debug(tds[0].text.encode('utf-8').split('\n'))
                    s = tds[0].text.encode('utf-8').split('\n')
                    # Sometimes we get empty fields - we remove them here.
                    while '' in s:
                        s.remove('')
                    initial_period = str(
                        themortgagemeter_utils.get_months(s[i], logger))
                    #logger.debug('type_str before split: ' + tds[i].text.encode('utf-8'))
                    #logger.debug('tds i: ' + str(i) + ' tds: ' + str(tds))
                    #logger.debug('tds i: ' + str(i) + ' tds[i]: ' + str(tds[i].text.encode('utf-8')))
                    #logger.debug(re.sub('\xa0','',tds[i].text.encode('utf-8')).split())
                    # TODO: generic text cleansing function
                    type_str = re.sub(
                        '\xa0', '',
                        re.sub('\xc2', ' ',
                               tds[i].text.encode('utf-8'))).split()[2]
                    logger.debug('type_str: ' + type_str)
                    if type_str == 'fixed':
                        mortgage_type = 'F'
                    elif type_str == 'tracker':
                        mortgage_type = 'T'
                    else:
                        themortgagemeter_utils.record_alert(
                            'ERROR: PSTFFC neither fixed nor tracker: ' +
                            type_str, logger,
                            themortgagemeter_db.db_connection,
                            themortgagemeter_db.cursor)
                    i += 1
                    j = 0
                    for td in tds[i].text.encode('utf-8').split('\n'):
                        t = tds[i].text.encode('utf-8').split('\n')[j]
                        rate_percent = themortgagemeter_utils.get_percentage(
                            t, logger)
                        if rate_percent != '':
                            break
                        j += 1
                    while svr_percent == '':
                        i += 1
                        for t in tds[i].text.encode('utf-8').split('\n'):
                            svr_percent = themortgagemeter_utils.get_percentage(
                                t, logger)
                            if svr_percent != '':
                                break
                    while apr_percent == '':
                        i += 1
                        for t in tds[i].text.encode('utf-8').split('\n'):
                            apr_percent = themortgagemeter_utils.get_percentage(
                                t, logger)
                            if apr_percent != '':
                                break
                    i += 1
                    booking_fee = tds[i].text.strip().encode(
                        'utf-8')[2:].replace(',', '')
                    for eligibility in eligibilities:
                        mc_util.handle_mortgage_insert(
                            institution_code, mortgage_type, rate_percent,
                            svr_percent, apr_percent, ltv_percent,
                            initial_period, booking_fee, term, url,
                            eligibility, logger)
        else:
            pass
def get_product_page_interest_rates(url, savings_data):
    logger = logging.getLogger('retrieve ' + url)
    bsobj = themortgagemeter_utils.get_page(False, '', url, logger)
    #logger.info(url) #logger.info(bsobj)
    if re.match('.*isa.*', url):
        savings_data['isa'] = 'Y'
    for t in bsobj.find_all('table'):
        #logger.info("TABLE")# logger.info(t)
        # Get all tables, then match on summary == "Interest rates:.*", and set up variables accordingly.
        summary = t.get('summary').encode('utf-8').lower()
        if summary:
            # Set up data for this page
            summary_info = re.match('.*interest rates: (.*)', summary).group(1)
            #logger.info("summary info: " + summary_info)
            if summary_info in ("cash e-isa#"):
                savings_data['isa'] = 'Y'
            elif summary_info in ("fixed rate saver - monthly interest"):
                savings_data['variability'] = 'F'
                savings_data['interest_paid'] = 'M'
            elif summary_info in ("fixed rate saver - annual interest"):
                savings_data['variability'] = 'F'
                savings_data['interest_paid'] = 'Y'
            elif "regular saver" in summary_info:
                savings_data['regular_saver'] = 'Y'
                savings_data['interest_paid'] = 'Y'
            elif "online bonus" in summary_info:
                savings_data['bonus'] = 'Y'
                savings_data['branch'] = 'N'
                savings_data['bonus_frequency_period'] = '1'
                savings_data['bonus_frequency_type'] = 'M'
                # skip bonus for HSBC- it's complicated - probably needs its own function TODO
                continue
            elif "flexible saver" in summary_info:
                savings_data['variability'] = 'V'
            else:
                themortgagemeter_utils.record_alert(
                    'NEED TO HANDLE: ' + summary_info, logger,
                    themortgagemeter_db.db_connection,
                    themortgagemeter_db.cursor)
                exit()
            tr_count = 0
            for tr in t.find_all('tr'):
                # This is a new savings product, so clone the data at this point and use that from here.
                this_savings_data = savings_data.copy()
                #logger.info("TR " + str(tr_count)) #logger.info(tr)
                if this_savings_data['bonus'] == 'Y':
                    #print "BONUS"
                    #print tr
                    pass
                if this_savings_data['regular_saver'] == 'Y':
                    td_count = -1
                else:
                    td_count = 0
                if tr_count >= 1:
                    # If tax-free, this will be true
                    for td in tr.find_all('td'):
                        td_style = td.get('style')
                        if td_style != None:
                            td_style = td_style.lower().encode(
                                'utf-8').translate(None, ' ')
                            if td_style == 'vertical-align:middle':
                                continue
                        #logger.info("TD" + str(td_count)) #logger.info(tr_count) #logger.info(td_count)
                        logger.info(td)
                        v = td.text.encode('utf-8').lower().strip()
                        if td_count == 0:
                            #logger.info(this_savings_data['regular_saver'])
                            if this_savings_data['regular_saver'] == 'Y':
                                logger.info('regular_saver: ' + v)
                                this_savings_data[
                                    'regular_saver_min_amt'] = v.split()[0][2:]
                                this_savings_data[
                                    'regular_saver_max_amt'] = v.split()[2][2:]
                                if v.split()[4] == "month":
                                    this_savings_data[
                                        'regular_saver_frequency_period'] = '1'
                                    this_savings_data[
                                        'regular_saver_frequency_type'] = 'M'
                                else:
                                    themortgagemeter_utils.record_alert(
                                        'ERROR: reg saver not parsed: ' + v,
                                        logger,
                                        themortgagemeter_db.db_connection,
                                        themortgagemeter_db.cursor)
                                    exit()
                            else:
                                # if it's got a + at the end, it's a min, if it's "up to" it's a max.
                                res = savings_util.get_money_range(v, logger)
                                this_savings_data['min_amt'] = res[0]
                                this_savings_data['max_amt'] = res[1]
                                # TODO: remove this section
                                #if re.match('^.*\+$',v):
                                #	money_val = themortgagemeter_utils.get_money(v,logger)
                                #	this_savings_data['min_amt'] = money_val
                                #elif re.match('^.*up to.*$',v) or re.match('^.*under.*$',v):
                                #	money_val = themortgagemeter_utils.get_money(v,logger)
                                #	this_savings_data['max_amt'] = money_val
                                #	this_savings_data['min_amt'] = 0
                                #elif re.match('^.* - .*$',v):
                                #	this_savings_data['min_amt'] = v.split()[0][2:].translate(None,',')
                                #	this_savings_data['max_amt'] = v.split()[2][2:].translate(None,',')
                                #else:
                                #	#logger.info(t) #logger.info('value not handled: ' + v)
                                #	themortgagemeter_utils.record_alert('ERROR: value wrong: ' + v,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
                                #	exit()
                        elif td_count == 1:
                            # we don't bother with net_percent
                            pass
                        elif td_count == 2:
                            # gross %
                            this_savings_data['gross_percent'] = v
                        elif td_count == 3:
                            this_savings_data['aer_percent'] = v
                        td_count += 1
                    # Some trs have no tds; we ignore those.
                    if td_count > 0:
                        # Now store this product
                        # TODO: fixed savings?
                        logger.info(this_savings_data)
                        isa = this_savings_data['isa']
                        regular_saver = this_savings_data['regular_saver']
                        regular_saver_frequency_period = this_savings_data[
                            'regular_saver_frequency_period']
                        regular_saver_frequency_type = this_savings_data[
                            'regular_saver_frequency_type']
                        regular_saver_min_amt = this_savings_data[
                            'regular_saver_min_amt']
                        regular_saver_max_amt = this_savings_data[
                            'regular_saver_max_amt']
                        bonus = this_savings_data['bonus']
                        bonus_frequency_period = this_savings_data[
                            'bonus_frequency_period']
                        bonus_frequency_type = this_savings_data[
                            'bonus_frequency_type']
                        online = this_savings_data['online']
                        branch = this_savings_data['branch']
                        variability = this_savings_data['variability']
                        min_amt = this_savings_data['min_amt']
                        max_amt = this_savings_data['max_amt']
                        gross_percent = this_savings_data['gross_percent']
                        aer_percent = this_savings_data['aer_percent']
                        interest_paid = this_savings_data['interest_paid']
                        child = this_savings_data['child']
                        savings_period = this_savings_data['savings_period']
                        savings_util.handle_savings_insert(
                            institution_code, isa, regular_saver,
                            regular_saver_frequency_period,
                            regular_saver_frequency_type,
                            regular_saver_min_amt, regular_saver_max_amt,
                            bonus, bonus_frequency_period,
                            bonus_frequency_type, online, branch, variability,
                            savings_period, min_amt, max_amt, gross_percent,
                            aer_percent, child, interest_paid, url, logger)
                else:
                    tr_count += 1
                    continue
                tr_count += 1
        else:
            #print url
            #print bsobj
            exit()