def ar_banks_iterator( url_bank_list='http://www.bcra.gov.ar/Sistema_financiero/sisfin020101.asp', url_bank_info='http://www.bcra.gob.ar/Sistema_financiero/' 'sisfin020101.asp?bco=%s', country='Argentina'): """ Argentinian Banks list iterator. >>> banks = ar_banks_iterator() >>> banks.next().keys() == ['latitud', 'ghom', 'fax', 'code', 'office', \ 'street2', 'site', 'number', 'phone', \ 'street', 'address', 'active', 'gins', 'id', \ 'longitud', 'city', 'name', 'zip', 'country', \ 'state', 'email', 'vat'] True """ if BeautifulSoup is None: return page_list = urlopen(url_bank_list) soup_list = BeautifulSoup(page_list) for bank in soup_list('option'): if 'value' in dict(bank.attrs): id, name = bank['value'].strip(), bank.string.strip() page_bank = urlopen(url_bank_info % id) soup_bank = BeautifulSoup(page_bank) data = { 'id': id, 'bcra_code': id, 'name': name, 'country': country, 'active': '1', 'number': '', } for line in soup_bank(id='texto_columna_2')[0]('table')[1]('tr'): sline = line.td.renderContents() for key in compiled_re.keys(): search = compiled_re[key].search(sline) if search: data[key] = unicode(search.group(1).strip(), encoding) searchaddress = u"%(street)s, %(city)s, %(state)s, %(country)s" \ % data geodata = unify_geo_data(strip_accents(searchaddress)) if geodata: data.update(geodata) for key in postprocessor_keys.keys(): if key in data: data[key] = postprocessor_keys[key](key, data) data[key] = data[key] else: _logger.warning("No geoposition %s." % (searchaddress)) yield data return
def ar_banks_iterator( url_bank_list='http://www.bcra.gov.ar/sisfin/sf010100.asp', url_bank_info='http://www.bcra.gov.ar/sisfin/sf010100.asp?bco=%s', country='Argentina'): """ Argentinian Banks list iterator. >>> banks = ar_banks_iterator() >>> banks.next().keys() == ['latitud', 'ghom', 'fax', 'code', 'office', \ 'street2', 'site', 'number', 'phone', \ 'street', 'address', 'active', 'gins', 'id', \ 'longitud', 'city', 'name', 'zip', 'country', \ 'state', 'email', 'vat'] True """ page_list = urlopen(url_bank_list) soup_list = BeautifulSoup(page_list) for bank in soup_list('option'): if 'value' in dict(bank.attrs): id, name = bank['value'].strip(), bank.string.strip() page_bank = urlopen(url_bank_info % id) soup_bank = BeautifulSoup(page_bank) data = { 'id': id, 'name': name, 'country': country, 'active': '1', } for line in soup_bank('div')[5]('table')[0]('tr'): sline = line.td.renderContents() for key in compiled_re.keys(): search = compiled_re[key].search(sline) if search: data[key] = unicode(search.group(1).strip(), encoding) searchaddress = u"%(street)s, %(city)s, %(state)s, %(country)s" % data geodata = unify_geo_data(strip_accents(searchaddress)) data.update(geodata) for key in postprocessor_keys.keys(): if key in data: data[key] = postprocessor_keys[key](key, data) data[key] = data[key].encode('utf-8') yield data
def search_zip(street, number, city, state, country, unique=True): """ Return zipcode. Only works with argentina. >>> search_zip("rivadavia", "9800", "buenos aires", "capital federal", "argentina") u'C1407DZT' >>> search_zip("jose clemente paz", "1200", "jose clemente paz", "buenos aires", "argentina") u'B1665BBB' >>> search_zip("general paz", "5445", "general san martin", "buenos aires", "argentina") u'1650' """ street = strip_accents(street.lower()) city = strip_accents(city.lower()) state = strip_accents(state.lower()) country = strip_accents(country.lower()) codpos = None if country in ['argentina', 'ar']: # Elimina titulos de calles y avidas street = re.sub('\s*av\s+', '', street) re_cpa = re.compile('>(\w{8})<') if state in ['capital federal']: codloca = ['5001'] codpos = [''] else: url = "http://www3.correoargentino.com.ar/scriptsN/cpa/' \ 'cpa_loca.idc?codprov=%s&pnl=%s" inpage = urlopen(url % (codprov_dict[country][state], _st(city))) soup = BeautifulSoup(inpage) options = soup.findAll('option') if len(options) == 0: raise RuntimeError( 'No locations for "%s"' % ','.join([street, number, city, state, country])) loca = map( lambda opt: re.search('(.*)\s*\(\d+\)', opt.string.lower()). groups()[0].strip(), options) codloca = map(lambda opt: opt['value'], options) codpos = map( lambda opt: re.search('\((\d+)\)', opt.string).groups()[0], options) for i in xrange(len(codloca)): url = 'http://www3.correoargentino.com.ar/scriptsN/cpa/' \ 'cpa_calle.idc?codloca=%s&pnc=%s&alt=%s' inpage = urlopen(url % (codloca[i], _st(street), number)) soup = BeautifulSoup(inpage) output = soup.body.div.table.tr.td.renderContents() match = re_cpa.search(output) if match: codpos[i] = match.group(1) if len(codloca) > 1 and unique: i = mostequivalent(loca, city) return codpos[i] else: return unicode(codpos[0]) else: raise NotImplementedError
def search_zip(street, number, city, state, country, unique=True): """ Return zipcode. Only works with argentina. >>> search_zip("rivadavia", "9800", "buenos aires", "capital federal", "argentina") u'C1407DZT' >>> search_zip("jose clemente paz", "1200", "jose clemente paz", "buenos aires", "argentina") u'B1665BBB' >>> search_zip("general paz", "5445", "general san martin", "buenos aires", "argentina") u'1650' """ street = strip_accents(street.lower()) city = strip_accents(city.lower()) state = strip_accents(state.lower()) country = strip_accents(country.lower()) codpos = None if country in ['argentina', 'ar']: # Elimina titulos de calles y avidas street = re.sub('\s*av\s+', '', street) re_cpa = re.compile('>(\w{8})<') if state in ['capital federal']: codloca = ['5001'] codpos = [''] else: url = "http://www3.correoargentino.com.ar/scriptsN/cpa/' \ 'cpa_loca.idc?codprov=%s&pnl=%s" inpage = urlopen(url % (codprov_dict[country][state], _st(city))) soup = BeautifulSoup(inpage) options = soup.findAll('option') if len(options) == 0: raise RuntimeError('No locations for "%s"' % ','.join( [street, number, city, state, country])) loca = map( lambda opt: re.search('(.*)\s*\(\d+\)', opt.string.lower()).groups()[0].strip(), options) codloca = map( lambda opt: opt['value'], options) codpos = map( lambda opt: re.search('\((\d+)\)', opt.string).groups()[0], options) for i in xrange(len(codloca)): url = 'http://www3.correoargentino.com.ar/scriptsN/cpa/' \ 'cpa_calle.idc?codloca=%s&pnc=%s&alt=%s' inpage = urlopen(url % (codloca[i], _st(street), number)) soup = BeautifulSoup(inpage) output = soup.body.div.table.tr.td.renderContents() match = re_cpa.search(output) if match: codpos[i] = match.group(1) if len(codloca) > 1 and unique: i = mostequivalent(loca, city) return codpos[i] else: return unicode(codpos[0]) else: raise NotImplementedError