def get_user_agents(self): page_soup = souper.get_soup( 'http://www.useragentstring.com/pages/useragentstring.php?typ=Browser' ) result = [] for ua in page_soup.findAll('li'): result.append(ua.text) return result
def get_fighter_url_exts(index_url): soup = get_soup(index_url) table_rows = soup.tbody.findAll('tr') fighter_url_exts = [] for tr in table_rows: fighter_url_exts.append(tr.a['href']) return fighter_url_exts
def get_fighter_matches_tr(url): try: soup = get_soup(url) except: return [] if (soup.tbody != None): return soup.tbody.findAll('tr') else: return []
def get_fighter_info(url): try: soup = get_soup(url) except Exception: return None name = '' try: name = soup.h1.string.encode('utf-8') except AttributeError: print 'AttributeError' return name
def get_proxy_dict(self): page_soup = souper.get_soup('https://free-proxy-list.net/') table = page_soup.findAll('table', {'id': 'proxylisttable'}) proxy_dict = {} for row in table: row = row.tbody.findAll('tr') for columns in row: ip_port = columns.findAll('td') ip_port_list = [] for i in ip_port[:2]: ip_port_list.append(i.text) is_https = columns.findAll('td', {'class': 'hx'}) is_https = is_https[0].text if is_https == 'yes': proxy_dict[ip_port_list[0] + ':' + ip_port_list[1]] = True else: proxy_dict[ip_port_list[0] + ':' + ip_port_list[1]] = False return proxy_dict
def read_url(url): print('Reading url:') print(url) print('-------------------') soup = souper.get_soup(url) beds, baths = get_beds_bathrooms(soup) rents = get_rents(soup) # array of [bed, bath, rent] arr = [] for indx, val in enumerate(beds): entry = [beds[indx], baths[indx], rents[indx]] if entry not in arr: if 'pp' in str(entry[2]): entry[2] = float(entry[2].replace('pp', '')) * entry[0] arr.append(entry) print('Beds\tBaths\tRent') for i in arr: print('%s\t%s\t%s' %(i[0], i[1], i[2])) return arr