def get_elements_from_file(url, element='table'): _skip_if_none_of(('bs4', 'html5lib')) url = file_path_to_url(url) from bs4 import BeautifulSoup with urlopen(url) as f: soup = BeautifulSoup(f, features='html5lib') return soup.find_all(element)
def test_file_url(self): url = self.banklist_data dfs = self.read_html(file_path_to_url(url), 'First', attrs={'id': 'table'}) tm.assertIsInstance(dfs, list) for df in dfs: tm.assertIsInstance(df, DataFrame)
def test_file_url(self): url = self.banklist_data dfs = self.read_html(file_path_to_url(url), 'First', attrs={'id': 'table'}) assert isinstance(dfs, list) for df in dfs: assert isinstance(df, DataFrame)
def test_file_url(self): url = self.banklist_data dfs = self.read_html(file_path_to_url(os.path.abspath(url)), match="First", attrs={"id": "table"}) assert isinstance(dfs, list) for df in dfs: assert isinstance(df, DataFrame)
def test_regex_idempotency(self): url = self.banklist_data dfs = self.read_html(file_path_to_url(os.path.abspath(url)), match=re.compile(re.compile('Florida')), attrs={'id': 'table'}) assert isinstance(dfs, list) for df in dfs: assert isinstance(df, DataFrame)
def test_file_url(self): url = self.banklist_data dfs = self.read_html(file_path_to_url(os.path.abspath(url)), 'First', attrs={'id': 'table'}) assert isinstance(dfs, list) for df in dfs: assert isinstance(df, DataFrame)
def get_elements_from_file(url, element="table"): _skip_if_none_of(("bs4", "html5lib")) url = file_path_to_url(url) from bs4 import BeautifulSoup with urlopen(url) as f: soup = BeautifulSoup(f, features="html5lib") return soup.find_all(element)
def test_regex_idempotency(self, banklist_data): url = banklist_data dfs = self.read_html( file_path_to_url(os.path.abspath(url)), match=re.compile(re.compile("Florida")), attrs={"id": "table"}, ) assert isinstance(dfs, list) for df in dfs: assert isinstance(df, DataFrame)
def test_regex_idempotency(self): url = self.banklist_data dfs = self.read_html(file_path_to_url(url), match=re.compile(re.compile("Florida")), attrs={"id": "table"}) tm.assertIsInstance(dfs, list) for df in dfs: tm.assertIsInstance(df, DataFrame)
def test_file_url(self): url = self.banklist_data dfs = self.read_html(file_path_to_url(url), "First", attrs={"id": "table"}) tm.assertIsInstance(dfs, list) for df in dfs: tm.assertIsInstance(df, DataFrame)