def extract(soup): """The job listing does not have the fields in very easily navigable HTML. It is one p tag with the headers in strong tags followed by the values in plain text. This, given the p tag soup, returns the fields in a dictionary. """ # initial strong is just the job title; skip over it soup = soup.find("strong") headers = soup.find_next_siblings("strong") return extract_from_headers(headers)
def extract_fields(self, soup): headers = soup.find("h3").find_next_siblings("b") return extract_from_headers(headers)