def navigate(self, url, dynamic_for_browser=True, waited_el_css=None, need_time_measure=False): start_time = time.time() if self.browser: self.browser.navigate(url, waited_el_css) driver = self.browser.driver self.el = driver if dynamic_for_browser else fromstring( driver.page_source) else: for attempts in range(5, 0, -1): try: response = requests.get(url, timeout=self.timeout) self.el = fromstring(response.text) except Exception as e: print("Bad navigation attempt", e) time.sleep(self.timeout << 1) if need_time_measure: time_measure(url, start_time) return self.__create_node__(self.el)
import time from gray.common.data_utils import write_entries, time_measure from gray.common.node_utils import Node, Provider doc = Node("https://generalassemb.ly/education", Provider.PHANTOMJS) entries = [] row_els = doc.select_list("#catalog-results > div > *:not(.date-divider)") for row_idx, row_el in enumerate(row_els): start_time = time.time() entry = {} entry["link"] = row_el.select("a").attr("href") details_el = row_el.select(".item-details") entry["title"] = details_el.select(".medium.item-title").text() entry["desc"] = details_el.children(1).text() entry["instructor"] = details_el.select(".instructor").text() entry["series"] = row_el.select(".series-info").text() entry["date"] = row_el.select(".date-details").text() entry["time"] = row_el.select(".date-details").text() entry["topics"] = row_el.select_list("li.topic-icon-item").attrs("title") entry["promo"] = row_el.select_list(".cyber-monday-promo").texts() time_measure(str(row_idx) + " row", start_time, 3) entries.append(entry) write_entries(entries, "generalassembly")