Esempio n. 1
0
 def navigate(self,
              url,
              dynamic_for_browser=True,
              waited_el_css=None,
              need_time_measure=False):
     start_time = time.time()
     if self.browser:
         self.browser.navigate(url, waited_el_css)
         driver = self.browser.driver
         self.el = driver if dynamic_for_browser else fromstring(
             driver.page_source)
     else:
         for attempts in range(5, 0, -1):
             try:
                 response = requests.get(url, timeout=self.timeout)
                 self.el = fromstring(response.text)
             except Exception as e:
                 print("Bad navigation attempt", e)
                 time.sleep(self.timeout << 1)
     if need_time_measure:
         time_measure(url, start_time)
     return self.__create_node__(self.el)
Esempio n. 2
0
import time

from gray.common.data_utils import write_entries, time_measure
from gray.common.node_utils import Node, Provider

doc = Node("https://generalassemb.ly/education", Provider.PHANTOMJS)
entries = []
row_els = doc.select_list("#catalog-results > div > *:not(.date-divider)")
for row_idx, row_el in enumerate(row_els):
    start_time = time.time()
    entry = {}
    entry["link"] = row_el.select("a").attr("href")

    details_el = row_el.select(".item-details")
    entry["title"] = details_el.select(".medium.item-title").text()
    entry["desc"] = details_el.children(1).text()
    entry["instructor"] = details_el.select(".instructor").text()

    entry["series"] = row_el.select(".series-info").text()
    entry["date"] = row_el.select(".date-details").text()
    entry["time"] = row_el.select(".date-details").text()
    entry["topics"] = row_el.select_list("li.topic-icon-item").attrs("title")
    entry["promo"] = row_el.select_list(".cyber-monday-promo").texts()
    time_measure(str(row_idx) + " row", start_time, 3)
    entries.append(entry)

write_entries(entries, "generalassembly")