def upenn_scraper(name): print('Retrieving ' + name + "'s email...") cache = Cache() try: email = cache[name] return email except KeyError: pass first_name, last_name = split_name(name) query_link = DIRECTORIES.get('upenn') driver = get_driver() driver.delete_all_cookies() driver.get(query_link) driver.implicitly_wait(5) driver.find_element_by_css_selector('input[name="lastName"]').send_keys( last_name) driver.find_element_by_xpath('//tr[4]//td//input').send_keys(first_name) driver.find_element_by_css_selector('form a.submitButton').click() driver.implicitly_wait(5) try: email = driver.find_element_by_xpath( '//tr[contains(@class, "lookupbody")]//a[contains(@href, "mailto")]' ) email = email.text print(email) except selenium.common.exceptions.NoSuchElementException: email = None driver.delete_all_cookies() driver.quit() if email is not None: cache[name] = email return email
def michigan_scraper(name): print('Retrieving ' + name + "'s email...") cache = Cache() try: email = cache[name] return email except KeyError: pass name = name.replace(" ", "%20") query_link = DIRECTORIES.get('michigan') query_link = query_link.format(name) driver = get_driver() driver.get(query_link) driver.implicitly_wait(5) try: driver.find_element_by_xpath( '//div[@id="peopleContent"]//table[@class="searchResults"]//tbody//tr[1]//td[1]//a[1]' ).click() driver.implicitly_wait(3) email = driver.find_element_by_xpath( '//div[contains(@class, "wrapEmail")]//a[contains(@href, "mailto")]' ) email = email.text print(email) except selenium.common.exceptions.NoSuchElementException: email = None driver.quit() if email is not None: cache[name] = email return email
def delaware_scraper(name): print('Retrieving ' + name + "'s email...") first_name, last_name = split_name(name) cache = Cache() try: email = cache[name] return email except KeyError: pass query_link = DIRECTORIES.get('delaware') driver = get_driver() driver.get(query_link) driver.implicitly_wait(5) driver.find_element_by_id('lastName').send_keys( last_name) driver.find_element_by_id('firstName').send_keys( first_name) driver.find_element_by_css_selector('form button[type="submit"]').click() driver.implicitly_wait(5) time.sleep(3) try: email = driver.find_element_by_xpath('//div[contains(@role, "main")]//a[contains(@href, "mailto")]') email = email.text print(email) except selenium.common.exceptions.NoSuchElementException: email = None driver.quit() email = email if email else None cache[name] = email return email
def princeton_scraper(name): print('Retrieving ' + name + "'s email...") cache = Cache() try: email = cache[name] return email except KeyError: pass query_link = DIRECTORIES.get('princeton') driver = get_driver() driver.get(query_link) driver.implicitly_wait(5) time.sleep(2) driver.find_element_by_id('edit-search').send_keys(name + Keys.RETURN) # pg_loaded = WebDriverWait(driver, 30).until( # EC.presence_of_element_located((By.ClassName, "results"))) time.sleep(3) driver.implicitly_wait(3) driver.find_element_by_id('people-label').click() tree = fromstring(driver.page_source) email = tree.xpath( '//div[contains(@class, "people-search-email")]/a/text()') driver.quit() email = email[0] if email else None if email is not None: cache[name] = email return email
def oak_ridge_scraper(name): print('Retrieving ' + name + "'s email...") query_link = DIRECTORIES.get('oak ridge') name = name.replace(" ", "+") cache = Cache() try: email = cache[name] return email except KeyError: pass query_link = query_link.format(name) driver = get_driver() driver.get(query_link) driver.implicitly_wait(5) driver.find_element_by_css_selector('td.views-field-nothing a:nth-child(1)').click() driver.implicitly_wait(5) try: email = driver.find_element_by_xpath('//div[contains(@class, "staff-profile-contact-info")]//a[contains(@href, "mailto")]') email = email.text print(email) except NoSuchElementException: email = None driver.quit() if email is not None: cache[name] = email return email
def drexel_scraper(name): print('Retrieving ' + name + "'s email...") query_link = DIRECTORIES.get('drexel') name = name.replace(" ", "%20") cache = Cache() try: email = cache[name] return email except KeyError: pass query_link = query_link.format(name) driver = get_driver() driver.get(query_link) driver.implicitly_wait(5) time.sleep(3) tree = fromstring(driver.page_source) email = tree.xpath( '//tr[@class="result-row"]//span[@class="email-address"]//a[contains(@href, "mailto")]/text()' ) print(email) driver.quit() email = email[0] if email else None if email is not None: cache[name] = email return email
def temple_scraper(name): print('Retrieving ' + name + "'s email...") cache = Cache() try: email = cache[name] return email except KeyError: pass first_name, last_name = split_name(name) query_link = DIRECTORIES.get('temple') driver = get_driver() driver.delete_all_cookies() driver.get(query_link) driver.implicitly_wait(5) driver.find_element_by_id('templeedusn').send_keys( last_name) driver.find_element_by_id('templeedugivenname').send_keys( first_name) driver.find_element_by_css_selector('form input.Search').click() driver.implicitly_wait(3) try: email = driver.find_element_by_xpath('//div[contains(@id, "Div_Column_02")]//a[contains(@href, "mailto")]') email = email.text print(email) except selenium.common.exceptions.NoSuchElementException: email = None driver.delete_all_cookies() driver.quit() if email is not None: cache[name] = email return email
def minnesota_scraper(name): print('Retrieving ' + name + "'s email...") query_link = DIRECTORIES.get('minnesota') name = name.replace(" ", "+") cache = Cache() try: email = cache[name] return email except KeyError: pass query_link = query_link.format(name) driver = get_driver() driver.get(query_link) driver.implicitly_wait(5) time.sleep(3) tree = fromstring(driver.page_source) email = tree.xpath( '//table[contains(@class, "result__single-person")]//a[contains(@href, "mailto")]/text()' ) print(email) driver.quit() email = email[0] if email else None if email is not None: cache[name] = email return email
def rutgers_scraper(name): print('Retrieving ' + name + "'s email...") cache = Cache() try: email = cache[name] return email except KeyError: pass query_link = DIRECTORIES.get('rutgers') driver = get_driver() driver.get(query_link) driver.implicitly_wait(5) driver.find_element_by_id('q').send_keys(name + Keys.RETURN) wait(driver, 30).until( EC.frame_to_be_available_and_switch_to_it( driver.find_element_by_tag_name("iframe"))) time.sleep(3) try: email = driver.find_element_by_xpath( '//div[contains(@id, "content")]//dd//a[contains(@href, "mailto")]' ) email = email.text print(email) except selenium.common.exceptions.NoSuchElementException: email = None driver.quit() email = email[0] if email else None if email is not None: cache[name] = email return email
def virginia_tech_scraper(name): print('Retrieving ' + name + "'s email...") cache = Cache() try: email = cache[name] return email except KeyError: pass query_link = DIRECTORIES.get('virginia tech') driver = get_driver() driver.get(query_link) driver.implicitly_wait(5) time.sleep(3) driver.find_element_by_id('vt_search_box').send_keys(name + Keys.RETURN) # pg_loaded = WebDriverWait(driver, 30).until( # EC.presence_of_element_located((By.ClassName, "results"))) time.sleep(2) driver.implicitly_wait(5) #driver.find_element_by_id('people-label').click() tree = fromstring(driver.page_source) persons = tree.xpath( '//div[@id="results"]//div[contains(@class, "vt-person")]') for person in persons: vt_name = person.xpath('//a[@class="vt-c-name"]/text()') if vt_name and all( [n in vt_name[0].lower() for n in name.lower().split(' ')]): email = person.xpath('//li[@class="vt-cl-email"]/a/text()') print(email) print() break else: email = None time.sleep(2) driver.quit() email = email[0] if email else None if email is not None: cache[name] = email return email
def stony_brook_scraper(name): print('Retrieving ' + name + "'s email...") cache = Cache() try: email = cache[name] return email except KeyError: pass query_link = DIRECTORIES.get('stony brook') name = name.replace(" ", "%20") query_link = query_link.format(name) driver = get_driver() driver.get(query_link) driver.implicitly_wait(5) time.sleep(3) tree = fromstring(driver.page_source) email = tree.xpath('//tr[@class="data"]//a[@class="email"]/text()') print(email) driver.quit() email = email[0] if email else None if email is not None: cache[name] = email return email