Example #1
0
def upenn_scraper(name):
    print('Retrieving ' + name + "'s email...")
    cache = Cache()
    try:
        email = cache[name]
        return email
    except KeyError:
        pass
    first_name, last_name = split_name(name)
    query_link = DIRECTORIES.get('upenn')
    driver = get_driver()
    driver.delete_all_cookies()
    driver.get(query_link)
    driver.implicitly_wait(5)
    driver.find_element_by_css_selector('input[name="lastName"]').send_keys(
        last_name)
    driver.find_element_by_xpath('//tr[4]//td//input').send_keys(first_name)
    driver.find_element_by_css_selector('form a.submitButton').click()
    driver.implicitly_wait(5)
    try:
        email = driver.find_element_by_xpath(
            '//tr[contains(@class, "lookupbody")]//a[contains(@href, "mailto")]'
        )
        email = email.text
        print(email)
    except selenium.common.exceptions.NoSuchElementException:
        email = None
    driver.delete_all_cookies()
    driver.quit()
    if email is not None:
        cache[name] = email
    return email
Example #2
0
def michigan_scraper(name):
    print('Retrieving ' + name + "'s email...")
    cache = Cache()
    try:
        email = cache[name]
        return email
    except KeyError:
        pass
    name = name.replace(" ", "%20")
    query_link = DIRECTORIES.get('michigan')
    query_link = query_link.format(name)
    driver = get_driver()
    driver.get(query_link)
    driver.implicitly_wait(5)
    try:
        driver.find_element_by_xpath(
            '//div[@id="peopleContent"]//table[@class="searchResults"]//tbody//tr[1]//td[1]//a[1]'
        ).click()
        driver.implicitly_wait(3)
        email = driver.find_element_by_xpath(
            '//div[contains(@class, "wrapEmail")]//a[contains(@href, "mailto")]'
        )
        email = email.text
        print(email)
    except selenium.common.exceptions.NoSuchElementException:
        email = None
    driver.quit()
    if email is not None:
        cache[name] = email
    return email
Example #3
0
def delaware_scraper(name):
    print('Retrieving ' + name + "'s email...")
    first_name, last_name = split_name(name)
    cache = Cache()
    try:
        email = cache[name]
        return email
    except KeyError:
        pass
    query_link = DIRECTORIES.get('delaware')
    driver = get_driver()
    driver.get(query_link)
    driver.implicitly_wait(5)
    driver.find_element_by_id('lastName').send_keys(
        last_name)
    driver.find_element_by_id('firstName').send_keys(
        first_name)
    driver.find_element_by_css_selector('form button[type="submit"]').click()
    driver.implicitly_wait(5)
    time.sleep(3)
    try:
        email = driver.find_element_by_xpath('//div[contains(@role, "main")]//a[contains(@href, "mailto")]')
        email = email.text
        print(email)
    except selenium.common.exceptions.NoSuchElementException:
        email = None
    driver.quit()
    email = email if email else None
    cache[name] = email
    return email
def princeton_scraper(name):
    print('Retrieving ' + name + "'s email...")
    cache = Cache()
    try:
        email = cache[name]
        return email
    except KeyError:
        pass
    query_link = DIRECTORIES.get('princeton')
    driver = get_driver()
    driver.get(query_link)
    driver.implicitly_wait(5)
    time.sleep(2)
    driver.find_element_by_id('edit-search').send_keys(name + Keys.RETURN)
    # pg_loaded = WebDriverWait(driver, 30).until(
    #     EC.presence_of_element_located((By.ClassName, "results")))
    time.sleep(3)
    driver.implicitly_wait(3)
    driver.find_element_by_id('people-label').click()
    tree = fromstring(driver.page_source)
    email = tree.xpath(
        '//div[contains(@class, "people-search-email")]/a/text()')
    driver.quit()
    email = email[0] if email else None
    if email is not None:
        cache[name] = email
    return email
def oak_ridge_scraper(name):
    print('Retrieving ' + name + "'s email...")
    query_link = DIRECTORIES.get('oak ridge')
    name = name.replace(" ", "+")
    cache = Cache()
    try:
        email = cache[name]
        return email
    except KeyError:
        pass
    query_link = query_link.format(name)
    driver = get_driver()
    driver.get(query_link)
    driver.implicitly_wait(5)
    driver.find_element_by_css_selector('td.views-field-nothing a:nth-child(1)').click()
    driver.implicitly_wait(5)
    try:
        email = driver.find_element_by_xpath('//div[contains(@class, "staff-profile-contact-info")]//a[contains(@href, "mailto")]')
        email = email.text
        print(email)
    except NoSuchElementException:
        email = None
    driver.quit()
    if email is not None:
        cache[name] = email
    return email
Example #6
0
def drexel_scraper(name):
    print('Retrieving ' + name + "'s email...")
    query_link = DIRECTORIES.get('drexel')
    name = name.replace(" ", "%20")
    cache = Cache()
    try:
        email = cache[name]
        return email
    except KeyError:
        pass
    query_link = query_link.format(name)
    driver = get_driver()
    driver.get(query_link)
    driver.implicitly_wait(5)
    time.sleep(3)
    tree = fromstring(driver.page_source)
    email = tree.xpath(
        '//tr[@class="result-row"]//span[@class="email-address"]//a[contains(@href, "mailto")]/text()'
    )
    print(email)
    driver.quit()
    email = email[0] if email else None
    if email is not None:
        cache[name] = email
    return email
Example #7
0
def temple_scraper(name):
    print('Retrieving ' + name + "'s email...")
    cache = Cache()
    try:
        email = cache[name]
        return email
    except KeyError:
        pass
    first_name, last_name = split_name(name)
    query_link = DIRECTORIES.get('temple')
    driver = get_driver()
    driver.delete_all_cookies()
    driver.get(query_link)
    driver.implicitly_wait(5)
    driver.find_element_by_id('templeedusn').send_keys(
        last_name)
    driver.find_element_by_id('templeedugivenname').send_keys(
        first_name)
    driver.find_element_by_css_selector('form input.Search').click()
    driver.implicitly_wait(3)
    try:
        email = driver.find_element_by_xpath('//div[contains(@id, "Div_Column_02")]//a[contains(@href, "mailto")]')
        email = email.text
        print(email)
    except selenium.common.exceptions.NoSuchElementException:
        email = None
    driver.delete_all_cookies()
    driver.quit()
    if email is not None:
        cache[name] = email
    return email
def minnesota_scraper(name):
    print('Retrieving ' + name + "'s email...")
    query_link = DIRECTORIES.get('minnesota')
    name = name.replace(" ", "+")
    cache = Cache()
    try:
        email = cache[name]
        return email
    except KeyError:
        pass
    query_link = query_link.format(name)
    driver = get_driver()
    driver.get(query_link)
    driver.implicitly_wait(5)
    time.sleep(3)
    tree = fromstring(driver.page_source)
    email = tree.xpath(
        '//table[contains(@class, "result__single-person")]//a[contains(@href, "mailto")]/text()'
    )
    print(email)
    driver.quit()
    email = email[0] if email else None
    if email is not None:
        cache[name] = email
    return email
def rutgers_scraper(name):
    print('Retrieving ' + name + "'s email...")
    cache = Cache()
    try:
        email = cache[name]
        return email
    except KeyError:
        pass
    query_link = DIRECTORIES.get('rutgers')
    driver = get_driver()
    driver.get(query_link)
    driver.implicitly_wait(5)
    driver.find_element_by_id('q').send_keys(name + Keys.RETURN)
    wait(driver, 30).until(
        EC.frame_to_be_available_and_switch_to_it(
            driver.find_element_by_tag_name("iframe")))
    time.sleep(3)
    try:
        email = driver.find_element_by_xpath(
            '//div[contains(@id, "content")]//dd//a[contains(@href, "mailto")]'
        )
        email = email.text
        print(email)
    except selenium.common.exceptions.NoSuchElementException:
        email = None
    driver.quit()
    email = email[0] if email else None
    if email is not None:
        cache[name] = email
    return email
def virginia_tech_scraper(name):
    print('Retrieving ' + name + "'s email...")
    cache = Cache()
    try:
        email = cache[name]
        return email
    except KeyError:
        pass
    query_link = DIRECTORIES.get('virginia tech')
    driver = get_driver()
    driver.get(query_link)
    driver.implicitly_wait(5)
    time.sleep(3)
    driver.find_element_by_id('vt_search_box').send_keys(name + Keys.RETURN)
    # pg_loaded = WebDriverWait(driver, 30).until(
    #     EC.presence_of_element_located((By.ClassName, "results")))
    time.sleep(2)
    driver.implicitly_wait(5)
    #driver.find_element_by_id('people-label').click()
    tree = fromstring(driver.page_source)
    persons = tree.xpath(
        '//div[@id="results"]//div[contains(@class, "vt-person")]')
    for person in persons:
        vt_name = person.xpath('//a[@class="vt-c-name"]/text()')
        if vt_name and all(
            [n in vt_name[0].lower() for n in name.lower().split(' ')]):
            email = person.xpath('//li[@class="vt-cl-email"]/a/text()')
            print(email)
            print()
            break
        else:
            email = None
    time.sleep(2)
    driver.quit()
    email = email[0] if email else None
    if email is not None:
        cache[name] = email
    return email
def stony_brook_scraper(name):
    print('Retrieving ' + name + "'s email...")
    cache = Cache()
    try:
        email = cache[name]
        return email
    except KeyError:
        pass
    query_link = DIRECTORIES.get('stony brook')
    name = name.replace(" ", "%20")
    query_link = query_link.format(name)
    driver = get_driver()
    driver.get(query_link)
    driver.implicitly_wait(5)
    time.sleep(3)
    tree = fromstring(driver.page_source)
    email = tree.xpath('//tr[@class="data"]//a[@class="email"]/text()')
    print(email)
    driver.quit()
    email = email[0] if email else None
    if email is not None:
        cache[name] = email
    return email