Exemple #1
0
def main():
    # 先读取用户名和密码
    infile = open("user.txt", "r")
    username = str(infile.readline())
    password = str(infile.readline())
    print "username", username, type(username)
    infile.close()
    url = Links.login()
    browser = Browser('firefox')
    browser.visit(url)
    time.sleep(1)
    browser.find_by_id('user_name').fill(username)
    browser.find_by_id('password').fill(password)
    browser.find_by_id('submit_button').click()
    time.sleep(1)
    browser.click_link_by_href(Links.xuankejieguo())
    # time.sleep(8)
    # 通过命令行来控制抓取
    print "ready to catch information!"
    cmd = raw_input("main>>>")
    while cmd != "exit":
        strlst = cmd.split(' ')
        cmd0 = strlst[0]
        if cmd0 == "get":
            print "begin catching information"
            pkuget = PkuGet(browser, 3)
            if pkuget.state == 1:
                pkuget.getinfo(strlst)
        cmd = raw_input("main>>>")
    browser.quit()
Exemple #2
0
def jpl_mars():
       
        #JPL Mars Space Images - Featured Image
        #Importing packages
        from selenium import webdriver
        import pandas as pd
        from splinter import Browser
        from bs4 import BeautifulSoup
        import requests
        global featured_image_url

        executable_path = {'executable_path': 'C:/webdrivers/chromedriver.exe'}
        browser = Browser('chrome', **executable_path, headless=False)
        url ='https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
        browser.visit(url)

        browser.click_link_by_partial_text('more news')
        for x in range(1, 3):
                html = browser.html
                soup = BeautifulSoup(html, 'html.parser')
                # get the number of pics to be reviewed within this page
                vl_jpl_pics = soup.find_all('li', class_='slide')
                #news_title='33'
                v_count = 0
                v_pag  =0
                vl_href=[]
                for vc_jpl_pics in vl_jpl_pics:
                    link_href = vc_jpl_pics.find('a')
                    link_href= link_href['href']      
                    vl_links_t= vc_jpl_pics.find('div', class_='content_title') 
                    # finding  the picture
                    if vl_links_t.text.strip() == news_title[0]:
                        print('===founded====')
                        print('Title:',v_count," ",vl_links_t.text.strip() )
                        # Click on the picture link reference
                        browser.click_link_by_href(link_href)          
                        html = browser.html
                        soup = BeautifulSoup(html, 'html.parser')
                        xvl_jpl_pics = soup.find_all('div', class_='article_image_container')
                        for xvc_jpl_pics in xvl_jpl_pics:
                            xlink_href = xvc_jpl_pics.find('a')
                            featured_image_url= xlink_href['href']
                            print('Link to Img :',featured_image_url)

                    break
                    vl_href.append(vl_links_t)
                    v_count +=1
                    v_pag +=1
                    print ('pag',x,"Count",v_count)
            # browser.click_link_by_partial_text('MORE')

            # Close the browser after scraping
        browser.quit()
Exemple #3
0
def parse_wordstat(login, password, request_word, number_of_request_pages = 10):
    # открытие страницы wordstat и авторизация 
    browser = Browser()
    url = "https://wordstat.yandex.ru/"
    browser.visit(url)
    browser.click_link_by_href('https://passport.yandex.ru/passport?mode=auth&msg=&retpath=https%3A%2F%2Fwordstat.yandex.ru%2F')
    browser.find_by_id('b-domik_popup-username').fill(login)
    sleep(random.randint(5, 10)/10)
    browser.find_by_id('b-domik_popup-password').fill(password)
    sleep(random.randint(5, 10)/10)
    button = browser.find_by_css('input[class="b-form-button__input"]')[2]
    button.click()
    sleep(random.randint(5, 10)/10)

    # ввод запроса
    word_search_input = browser.find_by_css('input[class="b-form-input__input"]').first
    word_search_input = word_search_input.fill(request_word)
    sleep(random.randint(5, 10)/10)
    span_button = browser.find_by_css('input[class="b-form-button__input"]')[0]
    span_button.click()
    sleep(random.randint(5, 10)/10)
    
    queries = []
    frequency = []
    # обход заданного числа страниц
    for _ in range(number_of_request_pages):
        # получение табличных данных
        iter_element = browser.find_by_css('td[class*="b-word-statistics__td"]')
        word_flag = True
        for i in range(100):
            if word_flag:
                queries.append(iter_element[i].text)  
            else:
                frequency.append(int(''.join((iter_element[i].text.split()))))
            word_flag = not word_flag

        browser.click_link_by_href('#next_page')
        sleep(random.randint(5, 10)/10)

    result = pd.DataFrame(dict(queries = queries, frequency = frequency))
    result.sort_values(by=['frequency'], ascending=False)
    result.to_excel("output.xlsx", columns=['queries', 'frequency'], index=False)
    browser.quit()
def advanced_search(pub_id, search_term,  user_name, password, year=None, sports=False, phantom=False):
    if phantom == True:
        browser = Browser('phantomjs')
    else:
        browser = Browser('firefox')
    login(browser, user_name, password)
    button = browser.find_by_name('doLogin').first.click()
    browser.click_link_by_href('advanced')
    browser.fill('queryTermField', pub_id)
    browser.fill('queryTermField_0', search_term)
    browser.select('fieldsSelect_0', 'all')
    if sports == False:
        browser.select('opsSelect_0', 'NOT')
        browser.fill('queryTermField_1', 'sports')
        browser.select('fieldsSelect_1', 'ti')
    if year != None:
        browser.select('select_multiDateRange', 'ON')
        browser.fill('year2', year)
    search_but = browser.find_by_name('searchToResultPage').first.click()
    return browser
Exemple #5
0
def get_login(url):

    browser = Browser("phantomjs", service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any'])
    browser.visit(url)

    browser.find_by_id('username').fill('*****@*****.**') # MORPH
    #browser.find_by_id('username').fill('*****@*****.**')
    #browser.find_by_id('username').fill('*****@*****.**')
    #browser.find_by_id('username').fill('*****@*****.**') # Morph uk
    browser.find_by_id('password').fill('Nrjn1gsa')
    browser.find_by_name('submit').first.click()
    time.sleep(1)
    print browser.url
    browser.click_link_by_href("/business/opportunitySearchForm.html")
    time.sleep(1)
    browser.select('status', "")
    browser.find_by_value("Search").first.click()
    time.sleep(2)
    print browser.url
    return browser
    def start_requests(self):
        
        display = Display(visible=0, size=(1024, 768))
        display.start()
        browser = Browser();
        browser.visit('https://msu.us.company.com')
        browser.fill('ssousername','*****@*****.**')
        browser.fill('password','password')
        browser.click_link_by_href('javascript:doLogin(document.LoginForm);')
        self.cookies = browser.cookies.all()
        browser.quit()
        display.stop()

        file = open("input.bin")

        for line in file:
            line = line.strip('\n')
            self.machine_ids.append(line)
        
        print len(self.machine_ids)

        for i, machine_id in enumerate(self.machine_ids):
            url = "https://msu.us.company.com/index.php?search_hostname=&search_domain=&search_serial_no=" + machine_id + "&search_cost_center=&search_building=&search_room=&search_location=&search_group_name=&search_logical_group=&search_system_usage=&search_contact=&search_sa_contact=&search_security_level=&search_in_service=&search_lastmodtime=&search_lastmodflds=&search_codename=&search_model=&search_notes=&search_admin_notes=&search_motherboard_model=&search_bios_info=&search_cpu_info=&compare_cpu_count=%3D&search_cpu_count=&compare_cpu_speed=%3D&search_cpu_speed=&search_architecture=&compare_memory=%3D&search_memory=&search_disk=&compare_storage_numdevices=%3D&search_storage_numdevices=&compare_storage_capacity=%3D&search_storage_capacity=&search_ether=&search_expansion_slots=&search_sound_cards=&search_video_cards=&search_ethernet_cards=&search_network_comments=&search_usertag1=&search_usertag2=&search_usertag3=&search_usertag4=&search_os_bits=&search_ip_addr=&search_os_version=&search_os_build=&search_system_state=&search_system_state_detail=&search_system_state_timestamp=&search_last_ping=&search_last_ping_att=&search_uptime=&search_console_access=&search_console_type=&search_sp_access=&search_sp_type=&search_console_patch_port=&search_conserver_server=&search_rpc=&search_rpc_type=&search_outlet=&search_outlet_type=&search_reservation_status=Any&search_reservation_type=&search_reserved_by=&compare_reserved_start_time=%3D&search_reserved_start_time_mm=&search_reserved_start_time_dd=&search_reserved_start_time_yy=&compare_reserved_end_time=%3D&search_reserved_end_time_mm=&search_reserved_end_time_dd=&search_reserved_end_time_yy=&search_reserved_comment=&search_reserved_project=&search_gq_pri=&search_gq_max=&search_gq_beg=&search_gq_end=&search_reservation_limit=&output=Standard&AdvancedSearch=1&Search=Search"
            yield Request(url, cookies = self.cookies, headers = self.headers, meta = {'id':machine_id}, callback = self.search_result, dont_filter=True)
def getRoutes(start, end):
    browser = Browser(driver_name="firefox")
    browser.visit('https://www.hopstop.com/search?xfr=cityscape')
    print(browser.url)
    browser.fill('address1', str(start))
    browser.fill('address2', str(end))
    browser.find_by_name('get_dirs').click()
    print(browser.url)
    if browser.is_text_present('Did you mean?'):
        browser.click_link_by_href("#")
        if browser.is_text_present('Did you mean?'):
            browser.click_link_by_href("#")
    browser.click_link_by_href("#")
    links = browser.find_link_by_partial_href("/station?stid")
    results = []
    for link in links:
        results.append(link.value)
    return results
def getRoutes(start, end):
    browser = Browser(driver_name="firefox")
    browser.visit("https://www.hopstop.com/search?xfr=cityscape")
    print(browser.url)
    browser.fill("address1", str(start))
    browser.fill("address2", str(end))
    browser.find_by_name("get_dirs").click()
    print(browser.url)
    if browser.is_text_present("Did you mean?"):
        browser.click_link_by_href("#")
        if browser.is_text_present("Did you mean?"):
            browser.click_link_by_href("#")
    browser.click_link_by_href("#")
    links = browser.find_link_by_partial_href("/station?stid")
    results = []
    for link in links:
        results.append(link.value)
    return results
def get_login(url):

    browser = Browser("phantomjs", service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any'])
    browser.visit(url)

    #browser.find_by_id('username').fill('*****@*****.**') # MORPH
    #browser.find_by_id('username').fill('*****@*****.**')

    #browser.find_by_id('username').fill('*****@*****.**')
    browser.find_by_id('username').fill('*****@*****.**')   # MORPH UK
    browser.find_by_id('password').fill('Nrjn1gsa')
    browser.find_by_name('submit').first.click()
    time.sleep(1)
    print browser.url
    try:
        browser.click_link_by_href("/business/opportunitySearchForm.html")
        time.sleep(1)
        browser.click_link_by_href("opportunityAdvancedSearchForm.html")
        time.sleep(2)
        #browser.find_by_value('All').first.click()
        browser.select('status', "")
        browser.select('area', "9")         # 'area' is 'class name' not just name?
        time.sleep(3)
        print browser.find_by_value('Add All')              #TODO
        print browser.html
        browser.find_by_value('Add All').first.click()
        print 'added all England only' #TODO
        time.sleep(2)
        browser.find_by_value("Search").first.click()
        time.sleep(2)
    except Exception as e:
        print 'error: ', e
        browser.click_link_by_href("/business/logoutHosts.html")
        time.sleep(4)
        browser.quit()
        sys.exit("login failed")
    print browser.url
    return browser
Exemple #10
0
		user_email = raw_input("enter users email address ")
		user_pass = raw_input("enter users password ")
		browser.visit('http://www.facebook.com')

		browser.fill('email', user_email)
		browser.fill('pass', user_pass)

		#Here is what I made a slight change
		button = browser.find_by_id('loginbutton')
		button.click()

		#I didn't find the page saving function for facebook using Splinter but as an alternative I found screenshot feature. 

		# The site we will navigate into, handling it's session
		browser.visit('http://www.baseball-reference.com/my/auth.cgi?return_to=http://www.baseball-reference.com/')
		browser.click_link_by_href('/my/auth.cgi?do=oauth_login&service=facebook&return_to=')
		# print response.read()
		# scrape_new_data(starting_year, ending_year, browser, only_pitching_data)
		if (pitching_data == 1) & (batting_data == 0):
			scrape_new_data(starting_year, ending_year, browser, 1)
		else:
			scrape_new_data(starting_year, ending_year, browser, 0)
	
	if start_time == 1:
		get_start_time_data()

	if team_schedules == 1:
		get_team_schedules(2004, 2015)
		convert_excel_date_format('team_schedules.csv', 0)

	if batting_order == 1:
Exemple #11
0
    change_brig(screenshot)
    crop_img("captcha.png")
    img = threshold("captcha.png")
    captcha = tesseract(img)
    #time.sleep(2)
    print captcha

    bro.fill('usuario','J311968199')
    bro.fill('contrasenia','J-311968199a')
    bro.fill('captcha', str(captcha))
    bro.find_by_id('btnLoginSisap').click()

flag = False

while not flag:
    ejecutar() 
    principal_menu = bro.find_by_id("principal-menu")
    
    if principal_menu != []:
        principal_menu.click()
        bro.click_link_by_href("/informacion-general/informacion-seniat")
        bro.click_link_by_href("#inf_accionistas")
        bro.click_link_by_href("/accionistas/gestion") 
        bro.select("id_tipo_relacion_empresa", "526")
        bro.select("id_pais","229")
        bro.fill("correo", "*****@*****.**")
        bro.fill("cantidad_acciones","1234")
        #bro.find_by_id("btnAccionistas").mouse_over()
        flag = True
#ipdb.set_trace()
def scrape_Mars1():
    executable_path = {'executable_path': 'chromedriver.exe'}
    browser = Browser('chrome', **executable_path, headless=False)
    url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"
    browser.visit(url)
    html = browser.html
    soup = bs(html, 'html.parser')
    News_Title = soup.find('div', class_='content_title')
    News_Title = News_Title.text
    News_Title
    News_Paragraph = soup.find('div', class_='article_teaser_body')
    News_Paragraph = News_Paragraph.text
    News_Paragraph
    #----------------------------------
    executable_path = {'executable_path': 'chromedriver.exe'}
    browser = Browser('chrome', **executable_path, headless=False)
    url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(url)
    browser.click_link_by_partial_text('FULL IMAGE')
    time.sleep(10)
    browser.click_link_by_partial_text('more info')
    html = browser.html
    soup = bs(html, 'html.parser')
    img = soup.find('figure', class_='lede')
    full_size = img.find("a")["href"]
    browser.click_link_by_href(full_size)
    html = browser.html
    soup = bs(html, 'html.parser')
    featured_image_url = soup.find('img')['src']
    #--------------------------------------------
    executable_path = {'executable_path': 'chromedriver.exe'}
    browser = Browser('chrome', **executable_path, headless=False)
    url = "https://twitter.com/marswxreport?lang=en"
    response = requests.get(url)
    soup = bs(response.text, 'html.parser')
    results = soup.find('div', class_="js-tweet-text-container")
    mars_weather = results.p.text
    #-----------------------------------------------
    url = "https://space-facts.com/mars/"
    mars_table = pd.read_html(url)
    mars_table
    df = mars_table[0]
    df.columns = ["Measurements", "Results"]
    df.set_index("Measurements", inplace=True)
    mars_html_table = df.to_html()
    df.to_html('table.html')
    #-------------------------------------------
    Url_List = []
    Hemispheres = [
        'Cerberus Hemisphere Enhanced', 'Schiaparelli Hemisphere Enhanced',
        'Syrtis Major Hemisphere Enhanced',
        'Valles Marineris Hemisphere Enhanced'
    ]
    for Hemisphere in Hemispheres:
        executable_path = {'executable_path': 'chromedriver.exe'}
        browser = Browser('chrome', **executable_path, headless=False)
        url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
        browser.visit(url)
        browser.click_link_by_partial_text(Hemisphere)
        html = browser.html
        soup = bs(html, 'html.parser')
        img_url = soup.find(
            'div', class_="downloads").find('ul').find('li').find('a')['href']
        Url_List.append(img_url)
    Hem_List = [[Hemispheres[0], Url_List[0]], [Hemispheres[1], Url_List[1]],
                [Hemispheres[2], Url_List[2]], [Hemispheres[3], Url_List[3]]]
    lables = {0: "Title", 1: "Img_Url"}
    Hem_Dict = [{lables[idx]: val
                 for idx, val in enumerate(item)} for item in Hem_List]
    H1 = Hem_Dict[0]
    H2 = Hem_Dict[1]
    H3 = Hem_Dict[2]
    H4 = Hem_Dict[3]
    scrape = {
        "News_Title": News_Title,
        "News_Paragraph": News_Paragraph,
        "Featured_Image": featured_image_url,
        "Mars_Tweet": mars_weather,
        "Mars_Table": mars_html_table,
        'Cerberus': H1["Title"],
        'Cerberus_Img': H1["Img_Url"],
        'Schiaparelli': H2["Title"],
        'Schiaparelli_Img': H2["Img_Url"],
        'Syrtis': H3["Title"],
        'Syrtis_Img': H3["Img_Url"],
        'Valles': H4["Title"],
        'Valles_Img': H4["Img_Url"],
    }
    return scrape
Exemple #13
0
def scrape_iwp(a_startpage=1, a_pagecount=20000):

    # Initialize PyMongo to work with MongoDBs
    conn = 'mongodb://localhost:27017'
    client = pymongo.MongoClient(conn)

    # Define database and collection
    db = client.etl_db

    # Setup the splinter Browser
    executable_path = {'executable_path': 'chromedriver.exe'}
    browser = Browser('chrome', **executable_path, headless=False)

    # URL of page to be scraped
    # url_iwp = 'https://iwaspoisoned.com'
    # UPDATE: Added the "?page=" to restart scraping on pages not already obtained
    
    url_iwp = 'https://iwaspoisoned.com/?page=' + str(a_startpage)

    # Visit the IWP page
    browser.visit( url_iwp )

    # Extract incidents from multiple pages
    page_target = int(a_pagecount)

    # How long to wait between pages to avoid triggering issues on website
    page_wait = 2

    # Count the number of pages visited
    n_pages = 0

    # Loop until no more pages or until page target is reached
    full_incident_list = []
    for j in range(page_target):
        # Get a page full of incidents from the USA
        i_list = parse_incident_page(browser.html)
        n_pages += 1

        # Add this list of incidents to a running list
        # full_incident_list.extend(i_list)

        # Add this list of incidents to the Mongo database
        
        try:
            # Attempt the insert
            insert_results = db.iwp.insert_many(i_list)
            
            # Print a progress marker
            print(f"Page {n_pages} of {a_pagecount}: {len(insert_results.inserted_ids)} of {len(i_list)} incidents added to DB. Total incidents: {db.iwp.count_documents({})}")

        except TypeError:
            # It's possible the incident list was empty, which could trigger a TypeError.
            # This is the case since it is being filtered for only Illinois, USA incidents
            print(f">> Page {n_pages}: No incidents captured")
        
        # Check to see if a hyperlink with attribute 'rel' = 'next' is present
        soup_thispage = BeautifulSoup(browser.html, 'lxml')
        next_tag = soup_thispage.find('a', {'rel' : 'next'})

        if next_tag:
            # Ok, there is a next page - get the hyperlink
            # print(f"DEBUG: Going to next page (next_tag = '{next_tag}' ")
            try:
                next_page_url = next_tag['href']

                # Wait for a specified number of seconds
                time.sleep(page_wait)

                # Click it!
                browser.click_link_by_href(next_page_url)

                #DEBUG ****************************************
                # if n_pages > 3:
                #    break

            # If KeyError occurs, then this tag has no html link for some reason
            except KeyError:
                break

        else:
            # No more pages - break out of this loop
            break
    
    # Close the Browser
    browser.quit()
            
    # Return the number of pages scraped
    return n_pages


# EXAMPLE:
# Command to Start at Page 1 of iwaspoisoned.com and Scrape 10 Pages,
# only keeping Incidents that occurred in Illinois, USA
#
# In a _separate_ Python file, include the code below:

#*******************************************************************************
# Import ETL Scraper function `scrape_iwp` from the local file `etl_scrape_iwp`
# from etl_scrape_iwp import scrape_iwp
#
# Use the function to scape pages
# pages_scraped = scrape_iwp(1, 10)
#*******************************************************************************
browser.find_by_xpath('//h1')
browser.find_by_tag('h1')
browser.find_by_name('name')
browser.find_by_text('Hello World!')
browser.find_by_id('firstheader')
browser.find_by_value('query')
# get element
first_found = browser.find_by_name('name').first
last_found = browser.find_by_name('name').last
second_found = browser.find_by_name('name')[1]

# Get value of an element
browser.find_by_css('h1').first.value

# Clicking links,return the first link
browser.click_link_by_href('http://www.the_site.com/my_link')
browser.click_link_by_partial_href('my_link')
browser.click_link_by_text('my link')
browser.click_link_by_partial_text('part of link text')
browser.click_link_by_id('link_id')

# element is visible or invisible
browser.find_by_css('h1').first.visible

#fill content
browser.find_by_id('productName').fill(
    'splinter - python acceptance testing for web applications')
browser.fill('q', 'splinter - python acceptance testing for web applications')

# Verifying if element has a className
browser.find_by_css('.content').first.has_class('content')
Exemple #15
0
#Parse home page to gather links to traverse   
bs = BeautifulSoup(browser.html, 'html.parser')
results = bs.find_all('a',class_="player-name")
aref_list = []
for result in results:
    aref_list.append(result['href'])

#Utilize list of href to scrape player bio and stats pages and store into a list of DataFrames.
bs = BeautifulSoup(browser.html, 'html.parser')
list_df_hitter = []
list_df_hitter_stats = []
for aref in aref_list[:TopN]:
    bio_dict = {}
    time.sleep(1)
    browser.click_link_by_href(aref)
    time.sleep(1)
    bio = BeautifulSoup(browser.html,'html.parser')

    #Calculate player name
    bio_name = bio.find('div','pull-left primary-heading-subheading')
    player = bio_name.text.lstrip().split('\n')[0].rstrip()
    print(player, flush=True)
    bio_results = bio.find_all('span','bio-detail')
    #Get Player Bio information
    college = ''
    for bio_result in bio_results:
        attr = bio_result.text.split(':')[0]
        if (attr == "Age"):
            age = int(bio_result.text.split(':')[1])
        elif (attr == "College"):
Exemple #16
0
class ChopeBrowser:
    def __init__(self, headless=False):
        self.chrome = Browser('chrome', headless=headless)

    def time_delay(self, time):
        self.chrome.is_element_present_by_name('!@#$%^&*())(*&^%$#@!',
                                               wait_time=time)

    def login(self, usr, pwd, domain='STUDENT'):
        url = 'https://ntupcb.ntu.edu.sg'
        url += '/fbscbs/Account/SignIn?ReturnUrl=%2ffbscbs'
        self.chrome.visit(url)
        dropdown = self.chrome.find_by_tag('option')

        for option in dropdown:
            if option.text == domain:
                option.click()

        self.chrome.fill('Username', usr)
        self.chrome.fill('Password', pwd + '\n')

    def first_setup(self):
        button = self.chrome.find_by_id('tdFacilityBook')
        button.click()
        self.chrome.click_link_by_href('#8')
        self.chrome.click_link_by_href('#-1')
        self.chrome.click_link_by_href('/fbscbs/Booking/Create?resourceId=69')
        self.chrome.click_link_by_id('book')
        self.chrome.click_link_by_id('changeResource')
        self.chrome.click_link_by_href('#-1')
        self.chrome.click_link_by_id('book')

    def is_registered(event):
        if event.has_class('noShowWhite'):
            return False
        if event.has_class('currentEvent'):
            return False
        return True

    def check_facility(self, evFacilities):
        columnWeek = self.chrome.find_by_css('.wc-event-column')
        evWeek = []
        for columnDay in columnWeek:
            evToday = []
            evList = columnDay.find_by_css('.ui-corner-all')
            for event in evList:
                if not event.has_class('noShowWhite'):
                    if not event.has_class('currentEvent'):
                        event = event.text
                        if not event.find('—') == -1:
                            if event == '':
                                continue
                            evToday.append(event.split('—'))
            evWeek.append(evToday)
        evFacilities.append(evWeek)

    def click_next(self, counter, evFacilities):
        # Kerja rekursif dengan check_facility.
        # Milih option facility berdasarkan counter.
        dropdown = self.chrome.find_by_id('ResourceId')
        options = dropdown.find_by_tag('option')
        if counter < len(options):
            nextOption = options[counter]
            nextOption.click()
            self.check_facility(counter, evFacilities)
        else:
            return evFacilities

    def scrape_seats(self, usr, pwd):
        self.login(usr, pwd)
        self.first_setup()
        evFacilities = []
        dropdown = self.chrome.find_by_id('ResourceId')
        options = dropdown.find_by_tag('option')
        for opt in options:
            nextOption = opt
            nextOption.click()
            self.time_delay(0.1)
            # while loadingTitle.visible:
            #     pass
            evFacilities.append(opt.text)
            self.check_facility(evFacilities)
        return evFacilities

    def quit(self):
        self.chrome.quit()
Exemple #17
0
parser = SafeConfigParser()
parser.read('config.ini')

browser = Browser(parser.get('Config', 'Browser'))
browser.driver.maximize_window()

browser.visit('https://fsweb.no/studentweb/login.jsf?inst=' +  parser.get('Config', 'Institution'))
browser.find_by_text('Norwegian ID number and PIN').first.click()

browser.find_by_id('login-box')
browser.fill('j_idt129:j_idt131:fodselsnummer', parser.get('Config', 'Fodselsnummer'))
browser.fill('j_idt129:j_idt131:pincode',  parser.get('Config', 'Pin'))
browser.find_by_text('Log in').first.click()

browser.click_link_by_href('/studentweb/resultater.jsf')

tags = browser.find_by_tag('tr')

chars = []

for tag in tags:
	if tag.has_class('resultatTop') or tag.has_class('none'):
		inner_tags = tag.find_by_tag('td')
		course_id = inner_tags[1].text.split("\n")[0]
		course_name = inner_tags[1].text.split("\n")[1]
		grade = inner_tags[5].text
		if grade != 'passed':
			chars.append(grade) 
			print "%s\t%-30s\t%s" % (course_id, course_name, grade)
Exemple #18
0
executable_path = {"executable_path": "chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)
base_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(base_url)

# html = browser.html
# soup = BeautifulSoup(html, 'html.parser')

# img_urls = []
# pic_dict = {'title': [], 'img_url': [],}

# pictures = soup.find_all('div',{"class":"item"})
# a=pictures[0].find_all("a")
# print(a[0]["href"])

browser.click_link_by_href('/search/map/Mars/Viking/cerberus_enhanced')

# print(pictures)

# for pic in pictures:
#     try:
#         t = pic.get_text()
#         title = t.strip('Enhanced')
#         time.sleep(5)
#         browser.click_link_by_href('/search/map/Mars/Viking/cerberus_enhanced')
#     except:
#         raise
#     finally:
#         browser.quit()
#     browser.click_link_by_partial_href('Enhanced')
#     browser.click_link_by_partial_text(t)
assert browser.url != "http://54.191.193.7:5000/email"
print("=========================================================")

#Test Case 6
print("Running Test Case 6: Classify image and click on link for more information")
browser.visit('http://54.191.193.7:5000/')
print("Visiting browser...")
time.sleep(2)
element = browser.driver.find_element_by_id("imageFile")
pathToImage = os.path.abspath("static/testing/Capture5.JPG")
element.send_keys(pathToImage)
print("Image chosen...")
time.sleep(2)
browser.click_link_by_id('submit')
assert browser.is_text_present('Image Uploaded') == True
browser.click_link_by_href("https://www.hse.ie/eng/health/az/h/hives%20-%20acute/causes-of-urticaria.html")
print("Link clicked...")
time.sleep(2)
browser.windows.current = browser.windows[1]
assert browser.url == "https://www.hse.ie/eng/health/az/h/hives%20-%20acute/causes-of-urticaria.html"
browser.windows[1].close()
browser.windows.current = browser.windows[0]
print("=========================================================")

#Test Case 7
print("Running Test Case 7: Classify image and click on both links for more information")
browser.visit('http://54.191.193.7:5000/')
print("Visiting browser...")
time.sleep(2)
element = browser.driver.find_element_by_id("imageFile")
pathToImage = os.path.abspath("static/testing/Capture5.JPG")
Exemple #20
0
parser.read('config.ini')

browser = Browser(parser.get('Config', 'Browser'))
browser.driver.maximize_window()

browser.visit('https://fsweb.no/studentweb/login.jsf?inst=' +
              parser.get('Config', 'Institution'))
browser.find_by_text('Norwegian ID number and PIN').first.click()

browser.find_by_id('login-box')
browser.fill('j_idt129:j_idt131:fodselsnummer',
             parser.get('Config', 'Fodselsnummer'))
browser.fill('j_idt129:j_idt131:pincode', parser.get('Config', 'Pin'))
browser.find_by_text('Log in').first.click()

browser.click_link_by_href('/studentweb/resultater.jsf')

tags = browser.find_by_tag('tr')

chars = []

for tag in tags:
    if tag.has_class('resultatTop') or tag.has_class('none'):
        inner_tags = tag.find_by_tag('td')
        course_id = inner_tags[1].text.split("\n")[0]
        course_name = inner_tags[1].text.split("\n")[1]
        grade = inner_tags[5].text
        if grade != 'passed':
            chars.append(grade)
            print "%s\t%-30s\t%s" % (course_id, course_name, grade)
Exemple #21
0
    def looping(self):
        NETID = self.controller.frames["MainPage"].ID_entry.get()
        PASSWD = self.controller.frames["MainPage"].PW_entry.get()
        CLS_LST = self.controller.frames["MainPage"].targets

        URL = "https://schedule.msu.edu"
        URL_PLAN = "https://schedule.msu.edu/Planner.aspx"

        b = Browser('chrome', headless=HEADLESS, **executable_path)
        for course in CLS_LST:
            tar = course.split()
            TERM = "{} {}".format(tar[1], tar[0])
            SUB = tar[2]
            SUB_NUM = tar[3]
            SEC = "{:03}".format(int(tar[4][3:]))

            try:
                # put all the list class in to user account planner
                b.visit(URL)
                # term = b.find_by_text(TERM).value
                term = re.findall(
                    '<option .*?value="(.+)??".*?>{}(-Tentative)?</option>'.
                    format(TERM), b.html)[0][0]
                # b.find_by_id("MainContent_SrearchUC_ddlTerm").select(term)
                # b.find_by_id("MainContent_SrearchUC_ddlSubject").select(SUB)
                # b.find_by_id("MainContent_SrearchUC_txtCourseNumber").fill(SUB_NUM)
                # b.find_by_id("MainContent_SrearchUC_btnSubmit").click()
                b.find_by_id("MainContent_ddlTerm").select(term)
                b.find_by_id("MainContent_ddlSubject").select(SUB)
                b.find_by_id("MainContent_txtCourseNumber").fill(SUB_NUM)
                b.find_by_id("MainContent_btnSubmit").click()
                combo = "{} {} Section {}".format(SUB, SUB_NUM, SEC)
                link = re.findall(
                    '<a href="(.+)?" title="[^"]+add {} to your planner"?>'.
                    format(combo), b.html)[0]

                b.click_link_by_href(link)
                self.checkLogin([b], URL_PLAN, NETID, PASSWD)

                self.status_table[course] = "READY"
                self.ready_table[course] = ["-1", combo]
            except:
                # print("Error:", sys.exc_info()[0])
                self.status_table[course] = "ERROR"

            self.updateStatus(CLS_LST)

        # now go to the planner
        b.visit(URL_PLAN)
        self.checkLogin([b], URL_PLAN, NETID, PASSWD)
        # find the plan idx
        self.updateReady(b.html)
        # print(self.ready_table)

        STATUS_CODE = "MainContent_UCPlan_rptPlanner_tdStatus_"
        ENROLL_CODE = "MainContent_UCPlan_rptPlanner_imgEnroll_"
        CONTINUE_CODE = "MainContent_btnContinue"
        to_delete = None
        # looping arround
        while len(self.ready_table) > 0:
            b.visit(URL_PLAN)
            self.checkLogin([b], URL_PLAN, NETID, PASSWD)
            for course in self.ready_table:
                plan_idx = self.ready_table[course][0]
                combo = self.ready_table[course][1]
                # print(b.find_by_id(STATUS_CODE+plan_idx).text)
                if "Open" in b.find_by_id(STATUS_CODE + plan_idx).text:
                    # section open!! enroll the class
                    b.find_by_id(ENROLL_CODE + plan_idx).click()
                    b.find_by_id(CONTINUE_CODE).click()
                    if b.html.find(
                            "The course has been added to your schedule."
                    ) != -1:
                        # enroll successfully
                        self.status_table[course] = "ENROLLED"
                    else:
                        # FAILED
                        self.status_table[course] = "FAILED"
                    to_delete = course
                    self.updateStatus(CLS_LST)
                    break

            if to_delete != None:
                b.visit(URL_PLAN)
                self.checkLogin([b], URL_PLAN, NETID, PASSWD)
                del self.ready_table[to_delete]
                self.updateReady(b.html)
                to_delete = None
            else:
                time.sleep(1)  # sleep 1 second

        self.updateStatus(CLS_LST, True)

        b.quit()
Exemple #22
0
class ChopeBrowser:
    def __init__(self, headless=False):
        self.chrome = Browser('chrome', headless=headless)

    def time_delay(self, time):
        self.chrome.is_element_present_by_name('!@#$%^&*())(*&^%$#@!',
                                               wait_time=time)

    def login(self, usr, pwd, domain='STUDENT'):
        url = 'https://ntupcb.ntu.edu.sg'
        url += '/fbscbs/Account/SignIn?ReturnUrl=%2ffbscbs'
        self.chrome.visit(url)
        dropdown = self.chrome.find_by_tag('option')

        for option in dropdown:
            if option.text == domain:
                option.click()

        self.chrome.fill('Username', usr)
        self.chrome.fill('Password', pwd + '\n')


# PC BOOKING STARTS HERE
# Tries to book the PC of selected type

    def pc_setup(self, usr, pwd, Type):
        self.login(usr, pwd)
        button = self.chrome.find_by_id('tdPcBook')
        button.click()
        time.sleep(2)
        with self.chrome.get_iframe('frmAdminViewControls') as iframe:
            iframe.find_by_id('pnlInsLoc3').click()
        self.type_number(Type)
        data = self.scrape_pc()

        can_book = self.book_pc(data[1], data[2])
        self.chrome.quit()
        return data[0], can_book

    # identify pc type requested
    def type_number(self, Types):
        for i in range(0, 4):
            with self.chrome.get_iframe('frmAdminViewControls') as iframe:
                page = iframe.find_by_id('pnlInsPcGrp' + str(i))
                if page != []:
                    page = page.html
                    page = BeautifulSoup(page, "lxml")
                    page = page.find("span", {
                        "style":
                        "display:inline-block;height:20px;width:80px;"
                    })
                    page = page.get_text()
                    if page == Types:
                        page = iframe.find_by_id('pnlInsPcGrp' +
                                                 str(i)).click()
                        return
        return 0

    # Scrape all PC in the current screen
    def scrape_pc(self):
        with self.chrome.get_iframe('frmSeating') as iframe:
            for i in range(0, 6):

                for j in range(1, 11):
                    btnID = 'grdSeating_tblCol' + str(j) + '_' + str(i)
                    parse = iframe.find_by_id(btnID)
                    if parse == []:
                        return 'no pc', 100, 100
                    if parse != []:
                        color = self.color(parse.html)
                        if (color == '#FFFFFF'):
                            return self.name_pc(parse.html), j, i
        no_pc = 'no pc'
        j = 100
        i = 100
        return no_pc, j, i

    # Identify name of PC
    def name_pc(self, codes):
        soup = BeautifulSoup(codes, "lxml")
        mydivs = soup.findAll("span", {"class": "lblPcName"})
        return mydivs[0].get_text()

    # Check availability of PC, by detecting background color
    def color(self, code):
        soup = BeautifulSoup(code, "lxml")
        tag = soup.findAll('td', {"style": "background-color: #FFFFFF"})
        if tag != []:
            return '#FFFFFF'
        else:
            return 'blabla'

    # Try to book the selected PC
    def book_pc(self, col, row):
        with self.chrome.get_iframe('frmSeating') as iframe:
            if (col != 100) and (row != 100):
                try:
                    time.sleep(1)
                    butt = iframe.find_by_id("grdSeating_divOuterCol" +
                                             str(col) + "_" + str(row))
                    if butt != []:
                        butt.click()
                    time.sleep(1)
                    sub = iframe.find_by_name("btnsumit")
                    sub.click()
                    return "booked"
                except:
                    pyautogui.press('enter')
                    return "cannot book"
        return "cannot book"

    # Initialize booking site until arriving to the booking table
    def first_setup(self):
        button = self.chrome.find_by_id('tdFacilityBook')
        button.click()
        self.chrome.click_link_by_href('#8')
        self.chrome.click_link_by_href('#-1')
        self.chrome.click_link_by_href('/fbscbs/Booking/Create?resourceId=69')
        self.chrome.click_link_by_id('book')
        self.chrome.click_link_by_id('changeResource')
        self.chrome.click_link_by_href('#-1')
        self.chrome.click_link_by_id('book')

    # Eliminates unnecessary booking slots
    def is_registered(event):
        if event.has_class('noShowWhite'):
            return False
        if event.has_class('currentEvent'):
            return False
        return True

    # Adds weekly booked slots for selected facility
    # Each list of weekly bookings contain list of daily bookings
    # each containing lists booked slots, determined by start and end time
    def check_facility(self, evFacilities):
        columnWeek = self.chrome.find_by_css('.wc-event-column')
        evWeek = []
        for columnDay in columnWeek:
            evToday = []
            evList = columnDay.find_by_css('.ui-corner-all')
            for event in evList:
                if not event.has_class('noShowWhite'):
                    if not event.has_class('currentEvent'):
                        event = event.text
                        if not event.find('—') == -1:
                            if event == '':
                                continue
                            evToday.append(event.split('—'))
            evWeek.append(evToday)
        evFacilities.append(evWeek)

    def click_next(self, counter, evFacilities):
        # Recursively check facilities.
        # Choose facility based on counter
        dropdown = self.chrome.find_by_id('ResourceId')
        options = dropdown.find_by_tag('option')
        if counter < len(options):
            nextOption = options[counter]
            nextOption.click()
            self.check_facility(counter, evFacilities)
        else:
            return evFacilities

    # Scrape seats main function
    # OPTIMIZE: by multithreading
    #           and by runnnig multiple browser at once
    def scrape_seats(self, usr, pwd):
        self.login(usr, pwd)
        self.first_setup()
        evFacilities = []
        dropdown = self.chrome.find_by_id('ResourceId')
        options = dropdown.find_by_tag('option')
        optRange = range(len(options))
        for i in optRange:
            opt = options[i]
            nextOption = opt
            nextOption.click()
            self.time_delay(0.2)
            # while loadingTitle.visible:
            #     pass
            evFacilities.append(opt.text)
            self.check_facility(evFacilities)
        self.quit()
        return evFacilities

    def quit(self):
        self.chrome.quit()
		self.classes.append(each_class)

print "Visiting: https://gymbox.legendonlineservices.co.uk/enterprise/account/Login"
browser.visit("https://gymbox.legendonlineservices.co.uk/enterprise/account/Login")
print "\tSuccess"

# LOGIN #
print "Logging In.."
browser.find_by_id("login_Email").fill(username)
browser.find_by_id("login_Password").fill(password)
browser.find_by_id("login").click()
print "\tSuccess"

# NAVIGATE TO CLASSES #
print "Navigating to: /enterprise/BookingsCentre/MemberTimetable"
browser.click_link_by_href("/enterprise/BookingsCentre/MemberTimetable")
print "\tSuccess"

# CREATE LIST OF OBJECTS CONTAINING DAYS AND CLASSES #
print "Building Classes Timetable.."
list_of_days = []
for each_row in browser.find_by_id("MemberTimetable").find_by_tag("tr"):
	each_class = {}
	if each_row.has_class("dayHeader"):
		day_obj = Day(each_row)
	else:
		day_obj.add_class(each_row)
	if day_obj not in list_of_days:
		list_of_days.append(day_obj)

print "\tSuccess\n"
Exemple #24
-1
def getRoutes(start,end):
    browser = Browser(
        driver_name="firefox"
)
    browser.visit('https://www.hopstop.com/search?xfr=cityscape')
    print(browser.url)
    browser.fill('address1',str(start))
    browser.fill('address2',str(end))
    browser.find_by_name('get_dirs').click()
    print(browser.url)
    if browser.is_text_present('Did you mean?'):
        print "better at least get here"
        #browser.click_link_by_href("#") 
        for link in browser.find_link_by_href("#"):
            print "Okay"
            if link.visible == True:
                print link.text
                browser.click_link_by_text(link.text)
                break
    browser.click_link_by_href("#")
    links = browser.find_link_by_partial_href("/station?stid")
    results = []
    for link in links:
        results.append(link.value)
    browser.quit()
    return results