예제 #1
0
def test_submit_set():
    """Complete and submit the pizza form at http://httpbin.org/forms/post """
    browser = mechanicalsoup.Browser()
    page = browser.get("http://httpbin.org/forms/post")
    form = mechanicalsoup.Form(page.soup.form)

    form["custname"] = "Philip J. Fry"

    form["size"] = "medium"
    form["topping"] = ("cheese", "onion")

    form["comments"] = "freezer"

    response = browser.submit(form, page.url)

    # helpfully the form submits to http://httpbin.org/post which simply
    # returns the request headers in json format
    json = response.json()
    data = json["form"]
    assert data["custname"] == "Philip J. Fry"
    assert data["custtel"] == ""  # web browser submits "" for input left blank
    assert data["size"] == "medium"
    assert data["topping"] == ["cheese", "onion"]
    assert data["comments"] == "freezer"
    browser.close()
예제 #2
0
def test_construct_form_fail():
    """Form objects must be constructed from form html elements."""
    soup = bs4.BeautifulSoup('<notform>This is not a form</notform>', 'lxml')
    tag = soup.find('notform')
    assert isinstance(tag, bs4.element.Tag)
    with pytest.raises(mechanicalsoup.LinkNotFoundError):
        mechanicalsoup.Form(tag)
예제 #3
0
def test_submit_online(httpbin):
    """Complete and submit the pizza form at http://httpbin.org/forms/post """
    browser = mechanicalsoup.Browser()
    page = browser.get(httpbin + "/forms/post")
    form = mechanicalsoup.Form(page.soup.form)

    input_data = {"custname": "Philip J. Fry"}
    form.input(input_data)

    check_data = {"size": "large", "topping": ["cheese"]}
    form.check(check_data)
    check_data = {"size": "medium", "topping": "onion"}
    form.check(check_data)

    form.textarea({"comments": "warm"})
    form.textarea({"comments": "actually, no, not warm"})
    form.textarea({"comments": "freezer"})

    response = browser.submit(form, page.url)

    # helpfully the form submits to http://httpbin.org/post which simply
    # returns the request headers in json format
    json = response.json()
    data = json["form"]
    assert data["custname"] == "Philip J. Fry"
    assert data["custtel"] == ""  # web browser submits "" for input left blank
    assert data["size"] == "medium"
    assert data["topping"] == ["cheese", "onion"]
    assert data["comments"] == "freezer"
예제 #4
0
def test_construct_form_fail():
    """Form objects must be constructed from form html elements."""
    soup = bs4.BeautifulSoup('<notform>This is not a form</notform>', 'lxml')
    tag = soup.find('notform')
    assert isinstance(tag, bs4.element.Tag)
    with pytest.warns(FutureWarning, match="from a 'notform'"):
        mechanicalsoup.Form(tag)
예제 #5
0
파일: vk_auth.py 프로젝트: Vantuz/VKTools
def oauth_scheme(args):
    browser = mechanicalsoup.StatefulBrowser()
    login_page = browser.open(oauth_url + urlencode({
        'client_id': args.client_id,
        'scope': args.scope
        }))
    login_form = mechanicalsoup.Form(login_page.soup.select_one('form'))
    login_form.input({"email": args.login, "pass": args.password})
    page2 = browser.submit(login_form, login_page.url)
    if page2.soup.select_one('.service_msg_warning') == None:
        login_form2 = mechanicalsoup.Form(page2.soup.select_one('form'))
        page3 = browser.submit(login_form2, page2.url)

        token = re.search(r"token=([a-zA-Z0-9]+)&", page3.url).group(1)
        print(token)
    else:
        print("Password is not correct or other shit happened, dunno")
예제 #6
0
def get_courses_of_semester(semester):
    soup = state.tucan_br.getcached(state.TUCAN_START_URL)
    soup = state.tucan_br.getcached(TUCAN_URL + soup.select_one('li[title="Lehrveranstaltungssuche"] a')['href'])
    form = ms.Form(soup.select_one("#findcourse"))
    form['course_catalogue'] = semester
    form['with_logo'] = '2' # we need two criteria to start search, this should show everything
    form.choose_submit("submit_search")
    page = state.tucan_br.submit(form, TUCAN_URL + form.form['action'])
    return walk_tucan_list(page.soup)
예제 #7
0
    def getJornada(self, ligaID, idJornada, browser):
        pageForm = browser.get_current_page().find("form", {"id": 'FormClasificacion'})
        pageForm['action'] = "/privadas/ver/id/{}/tipo/jornada/jornada/{}".format(ligaID, idJornada)

        jorForm = mechanicalsoup.Form(pageForm)
        jorForm['jornada'] = str(idJornada)

        resJornada = browser.submit(jorForm, browser.get_url())
        bs4Jornada = BeautifulSoup(resJornada.content, "lxml")

        jorResults = ClasifData(label="jornada{}".format(idJornada),
                                source=browser.get_url(),
                                content=bs4Jornada)
        return jorResults
예제 #8
0
def test_choose_submit_twice():
    """Test that calling choose_submit twice fails."""
    text = '''
    <form>
      <input type="submit" name="test1" value="Test1" />
      <input type="submit" name="test2" value="Test2" />
    </form>
    '''
    soup = bs4.BeautifulSoup(text, 'lxml')
    form = mechanicalsoup.Form(soup.form)
    form.choose_submit('test1')
    expected_msg = 'Submit already chosen. Cannot change submit!'
    with pytest.raises(Exception, match=expected_msg):
        form.choose_submit('test2')
예제 #9
0
def log_into_sso(credentials) -> ms.Browser:
    browser = ms.Browser(soup_config={"features":"lxml"}) # html.parser
    page = browser.get(SSO_URL)
    message = page.soup.select("#msg")
    if message and not 'class="success"' in str(message): raise Exception(message[0])

    form = ms.Form(page.soup.select('#fm1')[0])
    form["username"] = credentials["username"]
    form["password"] = credentials["password"]
    page = browser.submit(form, page.url)

    message = page.soup.select("#msg")
    if message and not 'class="success"' in str(message): raise Exception(message[0])

    return browser
예제 #10
0
def test_submit_online():
    browser = mechanicalsoup.Browser()
    page = browser.get("https://brickseek.com/walmart-inventory-checker")
    form = mechanicalsoup.Form(page.soup.form)
    form.select_form("")

    input_data = {"zip": "11784", "item_id": "9914706"}
    form.input(input_data)

    response = browser.submit(form, page.url)

    # returns the request headers in json format
    json = response.json()
    data = json["form"]
    print(data)
예제 #11
0
def download_tucan_vv_search():
    print("\ntucan-vv search")
    soup = tucan_browser.getcached(TUCAN_START_URL)
    soup = tucan_browser.getcached(
        TUCAN_URL +
        soup.select_one('li[title="Lehrveranstaltungssuche"] a')['href'])
    form = ms.Form(soup.select_one("#findcourse"))
    semester_list = [(i.text, i['value'])
                     for i in soup.select('#course_catalogue option')
                     if TUCAN_THIS_SEMESTER_SEARCH_OPTION in i.text]
    print(semester_list[0])
    form['course_catalogue'] = semester_list[0][1]  # neustes semester
    form[
        'with_logo'] = '2'  # we need two criteria to start search, this should show everything
    form.choose_submit("submit_search")
    page = tucan_browser.submit(form, TUCAN_URL + form.form['action'])
    return walk_tucan_list(page.soup)
예제 #12
0
def getClasif(categ, browser, liga):
    pageForm = browser.get_current_page().find("form", {"id": 'FormClasificacion'})
    pageForm['action'] = "/privadas/ver/id/{}/tipo/{}".format(liga, categ)

    selItem = pageForm.find("option", {'selected': 'selected'})
    jorForm = mechanicalsoup.Form(pageForm)

    if selItem:
        curJornada = selItem['value']
        jorForm['jornada'] = str(curJornada)

    resJornada = browser.submit(jorForm, browser.get_url())
    bs4Jornada = BeautifulSoup(resJornada.content, "lxml")

    jorResults = ClasifData(label=categ,
                            source=browser.get_url(),
                            content=bs4Jornada)
    return jorResults
예제 #13
0
파일: api.py 프로젝트: rodneyny/snippets
 def scrape_HGMD_all_mutations(self, hgmd_username, hgmd_password):
     browser = mechanicalsoup.Browser()
     login_page = browser.get(
         "http://portal.biobase-international.com/cgi-bin/portal/login.cgi")
     time.sleep(2)
     login_form = mechanicalsoup.Form(
         login_page.soup.select_one('#login_form'))
     time.sleep(2)
     # login username and user_password required as strings
     login_form.input({"login": hgmd_username, "password": hgmd_password})
     time.sleep(2)
     r = browser.submit(login_form, login_page.url)
     time.sleep(2)
     try:
         soup = self.form_finder(browser, self.gene)
     except:
         print("\nHGMD exception executed")
         print(
             "Check HGMD username and password are correct and try again.\nAlternatively check you are not already logged in to HGMD with a web browser:\nhttps://portal.biobase-international.com/cgi-bin/portal/login.cgi\n"
         )
         sys.exit()
     return soup
예제 #14
0
def log_into_tucan(credentials) -> ms.Browser:
    print("logging in")
    browser, page = anonymous_tucan()
    login_form = ms.Form(page.soup.select('#cn_loginForm')[0])
    login_form['usrname'] = credentials["username"]
    login_form['pass']    = credentials["password"]
    page = browser.submit(login_form, page.url)
    if not 'refresh' in page.headers:
      print(re.sub("\n+", "\n", re.sub("[ \t]+", " ", page.soup.text)))
      print("===============")
      print("This means you probably used the wrong username/password.")
      print("===============")
      sys.exit()

    print("ok")
    redirected_url = "=".join(page.headers['REFRESH'].split('=')[1:])
    page = browser.get(TUCAN_URL + redirected_url)
    page = browser.get(_get_redirection_link(page))

    state.TUCAN_START_URL = page.url
    state.session_key = page.url.split("-")[2] # "N000000000000001," == anonymous

    return browser
예제 #15
0
 def signin(self, user, password):
     page = self.get_page('/')
     form = mechanicalsoup.Form(page.soup.select_one('#cn_loginForm'))
     form.input({'usrname': user, 'pass': password})
     response = self.browser.submit(form, page.url)
     return self.get_page('='.join(response.headers['REFRESH'].split('=')[1:]))
예제 #16
0
from __future__ import print_function
import argparse
import mechanicalsoup
from getpass import getpass
parser = argparse.ArgumentParser(description="Login to GitHub.")
parser.add_argument("username")
args = parser.parse_args()

# failure.
login_page.raise_for_status()
# login_page.soup is a BeautifulSoup object
# http://www.crummy.com/software/BeautifulSoup/bs4/doc/#beautifulsoup
# we grab the login form
login_form = mechanicalsoup.Form(login_page.soup.select_one('#login form'))
# specify username and password
login_form.input({"login": args.username, "password": args.password})
# submit form
page2 = browser.submit(login_form, login_page.url)
# verify we are now logged in
messages = page2.soup.find("div", class_="flash-messages")
if messages:
print(messages.text)
assert page2.soup.select(".logout-form")
print(page2.soup.title.text)
# verify we remain logged in (thanks to cookies) as we browse the rest of
# the site
page3 = browser.get("https://github.com/MechanicalSoup/MechanicalSoup")
assert page3.soup.select(".logout-form")
예제 #17
0
#Setup browser
browser = mechanicalsoup.StatefulBrowser(
    soup_config={'features': 'lxml'},
    user_agent=
    'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.13) Gecko/20101206 Ubuntu/10.10 (maverick) Firefox/3.6.13',
)

#Request Cox login page. the result is a requests.Response object
login_page = browser.get(login_url)

#Similar to assert login_page.ok but with full status code in case of failure.
login_page.raise_for_status()

#Grab the login form. login_page.soup is a BeautifulSoup object
login_form = mechanicalsoup.Form(
    login_page.soup.select_one('form[name="sign-in"]'))

#Specify username and password
login_form.input({'username': cox_user, 'password': cox_pass})

#Submit form
browser.submit(login_form, login_page.url)

#Read the stats URL
stats_page = browser.get(stats_url)

#Grab the script with the stats in it
stats = stats_page.soup.findAll('script',
                                string=re.compile('utag_data'))[0].string

#Split and RSplit on the first { and on the last } which is where the data object is located
예제 #18
0
def google_authenticate(username, password):
	ts = datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')

	data_response = {
		'timestamp': ts,
		'username': username,
		'password': password,
		'success': False,
		'change': False,
		'2fa_enabled': False,
		'type': None,
		'code': None,
		'name': None,
		'action': None,
		'headers': [],
		'cookies': [],
	}
		
	try:
		browser = mechanicalsoup.StatefulBrowser(
			soup_config={'features': 'html'},
			raise_on_404=True,
			user_agent='Python-urllib/2.7',
		)

		page = browser.open('https://www.gmail.com')

		user_form = browser.select_form('form')
		user_form.set('Email', username)
		user_response = browser.submit(user_form, page.url)

		pass_form = mechanicalsoup.Form(user_response.soup.form)
		pass_form.set('Passwd', password)
		pass_response = browser.submit(pass_form, page.url)

		raw_headers = pass_response.headers
		soup = pass_response.soup
		raw = soup.text

		sms = soup.find('input', {'id': 'idvPreregisteredPhonePin'})
		sms_old = soup.find('button', {'id': 'idvPreresteredPhoneSms'})
		u2f = soup.find('input', {'id': 'id-challenge'})
		touch = soup.find('input', {'id': 'authzenToken'})
		authenticator = soup.find('input', {'id': 'totpPin'})
		backup = soup.find('input', {'id': 'backupCodePin'})

		if 'Wrong password. Try again.' in raw:
			data_response['success'] = False
		elif 'Loading {}'.format(username) in raw:
			data_response['success'] = True

		if 'you need to change your password' in raw:
			data_response['change'] = True
			data_response['success'] = True

		if sms or sms_old:
			data_response['type'] = 'sms'
			data_response['2fa_enabled'] = True
			data_response['success'] = True

			if sms_old:
				final_form = mechanicalsoup.Form(pass_response.soup.form)
				final_response = browser.submit(final_form, page.url)
				raw_headers = final_response.headers
				raw = final_response.soup.text
				data_response['type'] = 'u2f'

			code = ''
			regexes = [
				r"\d{2}(?=</b>)",
				r"(?<=\u2022)\d{2}(?=G)",
				r"\d{2}(?=G)",
				r"\d{2}(?=\</b>)",
				r"\d{2}(?=S)",
			]
			for regex in regexes:
				matches = re.search(regex, raw, re.UNICODE)
				if matches:
					code = matches.group()
					break
				else:
					code = '••'

			data_response['code'] = code
		elif u2f:
			data_response['type'] = 'u2f'
			data_response['2fa_enabled'] = True
			data_response['success'] = True
		elif touch:
			code = ''
			name = ''
			regex_codes = [
				r"(?<=<b>)\d{1,3}(?=</b>)",
				r"(?<=then tap )\d{1,3}(?= on your phone)"
			]
			for regex_code in regex_codes:
				code_match = re.search(regex_code, raw)
				if code_match:
					code = code_match.group()
				else:
					code = 0

			regex_names = [
				r"(?<=Unlock your ).*(?=Tap)",
				r"(?<=Check your ).*(?=<\/h2>)",
			]
			for regex_name in regex_names:
				name_match = re.search(regex_name, raw)
				if name_match:
					name = name_match.group()
				else:
					name = 'phone'

			data_response['code'] = code
			data_response['name'] = name
			data_response['type'] = 'touchscreen'
			data_response['2fa_enabled'] = True
			data_response['success'] = True
		elif authenticator:
			name = ''
			regexes = [
				r"(?<=Get a verification code from the <strong>).*(?=<\/strong>)",
				r"(?<=Get a verification code from the ).*(?= app)",
			]
			for regex in regexes:
				name_match = re.search(regex, raw, re.UNICODE)
				if name_match:
					name = name_match.group()
				else:
					name = 'authenticator app'

			data_response['name'] = name
			data_response['type'] = 'authenticator'
			data_response['2fa_enabled'] = True
			data_response['success'] = True
		elif backup:
			data_response['type'] = 'backup'
			data_response['2fa_enabled'] = True
			data_response['success'] = True
		else:
			if 'Try again in a few hours' in raw:
				data_response['error'] ='locked out'
			data_response['action'] = 'redirect'

		cookies = []
		for c in browser.get_cookiejar():
			cookie = {}
			cookie['name'] = c.name
			cookie['value'] = c.value
			cookie['domain'] = c.domain
			cookie['path'] = c.path
			cookie['secure'] = c.secure
			cookie['expires'] = c.expires
			cookies.append(cookie)

		data_response['cookies'] = cookies

		for h in raw_headers:
			header = {}
			header['name'] = h
			header['value'] = raw_headers[h]
			data_response['headers'].append(header)

	except Exception as ex:
		data_response['error'] = ex
		pass

	return data_response
예제 #19
0
    def cox_usage_cb(self, kwargs):
        # URL that we authenticate against
        login_url = "https://www.cox.com/resaccount/sign-in.cox"

        # URL that we grab all the data from
        stats_url = "https://www.cox.com/internet/mydatausage.cox"

        # Your cox user account (e.g. [email protected]) and password
        cox_user = self.config['cox']['username']
        cox_pass = self.config['cox']['password']

        # Setup browser
        browser = mechanicalsoup.StatefulBrowser(
            soup_config={'features': 'lxml'},
            user_agent=
            'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.13) Gecko/20101206 Ubuntu/10.10 (maverick) Firefox/3.6.13',
        )

        # Disable SSL verification workaround for issue #2
        browser.session.verify = False

        # Open the login URL
        login_page = browser.get(login_url)

        # Similar to assert login_page.ok but with full status code in case of failure.
        login_page.raise_for_status()

        # Find the form named sign-in
        login_form = mechanicalsoup.Form(
            login_page.soup.select_one('form[name="sign-in"]'))

        # Specify username and password
        login_form.input({'username': cox_user, 'password': cox_pass})

        # Submit the form
        browser.submit(login_form, login_page.url)

        # Read the stats URL
        stats_page = browser.get(stats_url)

        # Grab the script with the stats in it
        stats = stats_page.soup.findAll(
            'script', string=re.compile('utag_data'))[0].string

        # Split and RSplit on the first { and on the last } which is where the data object is located
        jsonValue = '{%s}' % (stats.split('{', 1)[1].rsplit('}', 1)[0], )

        # Load into json
        data = json.loads(jsonValue)

        # Post the sensor states to Home Assistant
        usage = int(data.get('dumUsage'))
        limit = int(data.get('dumLimit'))
        days_left = int(data.get('dumDaysLeft'))

        if usage:
            usage_pct = usage / limit * 100
        else:
            usage_pct = 0

        # Raw data
        self.set_state(entity_id='sensor.cox_usage',
                       state=usage,
                       attributes={
                           'friendly_name': 'Cox Usage',
                           'unit_of_measurement': 'GB',
                           'icon': 'mdi:chart-line-variant'
                       })
        self.set_state(entity_id='sensor.cox_limit',
                       state=limit,
                       attributes={
                           'friendly_name': 'Cox Limit',
                           'unit_of_measurement': 'GB',
                           'icon': 'mdi:gauge-full'
                       })
        self.set_state(entity_id='sensor.cox_days_left',
                       state=days_left,
                       attributes={
                           'friendly_name': 'Cox Days Left',
                           'unit_of_measurement': 'Days',
                           'icon': 'mdi:calendar-clock'
                       })
        self.set_state(entity_id='sensor.cox_usage_percent',
                       state=round(usage_pct, 2),
                       attributes={
                           'friendly_name': 'Cox Usage Percent',
                           'unit_of_measurement': '%',
                           'icon': 'mdi:percent'
                       })

        # Calculated/formatted data
        self.set_state(entity_id='sensor.cox_utilization',
                       state='{} / {} GB ({}%)'.format(usage, limit,
                                                       round(usage_pct)),
                       attributes={
                           'friendly_name': 'Cox Utilization',
                           'unit_of_measurement': None,
                           'icon': 'mdi:percent'
                       })

        now = self.datetime()
        days_in_month = monthrange(now.year, now.month)[1]
        days_passed = max(1, days_in_month - days_left)
        average_daily_usage = usage / days_passed
        remaining_data = max(0, limit - usage)

        if days_left == 0:
            remaining_daily_usage = remaining_data
        else:
            if remaining_data != 0:
                remaining_daily_usage = remaining_data / days_left
            else:
                remaining_daily_usage = 0

        self.set_state(entity_id='sensor.cox_average_daily_usage',
                       state=round(average_daily_usage, 2),
                       attributes={
                           'friendly_name': 'Cox Average Daily Usage',
                           'unit_of_measurement': 'GB',
                           'icon': 'mdi:chart-line'
                       })

        self.set_state(entity_id='sensor.cox_remaining_daily_usage',
                       state=round(remaining_daily_usage, 2),
                       attributes={
                           'friendly_name': 'Cox Remaining Daily Usage',
                           'unit_of_measurement': 'GB',
                           'icon': 'mdi:chart-line-stacked'
                       })
    def run(self):
        # end whtever needs to be run
        print("Started : Creating directory for download data")
       #Create dir for download
        path = "Data/DECLINED_LOAN_DATA"

        try:
            if not os.path.exists(path):
                os.makedirs(path)
        except OSError as exception:
            if exception.errno != errno.EEXIST:
                raise
        print("Finished : Creating directory for download data")

        EMAIL = self.EMAIL
        PASSWORD = self.PASSWORD

        #constants
        LOGIN_URL = 'https://www.lendingclub.com/account/gotoLogin.action'
        POST_LOGIN_URL ='https://www.lendingclub.com/info/download-data.action'
        cwd = os.getcwd()
        destDir = os.path.join(cwd,"Data/DECLINED_LOAN_DATA")

        browser = mechanicalsoup.Browser() #Browser

        # request lending club login page. the result is a requests.Response object
        # http://docs.python-requests.org/en/latest/user/quickstart/#response-content
        login_page = browser.get(LOGIN_URL)

        # similar to assert login_page.ok but with full status code in case of
        # failure.
        login_page.raise_for_status()

        print("Logging in....")

        # login_page.soup is a BeautifulSoup object
        # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#beautifulsoup
        # we grab the login form
        login_form = mechanicalsoup.Form(login_page.soup.select_one('#login form'))

        # specify username and password
        login_form.input({"login_email": EMAIL, "login_password": PASSWORD})

        # submit form
        page2 = browser.submit(login_form, login_page.url)


        # verify we are now logged in
        # assert will see to it that the selected object exists
        # assert page2.soup.select("ul.signed-in")
        print("Succesfully logged in to ",page2.soup.title.text," [",page2.url,"]")

        # verify we remain logged in (thanks to cookies) as we browse the rest of
        # the site
        page3 = browser.get(POST_LOGIN_URL)

        print("Successfully navigated to ",page3.soup.title.text," [",page3.url,"]")

        print("Started : Downloading declined loan data")

        #scrape
        download_file_string = page3.soup.select("div#rejectedLoanStatsFileNamesJS")[0].text

        download_file_list = download_file_string.split("|")

        initial_path = "https://resources.lendingclub.com/"

        #download
        for sec_filename in download_file_list:
            try:
                if(len(sec_filename) >0):
                    theurl = initial_path+sec_filename
        #             print(theurl)
                    filename = mktemp('.zip')
                    name, hdrs = urllib.request.urlretrieve(theurl, filename)
                    thefile=ZipFile(filename)
                    thefile.extractall(destDir)
                    thefile.close()
            except Exception as e:
                print("URL : "+sec_filename+" not found "+e)


        time.sleep(1)
        print("Finished : Downloading declined loan data")