Beispiel #1
0
def login(username, password):
    config_dir_path = os.path.join(os.path.expanduser('~'), CONFIG_DIR_NAME)
    pickle_path = os.path.join(config_dir_path, 'browser.pickle')
    if os.path.isfile(pickle_path):
        try:
            with open(pickle_path, 'rb') as file:
                data = pickle.load(file)
                if data['username'] == username and \
                        data['password'] == password:
                    return data['browser']
        except:
            pass

    login_url = 'https://www.kaggle.com/account/login'
    browser = Browser()

    login_page = browser.get(login_url)
    login_form = login_page.soup.select("#login-account")[0]
    login_form.select("#UserName")[0]['value'] = username
    login_form.select("#Password")[0]['value'] = password
    login_result = browser.submit(login_form, login_page.url)
    if login_result.url == login_url:
        error = (login_result.soup.select(
            '#standalone-signin .validation-summary-errors')[0].get_text())
        print('There was an error logging in: ' + error)
        sys.exit(1)

    if not os.path.isdir(config_dir_path):
        os.mkdir(config_dir_path, 0o700)

    with open(pickle_path, 'wb') as f:
        pickle.dump(
            dict(username=username, password=password, browser=browser), f)

    return browser
Beispiel #2
0
def search(url, keyword, cloudflare_needed=False):
    if cloudflare_needed:
        requests = create_scraper()
        requests = Browser(requests)
    else:
        requests = Browser()
    web_page = requests.get(url)
    forms = web_page.soup.find_all('form')
    search_text = re.compile(r'[\s\S]*[Ss]earch[\s\S]*')
    forms = [form for form in forms if bool(re.search(search_text, str(form)))]
    form = forms[0]
    search_input = form.find('input', attrs={'type': 'text'})
    if search_input is None:
        search_input = form.find('input', attrs={'type': 'search'})
    search_input['value'] = keyword
    for inp in form.find_all(attrs={'type': 'submit'}):
        if bool(re.search(search_text, str(inp))):
            inp['name'] = 'Search'
        else:
            inp['name'] = ''
    if url in form.attrs['action']:
        url = form.attrs['action']
    else:
        url = url + form.attrs['action']
    form = Form(form)
    form.choose_submit('Search')
    return requests.submit(form, url).text
Beispiel #3
0
    def _login(self):
        login_url = 'https://www.kaggle.com/account/login'
        browser = Browser()

        login_page = browser.get(login_url)
        login_form = login_page.soup.select("#login-account")[0]
        login_form.select("#UserName")[0]['value'] = self.username
        login_form.select("#Password")[0]['value'] = self.password
        login_result = browser.submit(login_form, login_page.url)
        if login_result.url == login_url:
            error = (login_result.soup
                     .select('#standalone-signin .validation-summary-errors')[0].get_text())
            raise Exception('There was an error logging in: ' + error)

        return browser
Beispiel #4
0
    def __get_login_browser(self, username, password):

        pickle_path = os.path.join('browser.pickle')
        login_url = 'https://www.kaggle.com/account/login'
        browser = Browser()

        login_page = browser.get(login_url)
        login_form = login_page.soup.select("#login-account")[0]
        login_form.select("#UserName")[0]['value'] = username
        login_form.select("#Password")[0]['value'] = password
        login_result = browser.submit(login_form, login_page.url)
        if login_result.url == login_url:
            error = (login_result.soup.select(
                '#standalone-signin .validation-summary-errors')[0].get_text())
            print('There was an error logging in: ' + error)
            sys.exit(1)

        return browser
Beispiel #5
0
 def login(self, username, password=None):
     if password is None:
         password = getpass(prompt="Kaggle Password: "******"#login-account")[0]
     login_form.select("#UserName")[0]['value'] = username
     login_form.select("#Password")[0]['value'] = password
     login_result = browser.submit(login_form, login_page.url)
     if len(
             login_result.soup.select(
                 '#standalone-signin .validation-summary-errors')) != 0:
         print(
             "Something went wrong when trying to log you in\nHere is the error from Kaggle\n %s"
             % login_result.soup.select(
                 '#standalone-signin .validation-summary-errors')
             [0].get_text())
     else:
         print("Logged in Successfully")
         self.userSession = browser
from pprint import pprint
from bs4 import BeautifulSoup

#
url = ""

br = Browser()
page = br.get(url)
form = page.soup.select("form")[1]

# user name
form.find("input", {"name": "DDDDD"})["value"] = ""
# password
form.find("input", {"name": "upass"})["value"] = ""
# is ok to keep that OMNKKey empty
form.find("input", {"name": "0MKKey"})["value"] = ""

# (the method here is __setitem__)
success_page = br.submit(form, page.url)  # submit current form

print("--------------------------------------------------------------------")
print('form texts:')
success_info = success_page.soup.find('form', {'name': 'form1'})
pprint(success_info.get_text())

print("--------------------------------------------------------------------")

print('scripts:')
scripts = success_page.soup.findAll('script').get_text()
pprint(scripts)
Beispiel #7
0
__Author__ = "noduez"
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2019/7/12 10:57 AM
# @File    : mech.py 可以编程的 Web 浏览方式
# @Software: PyCharm

from bs4 import BeautifulSoup, SoupStrainer
from mechanicalsoup import Browser

br = Browser()

#home page
rsp = br.submit('http://us.pycon.org/2011/home')
print('\n***', rsp.geturl())
print("Confirm home page has 'Log in' link; click it")
page = rsp.read()
assert 'Log in' in page, 'Log in not in page'
rsp = br.follow_link(text_regex='Log in')