def download_linke(coords, proxy, port, saveFile, saveMode):

    print proxy, port
    print proxy != ""

    url = "http://www.soda-is.com/eng/services/service_invoke/gui.php?" + "xml_descript=soda_tl.xml&Submit2=Month"

    session = Session()
    session.verify = False

    if proxy != "":
        proxies = {proxy: port}
        session.proxies = proxies

    br = RoboBrowser(session=session, parser="lxml")
    br.open(url)

    linke_form = br.get_forms()[1]

    num = len(coords)
    index = 0

    with open(saveFile, saveMode) as f:
        try:
            for coord in coords:
                inlon, inlat = coord
                linke_form["lat"].value = inlat
                linke_form["lon"].value = inlon

                sf = linke_form.submit_fields.getlist("execute")
                br.submit_form(linke_form, submit=sf[0])

                linke_table = br.find("table", {"cellspacing": "0", "cellpadding": "2"})

                linkes = get_monthly_linke_str(get_linke_values(linke_table))
                s = "%s,%s,%s\n" % (format(inlon, "0.5f"), format(inlat, "0.5f"), linkes)

                if len(s) > 48:
                    f.write(s)
                    print "Done with point %i of %i: (%s, %s)" % (
                        index + 1,
                        num,
                        format(inlon, "0.5f"),
                        format(inlat, "0.5f"),
                    )

                index += 1

                br.back()

            print "DONE!"

        except Exception as e:

            not_dl = list(coords[index:])
            with open(saveFile + "_notdownloaded.txt", "w") as nd:
                for c in not_dl:
                    nd.write("%s,%s\n" % (str(c[0]), str(c[1])))
            print e
예제 #2
0
def robobrowser_edit():
    """Use robobrowser to increment population"""

    # login
    br = RoboBrowser(history=True, parser='lxml', user_agent='a python robot')
    br.open(login_form.LOGIN_URL)
    form = br.get_form(action='#')
    print('form before {}'.format(form))
    form['email'].value = login_form.LOGIN_EMAIL
    form['password'].value = login_form.LOGIN_PASSWORD
    print('form after {}'.format(form))
    br.submit_form(form)

    # edit country
    br.open(COUNTRY_URL)
    form = br.get_forms()[0]
    print('Population before:', form['population'].value)
    form['population'].value = str(int(form['population'].value) + 1)
    br.submit_form(form)

    # check population increased
    br.open(COUNTRY_URL)
    form = br.get_forms()[0]
    print('Population after:', form['population'].value)

    # some info about the session
    print('User-Agent')
    print(br.session.headers['User-Agent'])
    print('Cookies')
    print(br.session.cookies.items())
예제 #3
0
class vk_session:
	def __init__(self, root_path, proxy="", cookies=""):
		self.is_signed = False
		self.proxy = proxy
		self.root_path = root_path
		session = requests.session()
		if proxy:
			session.proxies.update({'http': 'http://' + proxy, 'ssl': proxy ,'https': 'https://' + proxy})
		headers = {
			"ACCEPT": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
			"ACCEPT_ENCODING": "gzip, deflate, sdch",
			"ACCEPT_LANGUAGE": "ru-RU,ru;",
			"CONNECTION": "keep-alive",
			"REFERER": root_path,
			"UPGRADE_INSECURE_REQUESTS": "1",
			'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36',
		}
		session.headers = headers
		if cookies:
			session.cookies = cookies
		self.browser = RoboBrowser(session=session, timeout=4, history=False)

	def connect(self):
		
		self.browser.open(self.root_path) 	
		print("connected")

		
	def sign_in(self, username, password, captcha):
		try:
			form = self.browser.get_forms()[0]

			form["email"] = username
			form["pass"] = password
			if captcha:
				form["captcha_key"] = vk_captcha.decode(page=self.browser.parsed, root_path=self.root_path)
			self.browser.submit_form(form)
		except:
			print(username)
			raise

	def create_new_group(self, name, group_type, public_type):
		self.browser.open("https://m.vk.com/groups?act=new")
		form = self.browser.get_forms()[0]
		form["title"] = name
		form["type"] = group_type
		form["public_type"] = public_type
		self.browser.submit_form(form)
		time.sleep(1)

	def enter_captcha(self):
		form = self.browser.get_forms()[0]
		form["captcha_key"] = vk_captcha.decode(page=self.browser.parsed, root_path=self.root_path)
		self.browser.submit_form(form)
예제 #4
0
class BKBrowser(object):
    def __init__(self):
        # Browse url :
        self.result = None
        self.browser = RoboBrowser(parser="html.parser")
        self.browser.session.headers = config.headers
        # Mount with custom SSL Adapter
        self.browser.session.mount('https://', HTTPSAdapter())

    def _connect(self):
        # Get to website
        print("- Connecting to url ...")
        self.browser.open(config.url)

    def _skip_first_page(self):
        button = self.browser.get_forms()[0]
        self.browser.submit_form(button)

    # Let's fill in the proper form !
    def _fill_form(self):
        while not self.browser.find('p', {'class': 'ValCode'}):
            inputs_map = max_radio_map(self.browser)
            f = self.browser.get_forms()[0]
            for i in f.keys():
                if f[i].value == '':
                    answers_list = inputs_map.get(i, ['1'])
                    f[i].value = random.choice(answers_list)
            f.serialize()
            self.browser.submit_form(f)

    def _fill_date_form(self):
        # Fill in Date/Time form and start the Questionnaire
        print("- Filling Forms Randomly ...")
        form = self.browser.get_forms()[0]
        form['JavaScriptEnabled'].value = '1'
        form['SurveyCode'].value = config.ID
        form['InputMonth'].value = config.date[0]
        form['InputDay'].value = config.date[1]
        form['InputHour'].value = config.time[0]
        form['InputMinute'].value = config.time[1]
        form.serialize()
        self.browser.submit_form(form)

    def get_validation_code(self):
        self._connect()
        self._skip_first_page()
        self._fill_date_form()
        self._fill_form()
        self.result = self.browser.find('p', {'class': 'ValCode'}).text
        return self.result

    def return_result(self):
        return self.result
예제 #5
0
class BKBrowser(object):
    def __init__(self):
        # Browse url :
        self.result = None
        self.browser = RoboBrowser(parser="html.parser")
        self.browser.session.headers = config.headers
        # Mount with custom SSL Adapter
        self.browser.session.mount('https://', HTTPSAdapter())

    def _connect(self):
        # Get to website
        print("- Connecting to url ...")
        self.browser.open(config.url)

    def _skip_first_page(self):
        button = self.browser.get_forms()[0]
        self.browser.submit_form(button)

    # Let's fill in the proper form !
    def _fill_form(self):
        while not self.browser.find('p', {'class': 'ValCode'}):
            inputs_map = max_radio_map(self.browser)
            f = self.browser.get_forms()[0]
            for i in f.keys():
                if f[i].value == '':
                    answers_list = inputs_map.get(i, ['1'])
                    f[i].value = random.choice(answers_list)
            f.serialize()
            self.browser.submit_form(f)

    def _fill_date_form(self):
        # Fill in Date/Time form and start the Questionnaire
        print("- Filling Forms Randomly ...")
        form = self.browser.get_forms()[0]
        form['JavaScriptEnabled'].value = '1'
        form['SurveyCode'].value = config.ID
        form['InputMonth'].value = config.date[0]
        form['InputDay'].value = config.date[1]
        form['InputHour'].value = config.time[0]
        form['InputMinute'].value = config.time[1]
        form.serialize()
        self.browser.submit_form(form)

    def get_validation_code(self):
        self._connect()
        self._skip_first_page()
        self._fill_date_form()
        self._fill_form()
        self.result = self.browser.find('p', {'class': 'ValCode'}).text
        return self.result

    def return_result(self):
        return self.result
예제 #6
0
def new_token():
    random_string = ''.join(
        random.choice(string.ascii_uppercase + string.digits)
        for _ in range(16))

    from robobrowser import RoboBrowser
    browser = RoboBrowser()
    login_url = 'my_url'
    browser.open('https://github.com/login')
    form = browser.get_form()
    # print(form)
    form["login"].value = "thirstycode"
    form["password"].value = ""
    # print(form)
    browser.submit_form(form)
    browser.open('https://github.com/settings/tokens/new')
    form = browser.get_forms()
    # print(form)
    form[3]["oauth_access[description]"].value = random_string
    form[3]["oauth_access[scopes][]"].value = [
        'repo', 'admin:org', 'admin:public_key', 'admin:repo_hook',
        'admin:org_hook', 'gist', 'notifications', 'user', 'delete_repo',
        'write:discussion', 'admin:gpg_key'
    ]
    browser.submit_form(form[3])

    # print(browser.parsed())
    src = str(browser.parsed())

    start = '<code class="token" id="new-oauth-token">'
    end = '</code>'

    result = re.search('%s(.*)%s' % (start, end), src).group(1)
    return (result)
    print(result)
def get_video_url(url):

    br = RoboBrowser(history=True, parser='lxml')
    br.open(url)

    cn = input('请问是否要转换为中文?(y/n)')
    if not cn:
        cn = 'y'
    if cn == 'y':
        # shift to simplified chinese
        lang = br.get_forms()[0]
        lang['session_language'].options = ['cn_CN']
        lang['session_language'].value = 'cn_CN'
        br.submit_form(lang)

    # get video title
    vid_title = br.find('div', {'id': 'viewvideo-title'}).text.strip()
    print('the video you want to download is: {0}'.format(vid_title))
    print('-----------------------------------------------------------')

    # get video id
    vid_id = re.findall(
        r'\d{6}',
        br.find('a', {
            'href': '#featureVideo'
        }).attrs['onclick'])[0]

    # get real video link
    vid_real_url = 'http://192.240.120.34//mp43/{}.mp4'.format(vid_id)
    return vid_real_url, re.sub(
        """[\s+\.\!\/_,$%^*(+\"\']+|[+——!,。|?、~@#¥%……&*():]+""", " ",
        vid_title).strip()
예제 #8
0
def ExtractONEPAGE(page):

    final_res = ''
    browser = RoboBrowser(history=True,
                          parser='html.parser',
                          user_agent='Chrome/41.0.2228.0')

    while True:
        print('loop')
        browser.open(
            'http://tools.prowebguru.com/free-online-image-extractor/free_online_image_extractor_tool.php'
        )

        form = browser.get_forms({'class': 'form-horizontal'})

        if len(form) != 0:
            print('broke')
            break

    this_form = form[0]

    this_form["website"] = page

    browser.submit_form(this_form)

    img_links = browser.find_all('img', src=True)

    for line in img_links:
        if '/tbn/' not in line['src'] and '.wp.com' in line['src']:
            final_res = line['src']
            print(final_res)

    if final_res != '':
        with open('HenRUniqueComic.txt', 'a') as f:
            f.write(final_res + '\n')
예제 #9
0
class Scraper:
    def randString(self, n):
        return "l" * n

    def check(self, usn):
        password = usn
        rand = ''
        for i in password:
            rand += i + self.randString(2)
        encoded = base64.b64encode(rand.encode())
        return encoded

    def scrape(self, usn, dob):
        self.browser = RoboBrowser(history=False, parser='html.parser')
        self.browser.open('http://parents.msrit.edu/index.php')
        form = self.browser.get_forms()[0]
        form['username'].value = usn
        form['password'].value = self.check(dob)
        form['passwd'].value = self.check(dob)
        self.browser.submit_form(form)

    def getHTML(self):
        """
    returns parsed HTML
    """
        return str(self.browser.parsed)
예제 #10
0
    def getBrowser(contest):
        browser = RoboBrowser(parser="html.parser")
        browser.open('https://beta.atcoder.jp/login')

        form = browser.get_forms()[0]
        form['username'] = Auth.atcoder()['login']
        form['password'] = Auth.atcoder()['password']
        browser.submit_form(form)
        return browser
예제 #11
0
    def getBrowser(contest):
        browser = RoboBrowser(parser="html.parser")
        browser.open(contest)
        soup = browser.parsed

        form = browser.get_forms()[0]
        form['login'] = Auth.yandexcontest()['login']
        form['passwd'] = Auth.yandexcontest()['password']
        browser.submit_form(form)
        return browser
예제 #12
0
def login():
    browser = RoboBrowser(parser="html.parser")
    browser.open("https://www.codechef.com")
    login_form = browser.get_forms()[0]
    if login_form == None:
        print "Some Error Occurred"
        exit(0)
    login_form['name'] = User['username']
    login_form['pass'] = User['password']
    browser.submit_form(login_form)
    #authentication yet to be implemented
    return browser
예제 #13
0
    def getBrowser(contest):
        browser = RoboBrowser(parser="html.parser", user_agent='Mozilla/5.0')
        browser.open(contest)
        soup = browser.parsed

        link = soup.findAll('a', 'link link_access_login')
        browser.open('https://official.contest.yandex.ru%s' % link[0]['href'])

        form = browser.get_forms()[0]
        form['login'] = Auth.opencup()['login']
        form['password'] = Auth.opencup()['password']
        browser.submit_form(form)
        return browser
예제 #14
0
def reload_pyanywhr_app(username=None, password=None):
    if username is None:
        username = raw_input("Username: "******"Password: "******"Referer"] = url_login
    form = browser.get_forms()[0]
    form["username"].value = username
    form["password"].value = password
    browser.submit_form(form)
    assert browser.response.status_code == 200
    browser.open(url_web_app)
    assert browser.response.status_code == 200
    assert browser.url == url_web_app
    form = browser.get_forms(class_="reload_web_app")[0]
    browser.submit_form(form)
    assert browser.response.status_code == 200
    print("service reloaded")
    return browser.response.text
    def __init__(self, v_url, v_user, v_password):
        try:
            br = RoboBrowser(parser="html.parser")
            br.open("{}{}".format(v_url.rstrip('/'), "/user/login"))
            form = br.get_forms()[1]
            form['username'].value = v_user
            form['password'].value = v_password
            br.submit_form(form)

        except Exception as error:
            print("Could not create browser: {}".format(error))
            sys.exit(1)
        self.v_url = v_url
        self.br = br
예제 #16
0
class XSSFinder:
    def __init__(self, url):
        self.list_xss = []
        # self.url = url
        self.browser = RoboBrowser(parser=PARSER, history=True)
        self.browser.open(url)
        self.links_finder = LinksFinder(self.browser.url)

    def find(self):
        links = self.links_finder.get_valid_links()
        for link in links:
            self.browser.open(link)
            forms = self.browser.get_forms()
            for form in forms:
                fields = form.fields
                for field in fields:
                    form[field].value = VULNERABILITY_TESTING_STRING
                    self.validate_xss_weakness(form, field)

    def validate_xss_weakness(self, form, field):
        try:
            self.browser.submit_form(form)
            self.add_threat_to_list(field, form.method)
        except InvalidSubmitError:
            pass

    def add_threat_to_list(self, parameter, xss_type):
        threat = XSSFlaw(self.browser.url, parameter, xss_type)
        if threat not in self.list_xss:
            self.list_xss.append(threat)

    def get_xss_flaws(self):
        self.find()
        if len(self.list_xss) == 0:
            return NO_RESULT_FOUND
        else:
            result = EMPTY_STRING
            for xss_threat in self.list_xss:
                result += URL
                result += xss_threat.get_url()
                result += NEW_LINE
                result += PARAMETER
                result += xss_threat.get_parameter()
                result += NEW_LINE
                result += TYPE
                result += xss_threat.get_xss_type()
                result += TWO_NEW_LINES
            return result
예제 #17
0
class infs_brsr:
    """This browser will have functions useful to someone
    browsing the Infusionsoft front end programatically.
    """

    def __init__(self, appname, username, password, *args, **kwargs):
        self.loggedin=False
        self.browser=RoboBrowser(history=True)
        self.appname=appname
        self.username=username
        self.password=password
        self.baseurl = 'https://' + self.appname + '.infusionsoft.com'

    def openbase(self):
        self.browser.open(self.baseurl)

    def login(self):
        self.openbase()
        loginform = self.browser.get_form()
        loginform.fields['username'].value = self.username
        loginform.fields['password'].value = self.password
        self.browser.submit_form(loginform)
        # This next step is probably a bad idea.  It needs
        # some form of control
        self.browser.follow_link(self.browser.get_links()[1])
        self.loggedin=True

    def getapikey(self):
        if not self.loggedin:
            self.login()
        self.browser.open(self.baseurl + 'app/miscSetting/itemWrapper?systemId=nav.admin&settingModuleName=Application&settingTabName=Application')
        pageSoup = BeautifulSoup(self.browser.response.content, 'html.parser')
        self.apikey=pageSoup.findAll(id='Application_Encrypted_Key:_data')[0].text
        return self.apikey

    def importContactCSV(self, pathToCSV='/home/jlmarks/importme.csv'):
        if not self.loggedin:
            self.login()
        importURL = "https://" + self.appname + ".infusionsoft.com/Import/jumpToWizard.jsp?update=false&profileClass=com.infusion.crm.db.importer.profiles.ContactProfile"
        self.browser.open(importURL)
        frms = self.browser.get_forms()
        for eachform in frms:
            if 'id' in eachform.fields.keys():
                self.thisimportid=eachform['id'].value
                correctform = eachform
        correctform.fields.pop('Back')
        correctform.fields['importFile'].value=open(pathToCSV, 'rb')
        self.browser.submit_form(correctform)
예제 #18
0
def arglogin(username, password):
    url = "http://aaaaarg.fail/auth/login"
    session = Session()
    br = RoboBrowser(session=session, history=True, parser="lxml")
    br.open(url)
    #print(br)
    try:
        form = br.get_forms()[1]
    except:
        return None

    form['email'].value = username
    form['password'].value = password
    br.submit_form(form)

    return br
예제 #19
0
def get_email_by_cin(cin):
    url = 'http://www.mca.gov.in/mcafoportal/viewCompanyMasterData.do'
    browser = RoboBrowser()
    browser.session.headers['User-Agent'] = random.choice(user_agents)
    browser.open(url)
    form = browser.get_forms()[-1]
    form['companyID'].value = cin
    browser.submit_form(form)
    table = browser.find('table', attrs={'class': 'result-forms'})
    if not table:
        return None
    email_header = table.find('td', text='Email Id')
    if not email_header:
        return None
    email_row = email_header.findNext('td')
    email = str.strip(email_row.text)
    return email.lower()
예제 #20
0
def newfunc():
    url = 'https://www.screener.in/'
    start = requests.session()
    open = start.get(url)
    #rb =  RoboBrowser(history=True, parser="html.parser")
    #print(open.headers)
    start.headers = open.headers
    rb = RoboBrowser(session=start, history=True, parser="html.parser")
    rb.open(url)
    #ff = rb.get_form(class_='u-full-width')
    #ff = rb.get_form(id=re.compile("top-nav-search"))
    ff = rb.get_forms()[0]
    print(ff)
    # yInputControl = rb.find(class_=re.compile(r'\y-input__control\b'))
    yInputControl = rb.find(placeholder="Company search...")
    #print(yInputControl)
    yInputControl.value = 'PCPL'
예제 #21
0
def get_email_by_cin(cin):
    url = 'http://www.mca.gov.in/mcafoportal/viewCompanyMasterData.do'
    browser = RoboBrowser()
    browser.session.headers['User-Agent'] = random.choice(user_agents)
    browser.open(url)
    form = browser.get_forms()[-1]
    form['companyID'].value = cin
    browser.submit_form(form)
    table = browser.find('table', attrs={'class': 'result-forms'})
    if not table:
        return None
    email_header = table.find('td', text='Email Id')
    if not email_header:
        return None
    email_row = email_header.findNext('td')
    email = str.strip(email_row.text)
    return email.lower()
예제 #22
0
def _login(user="", pw=""):
    """
	Will login using given credentials to Rock.
	"""

    loginPage = (LOGIN_URL)

    # testing robobrowser
    browser = RoboBrowser(history=True, parser="lxml")
    browser.open(loginPage)
    form = browser.get_forms()[0]

    #TODO: dynamically find user field ID and pw ID
    form[USER_FIELD_ID].value = user
    form[PW_FIELD_ID].value = pw
    submitBtn = LOGIN_BTN_ID
    browser.submit_form(form, submit=form[submitBtn])

    return browser
예제 #23
0
def scrape_this_page(page_name):
    page = requests.get(page_name)
    if page.status_code == requests.codes.ok:
        # If a folder on the desktop does not already exist for this given artist, create one, then set it as the
        # directory to save images to
        pictures = get_image_urls(
            html.fromstring(page.content).xpath('//a[not(ancestor::div[@class="gr-body"])]/@href'))
        downloaded_images = []
        s = requests.session()
        s.headers.update({'Referer': 'http://www.deviantart.com/'})
        USERAGENTS = (
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1',
            'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101',
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
            'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)',
            'Opera/9.99 (Windows NT 5.1; U; pl) Presto/9.9.9',
            'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/ Safari/530.5',
            'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/6.0',
            'Mozilla/5.0 (Windows; U; Windows NT 6.1; pl; rv:1.9.1) Gecko/20090624 Firefox/3.5 (.NET CLR 3.5.30729)'
        )
        browser = RoboBrowser(history=False, session=s, tries=3, user_agent=random.choice(USERAGENTS))
        browser.open(
            'https://www.deviantart.com/users/login?ref=http%3A%2F%2Fwww.deviantart.com%2F&remember_me=1')
        form = browser.get_forms()[1]
        form['username'] = '******'
        form['password'] = '******'
        browser.submit_form(form)
        for picture in pictures:
            # Make sure image has not already been downloaded, and that it is not simply a duplicate url with
            # the comments section open
            if picture not in downloaded_images and '#comments' not in picture:
                browser.open(picture)
                deviation_page = str(browser.parsed)
                if download_img(deviation_page, get_folder(page_name)):
                    print("Grabbed " + picture)
                    downloaded_images.append(picture)
    else:
        print('Bad Url')
    return len(downloaded_images)
예제 #24
0
def GeneOntology(name):
    print("Starting GeneOntology for " + name)
    br = RoboBrowser(parser="html.parser")
    br.open("http://geneontology.org/")

    form = br.get_forms()[1]

    geneinput = form["input"]
    species = form["species"]

    form["species"].value = "IXOSC"

    os.chdir("/home/david/Documents/blast/Blastfiles/outputfiles/Genelists")
    os.listdir(".")
    file = open(name, "r")

    string = ""

    for line in file.readlines():
        #print(line)
        string = string + line

    form["input"] = string
    #print(form)
    br.submit_form(form)

    #print(br.find_all())
    #DebugHtml(str(br.parsed))

    table_link = br.find("a",
                         href=re.compile("/tools/compareToRefListTxt.jsp"))
    br.follow_link(table_link)
    csv_content = br.response.content.decode("utf-8")

    savefile = open("GOoutput/" + name, "w")
    savefile.write(csv_content)
    savefile.close()
    print("finished")
예제 #25
0
def weblogic_checkout(URL_1,URL_2, username, password):
    # Creates a new browser object called browser, enables history, and opens up the weblogic URL in that browser.
    browser = RoboBrowser(history=True)
    browser.open(URL_1)
    # Creates an array of all the forms on the page, which is only one because this should be the sign-on screen.
    # Creates a variable called form and assigns it the first and only form on the page.
    forms = browser.get_forms()
    form = forms[0]
    # The two text-fields of the form are labeled j_username and j_password. I found this by using the chrome inspector on the page.
    # Enters the login info into the appropriate text fields.
    form['j_username'] = username
    form['j_password'] = password
    # Submits our login info.
    browser.submit_form(form)
    # Navigates to table with current server states.
    browser.open(URL_2)
    # Selects the table with the data that we need.
    data = browser.find('table', id="genericTableFormtable")
    # Prints the data with a new line after each.
    for info in data.findAll('td'):
        info = str(info)
        print(info)
        print(' ')
예제 #26
0
def fetch_highscores(first_name):
    # First, a RoboBrowser opens the highscores list.
    br = RoboBrowser()
    br.open('https://secure.runescape.com/m=hiscore_oldschool/hiscorepersonal')

    # To access a player's highscores, we must have RoboBrowser enter their username in the
    # 'search player' form.  There are multiple forms on the page, but the 'search player'
    # form is the first one.
    form = br.get_forms()[0]

    # 'user1' is the name attribute of the form.  We get the username from the first name using
    # the file 'usernames'.  The purpose of a separate file for usernames is so I can present
    # this code but keep my friends and my usernames a secret.
    form.fields['user1'].value = get_username(first_name)

    # Sumbit the form.  We are now in first_name's highscores page.
    br.submit_form(form)

    # src is a long HTML string which we will look through for the total level.
    src = str(br.parsed())

    # Here is use BeautifulSoup.  It's probably not necessary to switch from RoboBrowser, but
    # it was easier for me to get the code working.
    soup = BeautifulSoup(src, features='html.parser')

    # At the time of writing this code, 'class_ = centerDiv' is the closest location for the
    # total level.
    posts = soup.find_all(class_='centerDiv')
    player_string = posts[0].get_text()

    # 'player_string' has a lot of new line characters, and I only want one piece of the string.
    # So, I turn the string into a list using newline characters as the break points.
    player_list = player_string.splitlines()

    # The total level is at index number 47, so extract it.
    total_level = player_list[47]
    return total_level
예제 #27
0
def ExtractFirstImg(url):

    final_res = ''

    browser = RoboBrowser(history=True,
                          parser='html.parser',
                          user_agent='Chrome/41.0.2228.0')
    browser.open(url)

    read_button = browser.find('div', {'class': 'read-now'})

    link = read_button.find('a', href=True)

    ComicFirstPage = link['href']

    browser.open(
        'http://tools.prowebguru.com/free-online-image-extractor/free_online_image_extractor_tool.php'
    )

    form = browser.get_forms({'class': 'form-horizontal'})

    this_form = form[0]

    this_form["website"] = ComicFirstPage

    browser.submit_form(this_form)

    img_links = browser.find_all('img', src=True)

    for line in img_links:
        if '/tbn/' not in line['src']:
            final_res = line['src'] + '|' + url

    print(final_res)

    with open('HenR.txt', 'a') as f:
        f.write(final_res + '\n')
예제 #28
0
def get_portal_auth() -> str:
    """
    Attempts login to the Club1909 page and retrieves the cookie FortressPortalAuth

    :return:
    """
    browser = RoboBrowser(session, history=True)
    browser.open(LOGIN_FORM_URL)
    login_form = browser.get_forms()[0]
    login_form['email'] = os.environ['club1909_username']
    login_form['password'] = os.environ['club1909_password']

    # TODO: check get_forms returns one value
    # TODO: check login errors / exceptions

    logging.debug(
        f"Attempt to login with {os.environ['club1909_username']} and {os.environ['club1909_password']} "
    )

    browser.submit_form(login_form)
    logging.info(
        f"Found portal Auth code {browser.session.cookies['.FortressPortalAuth']}"
    )
    return browser.session.cookies['.FortressPortalAuth']
예제 #29
0
    pages = (following_users_num // 48) + 1
else:
    pages = following_users_num // 48

#タグ除去用
p = re.compile(r"<[^>]*?>")
# [jump:1]形式除去用
jump = re.compile(r"\[jump:.+\]")
#ファイルエンコード設定用
character_encoding = 'utf_8'

# Webスクレイパーのログイン処理
pixiv_url = 'https://www.pixiv.net'
browser = RoboBrowser(parser='lxml', history=True)
browser.open('https://accounts.pixiv.net/login')
form = browser.get_forms('form', class_='')[0]
form['pixiv_id'] = client_info["pixiv_id"]
form['password'] = client_info["password"]
browser.submit_form(form)

# フォローユーザー一覧ページのURLを設定
target_url = 'https://www.pixiv.net/bookmark.php?type=user&rest=show&p='

# 全てのフォローユーザーのユーザIDを取得
following_users_id = []
for i in range(1, pages + 1):
    print(target_url + str(i))
    browser.open(target_url + str(i))
    following_users = browser.find(class_='members')
    for user in following_users.find_all("input"):
        following_users_id.append(user.get("value"))
def main():
    """This loops through every account in accounts.csv. Appending all their orders into 1 local html. 
	That html file uses css pulled from amazon.com so it looks the excat same, and all of the links work, except the ones that requre login.
	"""

    if not os.path.isfile("history.html"):
        makeHistoryFile()
    if not os.path.isfile("accounts.csv"):
        makeAccountFile()
        print "accounts.csv file made. Fill in email/passwords and run again."
        return 1

    with open("accounts.csv", "rU") as csvFile:
        reader = csv.reader(csvFile)

        for row in reader:
            email = str(row[0])
            password = str(row[1])
            update = str(row[2])

            if update.lower() == "true":
                # html5lib parser required for broken html on gameSplits
                s = requests.Session()
                s.headers[
                    "User-Agent"
                ] = "Mozilla (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.7 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.7"
                browser = RoboBrowser(history=True, parser="html5lib", session=s)

                browser.open(
                    "https://www.amazon.com/ap/signin?_encoding=UTF8&openid.assoc_handle=usflex&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.ns.pape=http%3A%2F%2Fspecs.openid.net%2Fextensions%2Fpape%2F1.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Fwww.amazon.com%2F%3Fref_%3Dnav_ya_signin"
                )

                form_signIn = browser.get_forms()[0]
                form_signIn["email"] = email
                form_signIn["password"] = password

                browser.submit_form(form_signIn)

                browser.open(
                    "https://www.amazon.com/gp/css/history/orders/view.html?orderFilter=year-%s&startAtIndex=1000"
                )

                orders = browser.find_all(class_="a-box-group a-spacing-base order")

                with open(r"./history.html", "a+") as historyFile:
                    historyFile.seek(0)
                    storedOrderIds = []
                    tempOrder = ""
                    storeLine = False
                    print "Collected orders from history.html"
                    for line in historyFile:
                        if line == "<!-- Start Order -->\n":
                            storeLine = True
                            continue
                        if line == "<!-- End Order -->\n":
                            storedOrderIds.append(getOrderId(cStringIO.StringIO(tempOrder)))
                            tempOrder = ""
                            storeLine = False
                        if storeLine:
                            tempOrder += line

                    print "Orders stored", len(storedOrderIds)
                    print "Find/Adding new orders for", email
                    for order in orders:
                        orderId = getOrderId(cStringIO.StringIO(order.__str__()))
                        if not orderId in storedOrderIds:
                            print "adding order", orderId
                            historyFile.write("\n<!-- Start Order -->\n")
                            historyFile.write(getAccountHtml(email))
                            historyFile.write(order.__str__())
                            historyFile.write("\n<!-- End Order -->\n")

    print "Done"
예제 #31
0
class Interaction(object):
    def __init__(self, httpc, interactions=None, verify_ssl=True):
        self.httpc = httpc
        self.browser = RoboBrowser()
        self.interactions = interactions
        self.verify_ssl = verify_ssl

    def pick_interaction(self, response, base):
        if self.interactions is None:
            return None

        self.browser._update_state(response)
        _bs = self.browser.parsed
        unic = ""

        for interaction in self.interactions:
            _match = 0
            for attr, val in list(interaction["matches"].items()):
                if attr == "url":
                    if val == base:
                        _match += 1
                elif attr == "title":
                    if _bs is None:
                        break
                    if _bs.title is None:
                        break
                    if val in _bs.title.contents:
                        _match += 1
                    else:
                        _c = _bs.title.contents
                        if isinstance(_c, list) and not isinstance(_c, str):
                            for _line in _c:
                                if val in _line:
                                    _match += 1
                                    continue
                elif attr == "content":
                    if unic and val in unic:
                        _match += 1

            if _match == len(interaction["matches"]):
                return interaction

        raise InteractionNeeded("No interaction matched")

    def pick_form(self, forms, **kwargs):
        """
        Picks which form in a web-page that should be used

        :param forms: A list of robobrowser.Forms instances
        :return: The picked form or None if no form matched the criteria.
        """

        _form = None

        if len(forms) == 1:
            _form = forms[0]
        else:
            if "pick" in kwargs:
                _dict = kwargs["pick"]
                for form in forms:
                    if _form:
                        break
                    for key, _ava in list(_dict.items()):
                        if key == "form":
                            _keys = list(form.attrs.keys())
                            for attr, val in list(_ava.items()):
                                if attr in _keys and val == form.attrs[attr]:
                                    _form = form
                        elif key == "control":
                            prop = _ava["id"]
                            _default = _ava["value"]
                            try:
                                orig_val = form[prop]
                                if isinstance(orig_val, str):
                                    if orig_val == _default:
                                        _form = form
                                elif _default in orig_val:
                                    _form = form
                            except KeyError:
                                pass
                            except Exception as err:
                                pass
                        elif key == "method":
                            if form.method == _ava:
                                _form = form
                        else:
                            _form = None

                        if not _form:
                            break
            elif "index" in kwargs:
                _form = forms[int(kwargs["index"])]

        return _form

    def select_form(self, response, **kwargs):
        """
        Pick a form on a web page, possibly enter some information and submit
        the form.

        :param orig_response: The original response (as returned by requests)
        :return: The response do_click() returns
        """
        self.browser._update_state(response)
        forms = self.browser.get_forms()
        form = self.pick_form(forms, **kwargs)

        if not forms:
            raise Exception("Can't pick a form !!")

        if "set" in kwargs:
            for key, val in list(kwargs["set"].items()):
                if key.startswith("_"):
                    continue
                if "click" in kwargs and kwargs["click"] == key:
                    continue

                try:
                    form[key].value = val
                except (ValueError):
                    pass
                except Exception as err:
                    raise
                    # cntrl = form.find_control(key)
                    # if isinstance(cntrl, ListControl):
                    #     form[key] = [val]
                    # else:
                    #     raise

        if form.action in kwargs["tester"].my_endpoints():
            _res = {}
            for name, cnt in form.fields.items():
                _res[name] = cnt.value
            return _res

        try:
            requests_args = kwargs["requests_args"]
        except KeyError:
            requests_args = {}

        self.browser.submit_form(form, **requests_args)
        return self.browser.state.response

    #noinspection PyUnusedLocal
    def chose(self, orig_response, path, **kwargs):
        """
        Sends a HTTP GET to a url given by the present url and the given
        relative path.

        :param orig_response: The original response
        :param content: The content of the response
        :param path: The relative path to add to the base URL
        :return: The response do_click() returns
        """

        try:
            _trace = kwargs["trace"]
        except KeyError:
            _trace = False

        if not path.startswith("http"):
            try:
                _url = orig_response.url
            except KeyError:
                _url = kwargs["location"]

            part = urlparse(_url)
            url = "%s://%s%s" % (part[0], part[1], path)
        else:
            url = path

        return self.httpc.send(url, "GET", trace=_trace)
        #return resp, ""

    def redirect(self, orig_response, url_regex, **kwargs):
        """
        Simulates a JavaScript redirect by extracting the target of the
        redirection from the page content using the given regex

        :param orig_response: The original response
        :param url_regex: The regex that defines how the target of the redirect
                          can be extracted from the content
        """

        matches = re.findall(url_regex, orig_response.content)
        no_of_matches = len(matches)
        if not no_of_matches == 1:
            raise InteractionNeeded("Expected single match but found %d",
                                    no_of_matches)

        url = matches[0]
        return self.httpc.send(url, "GET")

    def post_form(self, response, **kwargs):
        """
        The same as select_form but with no possibility of changing the content
        of the form.

        :param response: The original response (as returned by requests)
        :return: The response submit_form() returns
        """

        form = self.pick_form(response, **kwargs)

        return self.browser.submit_form(form)

    def response(self, response, **kwargs):
        return {"text": response.text}

    #noinspection PyUnusedLocal
    def interaction(self, args):
        _type = args["type"]
        if _type == "form":
            return self.select_form
        elif _type == "link":
            return self.chose
        elif _type == "response":
           return self.response
        elif _type == "redirect":
            return self.redirect
        elif _type == "javascript_redirect":
            return self.redirect
        else:
            return no_func
예제 #32
0
class NetmagisClient(object):

    url = None     # Netmagis's URL
    casurl = None  # CAS's URL
    br = None      # the browser reference
    c = None
    s = None

    def __init__(self, url, casurl):
        self.url = url
        self.casurl = casurl
        self.br = RoboBrowser(history=True, parser='lxml')

    # call the loginURL to authenticate
    def caslogin(self, login, passwd):
        uri = self.casurl+"?service="+self.url+"start"
        self.br.open(uri)
        form = self.br.get_form()
        form['username'].value = login
        form['password'].value = passwd
        self.br.submit_form(form)
        returnvalue = self.br.response.content.decode('utf8')
        if 'Logged as' in returnvalue:
            return 0
        else:
            return 1

    def addvhost(self, data):
        uri = self.url+"add"
        self.br.open(uri)
        f = self.br.get_forms()[2]
        f["name"] = data["name"]
        f["domain"] = data["domain"]
        f["nameref"] = data["nameref"]
        f["domainref"] = data["domainref"]
        self.br.submit_form(f)
        returnaddvhost = self.br.response.content.decode('utf8')
        if 'An error occurred in Netmagis application' in returnaddvhost:
            returnvalue = 1
        else:
            returnvalue = 0
        return returnvalue

    def add(self, data):
        uri = self.url+"add"
        self.br.open(uri)
        f = self.br.get_forms()[0]
        f["name"] = data["name"]
        f["domain"] = data["domain"]
        f["addr"] = data["addr"]
        f["mac"] = data["mac"]
        f["iddhcpprof"] = data["iddhcpprof"]
        f["hinfo"] = data["hinfo"]
        f["comment"] = data["comment"]
        f["respname"] = data["respname"]
        f["respmail"] = data["respmail"]
        self.br.submit_form(f)
        returnadd = self.br.response.content.decode('utf8')
        catchable_errors = ['An error occurred in Netmagis application']
        if any(x in returnadd for x in catchable_errors):
            returnvalue = 1
        else:
            if 'There is already a host named' in returnadd:
                f2 = self.br.get_forms()[0]
                self.br.submit_form(f2)
                self.br.response.content.decode('utf8')
                returnvalue = 0
            else:
                returnvalue = 0
        return returnvalue

    def deletename(self, data):
        uri = self.url+"del"
        self.br.open(uri)
        f = self.br.get_forms()[0]
        f["name"] = data["name"]
        f["domain"] = data["domain"]
        self.br.submit_form(f)
        firstsubmit = self.br.response.content.decode('utf8')
        if 'An error occurred in Netmagis application' in firstsubmit:
            returnvalue = 1
        else:
            f2 = self.br.get_form()
            self.br.submit_form(f2)
            secondsubmit = self.br.response.content.decode('utf8')
            if 'An error occurred in Netmagis application' in secondsubmit:
                returnvalue = 1
            else:
                returnvalue = 0
        return returnvalue

    def deleteip(self, data):
        uri = self.url+"del"
        self.br.open(uri)
        form = self.br.get_forms()[1]
        form["addr"] = data["addr"]
        self.br.submit_form(form)
        firstsubmit = self.br.response.content.decode('utf8')
        if 'An error occurred in Netmagis application' in firstsubmit:
            returnvalue = 1
        else:
            f2 = self.br.get_form()
            self.br.submit_form(f2)
            secondsubmit = self.br.response.content.decode('utf8')
            if 'An error occurred in Netmagis application' in secondsubmit:
                returnvalue = 1
            else:
                returnvalue = 0
        return returnvalue

    def exportcsv(self, data):
        uri = self.url+"net"
        self.br.open(uri)
        form = self.br.get_form(action='net')
        form['plages'].value = data['plage']
        self.br.submit_form(form, submit=form['docsv'])
        print(self.br.response.content.decode('utf8'))

    def looklarge(self, data):
        uri = self.url+"add"
        self.br.open(uri)
        form = self.br.get_forms()[1]
        form['naddr'] = data['naddr']
        form['plage'] = data['plage']
        self.br.submit_form(form, submit=form['dosearch'])
        returnlooklarge = self.br.response.content.decode('utf8')
        if 'Aucun bloc' in returnlooklarge:
            return 0
        f2 = self.br.get_forms()[0]
        returnvalue = f2.fields['addr'].value
        return returnvalue
예제 #33
0
        print('Posted online')
    if r.text=="403":
        print('Unautorised 403')
    


USERAGENTS ='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:37.0) Gecko/20100101 Firefox/37.0'
session = req_session()
session.headers.update({'Referer': 'https://www.deviantart.com'})

browser = RoboBrowser(history=False, session=session, tries=2, user_agent=USERAGENTS)

print("Attempting to log in to deviantArt...")

browser.open('https://www.deviantart.com/users/login?ref=https%3A%2F%2Fwww.deviantart.com%2F&remember_me=1')
form = browser.get_forms()[1]
form['username'] = USERNAME
form['password'] = PSWD
#print(form)
if browser.find(text=re.compile("Login")):
    print('Compiled login fields form...')

browser.submit_form(form)

if browser.find(text=re.compile("The password you entered was incorrect")):
        print("Wrong password or username. Attempting to download anyway.")
        exit();
elif browser.find(text=re.compile("\"loggedIn\":true")):
        print("Logged in!")
else:
        print("Login unsuccessful. Attempting to download anyway.")
예제 #34
0
class Client:
    def __init__(self, data_path, logger, timeout=None, max_retries=None):
        """
        :param str data_path: Where to store submissions
        :param logging.logger logger: Object to produce logs with
        :param int timeout: Default timeout, in seconds
        :param int max_retries: Number of retries
        """
        self.__data_path = data_path
        self.__logger = logger
        self.__browser = RoboBrowser(parser='html.parser',
                                     timeout=timeout,
                                     tries=max_retries,
                                     multiplier=1,
                                     history=False)

    def login(self, username, password):
        """
        :param str username: ZIMT username
        :param str password: ZIMT password
        """
        main_page = moodleutils.get_main_page(MOODLE_DOMAIN)
        self.__browser.open(main_page)
        if self.__browser.url == main_page:
            return True

        self.__browser.open(CAS_URL)
        login_form = self.__browser.get_forms()[0]
        login_form['username'].value = username
        login_form['password'].value = password
        self.__browser.submit_form(login_form)

        self.__browser.open(main_page)
        self.__browser.parsed.decompose()
        return self.__browser.url == main_page

    def download_new_course_data(self, course_id, allowed_assignments):
        """
        :param int course_id: Id of the course (can be found in the course url)
        :param set allowed_assignments: Ids or/and names of allowed assignments
        """
        main_page = moodleutils.get_course_main_page(MOODLE_DOMAIN, course_id)
        self.__browser.open(main_page)

        course_name = self.__browser.select(
            '.page-header-headings')[0].h1.string[:]
        course_data = Course(course_id, course_name)

        classes = ['section main clearfix', 'section main clearfix current']
        for class_ in classes:
            for section in self.__browser.find_all(class_=class_):
                for assign in section.find_all(
                        class_='activity assign modtype_assign'):
                    assign_id = assign['id'].split('-')[1]
                    assign_name = assign.find(
                        class_='instancename').contents[0]

                    if assign_id not in allowed_assignments and \
                            assign_name not in allowed_assignments:
                        self.__logger.warning('Assignment is not allowed, skip ' \
                                '[id={}, name=`{}\']'.format(
                                assign_id, assign_name))
                        continue

                    assign_data = self.__download_new_assignment_data(
                        assign_id, self.__data_path)
                    course_data.add_assignment(assign_data)
                    self.__logger.info('Got assignment data ' \
                            '[id={}, name=`{}\']'.format(
                            assign_data.id, assign_data.name))
        self.__browser.parsed.decompose()
        return course_data

    def send_feedback(self, course_data):
        """
        :param moodle.objects.Course course_data:
                course data with grades and comments
        """
        for assign_data in course_data.assignments():
            submissions_page = \
                    moodleutils.get_view_submissions_page(
                    MOODLE_DOMAIN, assign_data.id)
            self.__browser.open(submissions_page)

            options_form = None
            for form in self.__browser.get_forms():
                if self.__is_options_form(form):
                    options_form = form
                    break
            if options_form is None:
                self.__logger.error('No options form for assignment, ' \
                        'skip [id={},name=`{}\']'.format(
                        assign_data.id, assign_data.name))
                continue
            if not self.__fill_options_form(options_form):
                self.__logger.error(
                        'Can not fill options form for assignment, skip ' \
                        '[id={},name=`{}\']'.format(
                        assign_data.id, assign_data.name))
                continue
            self.__browser.submit_form(options_form)

            grading_form = None
            for form in self.__browser.get_forms():
                if self.__is_grading_form(form):
                    grading_form = form
                    break
            if grading_form is None:
                self.__logger.error('No grading form for assignment, ' \
                        'skip [id={}, name=`{}\']'.format(
                        assign_data.id, assign_data.name))
                continue

            self.__logger.info('Process assignment submissions '\
                    '[id={}, name=`{}\']'.format(
                    assign_data.id, assign_data.name))

            for subm_data in assign_data.submissions():
                if subm_data.grade is None:
                    continue
                subm = self.__browser.find(
                    class_='user{}'.format(subm_data.user_id))
                if subm is None:
                    continue
                submitted = subm.find(class_='submissionstatussubmitted')
                if submitted is None:
                    continue
                subm_ts = self.__parse_timestamp(
                    subm.find(class_='cell c7').contents[0])
                if subm_data.timestamp != subm_ts:
                    self.__logger.warning(
                            'Outdated submission, skip ' \
                            '[user_id={}, username=`{}\', timestamp={}]'.format(
                            subm_data.user_id, subm_data.username, subm_data.timestamp))
                    continue
                if not self.__fill_grading_form(grading_form, subm_data):
                    self.__logger.error(
                            'Can not fill grading form for submission, skip ' \
                            '[user_id={}, username=`{}\', timestamp={}]'.format(
                            subm_data.user_id, subm_data.username, subm_data.timestamp))
                    continue
                self.__logger.info('Grading form was filled successfully '\
                        '[user_id={}, username=`{}\', timestamp={}]'.format(
                        subm_data.user_id, subm_data.username, subm_data.timestamp))

            self.__browser.submit_form(grading_form)
            self.__logger.info('Grading form was submitted for assignment ' \
                    '[id={}, name=`{}\']'.format(
                    assign_data.id, assign_data.name))
            self.__browser.parsed.decompose()

    def __parse_timestamp(self, date_str, date_locale='de_DE.utf8'):
        if date_str == '-':
            return 0
        cur_locale = locale.getlocale()
        locale.setlocale(locale.LC_ALL, date_locale)  # XXX install locale
        timestamp = datetime.strptime(date_str,
                                      '%A, %d. %B %Y, %H:%M').timestamp()
        locale.setlocale(locale.LC_ALL, cur_locale)
        return timestamp

    def __download_file(self, link, path):
        resp = self.__browser.session.get(link)  # XXX ugly
        if resp.status_code != 200:
            self.__logger.error('Bad response code: {} ' \
                    '[link=`{}\']'.format(resp.status.code, link))
            return False
        try:
            with open(path, 'w') as f:
                f.write(resp.content.decode('utf-8'))
        except:
            with open(path, 'wb') as f:
                f.write(resp.content)
        return True

    def __download_submission(self, subm, path):
        user_id = subm['class'][0][4:]
        username = subm.find(class_='cell c2').a.contents[0]
        timestamp = self.__parse_timestamp(
            subm.find(class_='cell c7').contents[0])

        subm_path = os.path.join(path, 'user_' + user_id)
        subm_data = Submission(user_id, username, timestamp, subm_path)

        utils.remove_dir(subm_path)
        utils.make_dir(subm_path)
        for f in subm.find_all(class_='fileuploadsubmission'):
            name = f.a.contents[0]
            link = f.a['href']
            if not self.__download_file(link, os.path.join(subm_path, name)):
                self.__logger.warning('Can not download file `{}\', ' \
                        'skip submission [user_id={}, username=`{}\', timestamp={}]'.format(
                        subm_data.user_id, subm_data.username, subm_data.timestamp))
                return None
            else:
                self.__logger.info('Got file `{}\' ' \
                        '[user_id={}, username=`{}\', timerstamp={}]'.format(name,
                        subm_data.user_id, subm_data.username, subm_data.timestamp))
        return subm_data

    def __download_new_assignment_data(self, assign_id, path):
        submissions_page = moodleutils.get_view_submissions_page(
            MOODLE_DOMAIN, assign_id)
        self.__browser.open(submissions_page)
        table = self.__browser.find(class_='flexible generaltable generalbox')

        assign_path = os.path.join(path, 'assignment_' + assign_id)
        assign_name = self.__browser.find(role='main').h2.string[:]
        assign_data = Assignment(assign_id, assign_name)

        for subm in table.tbody.find_all('tr'):
            submitted = subm.find(class_='submissionstatussubmitted')
            if submitted is None:
                continue
            graded = subm.find(class_='submissiongraded')
            user_id = subm['class'][0][4:]
            username = subm.find(class_='cell c2').a.contents[0]
            subm_ts = self.__parse_timestamp(
                subm.find(class_='cell c7').contents[0])
            grade_ts = self.__parse_timestamp(
                subm.find(class_='cell c10').contents[0])
            if graded and subm_ts + 60 < grade_ts:  # XXX +1 minute - to retest in case of delays
                self.__logger.debug('Submission is already graded ' \
                        '[user_id={}, username=`{}\', subm_ts={}, grade_ts={}]'.format(
                        user_id, username, subm_ts, grade_ts))
                continue
            if time.time() < subm_ts + 120:
                self.__logger.info('Submission is too new and will be evaluated next time ' \
                        '[user_id={}, username=`{}\', subm_ts={}, grade_ts={}]'.format(
                        user_id, username, subm_ts, grade_ts))
                continue
            subm_data = self.__download_submission(subm, assign_path)
            if subm_data is not None:
                self.__logger.info('Got submission data ' \
                        '[user_id={}, username=`{}\', timestamp={}]'.format(
                        subm_data.user_id, subm_data.username, subm_data.timestamp))
                assign_data.add_submission(subm_data)
            else:
                self.__logger.warning('Submission data is not downloaded, skip ' \
                        '[user_id={}, username=`{}\', timestamp={}]'.format(
                        user_id, username, subm_ts))
        return assign_data

    def __is_options_form(self, form):
        for field in form.keys():
            if field == 'quickgrading':
                return True
        return False

    def __fill_options_form(self, form):
        try:
            form['filter'] = ''
            form['perpage'] = '-1'
            form['quickgrading'] = ['1']
            return True
        except:
            return False

    def __is_grading_form(self, form):
        for field in form.keys():
            if field.startswith('quickgrade_'):
                return True
        return False

    def __fill_grading_form(self, form, subm):
        try:
            # it is impossible to fill the form with 0
            form['quickgrade_' + subm.user_id] = \
                    (subm.grade if subm.grade > 0 else 1e-20)
            # it is necessary to update form even if the data is the same
            old_comment = form['quickgrade_comments_' + subm.user_id].value
            new_comment = subm.comment
            if new_comment is None:
                new_comment = ''
            if new_comment == old_comment:
                new_comment += ' '
            form['quickgrade_comments_' + subm.user_id] = new_comment
            return True
        except:
            return False
예제 #35
0
class ScheduleScrapper:
    """
    """
    def __init__(self):
        self.browser = RoboBrowser(history=False, parser='lxml')

    def _get_page_with_schedule(self,
                                group=None,
                                sdate=None,
                                edate=None,
                                teacher=None) -> list:
        """
        This function find schedule for group
        :param group: A group for which search will be done
        :param sdate: Start date to search
        :param edate: End date to search
        :return: List of days with schedule
        """
        self.browser.open(stng.SCHEDULE_URL + stng.GET_SCHEDULE_URL)
        form = self.browser.get_forms()[0]

        if group:
            form['group'].value = group.encode(
                stng.DEFAULT_ENCODING_FOR_REQUEST)

        if teacher:
            form['teacher'].value = teacher.encode(
                stng.DEFAULT_ENCODING_FOR_REQUEST)

        if sdate:
            form['sdate'].value = sdate.encode(
                stng.DEFAULT_ENCODING_FOR_REQUEST)

        if edate:
            form['edate'].value = edate.encode(
                stng.DEFAULT_ENCODING_FOR_REQUEST)

        self.browser.submit_form(form)
        soup = self.browser.parsed
        list_of_couples = soup.find_all('div', class_='col-md-6')[1:]
        return list_of_couples

    def _parse_schedule(self,
                        list_of_couples: list,
                        group=None,
                        teacher=None) -> list:
        """
        This function parse list of html tags to normal text list
        :param list_of_couples: list with html tags
        :return: list with formatting text
        """
        if group:
            appeal = f'для групи {group}'
        else:
            appeal = f'для {teacher}'
        result = list()
        for elem in list_of_couples:
            date = elem.find('h4').text
            # result.append(f'Розклад на {date}\n')
            result_str = f'<strong>Розклад на {date}\n{appeal}</strong>\n'
            for i in elem.find_all('tr'):
                pair = i.find_all('td')
                if pair[2].text != '':
                    pair_info = re.sub(" +", " ", pair[2].text)
                    pair_time = f'\n{pair[0].text} пара ({pair[1].text[:5]} - {pair[1].text[5:]})'
                    result_str += f'<i>{pair_time}</i>\n{pair_info}\n'
                    continue
            result.append(result_str)
        return result

    def _get(self, link: str, query: str) -> list:
        self.browser.open(link + query)
        response = self.browser.response.content.decode(
            stng.DEFAULT_ENCODING_FOR_REQUEST)
        try:
            return json.load(io.StringIO(response))["suggestions"]
        except:
            return []

    def groups_get(self, group: str) -> list:
        link = (stng.SCHEDULE_URL + stng.GROUP_EXISTS)
        return self._get(link, group)

    def teachers_get(self, teacher: str) -> list:
        link = (stng.SCHEDULE_URL + stng.TEACHER_EXISTS)
        return self._get(link, teacher)

    def get_schedule(self,
                     group=None,
                     sdate=None,
                     edate=None,
                     teacher=None) -> list:
        list_of_couple = self._get_page_with_schedule(group, sdate, edate,
                                                      teacher)
        response = self._parse_schedule(list_of_couple, group, teacher)
        return response
예제 #36
0
파일: roburger.py 프로젝트: jfyuen/botking
                                        block=block,
                                        ssl_version=ssl.PROTOCOL_TLSv1)

# Browse url :
browser = RoboBrowser(parser="lxml")
browser.session.headers = config.headers

# Mount with custom SSL Adapter
browser.session.mount('https://', MyAdapter())

# Get to website
print "- Connecting to url ..."
browser.open(config.url)

# Click on first button to go to second page:
button = browser.get_forms()[0]
browser.submit_form(button)

# Fill in Date/Time form and start the Questionnaire
form = browser.get_forms()[0]
form['JavaScriptEnabled'].value = '1'
form['SurveyCode'].value = config.ID
form['InputMonth'].value = config.date[0]
form['InputDay'].value = config.date[1]
form['InputHour'].value = config.time[0]
form['InputMinute'].value = config.time[1]

form.serialize()
browser.submit_form(form)

print "- Filling Forms Randomly ..."
예제 #37
0
class Interaction(object):
    def __init__(self, httpc, interactions=None, verify_ssl=True):
        self.httpc = httpc
        self.browser = RoboBrowser()
        self.interactions = interactions
        self.verify_ssl = verify_ssl

    def pick_interaction(self, response, base):
        if self.interactions is None:
            return None

        self.browser._update_state(response)
        _bs = self.browser.parsed
        unic = ""

        for interaction in self.interactions:
            _match = 0
            for attr, val in list(interaction["matches"].items()):
                if attr == "url":
                    if val == base:
                        _match += 1
                elif attr == "title":
                    if _bs is None:
                        break
                    if _bs.title is None:
                        break
                    if val in _bs.title.contents:
                        _match += 1
                    else:
                        _c = _bs.title.contents
                        if isinstance(_c, list) and not isinstance(_c, str):
                            for _line in _c:
                                if val in _line:
                                    _match += 1
                                    continue
                elif attr == "content":
                    if unic and val in unic:
                        _match += 1

            if _match == len(interaction["matches"]):
                return interaction

        raise InteractionNeeded("No interaction matched")

    def pick_form(self, forms, **kwargs):
        """
        Picks which form in a web-page that should be used

        :param forms: A list of robobrowser.Forms instances
        :return: The picked form or None if no form matched the criteria.
        """

        _form = None

        if len(forms) == 1:
            _form = forms[0]
        else:
            if "pick" in kwargs:
                _dict = kwargs["pick"]
                for form in forms:
                    if _form:
                        break
                    for key, _ava in list(_dict.items()):
                        if key == "form":
                            _keys = list(form.attrs.keys())
                            for attr, val in list(_ava.items()):
                                if attr in _keys and val == form.attrs[attr]:
                                    _form = form
                        elif key == "control":
                            prop = _ava["id"]
                            _default = _ava["value"]
                            try:
                                orig_val = form[prop]
                                if isinstance(orig_val, str):
                                    if orig_val == _default:
                                        _form = form
                                elif _default in orig_val:
                                    _form = form
                            except KeyError:
                                pass
                            except Exception as err:
                                pass
                        elif key == "method":
                            if form.method == _ava:
                                _form = form
                        else:
                            _form = None

                        if not _form:
                            break
            elif "index" in kwargs:
                _form = forms[int(kwargs["index"])]

        return _form

    def select_form(self, response, **kwargs):
        """
        Pick a form on a web page, possibly enter some information and submit
        the form.

        :param orig_response: The original response (as returned by requests)
        :return: The response do_click() returns
        """
        self.browser._update_state(response)
        forms = self.browser.get_forms()
        form = self.pick_form(forms, **kwargs)

        if not forms:
            raise Exception("Can't pick a form !!")

        if "set" in kwargs:
            for key, val in list(kwargs["set"].items()):
                if key.startswith("_"):
                    continue
                if "click" in kwargs and kwargs["click"] == key:
                    continue

                try:
                    form[key].value = val
                except (ValueError):
                    pass
                except Exception as err:
                    raise
                    # cntrl = form.find_control(key)
                    # if isinstance(cntrl, ListControl):
                    #     form[key] = [val]
                    # else:
                    #     raise

        if form.action in kwargs["tester"].my_endpoints():
            _res = {}
            for name, cnt in form.fields.items():
                _res[name] = cnt.value
            return _res

        try:
            requests_args = kwargs["requests_args"]
        except KeyError:
            requests_args = {}

        self.browser.submit_form(form, **requests_args)
        return self.browser.state.response

    # noinspection PyUnusedLocal
    def chose(self, orig_response, path, **kwargs):
        """
        Sends a HTTP GET to a url given by the present url and the given
        relative path.

        :param orig_response: The original response
        :param content: The content of the response
        :param path: The relative path to add to the base URL
        :return: The response do_click() returns
        """

        if not path.startswith("http"):
            try:
                _url = orig_response.url
            except KeyError:
                _url = kwargs["location"]

            part = urlparse(_url)
            url = "%s://%s%s" % (part[0], part[1], path)
        else:
            url = path

        return self.httpc.send(url, "GET")
        # return resp, ""

    def redirect(self, orig_response, url_regex, **kwargs):
        """
        Simulates a JavaScript redirect by extracting the target of the
        redirection from the page content using the given regex

        :param orig_response: The original response
        :param url_regex: The regex that defines how the target of the redirect
                          can be extracted from the content
        """

        matches = re.findall(url_regex, orig_response.content)
        no_of_matches = len(matches)
        if not no_of_matches == 1:
            raise InteractionNeeded("Expected single match but found %d",
                                    no_of_matches)

        url = matches[0]
        return self.httpc.send(url, "GET")

    def post_form(self, response, **kwargs):
        """
        The same as select_form but with no possibility of changing the content
        of the form.

        :param response: The original response (as returned by requests)
        :return: The response submit_form() returns
        """

        form = self.pick_form(response, **kwargs)

        return self.browser.submit_form(form)

    def response(self, response, **kwargs):
        return {"text": response.text}

    # noinspection PyUnusedLocal
    def interaction(self, args):
        _type = args["type"]
        if _type == "form":
            return self.select_form
        elif _type == "link":
            return self.chose
        elif _type == "response":
            return self.response
        elif _type == "redirect":
            return self.redirect
        elif _type == "javascript_redirect":
            return self.redirect
        else:
            return no_func
예제 #38
0
from robobrowser import RoboBrowser;
import BeautifulSoup;
"""
Take HTML file as input
return well indented HTML file using freeformatter.com
"""
input_html = open('rb2.html','r+');
browser = RoboBrowser(history=True);
main_url = 'http://www.freeformatter.com/html-formatter.html';
browser.open(main_url);
#print browser.find();
#print browser.response.text;
forms = browser.get_forms();
form_counter = 1;
#for f in forms:
# print f,'\n',++form_counter;
"""
<RoboForm action=, inputstring=, inputurl=, indent=3spaces, forcenewwindow=false> 
"""
form = browser.get_form(action='/html-formatter.html');
#print form;
form['inputstring'] = input_html;
form['forcenewwindow'] = 'false';
browser.submit_form(form);
#print browser;
resp = browser.response.content;
output_file = open('rb3.html','w+');
output_file.write(resp);

예제 #39
0
파일: scrap.py 프로젝트: podema/scraping
document='<table border="1" style="border-collapse:collapse">'
Users={
    #Put users here
}


"""
pwd=Users.items()[0][1]
user=Users.items()[0][0]
"""
for user,pwd in Users.iteritems():
    browser=RoboBrowser(history=True)
    browser.open('https://www.aqmetrix.com', verify=False)

    form=browser.get_forms().pop()
    form['nombre'].value=user
    form['passwd'].value=pwd
    browser.submit_form(form, verify=False)
    
    table=BeautifulSoup(browser.session.get('https://www.aqmetrix.com/aqx/diary/infodisp/get_infodisp_incidencias.php').content)
   
    j=0
    for row in table.select('table tbody tr'):
        document=document+'<tr>'        
        for data in row.select('td'):
            if data.select('a') == []:
                document=document+data.prettify()
            else:
                for i in data.select('a'):
                    if 'PNG' in i.attrs['href']:
예제 #40
0
import re
import socket
import random


# ----- Start crawling 
crid = 1

while(True):
    
    username = "******" % crid
    # print username;

    browser = RoboBrowser()
    browser.open('https://click2win.settrade.com/LoginRepOnRole.jsp?txtLogin='******'&txtPassword='******'&txtSecureKey=NONE&txtDefaultPage=%2FSETClick2WIN%2FSelectUserLeague.jsp&txtLoginPage=SETClick2WIN/index.jsp&txtBrokerId=089&txtSystem=ITP&txtRole=INTERNET&tmpUsername=&tmpPassword=')
    form = browser.get_forms()[0]
    browser.submit_form(form)
    form = browser.get_forms()[0]
    browser.submit_form(form)
    body = str(browser.parsed)
    # print type(body)
    # print body

    if ( "openStreaming" in body):
        print "OK -- " + username;
    else:
        print "FA ------------ " + username;
        

    crid+=1;
    # break;
예제 #41
0
def fanduel_salaries_scraper(credentials,
                             bucket_name,
                             obj_path,
                             years=default_years,
                             weeks=default_weeks):
    client = boto3.client('s3')
    browser = RoboBrowser()
    browser.open(login_url)
    login_form = browser.get_forms()[0]

    # Set login credentials
    login_form['ctl00$Body$EmailTextbox'].value = credentials['email']
    login_form['ctl00$Body$PasswordTextbox'].value = credentials['password']
    login_form.serialize()

    # Submit login form
    browser.submit_form(login_form)

    # Open the previously hidden page
    for yearIdx, year in enumerate(years):
        year_dict = years[yearIdx]
        year_key = list(year_dict.keys())[0]
        sn = year_dict[year_key]

        for week in weeks:
            w = week
            ew = week

            # Initialize the data to be written to the file
            formatted_data = ''

            for pos_idx, pos in enumerate(default_pos):
                pos_dict = default_pos[pos_idx]
                pos_key = list(pos_dict.keys())[0]
                p = pos_dict[pos_key]

                salary_data_url = 'https://fantasydata.com/nfl-stats/daily-fantasy-football-salary-and-projection-tool.aspx?fs={}&stype=0&sn={}&scope=0&w={}&ew={}&s=&t=0&p={}&st=FantasyPointsFanDuel&d=1&ls=&live=false&pid=true&minsnaps=4'.format(
                    fs, sn, w, ew, p)

                # Delay before retrieving next set of data
                time.sleep(0.25)

                browser.open(salary_data_url)
                content = browser.find_all('tr')

                for idx, line in enumerate(content):
                    # Only add the header once per year
                    if idx == 0 and week == 0 and p == 2:
                        formatted_data = headers + '\n'
                    elif idx != 0:
                        # Remove the comma from each salary 5,200 >> 5200
                        line_values = line.find_all(text=True)
                        line_values[10] = line_values[10].replace(',', '')

                        parsed_data = ','.join(line_values)
                        stripped_line = parsed_data.strip('\n').strip(',')
                        extra_fields = ',' + year_key
                        next_line = stripped_line + extra_fields + '\n'

                        formatted_data = formatted_data + next_line

                file_path = '{}/{}/{}.csv'.format(obj_path, year_key, week + 1)

            try:
                # Upload object to the S3 bucket
                client.put_object(Bucket=bucket_name,
                                  Body=formatted_data,
                                  Key=file_path)
            except RuntimeError as err:
                print('Failed to write to file: ', err)
                raise err

            print('Success! Uploaded data: {}'.format(file_path))
예제 #42
0
class Session():

	# self.username ='';
	# self.password ='';
	# self.driver = None;

	def __init__(self,username='',password=''):
		self.username = username
		self.password = password
		self.browser = RoboBrowser()
		
	def login(self):
		self.browser.open('https://click2win.settrade.com/LoginRepOnRole.jsp?txtLogin='******'&txtPassword='******'&txtSecureKey=NONE&txtDefaultPage=%2FSETClick2WIN%2FSelectUserLeague.jsp&txtLoginPage=SETClick2WIN/index.jsp&txtBrokerId=089&txtSystem=ITP&txtRole=INTERNET&tmpUsername=&tmpPassword='******'fvSyncTimeURL'])
		servTime = self.browser.select('p')[0].get_text().split("|")
		servTime = int(servTime[1]) #+ config.hourshift*60*60*1000
		servTime = servTime/1000
		self.difftime = time.time() - servTime
		# print self.difftime

	def getStreamingVar(self):
		# ------ Generate flash variables
		self.browser.open(config.url+'/realtime/streaming5/flash/StreamingPage.jsp')		
		src = self.browser.select('html')[0]
		src = src.get_text().encode('utf-8').split("\n")
		for i in src:
			if(i.find('flashVar')== -1 ): continue;
			i = i[i.find('flashVar'):]
			i = i[i.find('{')+1:]
			i = i[0:i.find('}')]
			src = i;
			break;
		flashVar = {}
		print src
		src = src.split(",")
		for line in src:
			line = line.split(":")
			flashVar[urllib.unquote(line[0])] = urllib.unquote(line[1][1:-1])
		
		self.flashVar = flashVar
		self.syncTime()
		return flashVar

	def getInstrumentList(self):
		url = config.url + self.flashVar['fvDataProviderStrURL']
		key = datetime.datetime.fromtimestamp(time.time()+self.difftime).strftime('%d/%m/%Y') + "_" + self.flashVar['fvBrokerId']+"_"+self.flashVar['fvUserref']
		m = hashlib.md5()
		m.update(key)
		hs = m.hexdigest()
		# print "hash("+key +") = " + hs
		params = "boardType=equity&"
		params += "APIVersion="+config.APIVersion+"&"
		params += "subListName=&"
		params += "mainListName=.A&"
		params += "boardSubType=&"
		params += "service=12&"
		params += "q="+hs
		self.browser.open(url +"?"+params);
		text = self.browser.select('p')[0].get_text().split("|")

		if text[0] == 'T':
		    category = text[6].split("^")
		    # print category
		    futures = [];
		    options = [];
		    equity  = [];
		    indexes = [];
		    for cate in category:
		        if cate == 'futuresAndUnderlying':
		            insts = text[7].split("^")
		            for inst in insts:
		                inst = inst.split("~")[0]
		                futures.append(inst);
		                
		        if cate == 'options':
		            insts = text[8].split("^")
		            for inst in insts:
		                inst = inst.split("~")[0]
		                options.append(inst);
		                
		        if cate == 'equity':
		            insts = text[9].split("^")
		            for inst in insts:
		                inst = inst.split("~")[0]
		                equity.append(inst);
		                
		    new_equity =[]
		    
		    reNW = re.compile('^.*-W$')
		    reW = re.compile('^.*-W\d(\d*)$')
		    reDW = re.compile('^(.|..|...|....)\d\d(C|P)\d\d\d\d.$')
		    reF = re.compile('^.*-F$')
		    reP = re.compile('^.*-P$')
		    reQ = re.compile('^.*-Q$')
		      
		    for eq in equity:
		        if(reNW.match(eq) or reW.match(eq) or reDW.match(eq) or reF.match(eq) or reP.match(eq) or reQ.match(eq)):
		            #not simple
		            pass;
		        else:
		            new_equity.append(eq);
		    
		    equity = new_equity;
		    # print len(futures)
		    # print len(options)
		    # print len(equity)
		    # total = len(futures)+ len(options) + len(equity)
		    # print total
		    # equity = equity[1:100]
		    # futures = futures[1:100]
		    # options = []
		    return (equity,futures,options)
		else:
			return None;

	def genKey(self):
		url = config.url + self.flashVar['fvGenerateKeyURL']
		key = datetime.datetime.fromtimestamp(time.time()+self.difftime).strftime('%d/%m/%Y') + "_" + self.flashVar['fvBrokerId']+"_"+self.flashVar['fvUserref']
		m = hashlib.md5()
		m.update(key)
		hs = m.hexdigest()
		# print "hash("+key +") = " + hs
		
		params = "time="+str(int(time.time()+self.difftime))+"&"
		params += "clientType="+self.flashVar['fvRealtimeClientType']+"&"
		params += "txtSETNET3="+self.flashVar['fvSETNET3']+"&"
		params += "APIVersion="+config.APIVersion+"&"
		params += "q="+hs

		self.browser.open(url +"?"+params);
		src = self.browser.select('p')[0].get_text().split("|")
		self.key=src;
		return src

	def genSocket(self):
		key = self.genKey()
		s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
		print key;
		dns = socket.gethostbyname_ex(key[2])
		host = dns[2][0]
		port = int(key[3])
		conn = s.connect((host, port))
		# s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
		ss = self.flashVar['fvUserref']+"|"+self.flashVar['fvBrokerId']+"|"+key[1]+"|" + str(int(time.time()+self.difftime)) + "|"+self.flashVar['fvRealtimeClientType']+"|"+config.APIVersion+"\n"
		s.send(ss)
		# print ss
		return s

	def marketSummarySocket(self):
		s = self.genSocket()
		s.send("REG|5\n")
		# print "REG|5\n"
		return s

	def tickerSocket(self):
		s = self.genSocket()
		s.send("REG|4^N~N^N~N^E~D^E~D\n")
		# print "REG|4^N~N^N~N^E~D^E~D\n"
		return s

	def bidofferSocket(self,insts):
		s = self.genSocket()
		ss = "REG|1^"
		for i in range(len(insts)):
			if(i == len(insts) - 1):
				ss+= insts[i]
			else:
				ss+= insts[i]+"~"

		ss += "^^"
		for i in range(len(insts)):
			if(i == len(insts) - 1):
				ss+= "M"
			else:
				ss+= "M~"
		ss += "\n"
		s.send(ss);
		print ss
		# print self.key
		return s
예제 #43
0
CHALLENGE_API = SITE_URL + 'competitions/ultimate-tic-tac-toe/game/challenge/<bot_name>/'

print("TheAiGames Bot Auto-Challenger")
print("")

username = raw_input("Username: "******"[INFO] CBTeamName is ranked " + str(our_rank))

    random.seed(os.urandom(8))
#!/usr/bin/python

from robobrowser import RoboBrowser

browser = RoboBrowser()
browser.open("https://www.noip.com/members/dns/host.php?host_id=48545720")
browser.get_form(id="clogs")
form = browser.get_form(id="clogs")
form["username"] = "******"
form["password"] = "******"
browser.submit_form(form)
browser.get_forms()
browser.submit_form(browser.get_forms()[0])
예제 #45
0
b = RoboBrowser(parser="lxml")
b.open("http://www.chandrashekar.info")
b.get_links()
b.get_links()
b.forms
dir(b)
b.get_links()
b.get_links()[-3]
l = b.get_links()[-3]
b.follow_link(l)
b.url
b.back()
b.url
b.forward()
b.url
b.get_forms()
b.get_forms()[0]
f = b.get_forms()[0]
f
f["name"] = "smith"
f["subject"] = "dslfj lsdjf lsdjf lksdj flsdjf"
f
b.submit_form(f)
import requests
r = requests.get("http://pypi.python.org/pypi", params={":action" : "search",
                               "term" : term,
                               "submit" : "search"})
r = requests.get("http://pypi.python.org/pypi", params={":action" : "search",
                               "term" : "rest",
                               "submit" : "search"})
r
예제 #46
0
def def_vs_scraper(credentials,
                   bucket_name,
                   obj_path,
                   years=default_years,
                   weeks=default_weeks):
    client = boto3.client('s3')
    browser = RoboBrowser()
    browser.open(login_url)
    login_form = browser.get_forms()[0]

    # Set login credentials
    login_form['ctl00$Body$EmailTextbox'].value = credentials['email']
    login_form['ctl00$Body$PasswordTextbox'].value = credentials['password']
    login_form.serialize()

    # Submit login form
    browser.submit_form(login_form)

    # Open the previously hidden page
    for yearIdx, year in enumerate(years):
        year_dict = years[yearIdx]
        year_key = list(year_dict.keys())[0]
        sn = year_dict[year_key]

        for week in weeks:

            for position_ranking in default_position_rankings:
                w = week
                ew = week
                pts_vs_url = 'https://fantasydata.com/nfl-stats/nfl-fantasy-football-points-allowed-defense-by-position.aspx?fs={}&stype=0&sn={}&scope={}&w={}&ew={}&s=&t=0&p=0&st={}&d=1&ls={}&live=false&pid=true&minsnaps=4'.format(
                    fs, sn, scope, w, ew, position_ranking['url'],
                    position_ranking['url'])

                # Delay before retrieving next set of data
                time.sleep(0.5)

                browser.open(pts_vs_url)
                content = browser.find_all('tr')

                # Initialize the data to be written to the file
                formatted_data = ''

                for idx, line in enumerate(content):
                    # Only add the header once per year
                    if idx == 0 and week == 0:
                        formatted_data = headers + '\n'
                    elif idx != 0:
                        parsed_data = ','.join(line.find_all(text=True))
                        stripped_line = parsed_data.strip('\n').strip(',')
                        year_value = str(list(year.keys())[0])
                        next_line = stripped_line + ',' + year_value + '\n'

                        formatted_data = formatted_data + next_line

                # Make the directory for each year of CSV Data
                file_path = '{}/{}/{}/{}.csv'.format(obj_path, year_key,
                                                     week + 1,
                                                     position_ranking['file'])

                try:
                    # Upload object to the S3 bucket
                    client.put_object(Bucket=bucket_name,
                                      Body=formatted_data,
                                      Key=file_path)
                except RuntimeError as err:
                    print('Failed to write to file: ', err)
                    raise err

                print('Success! Uploaded data: {}'.format(file_path))
예제 #47
0
파일: dagr.py 프로젝트: orangepole/dagr
class Dagr:
        """deviantArt gallery ripper class"""

        NAME = basename(__file__)
        __version__="0.60"
        MAX_DEVIATIONS = 1000000 # max deviations

        def __init__(self):
                # Internals
                self.browser = None
                self.errors_count = dict()

                # Configuration
                self.username = ""
                self.password = ""
                self.overwrite = False
                self.reverse = False
                self.testOnly = False
                self.verbose = False

                # Current status
                self.deviant = ""

        def start(self):
                if not self.browser:
                        # Set up fake browser
                        self.set_browser()
                # Always run login
                self.login()

        def set_browser(self):
                USERAGENTS = (
                    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1',
                    'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101',
                    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
                    'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)',
                    'Opera/9.99 (Windows NT 5.1; U; pl) Presto/9.9.9',
                    'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/ Safari/530.5',
                    'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/6.0',
                    'Mozilla/5.0 (Windows; U; Windows NT 6.1; pl; rv:1.9.1) Gecko/20090624 Firefox/3.5 (.NET CLR 3.5.30729)'
                    )
                session = req_session()
                session.headers.update({'Referer': 'http://www.deviantart.com/'})

                self.browser = RoboBrowser(history=False, session=session, tries=3, user_agent=random.choice(USERAGENTS))

        def login(self):
                if not (self.username and self.password):
                        return
                print("Attempting to log in to deviantArt...")
                self.browser.open('https://www.deviantart.com/users/login?ref=http%3A%2F%2Fwww.deviantart.com%2F&remember_me=1')
                form = self.browser.get_forms()[1]
                form['username'] = self.username
                form['password'] = self.password
                self.browser.submit_form(form)

                if self.browser.find(text=re.compile("The password you entered was incorrect")):
                        print("Wrong password or username. Attempting to download anyway.")
                elif self.browser.find(text=re.compile("\"loggedIn\":true")):
                        print("Logged in!")
                else:
                        print("Login unsuccessful. Attempting to download anyway.")

        def get(self, url, file_name = None):
                if file_name is not None and (self.overwrite == False) and (path_exists(file_name)):
                        print(file_name + " exists - skipping")
                        return
                #TODO Test robobrowser retries and exceptions
                self.browser.open(url)

                if file_name is None:
                        return str(self.browser.parsed)
                else:
                        # Open our local file for writing
                        local_file = open(file_name, "wb")
                        #Write to our local file
                        local_file.write(self.browser.response.content)
                        local_file.close()

        def find_link(self, link):
                filelink = None
                mature_error = False
                self.browser.open(link)
                # Full image link (via download link)
                img_link = self.browser.get_link(text=re.compile("Download( (Image|File))?"))
                if img_link and img_link.get("href"):
                        self.browser.follow_link(img_link)
                        filelink = self.browser.url
                else:
                        if self.verbose:
                                print("Download link not found, falling back to direct image")
                        # Fallback 1: try meta (filtering blocked meta)
                        filesearch = self.browser.find("meta", {"name":"og:image"})
                        if filesearch:
                                filelink = filesearch['content']
                                if basename(filelink).startswith("noentrythumb-"):
                                        filelink = None
                                        mature_error = True
                        if not filelink:
                                # Fallback 2: try collect_rid, full
                                filesearch = self.browser.find("img", {"collect_rid":True, "class":re.compile(".*full")})
                                if not filesearch:
                                # Fallback 3: try collect_rid, normal
                                        filesearch = self.browser.find("img", {"collect_rid":True, "class":re.compile(".*normal")})
                                if filesearch:
                                        filelink = filesearch['src']

                        if not filelink:
                                if mature_error:
                                        raise DagrException("probably a mature deviation")
                                else:
                                        raise DagrException("all attemps to find a link failed")

                filename = basename(filelink)
                return (filename, filelink)

        def handle_download_error(self, link, e):
                error_string = str(e)
                print("Download error (" + link + ") : " + error_string)
                if error_string in self.errors_count:
                        self.errors_count[error_string] += 1
                else:
                        self.errors_count[error_string] = 1

        def deviant_get(self, mode):
                print("Ripping " + self.deviant + "'s " + mode + "...")
                pat = "http://[a-zA-Z0-9_-]*\.deviantart\.com/art/[a-zA-Z0-9_-]*"
                modeArg = '_'
                if mode.find(':') != -1:
                        mode = mode.split(':',1)
                        modeArg = mode[1]
                        mode = mode[0]

                #DEPTH 1
                pages = []
                for i in range(0,int(Dagr.MAX_DEVIATIONS/24),24):
                        html = ""
                        url = ""

                        if mode == "favs":
                                url = "http://" + self.deviant.lower() + ".deviantart.com/favourites/?catpath=/&offset=" + str(i)
                        elif mode == "collection":
                                url = "http://" + self.deviant.lower() + ".deviantart.com/favourites/" + modeArg + "?offset=" + str(i)
                        elif mode == "scraps":
                                url = "http://" + self.deviant.lower() + ".deviantart.com/gallery/?catpath=scraps&offset=" + str(i)
                        elif mode == "gallery":
                                url = "http://" + self.deviant.lower() + ".deviantart.com/gallery/?catpath=/&offset=" + str(i)
                        elif mode == "album":
                                url = "http://" + self.deviant.lower() + ".deviantart.com/gallery/" + modeArg + "?offset=" + str(i)
                        elif mode == "query":
                                url = "http://" + self.deviant.lower() + ".deviantart.com/gallery/?q=" + modeArg + "&offset=" + str(i)
                        else:
                                continue

                        html = self.get(url)
                        prelim = re.findall(pat, html, re.IGNORECASE|re.DOTALL)

                        c = len(prelim)
                        for match in prelim:
                                if match in pages:
                                        c -= 1
                                else:
                                        pages.append(match)

                        done = re.findall("(This section has no deviations yet!|This collection has no items yet!)", html, re.IGNORECASE|re.S)

                        if len(done) >= 1 or c <= 0:
                                break

                        print(self.deviant + "'s " +  mode + " page " + str(int((i/24)+1)) + " crawled...")

                if not self.reverse:
                        pages.reverse()

                if len(pages) == 0:
                        print(self.deviant + "'s " + mode + " had no deviations.")
                        return 0
                else:
                        try:
                                da_make_dirs(self.deviant + "/" + mode)
                                if (mode == "query") or (mode == "album") or (mode == "collection"):
                                    da_make_dirs(self.deviant + "/" + mode + "/" + modeArg)
                        except Exception as e:
                                print(str(e))
                        print("Total deviations in " + self.deviant + "'s gallery found: " + str(len(pages)))

                ##DEPTH 2
                counter2 = 0
                for link in pages:
                        counter2 += 1
                        if self.verbose:
                                print("Downloading " + str(counter2) + " of " + str(len(pages)) + " ( " + link + " )")
                        filename = ""
                        filelink = ""
                        try:
                                filename,filelink = self.find_link(link)
                        except (KeyboardInterrupt, SystemExit):
                                raise
                        except Exception as e:
                                self.handle_download_error(link, e)
                                continue

                        if self.testOnly == False:
                                if (mode == "query") or (mode=="album") or (mode == "collection"):
                                        self.get(filelink, self.deviant + "/" + mode + "/" + modeArg + "/" + filename)
                                else:
                                        self.get(filelink, self.deviant + "/" + mode + "/" + filename)
                        else:
                                print(filelink)

                print(self.deviant + "'s gallery successfully ripped.")

        def group_get(self, mode):
                if mode == "favs":
                        strmode  = "favby"
                        strmode2 = "favourites"
                        strmode3 = "favs gallery"
                elif mode == "gallery":
                        strmode  = "gallery"
                        strmode2 = "gallery"
                        strmode3 = "gallery"
                else:
                        print("?")
                        sys.exit()
                print("Ripping " + self.deviant + "'s " + strmode2 + "...")

                folders = []

                insideFolder = False
                #are we inside a gallery folder?
                html = self.get('http://' + self.deviant + '.deviantart.com/' + strmode2 + '/')
                if re.search(strmode2 + "/\?set=.+&offset=", html, re.IGNORECASE|re.S):
                        insideFolder = True
                        folders = re.findall(strmode + ":.+ label=\"[^\"]*\"", html, re.IGNORECASE)

                #no repeats
                folders = list(set(folders))

                i = 0
                while not insideFolder:
                        html = self.get('http://' + self.deviant + '.deviantart.com/' + strmode2 + '/?offset=' + str(i))
                        k = re.findall(strmode + ":" + self.deviant + "/\d+\"\ +label=\"[^\"]*\"", html, re.IGNORECASE)
                        if k == []:
                                break
                        flag = False
                        for match in k:
                                if match in folders:
                                        flag = True
                                else:
                                        folders+=k
                        if self.verbose:
                                print("Gallery page " + str(int((i/10) + 1)) + " crawled...")
                        if flag:
                                break
                        i += 10

                #no repeats
                folders = list(set(folders))

                if len(folders) == 0:
                        print(self.deviant + "'s " +  strmode3 + " is empty.")
                        return 0
                else:
                        print("Total folders in " + self.deviant + "'s " + strmode3 + " found: " + str(len(folders)))

                if self.reverse:
                        folders.reverse()

                pat = "http:\\/\\/[a-zA-Z0-9_-]*\.deviantart\.com\\/art\\/[a-zA-Z0-9_-]*"
                pages = []
                for folder in folders:
                        try:
                                folderid = re.search("[0-9]+",folder,re.IGNORECASE).group(0)
                                label = re.search("label=\"([^\"]*)",folder,re.IGNORECASE).group(1)
                        except:
                                continue
                        for i in range(0,int(Dagr.MAX_DEVIATIONS/24),24):
                                html = self.get("http://" + self.deviant.lower() + ".deviantart.com/" + strmode2 + "/?set=" + folderid + "&offset=" + str(i - 24))
                                prelim = re.findall(pat, html, re.IGNORECASE)
                                if not prelim:
                                        break
                                for x in prelim:
                                        p = str(re.sub(r'\\/','/',x))
                                        if p not in pages:
                                                pages.append(p)
                                if self.verbose:
                                        print("Page " + str(int((i/24) + 1)) + " in folder " + label + " crawled...")

                        if not self.reverse:
                                pages.reverse()

                        try:
                                if mode == "favs":
                                        da_make_dirs(self.deviant + "/favs/" + label)
                                elif mode == "gallery":
                                        da_make_dirs(self.deviant + "/" + label)
                        except Exception as err:
                                print(err)
                        counter = 0
                        for link in pages:
                                counter += 1
                                if self.verbose:
                                        print("Downloading " +  str(counter) +  " of " + str(len(pages)) +  " ( " + link + " )")
                                filename = ""
                                filelink = ""
                                try:
                                        filename,filelink = self.find_link(link)
                                except (KeyboardInterrupt, SystemExit):
                                        raise
                                except Exception as e:
                                        self.handle_download_error(link, e)
                                        continue

                                if self.testOnly == False:
                                        if mode == "favs":
                                                self.get(filelink, self.devianti + "/favs/" + label + "/" + filename)
                                        elif mode == "gallery":
                                                self.get(filelink, self.deviant + "/" + label + "/" + filename)
                                else:
                                        print(filelink)

                print(self.deviant + "'s " + strmode3 + " successfully ripped.")

        def print_errors(self):
                if len(self.errors_count):
                        print("Download errors count:")
                        for error, count in self.errors_count.iteritems():
                                print("* " + error + " : " + str(count))