Python RoboBrowser.select 예제들, robobrowser.RoboBrowser.select Python 예제들

예제 #1

0

파일 보기

파일: Friends.py 프로젝트: srbcheema2/CoolKit

    def show(self):
        browser = RoboBrowser(parser='html.parser')
        browser.open('http://codeforces.com/enter')
        enter_form = browser.get_form('enterForm')
        enter_form['handleOrEmail'] = self.username
        enter_form['password'] = self.password
        browser.submit_form(enter_form)

        try:
            checks = list(
                map(lambda x: x.getText()[1:].strip(),
                    browser.select('div.caption.titled')))
            if self.username not in checks:
                click.secho('Login Failed.. Wrong password.', fg='red')
                return
        except Exception as e:
            click.secho('Login Failed.. Maybe wrong id/password.', fg='red')
            return

        browser.open('http://codeforces.com/friends')
        soup = browser.parsed()[0]  # no need of soup
        ftable = soup.findAll(
            'div',
            {'class': 'datatable'})[0].findAll('table')[0].findAll('tr')[1:]

        friends = [x.findAll('td')[1].getText().strip() for x in ftable]
        for f in friends:
            print(f)

예제 #2

0

파일 보기

파일: telp.py 프로젝트: puneethkanna/vardhamantelebot

def get_det(tid):
    tindex = gid.index(tid)
    rno = rid[tindex]
    pas = pid[tindex]
    print(rno, pas)
    br = RoboBrowser(history=True, parser="html.parser")
    br = RoboBrowser(
        user_agent=
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6'
    )
    br.open('http://studentscorner.vardhaman.org')
    form = br.get_form(action="")
    form["rollno"] = rno
    form["wak"] = pas
    br.submit_form(form)
    #br.open("http://studentscorner.vardhaman.org")'''
    print(rno)
    #bot.reply_to(m,"wait")
    br.open("http://studentscorner.vardhaman.org/student_information.php")
    bt = br.parsed()
    th = br.select("th")  #3
    td = br.select("td")  #8
    print("In details " + rno)
    #print(z.geturl())
    #if finalurl != "http://studentscorner.vardhaman.org/":
    try:
        return (str(th[3].text.strip()) + ":" + str(td[8].text.strip()) +
                "\n" + str(th[10].text.strip()) + ":" +
                str(td[17].text.strip()) + "\n" + str(th[29].text.strip()) +
                ":" + (str(td[33].text.strip())) + "\n" +
                str(th[31].text.strip()) + ":" + str(td[35].text.strip())
                )  #details
    except IndexError:
        return ("Something is wrong")

예제 #3

0

파일 보기

파일: Proxy.py 프로젝트: fanfan531/ProxyPool

    def kuai_dai_li(self):
        '''
        快代理
        :return:数据需要使用无头爬取
        '''
        url = 'https://www.kuaidaili.com/free/intr/'
        headers = {
            'User-Agent':
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_ ≥14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36",
            'Connection': 'close'
        }

        result = self.request_proxy('GET', url, headers=headers)
        soup = BeautifulSoup(result.text, 'html.parser')
        ips = soup.find('table', class_='layui-table').find('tbody').find('tr')
        for ip in ips:
            td = ip.find_all('td')
            host = td[0].get_text().lower()
            port = td[1].get_text().strip()
            self._format_ip({'http': f'http://{host}:{port}'})

        browser = RoboBrowser()
        browser.open(url)
        time.sleep(3)
        table = browser.select('table.table table-bordered table-striped')
        print(table)

예제 #4

0

파일 보기

파일: Submit.py 프로젝트: kishorerabha/CoolKit

    def submit(self):
        last_id, b, c, d, e = Submit.get_latest_verdict(self.username)

        browser = RoboBrowser(parser='html.parser')
        browser.open('http://codeforces.com/enter')
        enter_form = browser.get_form('enterForm')
        enter_form['handleOrEmail'] = self.username
        enter_form['password'] = self.password
        browser.submit_form(enter_form)

        try:
            checks = list(
                map(lambda x: x.getText()[1:].strip(),
                    browser.select('div.caption.titled')))
            if self.username not in checks:
                click.secho('Login Failed.. Wrong password.', fg='red')
                return
        except Exception as e:
            click.secho('Login Failed.. Maybe wrong id/password.', fg='red')
            return

        browser.open('http://codeforces.com/problemset/submit')
        submit_form = browser.get_form(class_='submit-form')
        submit_form['submittedProblemCode'] = self.prob_id
        submit_form['sourceFile'] = self.inputfile

        browser.submit_form(submit_form)
        if browser.url[-6:] != 'status':
            click.secho(
                'Failed submission, probably you have submit the same file before',
                fg='red')
            return

        Submit.print_verdict(last_id, self.username, 100)
        click.secho('[{0}] submitted ...'.format(self.inputfile), fg='green')

예제 #5

0

파일 보기

def check_cred(rno, pas):
    status = check_url()
    if (status == "down"):
        return jsonify({'site': 'down'})
    else:
        br = RoboBrowser(history=True, parser="html.parser")
        br = RoboBrowser(
            user_agent=
            'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6'
        )
        br.open('http://studentscorner.vardhaman.org')
        form = br.get_form(action="")
        form["rollno"] = rno
        form["wak"] = pas
        br.submit_form(form)
        checkrno = str(br.select)
        if (rno in checkrno):
            br.open(
                "http://studentscorner.vardhaman.org/student_information.php")
            bt = br.parsed()
            th = br.select("th")  #3
            td = br.select("td")  #8
            name = str(td[8].text.strip())
            print("In check_pas", finalurl)
            return jsonify({
                'valid': 'True',
                'rollno': rno,
                'pas': pas,
                'name': name
            })
        else:
            return jsonify({'valid': 'False'})

예제 #6

0

파일 보기

파일: attendance.py 프로젝트: puneethkanna/vardhamanapp

def date_wise_activity_diary(rno, pas):
    br = RoboBrowser(history=True, parser="html.parser")
    br = RoboBrowser(
        user_agent=
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6'
    )
    br.open('http://studentscorner.vardhaman.org')
    form = br.get_form(action="")
    form["rollno"] = rno
    form["wak"] = pas
    br.submit_form(form)
    br.open(
        "http://studentscorner.vardhaman.org/Studentwise_AttendanceRegister.php"
    )
    bt = br.parsed()
    th = br.select("th")  #3
    td = br.select("td")  #8
    l = []
    att = []
    #print str(th[55].text.strip())+":"+str(th[56].text.strip())#attend
    try:
        for i in range(10, 99):
            if (str(th[i].text.strip()) == "Attendance Percentage"):
                print(str(th[i + 1].text.strip()))
                return (str(th[i + 1].text.strip()))
                #if(finalurl != "http://studentscorner.vardhaman.org/"):
                #att.append("\033[1m"+str(th[i].text.strip())+" : *"+str(th[i+1].text.strip())+"*")
#				bot.send_message(tid,str(th[i].text.strip())+" : *"+str(th[i+1].text.strip())+"*",parse_mode= 'Markdown')#attend

    except IndexError:
        return (
            "Attendance is Freesed.If attendance is not freesed you can see it in the website send the mail to the developer stating the issue."
        )

예제 #7

0

파일 보기

파일: crawler.py 프로젝트: ramonbrbs/Scrap-Rdfa

def captura(url): #metodo para acessar uma url e capturar informações
    global vetor_links
    global visitados_links
    global grafo

    print('-----' + url + '-------')

    visitados_links.append(url) #adiciona a url atual no vetor de visitados
    vetor_links.remove(url) #remove a url do vetor de "para visitar"
    browser = RoboBrowser()
    
    try:
        browser.open(url_navegavel(url)) #abre a url
        print("++++" + url_navegavel(url) + "+++")
        if(browser.response.status_code != 200): #verifica se o servidor respondeu ok
            return
        
        
        
        g = extrair_rdfa(url)
        
        grafo = merge_graphs(grafo,g) #chama a função de junção de grafos
        links = browser.select('a') #seleciona as tags 'a' da página
        
        for link in links: #varre os links, corrige e adiciona ao vetor de 'para visitar'
            if(link.has_attr('href') and valida_url(link['href'])):
                if corrige_url(link['href'], url) not in vetor_links and corrige_url(link['href'], url) not in visitados_links and (len(visitados_links) + len(vetor_links) < QUANTIDADE_PAGINAS):
                    vetor_links.append(corrige_url(link['href'], url))
                
    except:
        print("erro")
        e = sys.exc_info()[0]
        traceback.print_exc(file=sys.stdout)
        print(e)

예제 #8

0

파일 보기

def cgpa(rno, pas):
    br = RoboBrowser(history=True, parser="html.parser")
    br = RoboBrowser(
        user_agent=
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6'
    )
    br.open('http://studentscorner.vardhaman.org')
    form = br.get_form(action="")
    form["rollno"] = rno
    form["wak"] = pas
    br.submit_form(form)
    br.open(
        "http://studentscorner.vardhaman.org/src_programs/students_corner/CreditRegister/credit_register.php"
    )
    bt = br.parsed()
    th = br.select("th")  #3
    td = br.select("td")  #8
    c = str(br.select)
    t = "Cumulative Grade Point Average"
    i = c.index(t)
    try:
        return (str(th[1].text.strip()) + ":" + str(td[7].text.strip()) +
                "\n" + "Cumulative Grade Point Average:" + c[i + 33] +
                c[i + 34] + c[i + 35] + c[i + 36])
    except IndexError:
        return (
            "Something went wrong send the report to [email protected] stating the issue, with your rollno"
        )

예제 #9

0

파일 보기

파일: action.py 프로젝트: ShunsukeMzk/seven-card

def scrape(card_no):

    serch_card = Card(card_no)

    # -- スクレイピング --
    browser = RoboBrowser(parser='html.parser')

    browser.open('https://www.google.co.jp/')

    form = browser.get_form(action='/search')
    form['q'] = serch_card.name

    browser.submit_form(form, list(form.submit_fields.values())[0])

    new_card = Card()
    new_card.name = serch_card.name
    # 画像はとりあえずランダムに設定
    new_card.image = random.choice([os.path.basename(file) for file in glob.glob('./selectImages/*.*')])
    new_card.description = "検索結果：" + serch_card.name

    new_card.create()

    for a in browser.select('h3 > a'):

        new_child_card = Card()
        new_child_card.name = a.text[0:32].encode("utf-8")
        new_child_card.image = random.choice([os.path.basename(file) for file in glob.glob('./selectImages/*.*')])
        new_child_card.description = a.get('href')
        new_child_card.parent_no = new_card.no

        new_child_card.create()


    redirect('/')

예제 #10

0

파일 보기

def login(handle):
    password = getpass('[Secured] Password of {}: '.format(handle))

    print('> Signing in...')

    try:
        browser = RoboBrowser(parser='lxml')
        browser.open('http://codeforces.com/enter')
        enter_form = browser.get_form('enterForm')
        enter_form['handleOrEmail'] = handle
        enter_form['password'] = password
        browser.submit_form(enter_form)

        checks = list(map(lambda x: x.getText()[
                      1:].strip(), browser.select('div.caption.titled')))

        if handle.lower() not in str(checks).lower():
            print('> !!! Login Failed. Please enter valid credentials')
            return None
        else:
            print('> Success!')
            return browser
    except Exception as e:
        print('>', e)
        return None

예제 #11

0

파일 보기

def fetch(url):
    browser = RoboBrowser(history=True, parser="html.parser")
    browser.open(url)

    votes = browser.select('.moderatorenSlider a.beitrag')
    followed_links = set()

    total_scores = {}

    for v in votes:
        if v["href"] in followed_links:
            continue
        else:
            followed_links.add(v["href"])
        print(v["href"])
        browser.follow_link(v)
        try:
            scores = extractVotes(browser)
            print(scores)
            for title, score in scores.items():
                if title not in total_scores:
                    total_scores[title] = (score, 1)
                else:
                    score_, num = total_scores[title]
                    total_scores[title] = (score_ + score, num + 1)
        except Exception as e:
            print(e)
        browser.back()
    return total_scores

예제 #12

0

파일 보기

파일: pypi_search_rb2.py 프로젝트: nsshayan/Python

def pypi_search(term):
    # TODO: Populate result list with each record as a dictionary
    # searched result.
    result = []
    from robobrowser import RoboBrowser
    br = RoboBrowser(parser="lxml")
    br.open("https://pypi.org/")
    if not br.response.ok:
        raise ValueError("Failed at https://pypi.org/")
    form = br.get_form()
    form["q"] = term
    br.submit_form(form)
    if not br.response.ok:
        raise ValueError("Failed at form submit")

    links = br.select("a.package-snippet")[:5]
    for i, link in enumerate(links):
        rec = {}
        rec["index"] = i
        rec["href"] = link["href"]
        rec["name"] = link.span.text
        rec["description"] = link.p.text
        result.append(rec)

    return result

예제 #13

0

파일 보기

파일: cp-cfsubmit.py 프로젝트: convict-git/cp_parser_cli

def login_to_cf(username, password):
    """login_to_cf creates a codeforces logged in session using RoboBrowser"""

    logger(
        'info', 'Trying to login into codeforces for the handle : {0}'.format(
            username))

    try:
        browser = RoboBrowser(parser='html.parser')
        browser.open('http://codeforces.com/enter')

        enter_form = browser.get_form('enterForm')
        enter_form['handleOrEmail'] = username
        enter_form['password'] = password
        browser.submit_form(enter_form)

        checks = str(browser.select('div.caption.titled')).count(username)
        if checks == 0 or username == "":
            logger('error', 'Login Failed.. Wrong password.')
            return (False, browser)
    except Exception as e:
        logger('error', 'Login Failed.. Maybe wrong id/password.')
        return (False, browser)

    global sessionUser
    sessionUser = username
    logger('success', 'Login successful, Welcome {0}!'.format(sessionUser))
    return (True, browser)

예제 #14

0

파일 보기

def soov_fetch(OTSING):
    import re
    from robobrowser import RoboBrowser
    browser = RoboBrowser()
    browser.open('https://soov-ee.postimees.ee/keyword-' + OTSING +
                 '/listings.html')

    try:
        page_numbers = int(
            str(
                browser.select('.pagination')[0].select('li')
                [len(browser.select('.pagination')[0].select('li')) -
                 2]).split('.html">')[1].split('</a')[0])
    except:
        all_items = []
        return all_items

    all_items = []
    for page_number in range(1, page_numbers + 1):
        browser.open("https://soov-ee.postimees.ee/keyword-" + OTSING + "/" +
                     str(page_number) + "/listings.html")
        items = browser.select('.item-list')
        counter = 0
        for item in items[
                2:]:  # esimesed kaks on listingute kohal need salvesta otsing ja selles nimekirjas on ka 0 kuulutust valismaalt error disclaimer, kuna need on sama class nameiga
            if 'Soovin' in str(item.select('span.thin')):
                pass
            if 'Müüa' in str(item.select('span.thin')):
                counter += 1
                NAME = str(item.select('img')).replace('[<img alt="',
                                                       '').split('" class')[0]
                if 'alt="' in NAME:
                    NAME = NAME.split('alt="')[1]
                IMG_URL = str(item.select('img')).replace(
                    '"/>]', '').split('src="')[1].split('"/>')[0]
                try:
                    PRICE = str(item.select('.item-price')).split(
                        'margin">')[1].split('€<')[0]
                    PRICE = ['-', PRICE]
                except:
                    PRICE = '-'
                    PRICE = ['-', PRICE]
                URL = str(item.select('.add-title')[0].select('a')).split(
                    'ref="')[1].split('">')[0]
                all_items.append(
                    ['#' + str(counter), URL, NAME, IMG_URL, PRICE, 'SOOV'])
    return all_items

예제 #15

0

파일 보기

def main():

    parser = argparse.ArgumentParser(
        description='Submit codeforces in command line')
    parser.add_argument('user', type=str,
                        help='Your codeforces ID')
    parser.add_argument('prob', type=str,
                        help='Codeforces problem ID (Ex: 33C)')
    parser.add_argument('file', type=str,
                        help='path to the source code')
    args = parser.parse_args()

    user_name = args.user
    last_id, _ = get_submission_data(user_name)

    try:
        passwd = judgerConfig['codeforces'][user_name]
    except Exception:
        print("Configuration Failure.")
        return

    browser = RoboBrowser(parser='lxml')
    browser.open('http://codeforces.com/enter')

    enter_form = browser.get_form('enterForm')
    enter_form['handleOrEmail'] = user_name
    enter_form['password'] = passwd
    browser.submit_form(enter_form)

    try:
        checks = list(map(lambda x: x.getText()[1:].strip(),
                          browser.select('div.caption.titled')))
        if user_name not in checks:
            print("Login Failed.. probably because you've typed"
                  "a wrong password.")
            return
    except Exception:
        print("Login Failed.. probably because you've typed"
              "a wrong password.")
        return

    browser.open('http://codeforces.com/problemset/submit')
    submit_form = browser.get_form(class_='submit-form')
    submit_form['submittedProblemCode'] = args.prob
    submit_form['sourceFile'] = args.file
    browser.submit_form(submit_form)

    if browser.url[-6:] != 'status':
        print('Your submission has failed, probably '
              'because you have submit the same file before.')
        return

    print('Submitted, wait for result...')
    while True:
        id_, verdict = get_submission_data(user_name)
        if id_ != last_id and verdict != 'TESTING':
            print('Verdict = {}'.format(verdict))
            break
        time.sleep(5)

예제 #16

0

파일 보기

def DownloadPronunciations(words):
    print("Aiming to download " + str(words))
    browser = RoboBrowser(history=True, parser="html5lib")
    print ("Connecting to Forvo...")
    browser.open('http://www.forvo.com/login/')
    form = browser.get_form(action=re.compile(r'login'))
    form["login"].value = forvoUsername
    form["password"].value = forvoPassword
    browser.submit_form(form)

    filepaths = []
    for word in words:
        try:
            print ("Trying to download; " + word)
            #The #sv tells it to look for sverige!
            wordUrl = "http://www.forvo.com/word/" + word + "/#sv"
            browser.open(wordUrl)
        
            ConvertedDownloadWord = word
            ConvertedDownloadWord = ConvertedDownloadWord.replace("ö", "%C3%B6")
            ConvertedDownloadWord = ConvertedDownloadWord.replace("å", "%C3%A5")
            ConvertedDownloadWord = ConvertedDownloadWord.replace("ä", "%C3%A4")
            searchString = '"/download/mp3/' + ConvertedDownloadWord + '/sv/"'

            #print(browser.get_links("download"))
            #print(browser.select('a[href*="download"]'))

            downloads = browser.select('a[href*='+searchString+']')
            #for link in downloads:
            #    print(link)
            
            if downloads:
                try:
                    fullDownloadUrl = downloads[0].attrs["href"]
                    print ('Attempt to download mp3 from ' + fullDownloadUrl)
                    browser.open(fullDownloadUrl)
                    #print ('Opened the mp3 site.')
                    mp3Response = browser.response
                    #print ('Read the mp3 response.')

                    filepath = saveDirectory + word + ".mp3"
                    file = open(filepath, 'wb')             
                    file.write(mp3Response.content)
                    file.close()
                    filepaths.append(filepath)
                except IndexError:
                    print ("Could not load the webpage ", fullDownloadUrl)
                except NameError:
                    print ("I'm not sure what this was. Maybe an incorrectly encoded string")
                except:
                    print("Unexpected error while downloading:", sys.exc_info()[0])
            else:
                print ("Couldn't find a download link :(.")
                
        except NameError :
            print ("Couldn't find ",searchWord)
        except :
            print("Unexpected error while searching:", sys.exc_info()[0])
    return filepaths

예제 #17

0

파일 보기

파일: attendance.py 프로젝트: puneethkanna/vardhamanapp

def period_attendance(rno, pas):
    stat = check_url(
        rno, pas, "http://studentscorner.vardhaman.org/student_attendance.php")
    if (stat == "down"):
        return ("down")
    else:
        d = {}
        br = RoboBrowser(history=True, parser="html.parser")
        br = RoboBrowser(
            user_agent=
            'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6'
        )
        br.open('http://studentscorner.vardhaman.org')
        form = br.get_form(action="")
        form["rollno"] = rno
        form["wak"] = pas
        br.submit_form(form)
        br.open("http://studentscorner.vardhaman.org/student_attendance.php")
        bt = br.parsed()
        th = br.select("th")  #3
        td = br.select("td")  #8
        l = []
        att = []
        try:
            for i in range(1, 49, 4):
                present = td[i + 3].text.strip()  #Present
                period = td[i + 1].text.strip()  #Period
                topic = td[i + 2].text.strip()  #Topic
                present = present.upper()
                topic = topic[0].upper() + topic[1:].lower()
                #				print(td[i],present, period, topic)
                if (present == "PRESENT"):
                    #att.append("\033[1m"+str(td[i].text.strip())+"   "+str(td[i+1].text.strip())	+"   "+d+"  -<b>"+t+"</b>")
                    temp_dict = {
                        str(td[i].text.strip()):
                        present + "_-_" + period + "_-_" + topic
                    }
                    d.update(temp_dict)
                else:
                    #att.append("\033[1m"+str(td[i].text.strip())+"   "+p+"   "+d+"  -  ~<i>"+t+"</i>~")
                    #bot.send_message(tid,str(td[i].text.strip())+"   "+p+"   "+d+"  -  ~<i>"+t+"</i>~",parse_mode= 'Html')#attend)
                    temp_dict = {
                        str(td[i].text.strip()):
                        present + "_-_" + period + "_-_" + topic
                    }
                    d.update(temp_dict)
            #break
        except IndexError:
            pass
        if (not d):
            d = {"status": "None"}
            pa = json.dumps(d)
            return (pa)
        else:
            temp_dict = {"status": "True"}
            d.update(temp_dict)
            pa = json.dumps(d)
            print(type(pa))
            return (pa)

예제 #18

0

파일 보기

def get_stock_id_pages(base_url):
    browser = RoboBrowser(parser='html.parser')
    browser.open(base_url)
    url_list = []
    for li in browser.select('tbody > tr'):
        url = li.get('data-href')
        url_list.append(url)
    return url_list

예제 #19

0

파일 보기

파일: attendance.py 프로젝트: puneethkanna/vardhamantelebot

def attendance(tid, rno, pas):
    br = RoboBrowser(history=True, parser="html.parser")
    br = RoboBrowser(
        user_agent=
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6'
    )
    br.open('http://studentscorner.vardhaman.org')
    form = br.get_form(action="")
    form["rollno"] = rno
    form["wak"] = pas
    br.submit_form(form)
    br.open("http://studentscorner.vardhaman.org/student_attendance.php")
    bt = br.parsed()
    th = br.select("th")  #3
    td = br.select("td")  #8
    l = []
    att = []
    #print str(th[55].text.strip())+":"+str(th[56].text.strip())#attend
    try:
        for i in range(40, 60):
            if (str(th[i].text.strip()) == "Attendance Percentage"):
                #if(finalurl != "http://studentscorner.vardhaman.org/"):
                #att.append("\033[1m"+str(th[i].text.strip())+" : *"+str(th[i+1].text.strip())+"*")
                bot.send_message(tid,
                                 str(th[i].text.strip()) + " : *" +
                                 str(th[i + 1].text.strip()) + "*",
                                 parse_mode='Markdown')  #attend

    except IndexError:
        bot.send_message(
            tid,
            "*Attendance is Freesed*.\nIf attendance is not freesed you can see it in the website send the mail to \n *[email protected]*\nstating the issue.",
            parse_mode='Markdown')
    try:
        for i in range(9, 37, 4):
            t = td[i + 3].text.strip()
            p = td[i + 1].text.strip()
            d = td[i + 2].text.strip()
            t = t.upper()
            d = d[0].upper() + d[1:].lower()
            if (t == "PRESENT"):
                #att.append("\033[1m"+str(td[i].text.strip())+"   "+str(td[i+1].text.strip())	+"   "+d+"  -  <b>"+t+"</b>")
                bot.send_message(tid,
                                 str(td[i].text.strip()) + "   " +
                                 str(td[i + 1].text.strip()) + "   " + d +
                                 "  -  <b>" + t + "</b>",
                                 parse_mode='Html')  #attend)
            else:
                #att.append("\033[1m"+str(td[i].text.strip())+"   "+p+"   "+d+"  -  ~<i>"+t+"</i>~")
                bot.send_message(tid,
                                 str(td[i].text.strip()) + "   " + p + "   " +
                                 d + "  -  ~<i>" + t + "</i>~",
                                 parse_mode='Html')  #attend)
        #break
    except IndexError:
        pass
    return 1

예제 #20

0

파일 보기

def get_list_pages(base_url):
    browser = RoboBrowser(parser='html.parser')
    browser.open(base_url)
    url_list = []
    for li in browser.select('ul.pager > li'):
        a = li.select('a')[0]
        url = urljoin(base_url, a.get('href'))
        url_list.append(url)
    return url_list

예제 #21

0

파일 보기

파일: kichi2_booking.py 프로젝트: newwaylw/python_project

def make_booking(url, num_people, lead_name, hotel_name, email, country, tel):

    logging.info('[%s] requesting %s' %
                 (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), url))
    browser = RoboBrowser(history=False, user_agent=random.choice(user_agents))
    browser.open(url)

    # some error handling
    err = browser.select('div.error-message')
    if len(err) > 0:
        if '指定日時の予約受け付けは終了しました。' in str(err[0]):
            return 1  # 'Reservation ended'
        elif '指定時間は予約を受け付けておりません' in str(err[0]):
            return 2  # Reservation not accepted
        elif '予約受付期間外です' in str(err[0]):
            return 3  # Outside reservation period
        elif 'ただ今予約は受け付けておりません' in str(err[0]):
            return 4  # We do not accept reservations now.
        else:
            return 6  # unknown error

    form = browser.get_form()

    # fill form
    form['booking[client][adult]'].value = num_people
    form['booking[client][name]'].value = lead_name
    form['booking[client][company]'].value = hotel_name
    form['booking[client][email]'].value = email
    form['booking[client][email2]'].value = email
    form['booking[client][country]'].value = country
    form['booking[client][tel]'].value = tel
    browser.submit_form(form)

    print(browser.parsed)

    err = browser.select('div.error-message')
    if len(err) > 0 and '定員オーバーのため予約不可です' in str(err[0]):
        return 5  # capacity over
        # get pass confirmation page
    form2 = browser.get_form()
    #     json2 = form2.serialize()
    browser.submit_form(form2)

    return 0  # success!

예제 #22

0

파일 보기

def draw_leaderboard(screen, width, height):
    """
	Retrieve data from investopedia and draw leaderboard on screen.

	Args:
		screen: the display object
		width: display width
		height: display height
	"""
    clear_leaderboard(screen, width, height)
    email, password = get_credentials()

    # Use login credentials to access leaderboard
    base_url = 'http://www.investopedia.com/accounts/login.aspx?'
    browser = RoboBrowser(parser='lxml', history=True)
    browser.open(base_url)
    form = browser.get_form(id='account-api-form')
    form['email'] = email
    form['password'] = password

    browser.submit_form(form)
    browser.open('http://www.investopedia.com/simulator/ranking/')
    leaderboard = browser.select('tr')

    x = width / 15
    y = 9 * height / 16

    for i in range(1, 11):
        data = leaderboard[i].text.replace('\n', '')
        data = data.replace('.                ', '')
        if i == 10:
            data = data[2:]
        else:
            data = data[1:]
        username = ''
        for char in data:
            if char == '(':
                break
            else:
                username = username + char

        if len(username) > 30:
            username = username[:30]

        # Display leaderboard info
        info = '{}. {}'.format(i, username)
        text = pygame.font.SysFont('Century Schoolbook', 40)
        label = text.render(info, 1, pitt_gold)
        screen.blit(label, (x, y))
        pygame.display.update()

        if i == 5:
            x = 11 * width / 21
            y = 9 * height / 16
        else:
            y = y + 50

예제 #23

0

파일 보기

파일: code_submit.py 프로젝트: bobogei81123/CodeSubmit

def main():

    parser = argparse.ArgumentParser(
        description='Submit codeforces in command line')
    parser.add_argument('user', type=str,
                        help='Your codeforces ID')
    parser.add_argument('prob', type=str,
                        help='Codeforces problem ID (Ex: 33C)')
    parser.add_argument('file', type=str,
                        help='path to the source code')
    args = parser.parse_args()

    user_name = args.user
    last_id, _ = get_submission_data(user_name)

    passwd = getpass()

    browser = RoboBrowser()
    browser.open('http://codeforces.com/enter')

    enter_form = browser.get_form('enterForm')
    enter_form['handle'] = user_name
    enter_form['password'] = passwd
    browser.submit_form(enter_form)

    try:
        checks = list(map(lambda x: x.getText()[1:].strip(),
            browser.select('div.caption.titled')))
        if user_name not in checks:
            print("Login Failed.. probably because you've typed"
                  "a wrong password.")
            return
    except Exception as e:
        print("Login Failed.. probably because you've typed"
              "a wrong password.")
        return 

    browser.open('http://codeforces.com/problemset/submit')
    submit_form = browser.get_form(class_='submit-form')
    submit_form['submittedProblemCode'] = args.prob
    submit_form['sourceFile'] = args.file
    browser.submit_form(submit_form)

    if browser.url[-6:] != 'status':
        print('Your submission has failed, probably '
              'because you have submit the same file before.')
        return

    print('Submitted, wait for result...')
    while True:
        id_, verdict = get_submission_data(user_name)
        if id_ != last_id and verdict != 'TESTING':
            print('Verdict = {}'.format(verdict))
            break
        time.sleep(5)

예제 #24

0

파일 보기

파일: virginia.py 프로젝트: mojowen/votebot-forms

 def get_locality_fips(self, user):
     # get locality fips ID by address from elsewhere on virginia.gov
     locality_browser = RoboBrowser(parser='html.parser', user_agent='HelloVote.org', history=True)
     locality_browser.open('http://www.tax.virginia.gov/fips')
     locality_form = locality_browser.get_form(id="build-fips-form")
     locality_form['street1'] = user['address']
     locality_form['city'] = user['city']
     locality_form['zipcode'] = user['zip']
     locality_form['zipcodeext'] = ''
     # two 'op' buttons, submit & reset. thankfully submit is first.
     locality_browser.submit_form(locality_form, submit=locality_form['op'])
     return locality_browser.select('dl.dl-horizontal dd')[1].text.strip().upper()

예제 #25

0

파일 보기

파일: idne.py 프로젝트: CroMarmot/idne

def cli(prob_id, filename):
	# get latest submission id, so when submitting should have not equal id
    last_id, b, c, d, e = get_latest_verdict(config.username)
    
    # Browse to Codeforces
    browser = RoboBrowser(parser = 'html.parser')
    browser.open('http://codeforces.com/enter')
        
    enter_form = browser.get_form('enterForm')
    enter_form['handleOrEmail'] = config.username
    enter_form['password'] = config.password
    browser.submit_form(enter_form)
    
    try:
	    checks = list(map(lambda x: x.getText()[1:].strip(),
	        browser.select('div.caption.titled')))
	    if config.username not in checks:
	        click.secho('Login Failed.. Wrong password.', fg = 'red')
	        return
    except Exception as e:
	    click.secho('Login Failed.. Maybe wrong id/password.', fg = 'red')
	    return 
    
    click.secho('[{0}] login successful! '.format(config.username), fg = 'green')
    click.secho('Submitting [{1}] for problem [{0}]'.format(prob_id, filename), fg = 'green')
    browser.open('https://codeforces.com/contest/'+prob_id[:-1]+'/problem/'+prob_id[-1])
    submit_form = browser.get_form(class_ = 'submitForm')
    try:
        submit_form['sourceFile'] = filename
    except Exception as e:
        click.secho('File {0} not found in current directory'.format(filename))
        return
    browser.submit_form(submit_form)

    if browser.url[-3:] != '/my':
        click.secho('Failed submission, probably you have submit the same file before', fg = 'red')
        return

    click.secho('[{0}] submitted ...'.format(filename), fg = 'green')
    hasStarted = False
    while True:
        id_, verdict_, time_, memory_, passedTestCount_ = get_latest_verdict(config.username)
        if id_ != last_id and verdict_ != 'TESTING' and verdict_ != None:
            if verdict_ == 'OK':
                click.secho('OK - Passed {} tests'.format(passedTestCount_), fg = 'green')
            else:
                click.secho("{} on test {}".format(verdict_, passedTestCount_ + 1), fg = 'red')
            click.secho('{} MS | {} KB'.format(time_, memory_), fg = ('green' if verdict_ == 'OK' else 'red'))
            break
        elif verdict_ == 'TESTING' and (not hasStarted):
            click.secho("Judgment has begun", fg='green')
            hasStarted = True
        time.sleep(0.5)

예제 #26

0

파일 보기

파일: config.py 프로젝트: ajax711/ma.py

def get_available_countries():
    """
    :return: list of countries as presented in url
             GEONAMES_COUNTRIES.
    """
    browser = RoboBrowser(parser='html.parser')
    browser.open(GEONAMES_COUNTRIES)
    return [
        country_name.string.lower()
        for country_name in browser.select('.restable ')[0].find_all('a')
        if country_name.string is not None
    ]

예제 #27

0

파일 보기

def scores():

	if request.method == "GET":

		#return login form
		return render_template("scores.html")

	if request.method == "POST":

		#connects to collegeboard.org
		browser = RoboBrowser()
		login_url = 'https://account.collegeboard.org/login/login?appId=287&DURL=https://apscore.collegeboard.org/scores/view-your-scores'
		browser.open(login_url)

		#logs in to collegeboard.org with user credentials
		form = browser.get_form(id='loginForm')
		form['person.userName'].value = request.form.get("username")
		form['person.password'].value = request.form.get("password")
		form.serialize()
		browser.submit_form(form)

		#redirects to AP scores page on collegeboard.org
		browser.open('https://apscore.collegeboard.org/scores/view-your-scores')

		#populates exams_final with exam names scraped from collegeboard.org
		exams = browser.select(".span5 > h4")
		exams_final = []
		for exam in exams:
			exam = str(exam)
			exams_final.append(exam[4:-5])

		#populates scores_final with scores scraped from collegeboard.org
		scores = browser.select(".span5 > span > em")
		scores_final = []
		for score in scores:
			score = str(score)
			scores_final.append(score[4:-5])

		#returns scores page
		return render_template("scored.html", exams=exams_final, scores=scores_final)

예제 #28

0

파일 보기

def applyf(rno, pas, out, indat, rea):
    br = RoboBrowser(parser="html.parser")
    br = RoboBrowser(
        user_agent=
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6'
    )
    br.open('http://studentscorner.vardhaman.org')
    form = br.get_form(action="")
    #print(ppr)
    form["rollno"] = rno  #ppr[0]# user.rno
    form["wak"] = pas  #ppr[1]#user.pas
    br.submit_form(form)
    #print(pper)
    form = br.get_form(action="insert_permission.php")
    br.open("http://studentscorner.vardhaman.org/students_permission_form.php")
    form = br.get_form(action="insert_permission.php")
    form['out_time'] = out  #pper[0]
    form['in_time'] = indat  #pper[1]
    form['reason'] = rea  #pper[2]
    #br.submit_form(form, submit=br.submit_fields['__CONFIRM__'])
    #payload = {'rollno': '17881A0526', 'wak': 'hodit@vardhaman', 'ok' : 'SignIn' }
    #br.open("http://studentscorner.vardhaman.org/students_permission_form.php", method='post',data=payload)
    #br.submit_form(form)
    #tt=str(br.select)
    #br.submit_form(form, submit=br.submit_fields['__CONFIRM__'])
    #payload = {'rollno': '17881A0526', 'wak': 'hodit@vardhaman', 'ok' : 'SignIn' }
    #br.open("http://studentscorner.vardhaman.org/students_permission_form.php", method='post',data=payload)
    br.submit_form(form)
    tt = str(br.select)
    print(tt)
    br.open("http://studentscorner.vardhaman.org/students_permission_form.php")
    td = br.select("td")
    if ("Permission Form Submitted Successfully" in tt):
        if (str(td[8].text.strip()) == "Student Name"):
            return (str(td[9].text.strip()).title() +
                    "'s Permission Form Submitted Successfully")
        elif (str(td[29].text.strip()) == "Student Name"):
            return (str(td[30].text.strip()).title() +
                    "'s Permission Form Submitted Successfully")

        else:
            return (
                "Permission Form Submitted Successfully.\n\nYour name has not found in the permission,please send a mail to \[email protected]\nstating the issue."
            )
    else:
        return (
            "Something went wrong.Apply the form manually and mail to [email protected] stating the issue."
        )

    print(checkper)
    return ("applied")

예제 #29

0

파일 보기

파일: GetWrites.py 프로젝트: m-grainger/Grafana_Stuff

		def scrape_td(parsed_url):
			br2 = RoboBrowser(history=True,parser="html.parser")
			try:
				br2.open(parsed_url) # attempt to open up parsed URL
			except:
				print(f"Unable to open{parsed_url}")	
			col_list = [28,29,30, # 1
						35,36,37, # 2
						42,43,44, # 3
						49,50,51, # 4
						56,57,58, # 5 
						63,64,65, # 6
						70,71,72, # 7
						77,78,79, # 8
						84,85,86, # 9
						91,92,93, # 10
						98,99,100, # 11
						105,106,107, # 12
						112,113,114, # 13
						119,120,121, # 14
						126,127,128, # 15
						133,134,135, # 16
						140,141,142, # 17
						147,148,149, # 18
						154,155,156, # 19
						161,162,163, # 20
						168,169,170, # 21
						175,176,177, # 22
						182,183,184, # 23
						189,190,191, # 24
						196,197,198, # 25 
						203,204,205, # 26
						210,211,212, # 27
						217,218,219, # 28 
						224,225,226, # 29
						231,232,233, # 30
						238,239,240, # 31
						245,246,247, # 32
						252,253,254] # 33
			temp_list = [] # empty list to insert writes for each consumer
			for nums in col_list:
				try:
					strp_val = str(br2.select('td')[nums]).replace("<td>","").replace("</td>","").replace(",","") # parses string so we can turn 
					temp_list.append(int(strp_val))																  #	value into an int
				except:
					print(f"Less than 33 threads for cluster 0{server_num}")
			writes_ = temp_list[0::3]
			writes_totals = sum(writes_) # total of writes located in temp list

			return writes_totals

예제 #30

0

파일 보기

파일: GetWrites.py 프로젝트: m-grainger/Grafana_Stuff

		def parse_url(server_num):
			try:
				if server_num >= 6:
					url = f"<url f string with {server num} injected>"
					first_url = f"<url f string with {server num} injected>"
				else:	
					url = f"<url f string with {server num} injected>"
					first_url = f"<url f string with {server num} injected>"
			except:
				print(f"Something went wrong loading SB for C0{server_num}")				
			br = RoboBrowser(history=True, parser="html.parser")
			try:
				br.open(url) # attempt to open url with RoboBrowser
			except:
				print(f"Unable to open {url}")						
			if server_num == 7:
				end_scrape = str(br.select('a')[9])[9:46]
			else:	
				end_scrape = str(br.select('a')[9])[9:48]
			combo_url = first_url+end_scrape # combine both strings to create URL for most recent job


			return combo_url

예제 #31

0

파일 보기

def scrape_cache(query, total):

    browser = RoboBrowser()

    listings = []
    for i in range(1, total, 14):
        offset = i  # which listing to start at per page. Increment by 14
        browser.open('http://www.bing.com/search?q=%s&first=%d' %
                     (query, offset))

        # Database Schema - A sqlite database is used to make data queries more efficient.
        # id (Primary Key) - ID of the item
        # orig_url - Original URL of the site.
        # cache_url - Cached URL of the site.
        # desc - Quick description of the site.

        # grab all search attribute strings
        capt_list = browser.select('.b_caption')
        for capt in capt_list:
            # start a new listing
            listing = {}

            # display original url
            listing['orig_url'] = re.sub('<[^>]*>', '',
                                         str(capt.select('cite')[0]))

            # display description
            listing['desc'] = capt.p.string

            # '|' delimited list, containing the ids needed to cache
            id_string = capt.select('div.b_attribution')[0].get('u')
            print(id_string)
            if (id_string != None):
                ids = id_string.split('|')
                listing[
                    'cache_url'] = "http://cc.bingj.com/cache.aspx?q=%s&d=%s&mkt=en-US&setlang=en-US&w=%s" % (
                        query, ids[2], ids[3])
            else:
                listing['cache_url'] = None

            print(listing)
            listings.append(listing)

        print(":: End of dump %d" % i)

        # delay between page grabs
        time.sleep(1)

    # listings is given as an output object
    return (listings)

예제 #32

0

파일 보기

파일: soundcloud-downloader.py 프로젝트: amney/soundcloud-page-downloader

def getSongLinks(soundcloudURL):
	browser = RoboBrowser(history=False)

	# Go to SoundFlush and ask to rip the specified track.
	browser.open('http://soundflush.com/')
	form = browser.get_form(id='form_download')
	form['track_url'].value = soundcloudURL
	browser.submit_form(form)

	# Grab the download link and filename from the download page.
	downloadLink = browser.select('a#btn_save')[0]
	downloadURL = downloadLink['href']
	downloadName = downloadLink['download']
	return {'url' : downloadURL, 'name' : downloadName}

예제 #33

0

파일 보기

파일: bing-cache-archiver.py 프로젝트: bibanon/webcache-scraper

def scrape_cache(query, total):
	
	browser = RoboBrowser()
	
	listings = []
	for i in range(1, total, 14):
		offset = i				# which listing to start at per page. Increment by 14
		browser.open('http://www.bing.com/search?q=%s&first=%d' % (query, offset))
	
		# Database Schema - A sqlite database is used to make data queries more efficient.
		# id (Primary Key) - ID of the item
		# orig_url - Original URL of the site.
		# cache_url - Cached URL of the site.
		# desc - Quick description of the site.
	
		# grab all search attribute strings
		capt_list = browser.select('.b_caption')
		for capt in capt_list:
			# start a new listing
			listing = {}
			
			# display original url
			listing['orig_url'] = re.sub('<[^>]*>', '', str(capt.select('cite')[0]))
			
			# display description
			listing['desc'] = capt.p.string
			
			# '|' delimited list, containing the ids needed to cache
			id_string = capt.select('div.b_attribution')[0].get('u')
			print(id_string)
			if (id_string != None):
				ids = id_string.split('|')
				listing['cache_url'] = "http://cc.bingj.com/cache.aspx?q=%s&d=%s&mkt=en-US&setlang=en-US&w=%s" % (query, ids[2], ids[3])
			else:
				listing['cache_url'] = None
			
			print(listing)
			listings.append(listing)
		
		print(":: End of dump %d" % i)
		
		# delay between page grabs
		time.sleep(1)
	
	# listings is given as an output object
	return(listings)

예제 #34

0

파일 보기

파일: utils.py 프로젝트: VMCTeam/apifest

def getKeyFromMetaData(url):
    """
        @brief Auxiliar function. Returns a list of normalized keywords.
        @param url String.
        @return listof(uStrings)
        
        Uses normalize(value) to fix unicode strings.
        The function selects values in the content property
        of the tags tagged as metadata.
        This content contain the website keywords for classification.
    """
    # Variables:
    crawler     = RoboBrowser(history=True)
    endpoint    = "http://"+url
    endpointSSL = "https://"+url
    connFlag    = True
    metadata    = list()
    temp_list2  = list()
    keywords    = list()

    # Procedimientos:
    try:
        crawler.open(endpoint, verify=False)
    except:
        connFlag = False

    if not connFlag:
        try:
            crawler.open(endpointSSL, verify=False)
        except:
            return None

    metadata = crawler.select("meta")
    # select metadata with keywords:
    for d in metadata:
        if 'name' in d.attrs.keys():
            if d.attrs['name'] == 'keywords':
                temp_list = re.split(', |,', d.attrs['content'])
                temp_list2.extend(temp_list)

    for k in temp_list2:
        keywords.append(normalize(k))                

    return keywords

예제 #35

0

파일 보기

파일: submiter.py 프로젝트: Marsan-Ma/checkins

  def submit(self, entry, message=None):
    browser = RoboBrowser(history=True)
    self.read_account()

    # [login]
    browser.open(self.login_url)
    login_form = browser.get_form(action='/account/login')
    login_form['UserName'].value = self.username
    login_form['Password'].value = self.password
    browser.submit_form(login_form)
    myname = browser.select('#header-account')[0].text
    print("[SUBMIT] login as \"%s\" @ %s" % (myname, datetime.now()))

    # [submit]
    browser.open(self.submit_url)
    submit_form = browser.get_form(action='/competitions/submissions/accept')
    submit_form['SubmissionUpload'].value = open(entry, 'r')
    if message: submit_form['SubmissionDescription'] = str(message)
    browser.submit_form(submit_form)
    print("[SUBMIT] submitted @ %s" % datetime.now())

예제 #36

0

파일 보기

파일: download.py 프로젝트: floydwch/kddcup2015-cli

    def take_action(self, parsed_args):
        config_dir = '~/.kddcup2015-cli'
        config_dir = os.path.expanduser(config_dir)

        if os.path.isdir(config_dir):
            config = ConfigParser.ConfigParser(allow_no_value=True)
            config.readfp(open(config_dir + '/config'))

            if parsed_args.username:
                username = parsed_args.username
            else:
                username = config.get('user', 'username')

            if parsed_args.password:
                password = parsed_args.password
            else:
                password = config.get('user', 'password')

        base = 'https://www.kddcup2015.com'
        login_url = '/'.join([base, 'user-ajaxlogin.html'])
        data_url = '/'.join([base, 'submission-data.html'])

        browser = RoboBrowser()

        response = browser.session.post(
            login_url, dict(email=username, pwd=password)).json()

        if response['rs'] == 'error':
            self.app.stdout.write(response['msg'])

        browser.open(data_url)

        src_urls = list(map(lambda x: x['href'], browser.select('tr .blue')))

        for url in src_urls:
            self.app.stdout.write('downloading %s\n' % url)
            request = browser.session.get(url, stream=True)
            with open(url[59:], "wb") as data_files:
                data_files.write(request.content)

예제 #37

0

파일 보기

파일: scrape.py 프로젝트: EvanHahn/umich-scrape-ctools-resources

def get_webdav_urls(username, password):

    # log in

    browser = RoboBrowser(history=True)
    browser.open('http://ctools.umich.edu')
    browser.follow_link(browser.find(id='ctoolsLogin'))

    login_form = browser.get_form()
    login_form['login'].value = username
    login_form['password'].value = password
    browser.submit_form(login_form)

    # get the results

    browser.follow_link(browser.find(
        class_='toolMenuLink ',
        title='For creating, revising, and deleting course and project sites'
    ))
    browser.open(browser.find(class_='portletMainIframe').attrs['src'])

    results = []

    course_links = browser.select('#sitesForm td h4 a[target="_top"]')
    for course_link in course_links:

        if not course_link.attrs:
            continue
        href = course_link.attrs['href']
        if '~' in href:
            continue

        results.append(
            'https://ctools.umich.edu/dav' +
            findall('\/[^\/]+$', href)[0]
        )

    return results

예제 #38

0

파일 보기

파일: HistoRadio.py 프로젝트: ejstepanian/HistoRadio

def songPick(url):
    browser = RoboBrowser(history=True)
    browser.open(url)
    song = browser.select('td')
    count = 0
    songName = []
    artistName = []
    y = 2
    while y == 2:
        for p in song:
            p = p.text
            if count == 1:
                artistName.append(p)
            elif count == 2:
                songName.append(p)
            count += 1
            if count == 3:
                count = 0

        x = randint(0,99)
        search = artistName[x] + " " + songName[x]
        if len(search) > 1:
            y = 3
            return(search,artistName[x],songName[x])

예제 #39

0

파일 보기

파일: bitbucket_robobrowser.py 프로젝트: jmortega/codemotion_scraping_the_web

username = raw_input ("Username: "******"Repository: " + link.select('td.repo')[0].text.encode("utf-8").strip()
	print "User: "******"utf-8").strip()
	print "Title: " + link.select('td.title')[0].select('a.execute')[0].text.encode("utf-8").strip()
	print "Updated " + link.select('td.date')[0].text.encode("utf-8").strip()
	print "\n----------------------"
#obtain links with beautifulSoup
links = browser.find_all('a')
for link in links:
	try:
		#print(link.get('href'))
		if not link['href'].startswith("https"):
			link['href']='https://bitbucket.org'+link['href'].encode("utf-8").strip()
			#link['href']='/odigeoteam/frontend-html5'
		print link['href']

예제 #40

0

파일 보기

파일: robot.py 프로젝트: bahrunnur/InfoKelas

class Robot(object):
    """This robot have two functionality, which is to grab matakuliah data
    and to grab KRS of each mahasiswa. This robot also need username and
    password for authorization.

    :param str username: username for login
    :param str password: password for login

    """
    def __init__(self, username, password):
        self.browser = RoboBrowser()
        self.username = username
        self.password = password
        self.matakuliah = []

    def update_matakuliah(self):
        self.matakuliah = self._get_matakuliah()
        for obj in self.matakuliah:
            detail = self._get_matakuliah_detail(obj['link_detail'])
            obj['jadwal_kuliah'] = detail['jadwal_kuliah']
            # obj['jadwal_uts'] = detail['jadwal_uts']
            # obj['jadwal_uas'] = detail['jadwal_uas']
        self._persist_matakuliah()

    def _persist_matakuliah(self):
        for obj in self.matakuliah:
            try:
                kelas = (Kelas.select()
                         .where(Kelas.nama == obj['nama_kelas']).get())
            except Kelas.DoesNotExist:
                kelas = Kelas()

            kelas.kode_mk = obj['kode_mk']
            kelas.nama = obj['nama_kelas']
            kelas.matakuliah = obj['matakuliah']
            kelas.dosen = obj['dosen']
            kelas.sks = obj['sks']
            kelas.tipe = obj['tipe']
            kelas.jadwal_kuliah = obj['jadwal_kuliah']
            kelas.save()

    def __login(self):
        self.browser.open('http://akademika.ugm.ac.id')
        login_form = self.browser.get_form(id='form-login')
        login_form['username'].value = self.username
        login_form['password'].value = self.password
        self.browser.submit_form(login_form)

    def _get_matakuliah(self):
        self.__login()

        # go to 'informasi matakuliah' page
        link_matakuliah = self.browser.select('#navigation li a')[3]
        self.browser.follow_link(link_matakuliah)

        marshal = []
        matakuliah_raw = browser.select('.table-common > tr')[1:]
        for raw in matakuliah_raw:
            data = raw.select('td')

            obj = {}
            obj['kode_mk'] = data[1].contents[0]
            obj['matakuliah'] = data[2].contents[0]
            obj['dosen'] = data[3].contents[0]
            obj['link_detail'] = data[4].contents[0]
            obj['nama_kelas'] = data[4].contents[0].get_text()
            obj['tipe'] = data[5].contents[0]
            obj['sks'] = data[6].contents[0]
            marshal.append(obj)

        return marshal

    def _get_matakuliah_detail(self, link):
        self.browser.follow_link(link)
        jadwal_row = self.browser.select('table > tr')

        # for brevity
        obj = {}
        obj['jadwal_kuliah'] = ""
        obj['jadwal_uts'] = ""
        obj['jadwal_uas'] = ""

        jadwal_kuliah_row = jadwal_row[0].select('table tr')[1:]
        for row in jadwal_kuliah_row:
            contents = [x.contents[0] for x in row.select('td')]
            data_string = "$".join(contents)
            obj['jadwal_kuliah'] = "|".join([data_string])

        # TODO: find a way to get 'tanggal'
        # jadwal_uts_row = jadwal_row[1].select('table tr')[1:]
        # jadwal_uas_row = jadwal_row[2].select('table tr')[1:]

        return obj

예제 #41

0

파일 보기

파일: scratch.py 프로젝트: Joiechen/python-practice

import re
from robobrowser import RoboBrowser

url = 'http://www.qq.com/'
b = RoboBrowser(history=True)
b.open(url)

today_top = b.find(id='todaytop').a
print today_top['href']

b.follow_link(today_top)

title = b.select('.hd h1')[0]
print '*****************************'
print title.text
print '*****************************'

print b.find(id='articleContent').text

예제 #42

0

파일 보기

파일: scrape_us.py 프로젝트: blaksmatic/SkiTacular

def getWebsiteOneData():
	data_og=[]
	page = urlopen(working_web).read()
	soup = BeautifulSoup(page)
	soup.prettify()
	#print(soup)
	#http://stackoverflow.com/questions/9253684/selecting-specific-tr-tags-with-beautifulsoup
	rows=soup.findAll('tr', {'class': 'rowB'})
	for r in rows:
		tag_a = r.find('a')   
		data_og.append([tag_a.text, tag_a['href']])
	
	#print(data_og)
	mountain_dict={}
	print("scraping: ", str(len(data_og)), " mountains")
	for x in data_og:
		#we are now opening a specifc mountains detail page
		###scrape resort name
		
		init_detail_url=prefix+x[1]
		proper_prefix_index=init_detail_url.rindex('/')+1

		#scrape resort description
		#print(init_detail_url)
		browser_6 = RoboBrowser(history=True)
		browser_6.open(init_detail_url)
		r_d = browser_6.select('.resort_description')
		soup_7 = BeautifulSoup(str(r_d))
		p_s= soup_7.findAll('p')
		description=""
		for p in p_s:
			description+=p.getText()+"\n"
		


		snow_report_url=init_detail_url[:proper_prefix_index]+"skireport.html"
		browser_2 = RoboBrowser(history=True)
		browser_2.open(snow_report_url)

		#print(init_detail_url)
		mountain_name=x[0]

		print(mountain_name)
		#print(init_detail_url)

		###scrape resort url
		website_of_resort=browser_2.select('.contact_wrap')
		soup_4 = BeautifulSoup(str(website_of_resort))
		try:
			website_of_resort_link=soup_4.find('a').getText()
		except:
			print("no webpage")
			website_of_resort_link="sorry, no link found, try google search"
		#print(website_of_resort_link)

		###scrape lift tickets:
		lift_url=init_detail_url[:proper_prefix_index]+"lift-tickets.html"
		#print(lift_url)
		browser_4 = RoboBrowser(history=True)
		browser_4.open(lift_url)
		ticket_box=browser_4.select('.resort_ticket_price')
		soup_5 = BeautifulSoup(str(ticket_box))
		data=[]
		#source: http://stackoverflow.com/questions/23377533/python-beautifulsoup-parsing-table
		for tr in soup_5.find_all('tr'):
			cols = tr.find_all('td')
			cols = [ele.text.strip() for ele in cols]
			data.append([ele for ele in cols if ele])
		#child, junior, adult, senior
		#print(data)
		weekday_prices=data[1][1:-1]
		weekend_prices=data[2][1:]
		#print(weekday_prices)
		#print(weekend_prices)


		###scrape trail map picture:
		image_url=init_detail_url[:proper_prefix_index]+"trailmap.html"
		browser_5 = RoboBrowser(history=True)
		browser_5.open(image_url)
		map_html=browser_5.select('.trailMap')
		soup_6=BeautifulSoup(str(map_html))
		try:
			img_tag=soup_6.find("img")
		except:
			print("image not found")
			img_rc="NULL"
		try:
			img_src=img_tag['src']
		except:
			img_src="NULL"



		###scrape address:
		driving_url=init_detail_url[:proper_prefix_index]+"driving-directions.html"
		#print(driving_url)
		browser_3 = RoboBrowser(history=True)
		browser_3.open(driving_url)
		direction_text=browser_3.select('.directions')
		soup_4 = BeautifulSoup(str(direction_text))
		destination_text = soup_4.find("input", {"id": "end"})
		destination_text = destination_text['value']
		

		#soup_3 = BeautifulSoup(str(mountain_name))
		#mountain_name=soup_3.find('span').getText()
		#print(mountain_name)

		#trail info scraper 
		runs_open=browser_2.select('.pie_chart_item')
		try:
			run_info=str(runs_open[0])
			soup_2 = BeautifulSoup(run_info)
			numbers=soup_2.find('p').getText()
			of_index=numbers.index('of')
			num_open=int(numbers[0:of_index])
			try:
				total_num=int(numbers[of_index+2:])
				#print(num_open)
				#print(total_num)
				#print(runs_open)
				#exit()
			except:
				print("no trail number info")
				total_num=-1
		except:
			print("no trail info")
			num_open=-1

		if mountain_name not in mountain_dict:
			#send %instead of open and total
			percent_trails_open=float(num_open)/float(total_num)
			mountain_dict[mountain_name]={'resort_description':description, 'percent_trails_open':percent_trails_open, 'resort_website': website_of_resort_link, 'resort_location':destination_text, 'weekday_prices': weekday_prices, 'weekend_prices': weekend_prices, 'trail_map_url': img_src}
		
	print(len(mountain_dict))
	return mountain_dict

예제 #43

0

파일 보기

파일: codeforces.py 프로젝트: joeyac/acmnote

class CF:
    # 基本信息
    URL_HOME = 'http://codeforces.com/'
    URL_LOGIN = URL_HOME + 'enter'
    URL_SUBMIT = URL_HOME + 'problemset/submit'
    URL_STATUS = URL_HOME + 'submissions/'

    # 结果信息
    INFO = ['RunID', 'Submit Time', 'Author', 'Pro.ID', 'Language', 'Judge Status', 'Time', 'Memory']
    # 语言
    LANGUAGE = {
        'G++': '42',
        'C': '42',
        'G++11': '42',
        'G++14': '50',
        'GCC': '10',
        'GCC11': '1',
        'JAVA': '36',
        'PYTHON2': '7',
        'PYTHON3': '31',
    }
    #
    header = {
        'Accept': 'text / html, application / xhtml + xml, '
                  'application / xml;q = 0.9, image / webp, * / *;q = 0.8',
        'Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6',
        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Ubuntu Chromium/52.0.2743.116 Chrome/52.0.2743.116 Safari/537.36',
        'Origin': "http://codeforces.com",
        'Host': "codeforces.com",
        'Content-Type': 'application/x-www-form-urlencoded',
        'Connection': 'keep-alive',
    }

    def __init__(self, user_id, password):
        self.user_id = user_id
        self.password = password
        self.browser = RoboBrowser()
        self.run_id = ''
        self.pre_id = ''
        self.res = {}

    def login(self):
        try:
            self.browser.open(CF.URL_LOGIN)
        except:
            ots = "Server Error"
            logger.error(ots)
            print(ots)
            return False
        enter_form = self.browser.get_form('enterForm')
        enter_form['handle'] = self.user_id
        enter_form['password'] = self.password

        try:
            self.browser.submit_form(enter_form)
        except:
            ots = "Server Error"
            logger.error(ots)
            print(ots)
            return False

        try:
            checks = list(map(lambda x: x.getText()[1:].strip(),
                              self.browser.select('div.caption.titled')))
            if self.user_id not in checks:
                ots = "Login Failed.. "
                logger.info(ots)
                print(ots)
                return False
        except:
            ots = "Server Error"
            logger.error(ots)
            return False
        ots = 'Login Successful!'
        logger.info(ots)
        return True

    def submit(self, pro_id, language, src):
        pro_id = str(pro_id).upper()
        try:
            language = CF.LANGUAGE[str(language).upper()]
        except:
            ots = 'language unrecognizable!'
            logger.error(ots)
            print(ots)
            return False

        try:
            self.browser.open(CF.URL_SUBMIT)
        except:
            ots = "Server Error"
            logger.error(ots)
            print(ots)
            return False

        submit_form = self.browser.get_form(class_='submit-form')
        submit_form['submittedProblemCode'] = pro_id
        submit_form['source'] = src
        submit_form['programTypeId'] = language

        self.browser.submit_form(submit_form)

        if self.browser.url[-6:] != 'status':
            ots = 'Submit Failed..(probably because you have submit the same file before.)'
            logger.info(ots)
            print(ots)
            return False
        ots = 'Submit Successful'
        logging.info(ots)
        print(ots)
        return True

    def init_id(self):
        if self.pre_id != '':
            return True
        url = CF.URL_STATUS + str(self.user_id)
        try:
            req = urllib.request.Request(url=url, headers=CF.header)
            page = urllib.request.urlopen(req, timeout=5)
        except:
            ots = "Server Error"
            logger.error(ots)
            print(ots)
            return False
        soup = BeautifulSoup(page, 'html5lib')
        tables = soup.find('table', {'class': 'status-frame-datatable'})
        tmp = []
        for row in tables.findAll('tr'):
            cols = row.findAll('td')
            cols = [ele.text.strip() for ele in cols]
            tmp = [ele.replace(u'\xa0', u' ') for ele in cols if ele]
            if len(tmp) == 8:
                break
        self.pre_id = tmp[0]
        return True

    def result(self):
        url = CF.URL_STATUS + str(self.user_id)
        try:
            page = urllib.request.urlopen(url, timeout=5)
        except:
            ots = "Server Error"
            logger.error(ots)
            print(ots)
            return False
        soup = BeautifulSoup(page, 'html5lib')

        tables = soup.find('table', {'class': 'status-frame-datatable'})
        tmp = []
        find = False
        for row in tables.findAll('tr'):
            cols = row.findAll('td')
            cols = [ele.text.strip() for ele in cols]
            tmp = [ele.replace(u'\xa0', u' ') for ele in cols if ele]
            if len(tmp) == 8:
                if tmp[0] == self.pre_id:
                    break
                if not find:
                    if self.run_id == '' or self.run_id == tmp[0]:
                        find = True
                        self.run_id = tmp[0]
            if find:
                break
        if not find:
            ots = "Can not find submissions!"
            logging.info(ots)
            print(ots)
            return True

        wait = ['Running', 'In queue']
        if tmp[5].find(wait[0]) != -1 or tmp[5].find(wait[1]) != -1:
            logging.info(tmp[5])
            return False
        for i in range(8):
            self.res[CF.INFO[i]] = tmp[i]
            print(CF.INFO[i], ':', tmp[i])
        return True

예제 #44

0

파일 보기

파일: lab1.py 프로젝트: fgeorgy/robobrowser

session.verify = False  # Skip SSL verification
session.proxies = {'http': 'http://localhost:8081/'}
browser = RoboBrowser(session=session, parser='lxml')

# Browse to Genius
# browser = RoboBrowser(history=True)
browser.open('http://www.petitesannonces.ch/')

# Search for Porcupine Tree
form = browser.get_form(action='/recherche/')
form                # <RoboForm q=>
form['q'].value = 'jardinage'
browser.submit_form(form)

# Look up the first song
songs = browser.select('.ele a.title')

for p in songs:
    print(p.text)

# browser.follow_link(songs[0])
# lyrics = browser.select('.lyrics')
# lyrics[0].text      # \nHear the sound of music ...
#
# # Back to results page
# browser.back()
#
# # Look up my favorite song
# song_link = browser.get_link('trains')
# browser.follow_link(song_link)
#

예제 #45

0

파일 보기

파일: tGoole.py 프로젝트: YuiLiou/git-demo

num = 1000
month = str(date.today()).split('-')[1]
day = str(date.today()).split('-')[2]
rename('company', 'company'+month+day)
if not os.path.exists('company'):
    os.makedirs('company')
for com in com_list:
    com = com.replace(" ","+")
    url = "https://www.google.com/search?q="+com+"&tbm=nws&num="+str(num)+"&filter=0&cr=tw&ie=utf-8&oe=utf-8&hl=zh-TW"
    browser = RoboBrowser(user_agent='a python robot')
    browser.session.headers['User-Agent'] # a python robot
    browser.open(url)
    news_list = list()
    f = open('company\\'+com+'.csv','w')
    w = csv.writer(f)
    for body in browser.select('div#search'):
        for div in body.select('div.g'):
            for a in div.select('h3 a'):
                hreff = str(a.attrs.get('href')).replace('%3F','?').replace('%3D','=').replace('%26','&').replace('/url?q=','')
                href = hreff.split('&sa=U')[0]
                print (a.text)
                #print ('(',href,')')
                try:
                    w.writerow ([a.text,href])
                except:
                    print ('error',a.text)
                    w.writerow (['error',href])              
                
    f.close()

예제 #46

0

파일 보기

파일: dawatcher.py 프로젝트: DangerBlack/DeviantArt-watchRSS

#print(form)
if browser.find(text=re.compile("Login")):
    print('Compiled login fields form...')

browser.submit_form(form)

if browser.find(text=re.compile("The password you entered was incorrect")):
        print("Wrong password or username. Attempting to download anyway.")
        exit();
elif browser.find(text=re.compile("\"loggedIn\":true")):
        print("Logged in!")
else:
        print("Login unsuccessful. Attempting to download anyway.")
        exit();
browser.open('https://www.deviantart.com/messages/#view=deviantwatch')
page=browser.select('body')

script=browser.select('script')
for s in script:
    st=s.text
    json_start= st.find('{"api":"scmc","preload":')
    if json_start!=-1:
        json_end= st.find('}}}}) }',json_start)
        print('Analising the json')
        #print(str(json_start)+" "+str(json_end))
        js=st[json_start:json_end+4]
        out_file = open("source.txt","w")
        out_file.write(js)
        out_file.close()
        #REPLACE ALL THE SINGLE QUOTE
        #REPLACE ALL THE \ WITH \\

예제 #47

0

파일 보기

파일: acm_input.py 프로젝트: sagersmith8/python_projects

            #form.new_control('text','code',{'value':''})
            #form.fixup()
            form['localid'].value=str(curProgram)
            form['language'].value='2'
            form['code'].value='import java.util.*;class Main{public static void main(String[]args) throws Exception{Scanner in = new Scanner(System.in);StringBuilder sb = new StringBuilder();while(in.hasNextLine()){sb.append(in.nextLine());}byte b=(byte)sb.charAt('+str(curByte)+');if((b>>'+str(shift)+'&0x01)==0){throw new Exception("Error");}}}'
            br.submit_form(form)
            #f3 = open('f3.html','w')
            #f3.write(str(tp))
            #print(tp)

            a=br.find_all('a', href=True, text = re.compile('My Submissions'))
            for link in a:
                #print(a)
                br.follow_link(link)
        
            tr = br.select('.sectiontableentry1')
            stra = str(tr[8])
            #print(stra[295:307])
        
            if 'Wrong answer' in stra:
                binaryString += '1'
            else:
                binaryString += '0'

                print('Submitted',curProgram,curByte, binaryString[::-1])
        if(curByte>1):
            intval = int(binaryString[::-1],2)
            if(intval==0):
                break
            input += chr(intval)
        binaryString = ''

예제 #48

0

파일 보기

파일: login.py 프로젝트: yangeren/salome

class Login(object):
    def __init__(self):
        self.dr = webdriver.PhantomJS('phantomjs')
        # self.dr = webdriver.Firefox()
        self.dr.maximize_window()
        self.source = RoboBrowser(history=True)
        print "xxxx"

    def readdata(self, user):
        res = yaml.load(file('ini.yaml'))
        login_url = res['url']['login']
        username = res[user]['username']
        password = res[user]['password']
        xuanchuantu = res['road']['xuanchuantu']
        print username, password
        return login_url, username, password, xuanchuantu

    def login(self, url, username, password):
        # self.dr = webdriver.PhantomJS('phantomjs')
        # # self.dr = webdriver.Firefox()
        # self.dr.maximize_window()
        # self.source = RoboBrowser(history=True)
        self.dr.get(url)
        #动态识别元素id
        self.source.open(url)
        ids = self.source.select('input')
        username_id = str(ids[0].attrs['id'])
        password_id = str(ids[1].attrs['id'])
        button_id = str(ids[2].attrs['id'])
        #页面输入
        self.dr.find_element_by_id(username_id).click()
        self.dr.find_element_by_id(username_id).send_keys(username)
        self.dr.find_element_by_id(password_id).click()
        self.dr.find_element_by_id(password_id).send_keys(password)
        self.dr.save_screenshot('./login.png')
        self.dr.find_element_by_name(button_id).click()
        time.sleep(2)

    #此方法后期可做元素遍历
    def add_jj(self, img):
        print "简介"
        self.dr.find_elements_by_class_name('btn-search')[1].click() #进入新建预约页
        self.dr.find_element_by_id('fileToUpload1').send_keys(img)
        self.dr.find_element_by_class_name('btn-confirm2').click()
        self.dr.find_element_by_name('title').click()
        self.dr.find_element_by_name('title').send_keys('whtest1')
        self.dr.find_element_by_name('leader').click()
        self.dr.find_element_by_name('leader').send_keys('whtest1')
        self.dr.find_element_by_name('tel').click()
        self.dr.find_element_by_name('tel').send_keys('13421111111')
        self.dr.find_element_by_name('location').click()
        self.dr.find_element_by_name('location').send_keys(u'朝阳区')
        self.dr.find_elements_by_tag_name('img')[2].click()
        frame = self.dr.find_elements_by_tag_name('iframe')[0].get_attribute('name')
        self.dr.switch_to_frame(frame)
        self.dr.find_element_by_id('ok').click()
        self.dr.switch_to_default_content()
        select = self.dr.find_elements_by_tag_name('select')
        select[0].find_elements_by_tag_name('option')[1].click()
        time.sleep(2)
        select[1].find_elements_by_tag_name('option')[1].click()
        # self.dr.find_elements_by_tag_name('select')[1].find_elements_by_tag_name('option')[1].click()
        self.dr.save_screenshot('./save.png')
        self.dr.find_element_by_link_text(u'保存').click()

    def add_yyxx(self):
        self.dr.find_element_by_link_text(u'预约信息').click()
        self.dr.find_element_by_name('s_start').send_keys('2015-01-08')
        self.dr.find_element_by_name('s_end').send_keys('2015-01-28')
        self.dr.find_element_by_xpath('/html/body/div[4]/div/div[2]/div/div[2]/div[1]/div[2]/span[5]/input').click()

    def quit(self):
        time.sleep(5)
        self.dr.quit()

예제 #49

0

파일 보기

파일: hottravelplace.py 프로젝트: Joiechen/python-practice

import re
from robobrowser import RoboBrowser

url = 'http://itest.info/courses/2'
b = RoboBrowser(history=True)
b.open(url)

class_name = b.select('.headline h2')
print class_name[0].text

class_desc = b.select('.tag-box')
print class_desc[0].text

class_time = b.select('h4')
print class_time[0].text

teacher = b.select('.thumbnail-style h3')
print teacher[0].text

qq = b.find(text=re.compile('QQ'))
print qq

qq_group = b.find(text=re.compile('\+selenium'))
print qq_group

예제 #50

0

파일 보기

파일: robo.py 프로젝트: yangeren/salome

__author__ = 'hanz'
#*--coding=utf-8--*
import re
from robobrowser import RoboBrowser
from BeautifulSoup import BeautifulSoup

dr = RoboBrowser(history=True)
dr.open('http://yuyueweijian.test.zae.zhongsou.com/user/login')

titles = dr.select('input')
for title in titles:
    print title.attrs['id'],title.attrs['name']

예제 #51

0

파일 보기

파일: update.py 프로젝트: yankees714/direct

browser = RoboBrowser()
letters = [letter for letter in string.ascii_lowercase]

# Login
browser.open('https://www.bowdoin.edu/BowdoinDirectory/rmSignon.jsp')
form = browser.get_forms()[1]
form["uname"] = USERNAME
form["pword"] = PASSWORD
print "Logging in...",
browser.submit_form(form)

# Only do external access
# browser.open("http://www.bowdoin.edu/BowdoinDirectory/lookup.jsp")

# Make sure we actually logged in
if browser.select("#sch"):
    print "done!"
else:
    print "FAILED."
    quit()

for letter in letters:
    # Get all students with last name beginning with letter
    form = browser.get_form(id='sch')
    form["ln"].value = letter
    form["so"].value = "stu"
    browser.submit_form(form)

    students = browser.select('.person')

    for student in students:

예제 #52

0

파일 보기

파일: qq_topic.py 프로젝트: easonhan007/easonhan007.github.io

# coding: utf-8
import re
from robobrowser import RoboBrowser

url = "http://www.qq.com/"
b = RoboBrowser(history=True)
b.open(url)

# 获取今日话题这个link
today_top = b.find(id="todaytop").a
print today_top["href"]

b.follow_link(today_top)

# 这个时候已经跳转到了今日话题的具体页面了

# 打印标题
title = b.select(".hd h1")[0]
print "*************************************"
print title.text
print "*************************************"

# 打印正文内容
print b.find(id="articleContent").text

예제 #53

0

파일 보기

파일: select.py 프로젝트: easonhan007/easonhan007.github.io

#coding: utf-8
import re
from robobrowser import RoboBrowser

url = 'http://itest.info/courses/2'
b = RoboBrowser(history=True)
b.open(url)

#页面上所有的a
all_links = b.select('a')  
for link in all_links:
  print link.text

# 页面上所有class是container的div
divs = b.select('.container')
print len(divs)

예제 #54

0

파일 보기

파일: start.py 프로젝트: easonhan007/easonhan007.github.io

import re
from robobrowser import RoboBrowser

b = RoboBrowser(history=True)
b.open('http://itest.info/courses/2')

'''
form = b.get_form(action='/s')
print form

form['wd'].value = 'selenium'

b.submit_form(form)

'''
title = b.select('.headline h2')
print title[0].text

infos = b.select('h4')

for info in infos:
  print info.text

body = b.select('body')
print body[0].text

예제 #55

0

파일 보기

파일: techMTimeSheet.py 프로젝트: kaymatrix/devcon-scripts

class techMTimeSheetCls():
	
	def __init__(self,parent):
		self.parent=parent
		self.settings=self.parent.settings
		self.tools=self.parent.customTools
		self.mainUrl='https://pacehr.techmahindra.com/psp/PACEHR/EMPLOYEE/HRMS/c/ROLE_EMPLOYEE.TL_MSS_EE_SRCH_PRD.GBL?FolderPath=PORTAL_ROOT_OBJECT.CO_EMPLOYEE_SELF_SERVICE.HC_TIME_REPORTING.HC_RECORD_TIME.HC_TL_SS_JOB_SRCH_EE_GBL&IsFolder=false&IgnoreParamTempl=FolderPath%2cIsFolder'
		self.timeSheetUrl='https://pacehr.techmahindra.com/psc/PACEHR/EMPLOYEE/HRMS/c/ROLE_EMPLOYEE.TL_MSS_EE_SRCH_PRD.GBL'
		self.sslVerification=True		
		self.loginId=self.settings.techmID
		self.loginPass= self.settings.techmPass
		print("techMTimeSheetCls is ready!")

	def useProxy(self):
		os.environ["HTTP_PROXY"] = os.environ["HTTPS_PROXY"] = self.settings.nabProxy
		self.sslVerification=False		
		
	def submitTimeSheet(self):
		if(not self.tools.isInternetAvailable):
			print("No internet available, Cannot submitTimeSheet")
			return		 
		self.browser = RoboBrowser(history=True, user_agent='Mozilla/5.0')
		print("----Loading techm site----")
		self.browser.open(self.mainUrl, verify=self.sslVerification)
		if(self.browser.response.ok):
			print("----Techm site loaded!, Checking for login form----")
			self.form = self.browser.get_form(id='login')
			if(self.browser.response.ok):
				print("----Login form loaded!, Filling login details----")
				self.form['userid']=self.loginId
				self.form['pwd']=self.loginPass
				self.browser.submit_form(self.form)
				if(self.browser.response.ok):		
					print("----Login successful!, Loading timesheet url----")			
					self.browser.open(self.timeSheetUrl, verify=self.sslVerification, method='post')
					if(self.browser.response.ok):		
						print("----Timesheet url loaded, Checking timesheet form----")			
						self.form2 = self.browser.get_form(id='TL_MSS_EE_SRCH_PRD')
						if(self.browser.response.ok):		
							print("----Timesheet form loaded, Filling timesheet data----")			
							self.form2['ICAction']='TL_SAVE_PB'
							self.form2['ICStateNum']='1'
							self.form2['TL_TR_WEEK_WRK_USER_FIELD_1$0']='PA'
							self.form2['USER_FIELD_3$0']='000000000000004'
							self.form2['PROJECT$0']='C01000000019679'
							self.form2['QTY_DAY1$0']='8.00'
							self.form2['QTY_DAY2$0']='8.00'
							self.form2['QTY_DAY3$0']='8.00'
							self.form2['QTY_DAY4$0']='8.00'
							self.form2['QTY_DAY5$0']='8.00'
							self.form2.fields.pop('TL_SAVE_PB')
							self.browser.submit_form(self.form2)
							if(self.browser.response.ok):		
								print("----Timesheet submitted!----")			
								expectedMsg = self.browser.select('span.PSEDITBOX_DISPONLY')
								if(expectedMsg):
									print(str(expectedMsg[0].text))
								else:
									print("----Error! Unable to update timesheet-----")
							else:
								print("----Error! Unable to update timesheet-----")
						else:
							print("----Error! Unable to update timesheet-----")						   
					else:
						print("----Error! Unable to update timesheet-----")					
				else:
					print("----Error! Unable to update timesheet-----")				
			else:
				print("----Error! Unable to update timesheet-----")			
		else:
			print("----Error! Unable to update timesheet-----")

예제 #56

0

파일 보기

파일: post.py 프로젝트: hcastor/DestinyLFG-Poster

import re
from robobrowser import RoboBrowser
from datetime import datetime, timedelta

browser = RoboBrowser(history=True)
browser.open("http://destinylfg.net")

addMyGroup = browser.select('#new')

form = browser.get_form(class_='form-horizontal panel-body')

form['region'].value  = 'northamerica'
form['platform'].value  = 'ps4'
form['gamertag'].value  = 'YOUR USERNAME'
form['level'].value  = 31
form['event'].value  = 'strikes-nightfall-weekly'
form['notes'].value  ='#lfm 30+ add USERNAME will not reply on here.'

startTime = datetime.now()
endTime = datetime.now()

browser.submit_form(form)
print 'post'

while(1):
  if(endTime - startTime > timedelta(seconds=30)):
    print 'post'
    startTime = datetime.now()
    endTime = datetime.now()
    browser.submit_form(form)
    wait = False

예제 #57

0

파일 보기

파일: downloader.py 프로젝트: spoage/atlas-source-jar-gen

def get_source(app, version, username, password,
               base_unpack_dir=None, clean=True, keep=True, archive_type=None):
    browser = RoboBrowser()
    # log in to the MyAtlassian portal
    browser.open(LOGIN_PAGE)
    login_form = browser.get_form(id='form-login')
    login_form['username'].value = username
    login_form['password'].value = password
    browser.submit_form(login_form)
    if browser.response.status_code != 200:
        raise IOError("Login failed to Atlassian ID service.")
    # get the list of versions for the application we're wanting to download source for
    browser.open("%s/%s" % (SOURCE_DOWNLOAD_BASE, app))
    versions = browser.select('table#source-download-table tr.smallish')
    row_number = 0
    version_download_map = {}
    archive_type = select_archive_type(app, archive_type)
    archive_extension = get_archive_extension(archive_type)
    for version_row in versions:
        row_number += 1
        try:
            columns = version_row.find_all('td')
            version_field = columns[0]
            if len(version_field) != 1:
                raise AtlassianSourceDownloadError("Version field not found.")
            version_text = version_field.text.strip()
            if len(version_text) == 0:
                raise AtlassianSourceDownloadError("Version field contained no text.")
            version_name_match = VERSION_EXTRACT_REGEX.match(version_text)
            if version_name_match is None:
                raise AtlassianSourceDownloadError("Couldn't match version number in field.")
            version_archive_type = version_name_match.group('type').lower()
            if version_archive_type != archive_extension:
                raise AtlassianSourceDownloadError("Archive type didn't match required type.")
            download_link_field = columns[-1].find('a')
            if len(download_link_field) != 1:
                raise AtlassianSourceDownloadError("Download link field not found or has multiple.")
            download_path = download_link_field.get('href').strip()
            if len(download_path) == 0:
                raise AtlassianSourceDownloadError("Download link URL contained no value.")
            version_download_map[version_name_match.group('version')] = download_path
        except AtlassianSourceDownloadError as ex:
            # print("Skipped row number %d. Reason: %s" % (row_number, ex))
            pass
    # blow up if the version requested isn't in the list
    if version not in version_download_map:
        raise AtlassianSourceDownloadError("Unable to find version '%s' on Atlassian source site."
                                           % version)
    # set the default unpack dir if it wasn't provided and create it if it doesn't exist
    if base_unpack_dir is None:
        base_unpack_dir = '%s/versions' % os.getcwd()
    else:
        base_unpack_dir = base_unpack_dir.rstrip(os.path.sep)
    os.makedirs(base_unpack_dir, exist_ok=True)
    # find the specified version in the list and download it
    version_dir_path = '%s/%s/%s' % (base_unpack_dir, app, version)
    source_archive_name = '%s/%s_%s.%s' % (base_unpack_dir, app, version, archive_extension)
    if clean and os.path.isfile(source_archive_name):
        os.unlink(source_archive_name)
    if not os.path.isfile(source_archive_name):
        source_download_url = MY_ATLASSIAN + version_download_map[version]
        browser.open(source_download_url)
        with open(source_archive_name, 'wb') as source_archive_file:
            source_archive_file.write(browser.response.content)
    with get_archive_object(archive_type, source_archive_name) as src:
        os.makedirs(version_dir_path, exist_ok=True)
        top_dirs = list(set(d.split(os.path.sep)[0] for d in src.namelist()))
        if len(top_dirs) != 1:
            raise AtlassianSourceDownloadError("Couldn't unpack archive - unexpected contents.")
        # extract the archive to a temporary location that we can move from later if all goes well
        top_level_dir_name = top_dirs[0]
        archive_extraction_dir = mkdtemp()
        try:
            # unpack the archive to the temporary folder
            src.extractall(archive_extraction_dir)
            # remove the target directory if it already exists
            if os.path.isdir(version_dir_path):
                shutil.rmtree(version_dir_path)
            shutil.move(os.path.join(archive_extraction_dir, top_level_dir_name), version_dir_path)
        finally:
            shutil.rmtree(archive_extraction_dir)
    if not keep:
        os.unlink(source_archive_name)
    # now that we've got the source, return the path it lives at
    return version_dir_path

예제 #58

0

파일 보기

파일: event_scraper.py 프로젝트: jpugliesi/USC-Academic-Calendar-Scraper

def getAcademicCalendarEvents(url):
    #Initialize ParseDateTime
    cal = parsedatetime.Calendar()
    
    # Initialize robobrowser
    browser = RoboBrowser(history=False)
    browser.open(url)

    event_tags = browser.select("#content-main tr")

    # Dictionary of semesters
    # in the form: {Spring_2015: [<events>], Fall_2014: [<events>]}
    semesters = {}

    # cycle through the event rows, adding them to the event array
    year = None
    season_year = "" # i.e. Spring_2015, to be used as a key for semesters Dict
    for event in event_tags:

            ##### Scraping the web-pages #####

        # Headers in the Schedules are within <th></th>
        if len(event.find_all("th")) > 0:
            # grab the first word of the header (Usually the Season)
            match = re.search(r'(\w+).*(\d\d\d\d)', event.find("th").text)
            season = match.group(1)
            year = match.group(2)
            
            season_year = season + "_" + year
            # Create a new entry in the semester dict
            semesters[season_year] = []
            
        else:
            # Event data is stored in 3 <td> tags:
            #   The first tag is the name of the event
            #   The second is the days of the week of the event (useless)
            #   The third is the Month and date(s) of the event
            event_info = event.find_all("td")
            event_name = event_info[0].text
            messy_event_date = event_info[2].text

                ##### Extracting event info #####
           
            # These dates are pretty messy, so get the important stuff with regex
            match = re.search(r'(\w+)\s(\d+)-*(\d*)(\w*)\s*(\d*)', messy_event_date)
            
            start_date = match.group(1) + " " + match.group(2) # i.e. December 1
            start_date_and_year = match.group(1) + " " + match.group(2) + " " + str(year) # i.e. December 1 2014
            source_date = match.group(1) + " " + str(year) # i.e. December 2014
            
            # Determine End Date based on single or multi-day event
            if len(match.group(3)) > 0:
                # Multi-Day Event
                # i.e. December 13-15
                end_date = match.group(1) + " " + match.group(3)
            elif len(match.group(4)) > 0:
                # i.e. December 13 - January 12
                end_date = match.group(4) + " " + match.group(5)
            else:
                # Single Day Event
                end_date = start_date = start_date_and_year

            date_range_pair = None
            if start_date is end_date:
                # Single Day Event
                # Note that this is repetitive, consider refactoring into above if-else statements
                event_start_date = event_end_date = cal.parseDateText(start_date_and_year)
            else:
                date_range_string = start_date + "-" + end_date
                date_range_pair = cal.evalRanges(date_range_string, cal.parseDateText(source_date))
                event_start_date = date_range_pair[0]
                event_end_date = date_range_pair[1]

            # Grab Year, Month, Day from dates
            event_start_date = [event_start_date[0], event_start_date[1], event_start_date[2]]
            event_end_date = [event_end_date[0], event_end_date[1], event_end_date[2]]
                            
            ##### Convert dates to datetime.date objects #####
        
            # This allows easy addition of days with timedelta (to make sure it works across months)

            start_date = datetime.date(event_start_date[0], event_start_date[1], event_start_date[2])
            end_date = datetime.date(event_end_date[0], event_end_date[1], event_end_date[2]) 

            # Fix off-by-one with multi-day event creation by adding a day to multi-day events
            # i.e. Google Calendar API interprets the end of an event to be the beginning of the
            #      provided end date (12:00a.m.). We want that end date to be included, so add 1 day
            
            if not start_date == end_date:
                end_date += datetime.timedelta(days=1)

            # Format date in Google Calendar API style: YYYY-MM-DD
            # (since these are all all-day events)
            
            start_date = str(start_date.year) + "-" + str(start_date.month).zfill(2) + "-" + str(start_date.day).zfill(2)
            end_date = str(end_date.year) + "-" + str(end_date.month).zfill(2) + "-" + str(end_date.day).zfill(2)

            # Add event to the appropriate semester

            semesters[season_year].append((event_name, start_date, end_date))
    
    return semesters

예제 #59

0

파일 보기

파일: login_testerhome.py 프로젝트: easonhan007/easonhan007.github.io

#coding: utf-8
import re
from robobrowser import RoboBrowser

url = 'http://testerhome.com/account/sign_in/'
b = RoboBrowser(history=True)
b.open(url)

# 获取登陆表单

login_form = b.get_form(action='/account/sign_in')
print login_form

# 输入用户名和密码
login_form['user[login]'].value = 'your account'
login_form['user[password]'].value = 'your password'

# 提交表单
b.submit_form(login_form)

# 打印登陆成功的信息
print b.select('.alert.alert-success')[0].text

예제 #60

0

파일 보기

파일: hallon.py 프로젝트: molobrakos/hallon

from os import path
import sys
import yaml
import json

with open(path.join(path.dirname(sys.argv[0]),
                    ".hallon-credentials.yaml")) as f:
    CREDENTIALS = yaml.safe_load(f)

URL = "https://www.hallon.se/mina-sidor"
br = RoboBrowser(parser="lxml")
br.open(URL)
form = br.get_form(action="/logga-in")
form["UserName"].value = CREDENTIALS["username"]
form["Password"].value = CREDENTIALS["password"]
br.submit_form(form)

usage = br.select("p.usage")[0].text.replace(",", ".").split()

remaining = round(float(usage[0]), 2)
total = int(usage[2])
used = round(float(total-remaining), 2)
used_pct = round(used*100/total, 1)
days_remaining = int(br.select("p.usage-daysleft")[0].text.split()[0])

print(json.dumps({"total": total,
                  "remaining": remaining,
                  "used": used,
                  "used_pct": used_pct,
                  "days_remaining": days_remaining}))