예제 #1
0
    def submit(self):
        last_id, b, c, d, e = Submit.get_latest_verdict(self.username)

        browser = RoboBrowser(parser = 'html.parser')
        browser.open('http://codeforces.com/enter')
        # todo check if it takes time
        enter_form = browser.get_form('enterForm')
        enter_form['handleOrEmail'] = self.username
        enter_form['password'] = self.password
        browser.submit_form(enter_form)

        try:
            checks = list(map(lambda x: x.getText()[1:].strip(),
                browser.select('div.caption.titled')))
            if self.username not in checks:
                Colour.print('Login Failed.. Wrong password.', Colour.RED)
                return
        except Exception as e:
            Colour.print('Login Failed.. Maybe wrong id/password.', Colour.RED)
            return

        # todo check if it takes time
        browser.open('http://codeforces.com/contest/'+self.c_name+'/submit')
        submit_form = browser.get_form(class_ = 'submit-form')
        submit_form['submittedProblemIndex'].value = self.p_name
        submit_form['sourceFile'] = self.inputfile

        browser.submit_form(submit_form)
        print(browser.url)
        # if browser.url[-6:] != 'status': # it was used when submitting from problemset
        if not 'my' in browser.url:
            Colour.print('Failed submission, probably you have submit the same file before', Colour.RED)
            return

        Submit.print_verdict(last_id,self.username,100)
        Colour.print('[{0}] submitted ...'.format(self.inputfile), Colour.GREEN)
예제 #2
0
    def __init__(self, artist, song, p_proxy):
        self.artist = self.remove_unwanted_chars(artist)
        self.song = self.remove_unwanted_chars(song)

        session = None

        #setting proxy
        if p_proxy is not None:
            arr = p_proxy.split(',')
            for ent in arr:
                m = re.match(r'(.+)=([\d\.\:]+)', ent)
                if m:
                    site = m.group(1)
                    proxy = m.group(2)
                    if site == self.site:
                        logging.info(self.log_msg("use proxy:" + proxy))
                        session = Session()
                        session.proxies = {'http': proxy, 'https': proxy}
                        break

        self.browser = RoboBrowser(parser="html.parser",
                                   session=session,
                                   user_agent='Mozilla Firefox',
                                   tries=5)
예제 #3
0
def zdz_post010(uid, unam, upas, chdr, ctxt, uhost='http://ziwang.com/'):
    brow = RoboBrowser(history=True, cache=True)
    uexit = uhost + 'member.php?action=logout'
    brow.open(uexit)
    zt.wait(1)
    #
    ulog = uhost + 'forum.php'  #,'58'  #灌水乐园
    brow.open(ulog)
    zt.wait(2)
    #print('ulog,',ulog)
    xact = "member.php?mod=logging&action=login&loginsubmit=yes&infloat=yes&lssubmit=yes"
    xlog = brow.get_form(action=xact)
    #print('xlog',xlog)
    if xlog == None: return False
    print('@xlog,', unam, upas)
    #
    xlog['username'].value = unam
    xlog['password'].value = upas
    brow.submit_form(xlog)
    #
    #       http://ziwang.com/forum.php?mod=post&action=newthread&fid=67
    upost0 = 'http://ziwang.com/forum.php?mod=post&action=newthread&fid='
    upost = upost0 + uid
    #print('@xpost, ',uid,upost);
    x = brow.open(upost)
    zt.wait(1)
    xact_post = 'forum.php?mod=post&action=newthread&fid=' + uid + '&extra=&topicsubmit=yes'
    xpost = brow.get_form(action=xact_post)
    #print('@xpost, ',xpost)
    #
    xpost['subject'].value, xpost['message'].value = chdr, ctxt
    brow.submit_form(xpost)
    #print('@xpost, ',upost);
    #
    #re_brow,upost,chk_post
    return True
예제 #4
0
def get_fb_token(login, password):
    fb_auth_url = 'https://www.facebook.com/v2.6/dialog/oauth?redirect_uri=fb464891386855067%3A%2F%2Fauthorize%2F&display=touch&state=%7B%22challenge%22%3A%22IUUkEUqIGud332lfu%252BMJhxL4Wlc%253D%22%2C%220_auth_logger_id%22%3A%2230F06532-A1B9-4B10-BB28-B29956C71AB1%22%2C%22com.facebook.sdk_client_state%22%3Atrue%2C%223_method%22%3A%22sfvc_auth%22%7D&scope=user_birthday%2Cuser_photos%2Cuser_education_history%2Cemail%2Cuser_relationship_details%2Cuser_friends%2Cuser_work_history%2Cuser_likes&response_type=token%2Csigned_request&default_audience=friends&return_scopes=true&auth_type=rerequest&client_id=464891386855067&ret=login&sdk=ios&logger_id=30F06532-A1B9-4B10-BB28-B29956C71AB1&ext=1470840777&hash=AeZqkIcf-NEW6vBd'
    s = RoboBrowser(parser="lxml")
    s.open(fb_auth_url)
    f = s.get_form()
    f["pass"] = password
    f["email"] = login
    s.submit_form(f)
    f = s.get_form()
    try:
        import re
        s.submit_form(f, submit=f.submit_fields['__CONFIRM__'])
        access_token = re.search(r"access_token=([\w\d]+)",
                                 s.response.content.decode()).groups()[0]
        return access_token
    except Exception as ex:
        print(
            "access token could not be retrieved. Check your username and password."
        )
        print("Official error: %s" % ex)
        return {
            "error":
            "access token could not be retrieved. Check your username and password."
        }
예제 #5
0
파일: spyder.py 프로젝트: abhidya/SixDOS
def connections(handle):
    session = requests.Session()
    browser = RoboBrowser(session=session, user_agent=random.choice(HEADERS_LIST), parser="lxml")
    client = MongoClient("mongodb://*****:*****@35.185.118.72:27017/")
    db = client['sixdos']
    # ttweets = total_tweets(handle)

    if db.data.find({'_id': handle}).count() == 0:
        print(handle)
        updatestats.update_last(handle)
        min_position, links = get_tweets(handle)

        with tqdm(10000) as pbar:
            while (True):
                min_position1, links1 = get_tweets(handle, min_position)
                links = links + links1
                pbar.update(len(links1))
                if (min_position1 == None):
                    break
                min_position = min_position1

        people_list = []

        for link in tqdm(links):
            if handle in link:
                people_list = people_list + get_people(link, handle)

        print("Handle: ", handle, "Length: ", str(len(people_list)), people_list)
        t = datetime.datetime.now()
        # t = datetime.datetime(year, month, day)
        s = t.strftime('%Y-%m-%d %H:%M:%S.%f')

        result = {"_id": handle, "Length": str(len(people_list)), "Connections": str(people_list), "date": s[:-3]}
        update = db.data.update({'_id': handle}, {"$set": result}, upsert=True)
        people_list = []
        return update
예제 #6
0
    def authenticate_user(self, user_email, password):

        # Set the users input values and read the home page
        webpage = RoboBrowser()
        goodreads_page = 'http://www.goodreads.com'  # best practice: page can be changed whenever needed
        webpage.open(goodreads_page)

        # load and submit the login form using the get_form function
        login_form = webpage.get_form(id='sign_in')
        login_form['user[email]'].value = user_email
        login_form['user[password]'].value = password

        webpage.submit_form(login_form)

        # read the web page again and check for certain tags only visible after login to verify the user
        # another method could be hitting the database for verifying using dynamic querry building
        home_page = str(webpage.parsed())

        if "Currently Reading" in home_page:
            print("User Authenticated")
            return True
        else:
            print("Invalid user creditentials")
            return False
예제 #7
0
def get_hitran_molecules():
    """
    Accesses http://hitran.org/lbl/# and reads its table
    Returns: tuple: table (list of lists), header (list of strings)
    """

    data, header = [], []

    browser = RoboBrowser(history=True, parser="lxml")
    browser.open("http://hitran.org/lbl/#")

    table = browser.find("table")

    hh = table.find_all("th")
    for h in hh:
        # Skips cells whose class starts with "meta" (they are not of interest)
        cls = h.get("class")
        if isinstance(cls, list) and cls[0].startswith("meta"):
            continue
        header.append(h.text)

    rr = table.find_all("tr")
    for r in rr:
        dd = r.find_all("td")
        if len(dd) == 0:
            continue
        row = []
        data.append(row)
        for d in dd:
            # Skips cells whose class starts with "meta" (they are not of interest)
            cls = d.get("class")
            if isinstance(cls, list) and cls[0].startswith("meta"):
                continue
            row.append(d.text)

    return data, header
예제 #8
0
def set_login(handle=None):
    if handle is None:
        handle = input("Handle: ")
    password = getpass.getpass("Password: "******"lxml")
    browser.open("http://codeforces.com/enter")
    enter_form = browser.get_form("enterForm")
    enter_form["handleOrEmail"] = handle
    enter_form["password"] = password
    browser.submit_form(enter_form)

    checks = list(
        map(lambda x: x.getText()[1:].strip(),
            browser.select("div.caption.titled")))
    if handle not in checks:
        print("Login Failed.")
        return
    else:
        secret_loc = os.path.join(os.path.dirname(__file__), "secret")
        secretfile = open(secret_loc, "w")
        secretfile.write(encode(handle) + " " + encode(password))
        secretfile.close()
        print("Successfully logged in as " + handle)
    def pushedbutton(self, b):
        account = self.lineEdit.text()
        pasw = self.lineEdit_3.text()
        #use robobrowser module to manipulate web page
        browser = RoboBrowser(history=True)
        browser.open('http://web1.cmu.edu.tw/stdinfo/login.asp')
        form1 = browser.get_form(id='form1')
        form1['f_id'].value = account
        form1['f_pwd'].value = pasw
        browser.submit_form(form1)
        if browser.state.url == "http://web1.cmu.edu.tw/stdinfo/loginerr.asp":
            self.lineEdit_2.setText('帳號密碼錯了?')
        else:
            self.lineEdit_2.setText('成功登入,填寫中....')
            link_one = browser.get_link(text='教師教學意見調查')
            browser.follow_link(link_one)
            list = []
            for l in browser.get_links(text='填寫'):
                list.append(l)
            list.pop(0)
            for li in list:
                browser.follow_link(li)
                form2 = browser.get_form(id='thisform')
                form2['CH_1'].value = '3'
                form2['CH_2'].value = '3'
                form2['CH_3'].value = '3'
                form2['CH_4'].value = '3'
                form2['CH_5'].value = '3'
                form2['CH_6'].value = '3'
                form2['CH_7'].value = '3'
                form2['CH_8'].value = '3'
                form2['CH_9'].value = '3'
                form2['CH_10'].value = '3'

                browser.submit_form(form2)
            self.lineEdit_2.setText('Done!')
예제 #10
0
def get_portal_auth() -> str:
    """
    Attempts login to the Club1909 page and retrieves the cookie FortressPortalAuth

    :return:
    """
    browser = RoboBrowser(session, history=True)
    browser.open(LOGIN_FORM_URL)
    login_form = browser.get_forms()[0]
    login_form['email'] = os.environ['club1909_username']
    login_form['password'] = os.environ['club1909_password']

    # TODO: check get_forms returns one value
    # TODO: check login errors / exceptions

    logging.debug(
        f"Attempt to login with {os.environ['club1909_username']} and {os.environ['club1909_password']} "
    )

    browser.submit_form(login_form)
    logging.info(
        f"Found portal Auth code {browser.session.cookies['.FortressPortalAuth']}"
    )
    return browser.session.cookies['.FortressPortalAuth']
예제 #11
0
    def show(self):
        browser = RoboBrowser(parser='html.parser')
        browser.open('http://codeforces.com/enter')
        enter_form = browser.get_form('enterForm')
        enter_form['handleOrEmail'] = self.username
        enter_form['password'] = self.password
        browser.submit_form(enter_form)

        try:
            checks = list(
                map(lambda x: x.getText()[1:].strip(),
                    browser.select('div.caption.titled')))
            if self.username not in checks:
                click.secho('Login Failed.. Wrong password.', fg='red')
                return
        except Exception as e:
            click.secho('Login Failed.. Maybe wrong id/password.', fg='red')
            return

        browser.open('http://codeforces.com/contest/' + self.c_name +
                     '/standings/friends/true')
        soup = browser.parsed()[0]  # no need of soup
        ftable = soup.findAll('table',
                              {'class': 'standings'})[0].findAll('tr')[1:-1]
        tableh = soup.findAll(
            'table', {'class': 'standings'})[0].findAll('tr')[0].findAll('th')

        table_data = [[x.getText().strip() for x in tableh]]
        for friend in ftable:
            row = [x.getText().strip() for x in friend.findAll('td')]
            table_data += [row]

        tt = texttable.Texttable()
        tt.add_rows(table_data)
        tt.set_cols_valign(["b"] * len(tableh))
        print(tt.draw())
예제 #12
0
    def submit(self):
        # get latest submission id, so when submitting should have not equal id
        last_id, b, c, d, e = Submit.get_latest_verdict(self.username)

        browser = RoboBrowser(parser='html.parser')
        browser.open('http://codeforces.com/enter')
        enter_form = browser.get_form('enterForm')
        enter_form['handleOrEmail'] = self.username
        enter_form['password'] = self.password
        browser.submit_form(enter_form)

        try:
            checks = list(
                map(lambda x: x.getText()[1:].strip(),
                    browser.select('div.caption.titled')))
            if self.username not in checks:
                click.secho('Login Failed.. Wrong password.', fg='red')
                return
        except Exception as e:
            click.secho('Login Failed.. Maybe wrong id/password.', fg='red')
            return

        browser.open('http://codeforces.com/problemset/submit')
        submit_form = browser.get_form(class_='submit-form')
        submit_form['submittedProblemCode'] = self.prob_id
        submit_form['sourceFile'] = self.inputfile

        browser.submit_form(submit_form)
        if browser.url[-6:] != 'status':
            click.secho(
                'Failed submission, probably you have submit the same file before',
                fg='red')
            return

        Submit.print_verdict(last_id, self.username, 100)
        click.secho('[{0}] submitted ...'.format(self.inputfile), fg='green')
예제 #13
0
def rslt(user, password):
    url = 'http://erp.iitbbs.ac.in'
    browser = RoboBrowser(history=False, parser='html.parser')
    response = browser.open(url)
    form = browser.get_form(action='login.php')
    form['email'].value = user
    form['password'].value = password
    browser.submit_form(form)

    if (browser.url != 'http://erp.iitbbs.ac.in/home.php'):
        return False

    attendance_link = 'http://erp.iitbbs.ac.in/Result/results.php'
    browser.open(attendance_link)

    soup = BeautifulSoup(browser.response.text, 'html.parser')
    content1 = soup.find('div', attrs={'class': 'inner2'})
    table0 = content1.find('table', attrs={'class': 'marks_list'})
    table1 = table0.find_all('table')
    heading = table1[0].find_all('tr')

    result = dict()

    lst = []
    lst1 = []
    for i in range(len(table1)):
        num = 0
        for row in table1[i].find_all('tr'):

            td = row.find_all('td')
            for j in td:
                lst.append(j.text.strip())
            result['table' + str(num)] = lst
            lst = []
            num = num + 1
    return result
예제 #14
0
    def __init__(self):

        self.HEADERS_LIST = [
            'Mozilla/5.0 (Windows; U; Windows NT 6.1; x64; fr; rv:1.9.2.13) Gecko/20101203 Firebird/3.6.13',
            'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
            'Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201',
            'Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16',
            'Mozilla/5.0 (Windows NT 5.2; RW; rv:7.0a1) Gecko/20091211 SeaMonkey/9.23a1pre'
        ]
        self.session = requests.Session()
        self.browser = RoboBrowser(session=self.session, user_agent=random.choice(self.HEADERS_LIST))
        self.handle = ''
        self.id_url = "https://twitter.com/intent/user?user_id="
        self.prof_url = "https://twitter.com/"
        self.TWITTER_AUTH = tweepy.OAuthHandler(
            "",
            ""
        )
        self.TWITTER_AUTH.set_access_token(
            "",
            ""
        )
        self.api = tweepy.API(self.TWITTER_AUTH, parser=tweepy.parsers.JSONParser(), wait_on_rate_limit=True,
                              wait_on_rate_limit_notify=True, compression=True)
예제 #15
0
def scrap(url):

    browser = RoboBrowser(user_agent='i am tool')
    browser.open(url)
    a = browser.find(class_='captcha'
                     )  ##machine learning would be great for class prediction
    fullsrc = url[:-1] + a['src']
    request.urlretrieve(fullsrc, "captcha.jpg")
    ##tesseract buraya gelecek

    ##tam buraya işte aha
    form = browser.get_form(action=re.compile(r'.'))

    # Fill it out
    form['name'].value = 'namaaaeee'

    form['password'].value = '*****@*****.**'
    form['password2'].value = 'teambeaver'
    form['captcha_1'].value = '1234'

    # Submit the form
    browser.submit_form(form)

    print(browser.response)
예제 #16
0
def desktop(keyword,sitename,device,useragent):
    parser = 'html.parser'
 
    browser = RoboBrowser(history=False,
                          user_agent=useragent,
                          parser=parser)
     
    browser.open('https://www.google.com/search?num=100&q=' + keyword)
     
    # links = browser.find_all("div", {"class": "KJDcUb"})

    #desktop div where URLs are
    links = browser.find_all("div", {"class": "g"})
     
    counter = 0

    print('The user Agent you used was ----> ' + useragent)

    d=[]
    for i in links:
        counter = counter + 1
        if sitename in str(i):
            url = i.find_all('a', href=True)
            position = "%d" % (counter)
            rank = "%s" % (url[0]['href'])
            now = datetime.date.today().strftime("%d-%m-%Y")
            keyword = keyword
            device = device
            d.append(keyword)
            d.append(position)
            d.append(rank)
            d.append(device)
            d.append(now)
            print(keyword, position, rank, device, now)
    
    csv_export(d,keyword,device)
예제 #17
0
    def getData(self):

        # temp
        sqlString = "DELETE FROM coin_ktoon WHERE date='%s'" % (
            self.todayString)
        self.dbconn.cur.execute(sqlString, )
        # temp

        browser = RoboBrowser(
            history=True,
            user_agent=
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/33.0.1750.152 Chrome/33.0.1750.152 Safari/537.36'
        )

        # ktoon login ---------------------------------------------------------------- start
        auth_url = 'https://www.myktoon.com/web/loginprc.kt'
        data = {'email': 'email value', 'passwd': 'password value'}
        browser.open(auth_url, method='post', data=data)
        # # ktoon login ---------------------------------------------------------------- end

        url_content = 'https://www.myktoon.com/web/payment/payment.kt'
        browser.open(url_content)
        self.coinTopElements = browser.find_all('a', {"name": "payamountkey"})

        self.product_title_list, self.product_price_list = [], []

        for idx, cel in enumerate(self.coinTopElements):

            for pt in cel.find_all('span'):
                if '베리' in pt.get_text():
                    print pt.get_text()
                    self.product_title_list.append(pt.get_text())

            for pp in cel.find_all('span', class_='won'):
                print pp.get_text()
                self.product_price_list.append(pp.get_text())
예제 #18
0
파일: cli.py 프로젝트: TexLui/sammobile-cli
def main():
    parser = parse_args()
    args = parser.parse_args()
    if not args.link or not args.output:
        parser.print_help()
        exit()

    if os.path.isfile("config.json"):
        config_file = "config.json"
    else:
        config_file = ".example.config.json"

    # Read config file
    with open(config_file, 'r') as f:
        config = json.load(f)

        # browser instance
    browser = RoboBrowser(history=True, parser="html.parser")
    firmware_file_path = args.output[0]
    firmware_file_url = args.link[0]
    if os.path.isfile(config['cookiefile']):
        with open(config['cookiefile'], 'r') as cookie_file:
            # set session cookies
            add_dict_to_cookiejar(browser.session.cookies, json.load(cookie_file))
            download_firmware(browser, firmware_file_url, firmware_file_path)
    else:
        # Browse to sammobile login page
        browser.open("https://www.sammobile.com/login/")
        form = browser.get_form(id="loginform-custom")
        form["log"] = config['username']
        form["pwd"] = config['password']

        browser.submit_form(form)
        if browser.url == "https://www.sammobile.com":
            _save_session_cookie(browser.session.cookies, config['cookiefile'])
            download_firmware(browser, firmware_file_url, firmware_file_path)
예제 #19
0
warnings.filterwarnings('ignore')
filename = 'Q12005'

filepath = './zipfile'
#creating the folder for zipfiles
pathlib.Path(filepath).mkdir(parents=True, exist_ok=True)

#account parameters
username = '******'
password = '******'

# username = input('Please enter your username: '******'Please enter your password: '******'https://freddiemac.embs.com/FLoan/secure/login.php?pagename=download')

#Getting form from browser
form = br.get_form()
form['username'] = username
form['password'] = password
br.submit_form(form)

filename = input('Please enter filename: ')

#accept the form
form1 = br.get_form()
form1['accept'] = 'Yes'
br.submit_form(form1)
예제 #20
0
def open_page(url):
    """Opens the goodreads homepage for login"""
    br = RoboBrowser(history=True, parser="html.parser")
    br.open(url)
    return br
예제 #21
0
def scrape_bio_and_albums(keywords):
    """
    scrapes artists' data on https://www.lyrics.com/, and stores text (bio and albums)
    from an HTML page source of each artist's page in file (text&csv files).
    :param keywords: list of keywords that should represent artists' names (list)
    :return:
    """
    if keywords and isinstance(keywords, list):

        # builds new object of RoboBrowser with given params
        browser = RoboBrowser(
            parser='html.parser',
            user_agent=
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
            '(KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
            history=True,
            timeout=10)
        # Open a URL (using 'RoboBrowser' library).
        browser.open(BASE_URL)

        for keyword in keywords:
            if keyword and len(keyword) > 1:

                # get browser url (should be 'old' after searching a term - if browser goes to new url)
                old_url = browser.url

                # trying to search keyword on 'lyrics.com' (using RoboBrowser's methods to handle forms)
                form = browser.get_form(
                    id='search-frm')  # Find form by ID 'search-frm'
                form[
                    'st'].value = keyword  # sets query value 'st' with given keyword
                browser.submit_form(
                    form)  # Submit a form - to search given keyword

                # check if the url is changed (after searching a keyword)
                if old_url != browser.url:

                    # select required <a> tags, using CSS Selectors (see on BeautifulSoup's documentation)
                    a_tags = browser.select(
                        'body p[class~=serp-flat-list] a[href^=artist/]')

                    if a_tags:
                        # browser.follow_link(a_tags[0])

                        # builds base url with href - to open required url using 'open()' method,
                        # and avoid including the "/lyrics/" part in url, when using 'follow_link()' method
                        first_artist_url = a_tags[0]['href'].replace(
                            "artist", BASE_URL + "artist")

                        # Open URL (should get url of the first suggested artist's page in results)
                        browser.open(first_artist_url)

                        # parse response content (bs4 obj), using HTML parser specified by the browser
                        soup = browser.parsed

                        if soup:
                            artist_bio_tag = soup.find(
                                class_='artist-bio')  # find tag by class
                            if artist_bio_tag:
                                # save parsed text (artist bio) from page source to a text file
                                save_source(keyword + " - bio",
                                            artist_bio_tag.get_text(),
                                            dir_path=os.path.join(
                                                ARTISTS_PATH, keyword))
                                # parse albums&songs from html tables, and save the data to a csv file
                                albums_to_csv(soup,
                                              keyword + " - albums",
                                              dir_path=os.path.join(
                                                  ARTISTS_PATH, keyword))

                        browser.back()  # Go back in browser history.
                    browser.back()  # Go back in browser history.
예제 #22
0
import re
from robobrowser import RoboBrowser

# Browse to Rap Genius
browser = RoboBrowser(history=True)
browser.open('http://rapgenius.com/')

# Search for Queen
form = browser.get_form(action='/search')
form  # <RoboForm q=>
form['q'].value = 'queen'
browser.submit_form(form)

# Look up the first song
songs = browser.select('.song_name')
browser.follow_link(songs[0])
lyrics = browser.select('.lyrics')
lyrics[0].text  # \n[Intro]\nIs this the real life...

# Back to results page
browser.back()

# Look up my favorite song
browser.follow_link('death on two legs')

# Can also search HTML using regex patterns
lyrics = browser.find(class_=re.compile(r'\blyrics\b'))
lyrics.text
예제 #23
0
#!/usr/bin/env python3
import os
import re
from collections import defaultdict
from datetime import datetime
from robobrowser import RoboBrowser
from ccf.config import LoadSettings
import pandas as pd

browser = RoboBrowser(history=True, timeout=6000, parser="lxml")

config = LoadSettings()["KSADS"]
download_dir = config["download_dir"]


def main():
    login()
    download_all()
    generate_snapshot_from_raw_excel_files()


def login():
    browser.open("https://ksads.net/Login.aspx")
    form = browser.get_form("form1")
    form["txtUsername"].value = config["user"]
    form["txtPassword"].value = config["password"]
    browser.submit_form(form)

    if browser.response.url == "https://ksads.net/Login.aspx":
        raise Exception("Incorrect credentials provided")
        return False
예제 #24
0
session = requests.Session()
session.proxies = {'http': '91.214.70.99:3128',
                    'http': '159.203.118.239:8080',}

while votes <= 4094:
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    text = str(soup.findAll('tr')[58])
    for i in range(len(text)):
        if text[i].isdigit() and count > 0:
            ini += str(text[i])
        if text[i].isdigit() and text[i+1].isspace() and count == 0:
            count += 1
        elif text[i].isdigit() and text[i+1].isspace() and count > 0:
            break
    votes = int(ini)
    print(votes)
    ini = ''
    count = 0
    br = RoboBrowser(history=True, session=session)
    br.open(url)
    form = br.get_form()
    form['id'].value = 733
    br.submit_form(form)
    tmp += 1
    if tmp == 30:
        time.sleep(1)
        tmp = 0
print("Finish! {} votes".format(votes+1))
예제 #25
0
파일: pewnet.py 프로젝트: sahwar/happypanda
class CommenHen:
    "Contains common methods"
    LOCK = threading.Lock()
    TIME_RAND = app_constants.GLOBAL_EHEN_TIME
    QUEUE = []
    COOKIES = {}
    LAST_USED = time.time()
    HEADERS = {
        'user-agent':
        "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0"
    }
    _QUEUE_LIMIT = 25
    _browser = RoboBrowser(user_agent=HEADERS['user-agent'],
                           parser='html.parser')

    def begin_lock(self):
        log_d('locked')
        self.LOCK.acquire()
        t1 = time.time()
        while int(time.time() - self.LAST_USED) < self.TIME_RAND:
            t = random.randint(3, self.TIME_RAND)
            time.sleep(t)
        t2 = time.time() - t1
        log_d("Slept for {}".format(t2))

    def end_lock(self):
        log_d('unlocked')
        self.LAST_USED = time.time()
        self.LOCK.release()

    def add_to_queue(self, url='', proc=False, parse=True):
        """Add url the the queue, when the queue has reached _QUEUE_LIMIT entries will auto process
		:proc -> proccess queue
		:parse -> return parsed metadata
		"""
        if url:
            self.QUEUE.append(url)
            log_i("Status on queue: {}/{}".format(len(self.QUEUE),
                                                  self._QUEUE_LIMIT))
        try:
            if proc:
                if parse:
                    return self.parse_metadata(*self.process_queue())
                return self.process_queue()
            if len(self.QUEUE) >= self._QUEUE_LIMIT:
                if parse:
                    return self.parse_metadata(*self.process_queue())
                return self.process_queue()
            else:
                return 1
        except TypeError:
            return None

    def process_queue(self):
        """
		Process the queue if entries exists, deletes entries.
		Note: Will only process _QUEUE_LIMIT entries (first come first out) while
			additional entries will get deleted.
		"""
        log_i("Processing queue...")
        if len(self.QUEUE) < 1:
            return None

        try:
            if len(self.QUEUE) >= self._QUEUE_LIMIT:
                api_data, galleryid_dict = self.get_metadata(
                    self.QUEUE[:self._QUEUE_LIMIT])
            else:
                api_data, galleryid_dict = self.get_metadata(self.QUEUE)
        except TypeError:
            return None
        finally:
            log_i("Flushing queue...")
            self.QUEUE.clear()
        return api_data, galleryid_dict

    @classmethod
    def login(cls, user, password):
        pass

    @classmethod
    def check_login(cls, cookies):
        pass

    def check_cookie(self, cookie):
        cookies = self.COOKIES.keys()
        present = []
        for c in cookie:
            if c in cookies:
                present.append(True)
            else:
                present.append(False)
        if not all(present):
            log_i("Updating cookies...")
            try:
                self.COOKIES.update(cookie)
            except requests.cookies.CookieConflictError:
                pass

    def handle_error(self, response):
        pass

    @classmethod
    def parse_metadata(cls, metadata_json, dict_metadata):
        """
		:metadata_json <- raw data provided by site
		:dict_metadata <- a dict with gallery id's as keys and url as value

		returns a dict with url as key and gallery metadata as value
		"""
        pass

    def get_metadata(self, list_of_urls, cookies=None):
        """
		Fetches the metadata from the provided list of urls
		returns raw api data and a dict with gallery id as key and url as value
		"""
        pass

    @classmethod
    def apply_metadata(cls, gallery, data, append=True):
        """
		Applies fetched metadata to gallery
		"""
        pass

    def search(self, search_string, **kwargs):
        """
		Searches for the provided string or list of hashes,
		returns a dict with search_string:[list of title & url tuples] of hits found or emtpy dict if no hits are found.
		"""
        pass
예제 #26
0
import os
from robobrowser import RoboBrowser
from string import capwords

url = "http://www.guitarcats.com/realbook-jazz-standards/A"

# Open up HTML
browser = RoboBrowser(parser="html.parser", history=True)
browser.open(url)

if browser.response.status_code != 200:
    print("Server responded with code " + str(browser.response.status_code) +
          " for " + url)
    print("Exiting...")
    quit()

links = browser.find_all("a", {"class": "realbook_letter_link"})

for link in links:

    # Open A-Z categories 1 by 1
    browser.open(link["href"])

    # Check proper response
    if browser.response.status_code != 200:
        print("Server responded with code " +
              str(browser.response.status_code) + " for " + link["href"])
        continue

    # Get current category and find all its songs
    letter = link["href"][-1]
예제 #27
0
        return text[len(prefix):]
    return text


# Fix from: https://github.com/jmcarp/robobrowser/issues/93#issuecomment-650640171
import werkzeug

werkzeug.cached_property = werkzeug.utils.cached_property

# Start scraping
from robobrowser import RoboBrowser
import re
import urlparse

courseModulesUrl = 'https://myuni.adelaide.edu.au/courses/' + args.course + '/modules'
browser = RoboBrowser(history=True, parser='html.parser')
browser.open(courseModulesUrl)
# Handle login page
form = browser.get_form(id='fm1')
form["username"] = args.username
form["password"] = args.password
browser.session.headers['Referer'] = args.course
browser.submit_form(form)
# Get course name (no special characters)
courseTitle = browser.find("title").text
courseTitle = remove_prefix(courseTitle, 'Course Modules: ')
courseTitle = "".join([x if x.isalnum() else "_" for x in courseTitle])
print('Course Url: ' + courseModulesUrl)
print('Course Title: ' + courseTitle)
print('Finding file links of type: ' + args.downloadOnly)
# Make output dir
예제 #28
0
from urllib import request
import csv
import sys

"""
Script per il sito del unipa regione Sicilia

primo parametro:    medie/estremi (1 per orari, 2 per giornalieri, 3 per mensili)
secondo parametro:  Intervallo temporale
terzo parametro:    Inizio
quarto parametro:   Fine

"""

url = "http://meteo.astropa.unipa.it/public/"
br = RoboBrowser(parser="html.parser")

def query (avgtype, timespan, day, month, year, dayend, monthend, yearend) :

    br.open(url)
    form = br.get_form() #prende il form dalla pagina web
    form['avgtype'] = avgtype #medie 
    form['timespan'] = timespan #intervallo di tempo
    form['day'] = day    
    form['month'] = month #mese inizio
    form['year'] = year
    form['dayend'] = dayend
    form['monthend'] = monthend #mese fine
    form['yearend'] = yearend
    br.submit_form(form) #esegue metodo post 
예제 #29
0
def def_vs_scraper(credentials,
                   bucket_name,
                   obj_path,
                   years=default_years,
                   weeks=default_weeks):
    client = boto3.client('s3')
    browser = RoboBrowser()
    browser.open(login_url)
    login_form = browser.get_forms()[0]

    # Set login credentials
    login_form['ctl00$Body$EmailTextbox'].value = credentials['email']
    login_form['ctl00$Body$PasswordTextbox'].value = credentials['password']
    login_form.serialize()

    # Submit login form
    browser.submit_form(login_form)

    # Open the previously hidden page
    for yearIdx, year in enumerate(years):
        year_dict = years[yearIdx]
        year_key = list(year_dict.keys())[0]
        sn = year_dict[year_key]

        for week in weeks:

            for position_ranking in default_position_rankings:
                w = week
                ew = week
                pts_vs_url = 'https://fantasydata.com/nfl-stats/nfl-fantasy-football-points-allowed-defense-by-position.aspx?fs={}&stype=0&sn={}&scope={}&w={}&ew={}&s=&t=0&p=0&st={}&d=1&ls={}&live=false&pid=true&minsnaps=4'.format(
                    fs, sn, scope, w, ew, position_ranking['url'],
                    position_ranking['url'])

                # Delay before retrieving next set of data
                time.sleep(0.5)

                browser.open(pts_vs_url)
                content = browser.find_all('tr')

                # Initialize the data to be written to the file
                formatted_data = ''

                for idx, line in enumerate(content):
                    # Only add the header once per year
                    if idx == 0 and week == 0:
                        formatted_data = headers + '\n'
                    elif idx != 0:
                        parsed_data = ','.join(line.find_all(text=True))
                        stripped_line = parsed_data.strip('\n').strip(',')
                        year_value = str(list(year.keys())[0])
                        next_line = stripped_line + ',' + year_value + '\n'

                        formatted_data = formatted_data + next_line

                # Make the directory for each year of CSV Data
                file_path = '{}/{}/{}/{}.csv'.format(obj_path, year_key,
                                                     week + 1,
                                                     position_ranking['file'])

                try:
                    # Upload object to the S3 bucket
                    client.put_object(Bucket=bucket_name,
                                      Body=formatted_data,
                                      Key=file_path)
                except RuntimeError as err:
                    print('Failed to write to file: ', err)
                    raise err

                print('Success! Uploaded data: {}'.format(file_path))
예제 #30
0
from robobrowser import RoboBrowser

import login
import parser

if __name__ == '__main__':
    shared_browser = RoboBrowser(parser='html.parser', timeout=10)
    login_gui = login.Login(shared_browser)
    user_data, is_logged_in = login_gui.user_data, login_gui.is_logged_in
    del login_gui  # free memory reserved by the login gui
    if is_logged_in:
        parser_gui = parser.Parser(shared_browser, user_data)