Exemple #1
0
    def redirect(self):
        try:
            if self.agent == True:
                br = Browser()

                UserAgent = "Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0"
                header = {"User-Agent": UserAgent}
                br.set_handle_robots(False)
                br.addheaders = [("User-agent", "Fifefox")]

                remote_url = br.open(self.target).geturl()

            else:
                remote_url = u.urlopen(self.target).geturl()

            return (remote_url)
        except Exception as e:
            print(e)
Exemple #2
0
def card_dates(tipo, numero):
    cod = []
    cod.append(tipo)
    logging.info('Buscando datos de la tarjeta CRTM '+tipo+' '+numero)
    browser = Browser()
    response = browser.open('http://tarjetatransportepublico.crtm.es/CRTM-ABONOS/consultaSaldo.aspx')
    if response.code == 200: # Comprueba que la página responde
        browser.select_form("aspnetForm")         # Selecciona el formulario
        browser["ctl00$cntPh$txtNumTTP"] = numero # Rellena los datos de la tarjeta
        browser["ctl00$cntPh$dpdCodigoTTP"] = cod
        page = browser.submit(name="ctl00$cntPh$btnConsultar")  # "Pulsa" el boton de Continuar

        soup = BeautifulSoup(page.read())
        card_no = soup.findAll('span', attrs={"id" : 'ctl00_cntPh_lblNumeroTarjeta'}) # Busca el número completo de la tarjeta
        results = soup.findAll('div', attrs={"id" : 'ctl00_cntPh_tableResultados'}) # Busca los datos de validez de la tarjeta
        spans = results[0].findAll('span') # Separa los datos

        if card_no: # Comprueba si la tarjeta introducida ha devuelto un resultado válido
            card["card_no"] = card_no[0].renderContents()
            card["subscription_age"] = spans[1].text
            card["load_date"] = dparser.parse(spans[2].text, fuzzy=True, dayfirst=True)
            card["valid_date"] = dparser.parse(spans[3].text, fuzzy=True, dayfirst=True)
            card["first_use"] = dparser.parse(spans[4].text, fuzzy=True, dayfirst=True)
            card["renovation_date"] = dparser.parse(spans[5].text, fuzzy=True, dayfirst=True)
        else:
            card["card_no"] = '0'
            card["subscription_age"] = 'Nope'
            card["load_date"] = datetime.datetime(1991, 1, 1)
            card["valid_date"] = datetime.datetime(1991, 1, 1)
            card["first_use"] = datetime.datetime(1991, 1, 1)
            card["renovation_date"] = datetime.datetime(1991, 1, 1)

        return card
    else:
        logging.info("La página del CRTM no está disponible")

        card["card_no"] = '0'
        card["subscription_age"] = 'Nope'
        card["load_date"] = datetime.datetime(1991, 1, 1)
        card["valid_date"] = datetime.datetime(1991, 1, 1)
        card["first_use"] = datetime.datetime(1991, 1, 1)
        card["renovation_date"] = datetime.datetime(1991, 1, 1)

        return card
Exemple #3
0
def scrape(url):

    global data, data2, data3
    mech = Browser()
    page = mech.open(url)
    html = page.read()

    soup = BeautifulSoup(html)
    table = soup.find('table',
                      width='100%',
                      cellspacing='0',
                      cellpadding='2',
                      border='0')
    data2 = []
    rowIndex = 0
    for row in table.findAll('tr')[1:]:
        data = str(row.getText(separator=' '))
        data.strip()
        data = data.replace(' ', '')
        data = data.replace('\n', '')
        data = data.replace('-', '0')
        data = data.replace(',', '')
        data = data.replace('\'', '')
        data = shlex.split(data)
        rowIndex += 1
        for i in data[:]:
            if i.isdigit():
                i = int(i)
                data2.append(i)
            elif contains(i, '()'):
                i = i.replace('(', '')
                i = i.replace(')', '')
                i = int(i)
                i = -i
                data2.append(i)
            else:
                data.remove(i)
    data3 = split_list(data2, 3)
    if url == str(income):
        create_income_vars()
    elif url == str(balance):
        create_balance_vars()
    else:
        create_cash_vars()
Exemple #4
0
def confirm_master_runing(url):
    max_retries = 1000  # maximum number of times to retry
    interval = 3  # number of seconds to wait between retries
    br = Browser()
    br.set_handle_robots(False)
    tried = 0
    connected = False
    count = 1  # count forms found in url

    while not connected:
        try:
            response = br.open(url)
            connected = True  # if line above fails, this is never executed
        except:
            print "connection could not be establish"
            time.sleep(interval)
            tried += 1
        if tried > max_retries:
            exit()
Exemple #5
0
def update(acct, slp, if_update=True):
    br = Browser()
    br.set_handle_robots(False)
    user, pwd, dockey = acct
    #login(br, 'https://www.dice.com/dashboard/logout', user,pwd)

    login(br, 'https://www.dice.com/dashboard/login', user, pwd)
    (sfom, sto) = slp
    print 'Processing', dockey, slp
    #nots="http://www.dice.com/profman/servlet/ProfMan?op=1011&MENU_PROFILES=cb7bb7bace4884843e70679a3d15525e&MENU_DEACITIVATE=Make%20Not%20Searchable&makeNotSearchable";
    menud = 'Make%20Not%20Searchable'
    browse(
        br,
        "http://www.dice.com/profman/servlet/ProfMan?op=1011&MENU_PROFILES=%s&MENU_DEACITIVATE=%s&makeNotSearchable"
        % (dockey, menud))
    sl = random.randrange(10, 15)
    print "Hide break: %d sec." % sl
    time.sleep(sl)
    #browse(br,"https://secure.dice.com/regman/profile.html?dockey=fe637379adaa31c7020ffe731f90cc36")
    #time.sleep(random.randrange(10, 50))
    if if_update:
        update_resume(dockey)
        sl = random.randrange(30, 60)
        print "Update break: %d sec." % sl
        time.sleep(sl)
        browse(
            br,
            "https://secure.dice.com/regman/profile.html?dockey=%s" % dockey)
        time.sleep(random.randrange(5, 15))
    menus = 'Make%20Searchable'
    unhide_url = "http://www.dice.com/profman/servlet/ProfMan?op=1011&MENU_PROFILES=%s&MENU_STATUS_CHANGE=%s" % (
        dockey, menus)
    #print unhide_url
    #e(0)
    browse(br, unhide_url)
    sl = random.randrange(sfom, sto)
    print "Unhide break: %d sec (%s min)." % (sl, sl / 60)
    time.sleep(sl)
    #browse(br,"https://secure.dice.com/regman/profile.html?dockey=%s" % dockey)
    #browse(br,"https://secure.dice.com/regman/profile.html?dockey=%s" % dockey)
    #browse(br,"https://www.dice.com/dashboard#/profiles/active" )
    time.sleep(random.randrange(50, 150))
    browse(br, 'https://www.dice.com/dashboard/logout')
    def test_lost_your_password_for_internal_accounts(self):
        """websession - sending lost password for internal admin account"""

        try_with_account = CFG_SITE_ADMIN_EMAIL

        # click on "send lost password" for CFG_SITE_ADMIN_EMAIL internal account
        browser = Browser()
        browser.open(CFG_SITE_SECURE_URL + "/youraccount/lost")
        browser.select_form(nr=0)
        browser['p_email'] = try_with_account
        try:
            browser.submit()
        except Exception, e:
            # Restore the admin password (send_email set it to random number)
            run_sql("UPDATE user SET password=AES_ENCRYPT(email, '')"
                    "WHERE id=1")
            self.fail(
                "Obtained %s: probably the email server is not installed "
                "correctly." % e)
Exemple #7
0
def amazon(email):
    brows = Browser()
    brows.set_handle_robots(False)
    brows._factory.is_html = True
    brows.set_cookiejar(cookielib.LWPCookieJar())
    brows.addheaders = [('User-agent',random.choice(ua["browsers"]["chrome"]))]
    brows.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(),max_time=1)
    url = "https://www.amazon.com/ap/signin?openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Fwww.amazon.com%2F%3F_encoding%3DUTF8%26ref_%3Dnav_ya_signin&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.assoc_handle=usflex&openid.mode=checkid_setup&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&"
    brows.open(url, timeout=10)
    brows.select_form(nr=0)
    brows.form['email'] = email

    brows.method = "POST"
    submit = brows.submit()
    soup = BeautifulSoup(submit.read().decode("utf-8"),"lxml")
    if soup.find("div", {"id": "auth-password-missing-alert"}):
        return({"rateLimit":False,"exists":True,"emailrecovery":None,"phoneNumber":None,"others":None})
    else:
        return({"rateLimit":False,"exists":False,"emailrecovery":None,"phoneNumber":None,"others":None})
Exemple #8
0
    def force_build(self, username="******", comments=None):
        def predicate(form):
            try:
                return form.find_control("username")
            except Exception:
                return False

        if not self._browser:
            self._browser = Browser()
            self._browser.set_handle_robots(
                False)  # The builder pages are excluded by robots.txt

        # ignore false positives for missing Browser methods - pylint: disable=E1102
        self._browser.open(self.url())
        self._browser.select_form(predicate=predicate)
        self._browser["username"] = username
        if comments:
            self._browser["comments"] = comments
        return self._browser.submit()
Exemple #9
0
 def test_clone_browser(self):
     from mechanize import Browser
     br = Browser()
     br.set_handle_refresh(True, max_time=237, honor_time=True)
     br.set_handle_robots(False)
     cbr = copy.copy(br)
     for h, ch in zip(br.handlers, cbr.handlers):
         self.assertIsNot(h, ch)
         self.assertIs(ch.parent, cbr)
         self.assertIs(h.__class__, ch.__class__)
     self.assertEqual(set(br._ua_handlers), set(cbr._ua_handlers))
     self.assertIs(br._ua_handlers['_cookies'].cookiejar,
                   cbr._ua_handlers['_cookies'].cookiejar)
     self.assertIsNot(br.addheaders, cbr.addheaders)
     self.assertEqual(br.addheaders, cbr.addheaders)
     self.assertIs(br.finalize_request_headers,
                   cbr.finalize_request_headers)
     h = cbr._ua_handlers['_refresh']
     self.assertEqual((h.honor_time, h.max_time), (True, 237))
Exemple #10
0
def getGamesAndOddsFin(url):
    br = Browser()
    #site demands a user-agent that isn't a robot
    br.addheaders = [('User-agent', 'Firefox')]
    #retrieve veikkaus vakio mobile home page and browse to game percent page
    #and store the page
    br.open(url)
    for link in br.links():
        siteMatch = re.compile(VEIKKAUS_LINK_TEXT).search(link.text)
        if siteMatch:
            resp = br.follow_link(link)
            result = resp.get_data()
            break
    #html page as a beautifulsoup object
    page = BeautifulSoup(result)
    #find games names and odds
    gameData = page.findAll('td')
    #full names
    nameList = []
    #concatenated names used to select the correct names from betfair data
    refNamesFin = []
    #full name + odds
    oddsList = []
    #parse game names (first 13 games) into list
    #4 elements per row and 13 rows => 52
    for idx in range(ELEMENT_NUM):
        if idx % 4 == 0:
            gameName = gameData[idx].string.strip().encode(ENCODING)
            #full team names
            nameList.append(gameName)
            teamNames = gameName.split(' - ')
            gameString = createTeamName(teamNames[0]) + ' - ' + createTeamName(
                teamNames[1])
            #concatenated team names
            refNamesFin.append(gameString)
        else:
            gameOdds = gameData[idx].string.strip().encode(ENCODING)
            #calculate odds from game percents and limit decimal places
            oddsList.append('{0:.2f}'.format(100 / float(gameOdds)))
    #create odds tuples, 3 odds per game (1,x,2)
    gameOdds = zip(*[iter(oddsList)] * 3)
    #create games dictionary including game names and corresponding odds
    return (nameList, gameOdds, refNamesFin)
Exemple #11
0
def get_data(movie):

    try:
        br = Browser()
        br.open("http://www.imdb.com/find?s=tt&q=" + movie)
        link = list(br.links(url_regex=re.compile(r"/title/tt*")))[0]

    except:
        print "Not Found!"
        exit(3)

    res = br.follow_link(link)
    soup = BeautifulSoup(res.read())
    title_year = soup.find('span', id='titleYear')
    year_str = str(title_year)
    year = re.search('.*([0-9]{4}).*', year_str).group(1)
    title = soup.find('title').contents[0]
    rate = soup.find('span', itemprop='ratingValue')
    rating = str(rate.contents[0])

    actors = []
    actors_soup = soup.findAll('span', itemprop='actors')
    for i in actors_soup:
        i_str = str(i)
        j = i_str.rpartition('itemprop="name"')[-1]
        actors.append(re.search('\>(.*?)\<', j).group(1))

    directors = []
    director_soup = soup.findAll('span', itemprop='director')
    for i in director_soup:
        i_str = str(i)
        j = i_str.rpartition('itemprop="name"')[-1]
        directors.append(re.search('\>(.*?)\<', j).group(1))

    votes = soup.find('span', itemprop='ratingCount').contents[0]
    response = []
    response.append({"Movie : ": title})
    response.append({"Rating: ": rating})
    response.append({"Votes ": votes})
    response.append({"Release Year : ": year})
    response.append({"Director : ": directors})
    response.append({"Actors : ": actors})
    return json.dumps(response)
 def __init__(self, num, keyword):
     self.num = num
     self.keyword = keyword
     self.br = Browser(factory=mechanize.RobustFactory())
     self.br.set_handle_robots(False)
     self.br.addheaders = [
         ('User-Agent', userAgent),
         ('Accept',
          'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')
     ]
     self.cj = mechanize.LWPCookieJar()
     self.br.set_cookiejar(self.cj)
     self.br._factory.is_html = True
     self.br.set_handle_refresh(False)
     self.idletime = 0
     threading.Thread.__init__(self)
     self.url = ""
     self.depth = 0
     self.output = ""
Exemple #13
0
def upload(count):
    br = Browser()
    br.set_handle_robots(False)
    br.open('http://zincpharmer.csb.pitt.edu/pharmville/')
    form = list(br.forms())[0]
    br.form = form

    form['receptor'] = ['traf2']
    form.add_file(open(outputBase + 'minimized_results.sdf'), 'text/plain',
                  'upload.sdf')
    form['userid'] = 'yifengt'
    form['name'] = 'Test'
    response = br.submit()

    print str(count) + '.sdf'
    analysis = process()
    analysis.feed(response.read())
    analysis.close()
    br.close()
Exemple #14
0
def movie(request, uId):
    if 'q' in request.GET:
        movie = request.GET['q']
        movie_search = '+'.join(movie.split())
        base_url = 'http://www.imdb.com/find?q='
        url = base_url + movie_search + '&s=all'
        title_search = re.compile('/title/ttd+')
        br = Browser()
        br.open(url)
        link = br.find_link(url_regex=re.compile(r'/title/tt.*'))
        res = br.follow_link(link)
        soup = BeautifulSoup(res.read())
        print soup
        info = {}
        movie_title = getunicode(soup.find('title'))
        info['title'] = movie_title
        strng = ""
        rate = soup.find('span', itemprop='ratingValue')
        rating = getunicode(rate)
        info['rating'] = rating
        img = soup.find('img', {'itemprop': 'image'})['src']
        image = getunicode(img)
        #image = image.split('.jpg')[0]
        info[
            'img'] = "https://d202m5krfqbpi5.cloudfront.net/books/1370898422l/18054175.jpg"
        des = soup.find('meta', {'name': 'description'})['content']
        descp = getunicode(des)
        info['description'] = descp
        genre = []
        infobar = soup.find('div', {'class': 'infobar'})
        r = infobar.find('', {'title': True})['title']
        genrelist = infobar.findAll('a', {'href': True})
        for i in range(len(genrelist) - 1):
            genre.append(getunicode(genrelist[i]))
            info['genre'] = genre
        release_date = getunicode(genrelist[-1])
        info['date'] = release_date

        return render_to_response('moviedetails.html', {'Movie': info},
                                  context_instance=RequestContext(request))
    else:
        return HttpResponseRedirect('/%s/?e=1' % uId)  #set url to /userid
def getQuote(movie_search, quote_prefix):

    base_url = 'http://www.imdb.com/find?q='
    url = base_url + movie_search + '&s=all'

    title_search = re.compile('/title/tt\d+')

    br = Browser()
    br.open(url)

    link = br.find_link(
        url_regex=re.compile(r'/title/tt(.*)/?ref_=fn_al_tt_1'))
    res = br.follow_link(link)

    soup = BeautifulSoup(res.read(), 'html.parser')

    qtlink = br.find_link(url='trivia?tab=qt&ref_=tt_trv_qu')

    qtres = br.follow_link(qtlink)

    qtsoup = BeautifulSoup(qtres.read(), 'html.parser')

    searchFor = quote_prefix

    all_chars = []

    quote_entry = {}
    char_match = qtsoup.find_all("span", class_=re.compile("character"))
    for c in char_match:
        if c.string not in all_chars:
            all_chars.append(c.string)

    tag = qtsoup.p
    tag.a.decompose()

    ch_match = []
    for tag in qtsoup.find_all(re.compile('p')):
        if tag.name == 'p':
            ch_match = tag.find_all(text=re.compile(":(.*)"))
            for allqt in ch_match:
                if searchFor in allqt:
                    print allqt
Exemple #16
0
def add_to_cart():
    """
    """

    data = request.json
    resp = Response(u'%s')

    uid = data["uid"]
    pwd = data["pwd"]
    items = data["items"]

    params = {"items": items}
    params = urllib.quote(json.dumps(params)).replace('%27', '%22').replace(
        '%20', '')

    #print params

    br = Browser()
    br.addheaders = [('User-agent', 'Firefox')]
    br.set_handle_robots(False)
    br.open('https://www.freshdirect.com/login/login.jsp')
    br.select_form(name="fd_login")
    br['userid'] = uid
    br['password'] = pwd
    br.submit()

    #items = data[ "items" ]

    br.addheaders = [('Content-Type', 'application/x-www-form-urlencoded'),
                     ('User-agent', 'Firefox')]

    # encoding the dict is producing single quote (%27) when it should do double (%22)
    #br.open( "https://www.freshdirect.com/api/addtocart", "data=%7B%22items%22%3A%5B%7B%22salesUnit%22%3A%22EA%22%2C%22quantity%22%3A%223%22%2C%22skuCode%22%3A%22FRU0069115%22%2C%22pageType%22%3A%22BROWSE%22%7D%5D%7D")
    br.open("https://www.freshdirect.com/api/addtocart", "data=" + params)

    soup = BS(br.response().read(), "lxml")
    print soup

    #resp = Response(u'%s' % json_output)
    resp.headers['Content-Type'] = 'application/json; charset=utf-8'

    return resp
Exemple #17
0
def get_ratings(movies_of_my_genre):
    for movie in movies_of_my_genre:
        try:
            print "Checking IMDb rating of :   " + movie.movie_name.replace(
                '\t', '')
            movie_search = '+'.join(movie.movie_name.split())
            movie_url = base_url + movie_search + '&s=all'
            br = Browser()
            br.open(movie_url)
            link = br.find_link(url_regex=re.compile(r'/title/tt.*'))
            res = br.follow_link(link)
            soup = BeautifulSoup(res.read(), "lxml")
            movie_title = soup.find('title').contents[0]
            rate = soup.find('span', itemprop='ratingValue')
            if rate is not None:
                movie.movie_rating = float(rate.contents[0])
            else:
                movie.movie_rating = 0
        except:
            movie.movie_rating = 0
def token(timestamp, username, t_id, t_short, submission_num, base_url):
    """Execute the request for releasing test a submission.

    timestamp (int): seconds from the start.
    username (string): username issuing the submission.
    t_id (string): id of the task.
    t_short (string): short name of the task.
    submission_num (string): id of the submission to release test.
    base_url (string): http address of CWS.

    """
    print("\n%s - Playing token for %s on task %s" %
          (to_time(timestamp), username, t_short),
          end='')
    browser = Browser()
    browser.set_handle_robots(False)
    LoginRequest(browser, username, "", base_url=base_url).execute()
    TokenRequest(browser, (int(t_id), t_short),
                 submission_num=submission_num,
                 base_url=base_url).execute()
def create_browsing_context():
    if not os.path.exists('corpas.focloir.ie.credentials'):
        # untried
        username = input('corpas.focloir.ie username:'******'corpas.focloir.ie password:'******'corpas.focloir.ie.credentials', 'rw') as f:
            f.write(username + '\n' + password)
    with open('corpas.focloir.ie.credentials', 'r') as f:
        username, password = f.read().split('\n')
    br = Browser()
    br.set_handle_robots(False)
    #br.open('http://corpas.focloir.ie/')
    br.open('http://focloir.sketchengine.co.uk/')
    br.add_password(
        'http://focloir.sketchengine.co.uk/auth/run.cgi/simple_search?home=1',
        username, password)
    for link in br.links():
        if link.text.lower().replace(' ', '') == 'login':
            br.follow_link(link)
    return br
Exemple #20
0
def search(arg):
    assert '/' not in arg  # because we use it in a filename
    cache = rc['authority_cache']
    filename = cache + '/' + arg
    if os.path.exists(filename):
        return [eval(i) for i in open(filename)]
    br = Browser()
    br.set_handle_robots(False)
    br.open(start)
    br.select_form(name="querybox")
    br['Search_Arg'] = arg.encode('utf-8')
    br['Search_Code'] = ['NHED_']
    res = br.submit()
    found = list(read_serp(res))
    br.close()
    out = open(filename, 'w')
    for i in found:
        print >> out, i
    out.close()
    return found
Exemple #21
0
 def browser(self):
     '''setup mechanize browser instance'''
     if not self.br:
         br = Browser()
         cj = cookielib.LWPCookieJar()
         br.set_cookiejar(cj)
         #set user agents to maintain aws happiness
         br.addheaders = [('User-agent', self.user_agent),
                          ('Referer', self.referer)]
         br.set_handle_equiv(True)
         br.set_handle_redirect(True)
         br.set_handle_referer(True)
         br.set_handle_robots(False)
         #br.set_handle_gzip(True) #experimental & probably unnecessary
         #proxy support
         #br.set_proxies({"http": "user:[email protected]:3128"})
         #br.set_proxies({"http": "myproxy.example.com:3128"})
         #br.add_proxy_password("user", "password")
         self.br = br
     return self.br
def getCurrentCoverageDirectory(baseURL):
    mech = Browser()
    mech.open(baseURL)

    currentLink = None

    for link in mech.links():
        # Find the first directory link that is not the parent
        if (link.url.endswith("/") and not link.url.startswith("/")):
            currentLink = link
            break

    if currentLink == None:
        mech.close()
        raise "Unable to find current coverage directory"

    linkURL = currentLink.base_url + currentLink.url
    mech.close()

    return linkURL
Exemple #23
0
def main():

    br = Browser()

    names = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")

    for i in range(100):
        br.set_handle_robots(False)
        br.open("http://www.ultimatelovecalc.com/love/1352035")
        br.select_form(name="formMain")
        a1 = random.sample(names, 5)
        a2 = random.sample(names, 5)
        a3 = random.sample(names, 5)
        br["fname"] = "".join(a1)
        br["cname1"] = "".join(a2)
        br["cname2"] = "".join(a3)
        br["cname3"] = "".join(a1)
        sub = br.submit()
        donech = sub.read()
        print i
def ncbiUrlBuilder(snp):
    """ncbiUrlBuilder builds the url used by NCBI to query a particular SNP. 
    an example url for snp rs96066708 is below:
    http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=9606708
    and will give the information page for that SNP. It builds the URL based on
    the base URL from the ncbi site and concatenates it with some simple string
    building tools. 
    The URL is passed to mechanize/Browser to open the site, and the opened page
    is returned to the program.
    Arguments:
        snp -- the snp id. 
    """
    #url = "http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=9606708"
    print 'attempting to query', snp
    snpNumber = snp[2:(len(snp))]
    url = "http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?" + "rs=" + snpNumber
    print 'trying url: ' + url 
    mech = Browser()
    page = mech.open(url)
    return page, snp
def submit(timestamp, username, t_id, t_short, files, base_url):
    """Execute the request for a submission.

    timestamp (int): seconds from the start.
    username (string): username issuing the submission.
    t_id (string): id of the task.
    t_short (string): short name of the task.
    files ([string]): list of filenames of submitted files.
    base_url (string): http address of CWS.

    """
    print("\n%s - Submitting for %s on task %s" %
          (to_time(timestamp), username, t_short),
          end='')
    browser = Browser()
    browser.set_handle_robots(False)
    LoginRequest(browser, username, "", base_url=base_url).execute()
    SubmitRequest(browser, (int(t_id), t_short),
                  filename=files[0],
                  base_url=base_url).execute()
Exemple #26
0
    def __init__(self, url, filename, sample_time, n_attempts=2):
        self.filename = filename
        self.sample_time = sample_time
        self.n_attempts = n_attempts
        sleep_time = 5
        self.url = url
        self.br = Browser()

        while (True):
            attempt = 0
            for attempt in range(n_attempts):
                try:
                    self._load_page()
                    self.write_games_odds()
                    break
                except:
                    print('Erro!')
                    time.sleep(sleep_time)

            time.sleep(self.sample_time - attempt * sleep_time)
Exemple #27
0
def yapistir():
    br = Browser()
    br.addheaders = [(
        'User-agent',
        'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0'
    )]
    br.set_handle_robots(False)

    br.open("http://paste.ubuntu.com")
    br.select_form("pasteform")

    br['poster'] = coder
    br.find_control(name="syntax").value = ["python"]

    dosya_ac = open(dosya)
    kodlar = dosya_ac.read()
    br['content'] = kodlar
    br.submit()
    for link in br.links():
        k_link.append(link.url)
def read_all_result_page_links_for(mainurl):
    br = Browser()
    br.set_handle_robots(False)
    br.addheaders = [(
        'User-agent',
        'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'
    )]
    i = 0
    global_list = []

    br.open(mainurl)
    nice_links = [l for l in br.links() if 'company' in l.url]

    #global_list.extend(nice_links)

    #record = {}
    for link in nice_links:

        i = i + 1
        read_detail_page(link.url, i)
Exemple #29
0
def fetch():
    result_no = 0                 # Number the output files
    br = Browser()                # Create a browser
    br.open(LOGIN_URL)            # Open the login page
    br.select_form(name="login")  # Find the login form
    br['username'] = USERNAME     # Set the form values
    br['password'] = PASSWORD
    resp = br.submit()            # Submit the form

    # Automatic redirect sometimes fails, follow manually when needed
    if 'Redirecting' in br.title():
        resp = br.follow_link(text_regex='click here')

    # Loop through the searches, keeping fixed query parameters
    for actor in in VARIABLE_QUERY:
        # I like to watch what's happening in the console
        print >> sys.stderr, '***', actor
        # Lets do the actual query now
        br.open(SEARCH_URL + FIXED_QUERY + actor)
        # The query actually gives us links to the content pages we like,
        # but there are some other links on the page that we ignore
        nice_links = [l for l in br.links()
                      if 'good_path' in l.url
                      and 'credential' in l.url]
        if not nice_links:        # Maybe the relevant results are empty
            break
        for link in nice_links:
            try:
                response = br.follow_link(link)
                # More console reporting on title of followed link page
                print >> sys.stderr, br.title()
                # Increment output filenames, open and write the file
                result_no += 1
                out = open(result_%04d' % result_no, 'w')
                print >> out, response.read()
                out.close()
            # Nothing ever goes perfectly, ignore if we do not get page
            except mechanize._response.httperror_seek_wrapper:
                print >> sys.stderr, "Response error (probably 404)"
            # Let's not hammer the site too much between fetches
                           time.sleep(1)
Exemple #30
0
    def downloadAll(self,
                    section: str,
                    url: str,
                    createdirs: bool,
                    overwrite: int = 1,
                    pattern: str = "",
                    saveto: str = "",
                    httpUsername: str = None,
                    httpPassword: str = None):
        br = Browser()
        self.setupBrowser(br, url, httpUsername, httpPassword)

        br.open(url)
        for link in br.links(url_regex=pattern):
            if link.url.startswith("http://") or link.url.startswith(
                    "https://"):
                self.download(section,
                              link.url,
                              createdirs,
                              overwrite,
                              saveto=saveto,
                              httpUsername=httpUsername,
                              httpPassword=httpPassword)
            elif link.url.startswith("/"):
                self.download(section,
                              link.base_url[:link.base_url.find("/", 8)] +
                              link.url,
                              createdirs,
                              overwrite,
                              saveto=saveto,
                              httpUsername=httpUsername,
                              httpPassword=httpPassword)
            else:
                self.download(section,
                              link.base_url[:link.base_url.rfind("/") + 1] +
                              link.url,
                              createdirs,
                              overwrite,
                              saveto=saveto,
                              httpUsername=httpUsername,
                              httpPassword=httpPassword)