Exemple #1
0
    def getMovieLink(self, movietitle):

        br = Browser()
        br.set_handle_robots(False)
        br.open(base_url)
        br.select_form(nr=0)
        br.form['q'] = movietitle
        br.submit()
        #    print br.response().read()

        pageData = br.response().read()
        br.close()

        tree = html.fromstring(pageData)

        soup = BeautifulSoup(pageData, 'lxml')
        allFindSections = soup.find_all('div', class_='findSection')
        #    print allFindSections[0]
        firstSectionHeader = allFindSections[0].findAll(
            'h3', class_="findSectionHeader")
        #    print firstSectionHeader[0].contents[1]

        #        if (re.match('Title', firstSectionHeader[0].contents[1])):
        #            print "title found:"
        #            print firstSectionHeader[0].contents[1]

        findList = allFindSections[0].findAll('tr', class_='findResult')
        #    print findList[0]
        firstResultLinks = findList[0].findAll('a')
        firstResultLink = firstResultLinks[1].get('href')

        allFindTitles = soup.find_all('h3', class_="findSectionHeader")
        return firstResultLink
Exemple #2
0
def setup_tentative(username, passw):
    browser = Browser()
    browser.addheaders = [('User-Agent', sorteed())]
    browser.set_handle_robots(False)
    bool = False
    while bool == False:
        try:
            browser.open('https://facebook.com/')
            browser.select_form(nr=0)
            bool = True
        except:
            system('/etc/init.d/tor restart > /dev/null')
            sleep(20)
    browser.form['email'] = username
    browser.form['pass'] = passw
    response = browser.submit()
    link = response.geturl()
    browser.close()
    link = link.split('/')
    ok = ''

    for l in link:
        if b'Find Friends' in response.read():
            ok = False
    if (ok == False):
        system('/etc/init.d/tor stop')
        system('clear')
        print(gr + 'Senha ' + red + passw + gr + ' detectada para ' + red +
              username)
        exit(0)
Exemple #3
0
def get_title(url):

    # najpierw w sposob młotkowy
    # nie trzeba sie łączyc, nie wpadnie na 403 albo robots.txt
    for ext in ("jpg", "jpeg", "gif", "png", "svg", ":large"):
        if url.lower().endswith(ext):
            cache.put(url, (url, True))
            continue

    try:
        (title, got_image) = cache.get(url)

    except TypeError:
        # got None, key not in cache - open and get the title
        br = Browser()
        try:
            br.open(url)

            # teraz w sposób sprytny, bo facebook spierdolił linki do obrazków
            if br.response().info()["Content-type"] in [
                    "image/png", "image/jpeg", "image/gif", "image/svg+xml"
            ]:
                cache.put(url, (url, True))
            else:
                cache.put(url, (br.title(), False))
        except Exception as exception:
            print(f"Problem, Sir - {exception} - with {url}")
            cache.put(url, (url, False))
        br.close()

    # try again
    (title, got_image) = cache.get(url)

    return (got_image, title)
def getFileCoverage(baseURL, filename):
    """For a given filename, fetch coverage from an online LCOV source."""
    covURL = baseURL + filename + ".gcov.html"

    mech = Browser()
    try:
        urlObj = mech.open(covURL)
    except:
        return ("N/A", "N/A", "N/A", "N/A")

    parsedUrlObj = lxml.html.parse(urlObj)

    branchCoveragePercent = "N/A"
    branchCoverageMissing = "N/A"
    lineCoveragePercent = "N/A"
    lineCoverageMissing = "N/A"

    try:
        # Xpath to the coverage, see below
        lineCoveragePercent = float(
            parsedUrlObj.xpath("/html/body/table[1]/tr[3]/td/table/tr[2]/td[7]"
                               )[0].text.replace(" %", ""))
        # ------------------------------------------------------------------------------------^
        #   2 - line coverage
        #   3 - function coverage
        #   4 - branch coverage
        # ------------------------------------------------------------------------------------------^
        #   5 - # hit
        #   6 - # total
        #   7 - % hit

        lineCoverageHit = int(
            parsedUrlObj.xpath(
                "/html/body/table[1]/tr[3]/td/table/tr[2]/td[5]")[0].text)
        lineCoverageTotal = int(
            parsedUrlObj.xpath(
                "/html/body/table[1]/tr[3]/td/table/tr[2]/td[6]")[0].text)
        lineCoverageMissing = lineCoverageTotal - lineCoverageHit
    except ValueError:
        pass

    try:
        branchCoveragePercent = float(
            parsedUrlObj.xpath("/html/body/table[1]/tr[3]/td/table/tr[4]/td[7]"
                               )[0].text.replace(" %", ""))
        branchCoverageHit = int(
            parsedUrlObj.xpath(
                "/html/body/table[1]/tr[3]/td/table/tr[4]/td[5]")[0].text)
        branchCoverageTotal = int(
            parsedUrlObj.xpath(
                "/html/body/table[1]/tr[3]/td/table/tr[4]/td[6]")[0].text)
        branchCoverageMissing = branchCoverageTotal - branchCoverageHit
    except ValueError:
        pass

    mech.close()

    return (lineCoveragePercent, lineCoverageMissing, branchCoveragePercent,
            branchCoverageMissing)
 def _login(self, username, password):
     br = Browser()
     br.open("http://netlogin.kuleuven.be/")
     br.select_form(name="wayf")
     br.submit()
     br.select_form(name="netlogin")
     br[br.form._pairs()[2][0]]=username
     br[br.form._pairs()[3][0]]=password
     result = br.submit()
     lines = [k for k in result.readlines()]
     br.close()
     
     return lines
Exemple #6
0
    def getMovieData(self, link):
        url = base_url + link
        br = Browser()
        br.set_handle_robots(False)

        # some movies do not contain a rating, so initialise with no values
        ratingValue = "none"
        bestRating = "none"

        title = ""
        year = ""

        pageData = br.open(url)

        soup = BeautifulSoup(pageData, 'lxml')

        ratingValueElement = soup.find_all('span', itemprop='ratingValue')

        if len(ratingValueElement) > 0:
            ratingValue = ratingValueElement[0].string

        bestRatingElement = soup.find_all('span', itemprop='bestRating')

        if len(bestRatingElement) > 0:
            bestRating = bestRatingElement[0].string

        titleHeadingElement = soup.find_all('h1', itemprop='name')
        title = titleHeadingElement[0].contents[0]

        yearElement = soup.find_all('span', id='titleYear')
        yearElementLink = yearElement[0].find_all('a')
        year = yearElementLink[0].contents[0]

        genreElement = soup.find_all('span',
                                     class_='itemprop',
                                     itemprop='genre')

        genres = []
        for ge in genreElement:
            genres.append(ge.contents[0])

        results = {
            'title': title,
            'year': year,
            'ratingValue': ratingValue,
            'bestRating': bestRating,
            'genres': genres
        }
        br.close()
        return results
Exemple #7
0
class Liker(threading.Thread):
    def __init__(self, id, accounts, pageid):
        self.id = id
        self.accounts = accounts
        self.pageid = pageid
        self.running = False

        threading.Thread.__init__(self)

    def createbrowser(self):
        self.br = Browser()
        self.br.set_handle_gzip(True)
        self.br.set_handle_robots(False)
        self.br.addheaders = [(
            'User-agent',
            'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'
        )]

    def like(self, user, passw, pageid):
        try:
            self.createbrowser()
            self.br.open('http://m.facebook.com/login.php', timeout=10)
            self.br.select_form(nr=0)
            self.br.form['email'] = user
            self.br.form['pass'] = passw
            self.br.submit()
            if 'Your password was incorrect.' in self.br.response().read(
            ) or "We didn't recognize your email address." in self.br.response(
            ).read(
            ) or 'Sorry, your account  is temporarily unavailable.' in self.br.response(
            ).read():
                Publisher().sendMessage(
                    "update", "Could not login with {0}".format(user))
                return
            Publisher().sendMessage("update",
                                    "Logged in with {0}".format(user))
            self.br.open('http://m.facebook.com/' + pageid, timeout=10)
            for yc in self.br.links(text="Unlike"):
                Publisher().sendMessage("update",
                                        "Already liked with {0}".format(user))
                return
            for xc in self.br.links(text="Like"):
                self.br.follow_link(xc)
                break
            Publisher().sendMessage("update", "Liked with {0}".format(user))
            self.br.close()
        except Exception, e:
            Publisher().sendMessage("update",
                                    "{0} with {1}".format(str(e), str(a)))
            self.like(user, passw, pageid)
Exemple #8
0
def fetch_page_after_auth(username, password, next_url):
    print username, password
    logout_url = 'https://secure.ninjacourses.com/account/logout/'
    login_url = 'https://secure.ninjacourses.com/account/login/?next=%s' % next_url
    br = Browser(file_wrapper=ResponseWrapper)
    br.set_handle_robots(False)
    br.open(logout_url)
    br.open(login_url)
    br.select_form()
    br['username'], br['password'] = username, password
    result_page = br.submit().read()
    br.close()
    if 'correct username' in result_page:
        raise ValueError
    return result_page
Exemple #9
0
def fetch_page_after_auth(username, password, next_url):
    print username, password
    logout_url = 'https://secure.ninjacourses.com/account/logout/'
    login_url = 'https://secure.ninjacourses.com/account/login/?next=%s' % next_url
    br = Browser(file_wrapper=ResponseWrapper)
    br.set_handle_robots(False)
    br.open(logout_url)
    br.open(login_url)
    br.select_form()
    br['username'], br['password'] = username, password
    result_page = br.submit().read()
    br.close()
    if 'correct username' in result_page:
        raise ValueError
    return result_page
def getFileCoverage(baseURL, filename):
  """For a given filename, fetch coverage from an online LCOV source."""
  covURL = baseURL + filename + ".gcov.html"
  
  mech = Browser()
  try:
    urlObj = mech.open(covURL)
  except:
    return ("N/A", "N/A", "N/A", "N/A")
  
  parsedUrlObj = lxml.html.parse(urlObj)
  
  
  branchCoveragePercent = "N/A"
  branchCoverageMissing = "N/A"
  lineCoveragePercent = "N/A"
  lineCoverageMissing = "N/A"

  try:
    # Xpath to the coverage, see below
    lineCoveragePercent = float(parsedUrlObj.xpath("/html/body/table[1]/tr[3]/td/table/tr[2]/td[7]")[0].text.replace(" %", ""));
    # ------------------------------------------------------------------------------------^
    #   2 - line coverage
    #   3 - function coverage
    #   4 - branch coverage
    # ------------------------------------------------------------------------------------------^
    #   5 - # hit
    #   6 - # total
    #   7 - % hit

    lineCoverageHit = int(parsedUrlObj.xpath("/html/body/table[1]/tr[3]/td/table/tr[2]/td[5]")[0].text)
    lineCoverageTotal = int(parsedUrlObj.xpath("/html/body/table[1]/tr[3]/td/table/tr[2]/td[6]")[0].text)
    lineCoverageMissing = lineCoverageTotal - lineCoverageHit
  except ValueError:
    pass

  try:
    branchCoveragePercent = float(parsedUrlObj.xpath("/html/body/table[1]/tr[3]/td/table/tr[4]/td[7]")[0].text.replace(" %", ""));
    branchCoverageHit = int(parsedUrlObj.xpath("/html/body/table[1]/tr[3]/td/table/tr[4]/td[5]")[0].text)
    branchCoverageTotal = int(parsedUrlObj.xpath("/html/body/table[1]/tr[3]/td/table/tr[4]/td[6]")[0].text)
    branchCoverageMissing = branchCoverageTotal - branchCoverageHit
  except ValueError:
    pass
 
  mech.close()
  
  return (lineCoveragePercent, lineCoverageMissing, branchCoveragePercent, branchCoverageMissing)
Exemple #11
0
def upload(count):
    br = Browser()
    br.set_handle_robots(False)
    br.open('http://zincpharmer.csb.pitt.edu/pharmville/')
    form = list(br.forms())[0]
    br.form = form

    form['receptor'] = ['traf2']
    form.add_file(open(outputBase + 'minimized_results.sdf'), 'text/plain',
                  'upload.sdf')
    form['userid'] = 'yifengt'
    form['name'] = 'Test'
    response = br.submit()

    print str(count) + '.sdf'
    analysis = process()
    analysis.feed(response.read())
    analysis.close()
    br.close()
def getCurrentCoverageDirectory(baseURL):
  mech = Browser()
  mech.open(baseURL)
  
  currentLink = None
  
  for link in mech.links():
    # Find the first directory link that is not the parent
    if (link.url.endswith("/") and not link.url.startswith("/")):
      currentLink = link
      break
    
  if currentLink == None:
    mech.close()
    raise "Unable to find current coverage directory"
  
  linkURL = currentLink.base_url + currentLink.url
  mech.close()
  
  return linkURL
def getCurrentCoverageDirectory(baseURL):
    mech = Browser()
    mech.open(baseURL)

    currentLink = None

    for link in mech.links():
        # Find the first directory link that is not the parent
        if (link.url.endswith("/") and not link.url.startswith("/")):
            currentLink = link
            break

    if currentLink == None:
        mech.close()
        raise "Unable to find current coverage directory"

    linkURL = currentLink.base_url + currentLink.url
    mech.close()

    return linkURL
Exemple #14
0
def search(arg):
    assert '/' not in arg  # because we use it in a filename
    cache = rc['authority_cache']
    filename = cache + '/' + arg
    if os.path.exists(filename):
        return [eval(i) for i in open(filename)]
    br = Browser()
    br.set_handle_robots(False)
    br.open(start)
    br.select_form(name="querybox")
    br['Search_Arg'] = arg.encode('utf-8')
    br['Search_Code'] = ['NHED_']
    res = br.submit()
    found = list(read_serp(res))
    br.close()
    out = open(filename, 'w')
    for i in found:
        print >> out, i
    out.close()
    return found
Exemple #15
0
def search(arg):
    assert '/' not in arg # because we use it in a filename
    cache = rc['authority_cache']
    filename = cache + '/' + arg
    if os.path.exists(filename):
        return [eval(i) for i in open(filename)]
    br = Browser()
    br.set_handle_robots(False)
    br.open(start)
    br.select_form(name="querybox")
    br['Search_Arg'] = arg.encode('utf-8')
    br['Search_Code'] = ['NHED_']
    res = br.submit()
    found = list(read_serp(res))
    br.close()
    out = open(filename, 'w')
    for i in found:
        print >> out, i
    out.close()
    return found
def store():
    while True:
        link = raw_input("Enter link\n")
        link = link.strip()
        if (link == 'X') or (link == 'x'):
            break
        print "Collecting song info..."
        if link.find("www.youtube.com") == -1:
            print Fore.GREEN, "Not a valid YouTube link.", Fore.RESET
            continue
        br = Browser()
        global arr
        try:
            br.set_handle_robots(False)
            respo = br.open(link)

            soup = BeautifulSoup(respo, "html.parser")
            name = soup.find("title")
            name = name.text
        except Exception as e:
            print "Not working. Trying method 2..."
            name = method2(link)
            if name == -1:
                continue
        finally:
            br.close()

        try:
            fil = open("MusicLinks.txt", 'rb')
            arr = pickle.load(fil)
            fil.close()
        except Exception as e:
            print "Creating new file to store links."
        finally:
            if link in arr:
                print Fore.YELLOW, "Song link already present.", Fore.RESET
            else:
                arr[link] = name
                print Fore.YELLOW, "Song added successfully", Fore.RESET
        pushIntoFile()
Exemple #17
0
    def _mapgen_speed_fired(self):
        test_dir(join(self.dirname, 'gps_vis'))
        br = Browser()
        # Ignore robots.txt
        br.set_handle_robots( False )

        # Google demands a user-agent that isn't a robot
        br.addheaders = [('User-agent', 'Firefox')]
            
        resp1 = br.open( "http://www.gpsvisualizer.com/map_input" )
         
        # Select the search box and search for 'foo'
        br.select_form( name='main' )

        br.form['width'] = '870'
        br.form['height'] = '600'
        br.set_value(['google_openstreetmap'], name='bg_map')
        br.set_value(['speed'], name='trk_colorize')
        br.form['legend_steps'] = '10'
        br.add_file(open(self.filename_converted), "text/plain", self.filename_converted, name='uploaded_file_1')
         
        # Get the search results
        resp2 = br.submit()
         
        resp = None
        for link in br.links():
            siteMatch = re.compile( 'download/' ).search( link.url )
            if siteMatch:
                resp = br.follow_link( link )
                break

        # Print the site
        content = resp.get_data()

        ofile = open(join(self.dirname, 'gps_vis', 'map_speed.html'),'w')
        ofile.write(content)
        ofile.close()
        br.close()

        print 'map generated (speed color)'
Exemple #18
0
    def _profilegen_fired(self):
        test_dir(join(self.dirname, 'gps_vis'))
        br = Browser()
        # Ignore robots.txt
        br.set_handle_robots( False )

        # Google demands a user-agent that isn't a robot
        br.addheaders = [('User-agent', 'Firefox')]
            
        # Retrieve the Google home page, saving the response
        resp1 = br.open( "http://www.gpsvisualizer.com/profile_input" )
         
        # Select the search box and search for 'foo'
        br.select_form( name='main' )

        br.form['width'] = '870'
        br.form['height'] = '250'
        br.form['legend_steps'] = '10'
        br.add_file(open(self.filename_converted), "text/plain", self.filename_converted, name='uploaded_file_1')
         
        # Get the search results
        resp2 = br.submit()
         
        resp = None
        for link in br.links():
            siteMatch = re.compile( 'download/' ).search( link.url )
            if siteMatch:
                resp = br.follow_link( link )
                break

        # Print the site
        content = resp.get_data()

        ofile = open(join(self.dirname, 'gps_vis', 'profile.png'),'wb')
        ofile.write(content)
        ofile.close()
        br.close()

        print 'profile generated'
Exemple #19
0
def hax0r():
    user = g.user
    if user.username == 'hax0r':
        if request.args.get('add'):
            browser = Browser()
            url = "http://productivepoop.com/users/new"
            browser.open(url)
            browser.select_form(nr=0)
            browser['user[username]'] = 'johnsmith'
            browser['user[name]'] = 'johnsmith'
            browser['user[email]'] = '*****@*****.**'
            browser['user[password]'] = 'password'
            browser['user[password_confirmation]'] = 'password'
            browser.submit()
            browser.close()
            return jsonify({'cool': True})
        if request.args.get('remove'):
            browser = Browser()
            url = "http://productivepoop.com/users/"
            browser.open(url)
            browser.form = list(browser.forms())[-1] 
            browser.submit()
            browser.close()
            return jsonify({'cool': True})
        if request.args.get('addalot'):
            for i in range(1000000):
                browser = Browser()
                url = "http://productivepoop.com/users/new"
                browser.open(url)
                browser.select_form(nr=0)
                browser['user[username]'] = 'johnsmithy' + str(i)
                browser['user[name]'] = 'johnsmithy' + str(i)
                browser['user[email]'] = 'johnsmith'+str(i)+'@johnsmith.com'
                browser['user[password]'] = 'password'
                browser['user[password_confirmation]'] = 'password'
                browser.submit()
                browser.close()
        if request.args.get('removealot'):
            for i in range(100):
                browser = Browser()
                url = "http://productivepoop.com/users/"
                browser.open(url)
                browser.form = list(browser.forms())[-1] 
                browser.submit()
                browser.close()
                print 'hello '+str(i)
            return jsonify({'cool': True})
        return render_template('hax0r.html', user=user)
    abort(404)
Exemple #20
0
    def _send_fired(self):
        br = Browser()

        # Ignore robots.txt
        br.set_handle_robots(False)
        # Google demands a user-agent that isn't a robot
        br.addheaders = [('User-agent', 'Firefox')]

        # Retrieve the Google home page, saving the response
        resp = br.open("https://www.t-mobile.cz/.gang/login-url/portal?nexturl=https%3A%2F%2Fwww.t-mobile.cz%2Fweb%2Fcz%2Fosobni")

        br.select_form(nr=2)

        br.form['username'] = '******'
        br.form['password'] = self.password

        resp = br.submit()
        # print resp.get_data()

        resp = br.open("https://sms.client.tmo.cz/closed.jsp")
        br.select_form(nr=1)

        # print br.form
        # help(br.form)

        br.form['recipients'] = self.phone_number  # '736639077'#'737451193' #'605348558'
        br.form['text'] = self.message

        br.form.find_control("confirmation").get("1").selected = self.confirmation

        resp = br.submit()

        # logout
        resp = br.follow_link(url_regex='logout')

        br.close()

        information(None, 'SMS sent!')
Exemple #21
0
def get_balance( username, password ):

    balance = 0.0
    browser = Browser()

    try:
        browser.open( URL_PATH )
    except:
        return balance

    browser.select_form( nr=0 )
    browser.form[USER_FIELD]        = username
    browser.form[PASS_FIELD]        = password
    browser.submit()

    response = browser.response()
    html = response.read()
    browser.close()

    balance_string = html_filter( html )
    result = get_float( balance_string )

    return result
Exemple #22
0
def get_balance( username, password ):

    balance = 0.0
    browser = Browser()

    try:
        browser.open( "http://sipnet.ru" )
    except:
        return balance

    browser.select_form( nr=0 )
    browser.form['Name']        = username
    browser.form['Password']    = password
    browser.submit()

    response = browser.response()
    html = response.read()
    browser.close()

    balance_string = html_filter( html )
    result = get_float( balance_string )

    return result
Exemple #23
0
def activate_emails(emails,email,password):
 	import imaplib
	m = imaplib.IMAP4_SSL("imap.gmail.com")
	m.login(email+"@gmail.com",password)
	uuids = []
	for email in emails:
		m.select("INBOX")
		replace_string = '(TO "'+string.replace(email,'%2B','+')+'" SUBJECT "Tradeshift Password Reset")'
		result,data = m.uid('search',None,replace_string)
		htmlbullshit = m.uid('fetch',data[0],'(RFC822)')[1][0][1]
		match = re.compile('(?<=href=").*?(?=")')
		activate = match.findall(htmlbullshit)[1]
		r = requests.get(activate)
		browser = Browser()
		browser.set_handle_robots(False)
		browser.open(r.url)
		match = re.compile('(?<=user=).*?(?=&)')
		uuid = match.findall(r.url)[0]
		uuids.append(uuid)
		browser.select_form(nr=0)
		browser["password"]=password
		browser.submit()
		browser.close()
	return uuids
Exemple #24
0
def toggleStatus(list_com):
    status = "On.png" if list_com[1] == "/open" else "Off.png"
    login = list_com[2]
    passwd = list_com[3]

    url = 'http://teresinahc.org/wiki/index.php?title=Especial:Autenticar-se&returnto=P%C3%A1gina+principal'

    br = Browser()

    br.set_handle_robots(False)
    br.addheaders = [('User-agent', 'Firefox')]
    br.open(url)

    if 'Autenticar-se' in br.response().read():
        br.select_form('userlogin')
        br.form['wpName'] = login
        br.form['wpPassword'] = passwd
        br.submit()
        pag = br.response().read()
        if '"wgUserName":null' not in pag:
            br.open('http://teresinahc.org/wiki/index.php?title=Status&action=edit')
            if 'value="Salvar página"' in br.response().read():
                br.select_form('editform')
                br.form['wpTextbox1'] = '<center>[[Arquivo:'+status+']]</center>'
                br.submit(name='wpSave')
                br.close()
                if status == 'On.png':
                    return 'no momento o Teresina Hacker Clube encontra-se ABERTO!'
                else:
                    return 'no momento o Teresina Hacker Clube encontra-se FECHADO!'
            else:
                br.close()
                return 'Voc\xc3\xaa n\xc3\xa3o tem permiss\xc3\xa3o para alterar p\xc3\xa1ginas da Wiki do Teresina Hacker Clube'
        else:
            br.close()
            output  = re.compile('<div class="errorbox">(.*?)</div>', re.DOTALL |  re.IGNORECASE).findall(pag)
            return "</code>"+output[0].replace("<br />", "").replace("\t", "")+"<code>"
    else:
        br.close()
        return 'Desculpe, por algum motivo n\xc3\xa3o foi poss\xc3\xadvel acessar a Wiki do Teresina Hacker Clube.'
Exemple #25
0
            elif "txt" in a["href"]:
                ext = "txt"
            elif "srt" in a["href"]:
                ext = "srt"
            elif "download.mp4" in a["href"]:
                ext = "mp4"
            else:
                continue
            filename = (
                str(num + 1)
                + "."
                + str(j + 1)
                + "-"
                + re.sub(r"--*", "-", re.sub(r"[^A-Za-z0-9.]", "-", ltitle.lower()))
                + "."
                + ext
            )
            if ext in dwdlist:
                print ext + ": " + filename
                download(url, title, filename)
            # else:
            # print ext+": Skipping: Not in download list"
            count += 1
        print
    os.chdir(cd)
    print
br.close()
print "Completed downloading " + course + "."
pth = os.path.join(pcd, course + ".html")
os.remove(pth)
Exemple #26
0
class check_keys():
    def __init__(self):
        #инициализация
        #variables
        self.log_file = ''
        self.keys = []
        self.url = 'http://forum.rsload.net/cat-kryaki-seriyniki-varez/topic-4820-page-%d.html'
        self.login = '******'
        self.passwd = 'ghbphfr1'
        self.form_nomber = 0
        self.login_name = 'name'
        self.paswd_name = 'password'
        self.submit_nomber = 0
        self.curPage = 84
        self.html_source = ''

        self.headers = [(
                        'User-agent',
                        'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'
                        )]

        self.xpath_data = {
                            "max_page" : ".//*[@id='board_index']/div[1]/div/div[2]/ol/li[1]/a/text()",
                            'keys' : './/blockquote/p/span[@class="texthide"]/text()',
                            "is_login" : ".//*[@id='user_info']/fieldset/dl/dt[1]/a/b/span/text()"
                            }

        self.br = Browser()
        self.cj = cookielib.LWPCookieJar()
        self.br.set_cookiejar(self.cj)
        self.br.addheaders = self.headers

    def __del__(self):
        #уничтожение класса
        self.br.close()
        return

    def max_page(self):
        tree= etree.HTML(self.html_source)
        result = tree.xpath(self.xpath_data["max_page"])
        maxPage = result[0][-2] + result[0][-1]
        return maxPage

    def rw_file(self, data = []):
        if len(data) > 0:
            f = open(self.log_file, 'a')
            for index in data:
                f.write(index + '\n')
        else:
            f = open(self.log_file, 'r')
            data = [line.strip() for line in f]
        f.close()
        return data

    def get_all_keys(self):
        oldKeys = self.rw_file()
        tree = etree.HTML(self.html_source)
        keysList = tree.xpath(self.xpath_data["keys"])
        newKeys = []

        buf = ' '.join(keysList)
        buf = re.sub(r'\s+', ',', buf)
        keysList = buf.split(',')

        for key in keysList:
            if not key in oldKeys and key.startswith("CHZ"):
                newKeys.append(key)

        if len(newKeys) > 0:
            self.keys = list(newKeys)
            self.rw_file(newKeys)
            return True
        else:
            return False

    def is_login(self):
        tree= etree.HTML(self.html_source)
        result = tree.xpath(self.xpath_data["is_login"])
        if len(result) == 1:
            return self.login == result[0]
        else:
            return False

    def update(self):
        self.html_source = self.br.open(self.url % (self.curPage)).read()
        maxPage = int(self.max_page())
        if self.curPage != maxPage:
            self.html_source = self.br.open(self.url % (maxPage)).read()
            self.curPage = maxPage
        return self.is_login()


    def login_url(self):
        self.br.open(self.url % (self.curPage))
        self.br.select_form(nr = self.form_nomber)
        self.br[self.login_name] = self.login
        self.br[self.paswd_name] = self.passwd

        self.html_source = self.br.submit(nr = self.submit_nomber).read()
        return self.is_login()
Exemple #27
0
def get_filelist(start_url):

    if not start_url.endswith('/'):
        start_url += '/'
    start_url_len = len(start_url)

    dirCollection = {}

    directories_todo = [start_url]

    # walk directories non-recursively
    # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/435875
    while len(directories_todo) > 0:
        print '+', len(dirCollection.keys())
        print '-', len(directories_todo)
        #if len(directories_todo) == 175:
        #    return dirCollection, 'foo'


        directory = directories_todo.pop()
        #if '/drpmsync/' in directory or '/repositories' in directory:
        #    print '>>>>>> skipping', directory
        #    continue
        print '>'
        print directory

        name = directory[start_url_len:].rstrip('/') or '.'
        dirCollection[name] = mb.core.Directory(name)

        br = Browser()
        br.open(directory)

        # found files
        for i in br.links(url_regex = _match_file):
            #if i.url.startswith('./'):
            #    i.url = i.url[2:]
            #print 'appending file', i.url
            dirCollection[name].files.append(i.url)

        found_dirs  = [ link.base_url + link.url.lstrip('./') for link in br.links(url_regex = _match_dir) ]
        found_dirs = []
        for link in br.links(url_regex = _match_dir):
            if link.url.startswith('./'):
                link.url = link.url[2:]
            found_dirs.append(link.base_url + link.url)

        print 'found_dirs:', found_dirs
        #found_files = [ link.base_url + link.url for link in br.links(url_regex = _match_file) ]
        #print 'found_files:', found_files
        br.close()
        
        # found directories
        for found_dir in found_dirs:
            br = Browser()
            br.open(found_dir)

            name = found_dir[start_url_len:].rstrip('/')
            print 'name:', name
            dirCollection[name] = mb.core.Directory(name)

            for i in br.links(url_regex = _match_file):
                dirCollection[name].files.append(i.url)

            for i in br.links(url_regex = _match_dir):
                if i.url.startswith('./'):
                    i.url = i.url[2:]
                print 'neues todo:', i.base_url + i.url
                directories_todo.append(i.base_url + i.url)

            br.close()


    del br

    return dirCollection, 'foo'
Exemple #28
0
class RequestQuery:

    def __init__(self,config):
        self.br=Browser()

        self.config = config
        
        # Initialise connections
        self.phedex = PhEDEx({"endpoint":"https://cmsweb.cern.ch/phedex/datasvc/json/prod/"}, "json")
        self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/")
        self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/")
        self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/")
        
    def __del__(self):
        self.br.close()

    def getScramArchByCMSSW(self):
        """
        Get from the list of available CMSSW releases
        return a dictionary of ScramArchitecture by CMSSW
        """
        
        # Set temporary conection to the server and get the response from cmstags
        url = 'https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML'
        br = Browser()
        br.set_handle_robots(False)
        response=br.open(url)
        soup = BeautifulSoup(response.read())
        
        # Dictionary form
        # {'CMSSW_X_X_X':[slc5_amd64_gcc472], ... }
        archByCmssw={}
        
        # Fill the dictionary
        for arch in soup.find_all('architecture'): 
            for cmssw in arch.find_all('project'): 
                # CMSSW release
                cmsswLabel = cmssw.get('label').encode('ascii', 'ignore')
                if cmsswLabel not in archByCmssw:
                    archByCmssw[cmsswLabel]=[]
                # ScramArch related to this CMSSW release
                archName = arch.get('name').encode('ascii', 'ignore')
                archByCmssw[cmsswLabel].append(archName)
        
        return archByCmssw
      
    def getDatasetOriginSites(self, dbs_url, data):
        """
        Get the origin sites for each block of the dataset.
        Return a list block origin sites.
        """
        
        sites=[]
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listBlocks(detail=True,dataset=data)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listBlocks(detail=True,dataset=data)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listBlocks(detail=True,dataset=data)
        
        seList = []
        for block in response:
            if block['origin_site_name'] not in seList:
                seList.append(block['origin_site_name'])
        
        siteNames = []
        for node in self.nodeMappings['phedex']['node']:
            if node['se'] in seList:
                siteNames.append(node['name']) 
        
        return siteNames, seList
    
    def phEDExNodetocmsName(self, nodeList):
        """
        Convert PhEDEx node name list to cms names list 
        """
        names = []
        for node in nodeList:
            name = node.replace('_MSS',
                                '').replace('_Disk',
                                    '').replace('_Buffer',
                                        '').replace('_Export', '')
            if name not in names:
                names.append(name)
        return names
    
    def setGlobalTagFromOrigin(self, dbs_url,input_dataset):
        """
        Get the global tag of the dataset from the source dbs url. If it is not set, then set global tag to 'UNKNOWN'
        """
        
        globalTag = ""
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listOutputConfigs(dataset=input_dataset)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listOutputConfigs(dataset=input_dataset)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listOutputConfigs(dataset=input_dataset)
        
        globalTag = response[0]['global_tag']
        # GlobalTag cannot be empty
        if globalTag == '':
            globalTag = 'UNKNOWN'
            
        return globalTag
    
    def isDataAtUrl(self, dbs_url,input_dataset):
        """
        Returns True if the dataset is at the dbs url, if not returns False
        """
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listDatasets(dataset=input_dataset)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listDatasets(dataset=input_dataset)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listDatasets(dataset=input_dataset)
        # This means that the dataset is not at the url
        if not response:
            return False
        else:
            return True
         
    def getLabelByValueDict(self, control):
        """
        From control items, create a dictionary by values
        """   
        d = {}
        for item in control.items:
            value = item.attrs['value']
            label = item.attrs['label']
            d[value] = label
                
        return d
    
    def getValueByLabelDict(self, control):
        """
        From control items, create a dictionary by labels
        """
        d = {}
        for item in control.items:
            value = item.attrs['value']
            label = item.attrs['label']
            d[label] = value

        return d
    
    def createRequestJSON(self, ticket, input_dataset, dbs_url, cmssw_release, group_name, version = 1):
        """
        Creates a JSON file 'Ticket_#TICKET.json' with the needed
        information for creating a requeston ReqMgr.
        Input:
            - ticket: the ticket #, for instance 110773 on https://ggus.eu/?mode=ticket_info&ticket_id=110773
            - input_dataset
            - dbs_url: only the instance name, For example: "phys01" for 
             https://cmsweb.cern.ch/dbs/prod/phys01/DBSReader
            - cmssw_release
            - group_name: the physics group name
            - version: the dataset version, 1 by default.
        It returns a dictionary that contains the request information.
        """

        scramArchByCMSSW = self.getScramArchByCMSSW()
        self.nodeMappings = self.phedex.getNodeMap()
        task = ticket
        print "Processing ticket: %s" % task
        
        #splitting input dataset       
        input_primary_dataset = input_dataset.split('/')[1].replace(' ','')
        input_processed_dataset = input_dataset.split('/')[2].replace(' ','')
        data_tier = input_dataset.split('/')[3].replace(' ','')
                
        # Transform input value to a valid DBS url
        #dbs_url = "https://cmsweb.cern.ch/dbs/prod/"+dbs_url+"/DBSReader"
        dbs_url = dbs_base_url+dbs_url+"/DBSReader"
        release_id = cmssw_release
                
        # check if deprecated release was used
        release = cmssw_release
        # check if release has not ScramArch match
        if release not in scramArchByCMSSW:
            raise Exception("Error on ticket %s due to ScramArch mismatch" % task)
        else:
            scram_arch = scramArchByCMSSW[release][-1]

        # check if dataset is not at dbs url
        try:
            data_at_url = self.isDataAtUrl(dbs_url,input_dataset)
        except:
            raise Exception('Error on ticket %s, dataset %s not available at %s' %(task, input_dataset,dbs_url))

        if not data_at_url:
            raise Exception('Error on ticket %s, dataset %s not available at %s' %(task, input_dataset,dbs_url))
                    
        ## Get Physics Group
        group_squad = 'cms-storeresults-'+group_name.replace("-","_").lower()

        ## Get Dataset Version
        dataset_version = str(version)

        # Set default Adquisition Era for StoreResults 
        acquisitionEra = "StoreResults"

        ## Construction of the new dataset name (ProcessingString)
        ## remove leading hypernews or physics group name and StoreResults+Version
        if input_processed_dataset.find(group_name)==0:
            new_dataset = input_processed_dataset.replace(group_name,"",1)
        else:
            stripped_dataset = input_processed_dataset.split("-")[1:]
            new_dataset = '_'.join(stripped_dataset)
                        
        # Get dataset site info:
        phedex_map, se_names = self.getDatasetOriginSites(dbs_url,input_dataset)
        sites = self.phEDExNodetocmsName(phedex_map)
        
        infoDict = {}
        # Build store results json
        # First add all the defaults values
        infoDict["RequestType"] = "StoreResults"
        infoDict["UnmergedLFNBase"] = "/store/unmerged" 
        infoDict["MergedLFNBase"] = "/store/results/" + group_name.replace("-","_").lower()
        infoDict["MinMergeSize"] = 1500000000
        infoDict["MaxMergeSize"] = 5000000000
        infoDict["MaxMergeEvents"] = 100000
        infoDict["TimePerEvent"] = 40
        infoDict["SizePerEvent"] = 512.0
        infoDict["Memory"] = 2394
        infoDict["CmsPath"] = "/uscmst1/prod/sw/cms"                                        
        infoDict["Group"] = "DATAOPS"
        infoDict["DbsUrl"] = dbs_url
        
        # Add all the information pulled from Savannah
        infoDict["AcquisitionEra"] = acquisitionEra
        infoDict["GlobalTag"] = self.setGlobalTagFromOrigin(dbs_url, input_dataset)
        infoDict["DataTier"] = data_tier
        infoDict["InputDataset"] = input_dataset
        infoDict["ProcessingString"] = new_dataset
        infoDict["CMSSWVersion"] = release
        infoDict["ScramArch"] = scram_arch
        infoDict["ProcessingVersion"] = dataset_version                    
        infoDict["SiteWhitelist"] = list(sites)
        
        # Create report for Migration2Global
        report = {}
         
        #Fill json file, if status is done
        self.writeJSONFile(task, infoDict)
        report["json"] = 'y'
        report["task"] = int(task)
        report["InputDataset"] = input_dataset
        report["ProcessingString"] = new_dataset
        report["localUrl"] = dbs_url
        report["sites"] = list(sites)
        report["se_names"] = list(se_names)

        return report

    def writeJSONFile(self, task, infoDict):
        """
        This writes a JSON file at ComponentDir
        """
        ##check if file already exists
        filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json'
        if not os.access(filename,os.F_OK):
            jsonfile = open(filename,'w')
            request = {'createRequest':infoDict} ## CHECK THIS BEFORE FINISHING
            jsonfile.write(json.dumps(request,sort_keys=True, indent=4))
            jsonfile.close

        return

    def removeJSONFile(self,task):
        """
        This removes the JSON file at ComponentDir if it was created
        """
        filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json'

        if os.access(filename,os.F_OK):
            os.remove(filename)
        return

    def printReport(self, report):
        """
        Print out a report
        """
        print "%20s %5s %10s %50s %50s" %( 'Ticket','json','local DBS','Sites','se_names') 
        print "%20s %5s %10s %50s %50s" %( '-'*20,'-'*5,'-'*10,'-'*50,'-'*50 )
        
        json = report["json"]
        ticket = report["task"]
        #status = report["ticketStatus"]
        localUrl = report["localUrl"].split('/')[5]
        site = ', '.join(report["sites"])
        se_names = ', '.join(report["se_names"])
        print "%20s %5s %10s %50s %50s" %(ticket,json,localUrl,site,se_names)  
Exemple #29
0
def get_filelist(start_url):

    if not start_url.endswith('/'):
        start_url += '/'
    start_url_len = len(start_url)

    dirCollection = {}

    directories_todo = [start_url]

    # walk directories non-recursively
    # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/435875
    while len(directories_todo) > 0:
        print '+', len(dirCollection.keys())
        print '-', len(directories_todo)
        #if len(directories_todo) == 175:
        #    return dirCollection, 'foo'

        directory = directories_todo.pop()
        #if '/drpmsync/' in directory or '/repositories' in directory:
        #    print '>>>>>> skipping', directory
        #    continue
        print '>'
        print directory

        name = directory[start_url_len:].rstrip('/') or '.'
        dirCollection[name] = mb.core.Directory(name)

        br = Browser()
        br.open(directory)

        # found files
        for i in br.links(url_regex=_match_file):
            #if i.url.startswith('./'):
            #    i.url = i.url[2:]
            #print 'appending file', i.url
            dirCollection[name].files.append(i.url)

        found_dirs = [
            link.base_url + link.url.lstrip('./')
            for link in br.links(url_regex=_match_dir)
        ]
        found_dirs = []
        for link in br.links(url_regex=_match_dir):
            if link.url.startswith('./'):
                link.url = link.url[2:]
            found_dirs.append(link.base_url + link.url)

        print 'found_dirs:', found_dirs
        #found_files = [ link.base_url + link.url for link in br.links(url_regex = _match_file) ]
        #print 'found_files:', found_files
        br.close()

        # found directories
        for found_dir in found_dirs:
            br = Browser()
            br.open(found_dir)

            name = found_dir[start_url_len:].rstrip('/')
            print 'name:', name
            dirCollection[name] = mb.core.Directory(name)

            for i in br.links(url_regex=_match_file):
                dirCollection[name].files.append(i.url)

            for i in br.links(url_regex=_match_dir):
                if i.url.startswith('./'):
                    i.url = i.url[2:]
                print 'neues todo:', i.base_url + i.url
                directories_todo.append(i.base_url + i.url)

            br.close()

    del br

    return dirCollection, 'foo'
Exemple #30
0
    def Authorize(self):
        '''
        Authorize the application with Twitter.
        '''
        auth = tweepy.OAuthHandler(self.CON_KEY, self.CON_SEC)

        try:
            auth_url = auth.get_authorization_url()
        except tweepy.error.TweepError:
            raise NetworkError('Unable to access network')

        ui_print (colored('Authorizing Twitter Account...', 'yellow'))
        username = ui_prompt("Username : "******"Password : ", mask = True)

        ''' Initialize mechanize browser instance '''
        br = Browser()
        cj = cookielib.LWPCookieJar()
        br.set_cookiejar(cj)
        br.set_handle_robots(False)
        br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

        ''' Opens browser and authenticate account '''
        try:
            br.open(auth_url)
        except URLError:
            raise NetworkError('Unable to access network')

        br.form = list(br.forms())[0]
        br.form['session[username_or_email]'] = username
        br.form['session[password]'] = password

        try:
            response = br.submit()
        except URLError:
            br.close()
            raise NetworkError('Unable to access network')

        content = response.get_data()

        soup = BeautifulSoup(content)
        code = soup.find('code')

        if code:
            pin = code.text
            br.close()
        else:
            br.form = list(br.forms())[1]
            try:
                response = br.submit()
            except URLError:
                br.close()
                raise NetworkError('Unable to access network')

            content = response.get_data()
            br.close()
            soup = BeautifulSoup(content)
            code = soup.find('code')
            if code:
                pin = code.text
            else:
                raise AuthorizationError('Authorization Failed')

        try:
            auth.get_access_token(pin)
        except tweepy.error.TweepError, e:
            raise AuthorizationError('Authorization Failed')
class AppointmentSearch():

    def __init__(self, params):
        """

        :param first_name:
        :param last_name:
        :param email:
        :param repeat_email:
        :param passnummer:
        :param lower_date: format 'dd.mm.yyyy'
        :param upper_date:
        """

        self.app_link = params.get('app_link')
        self.first_name = params.get('first_name')
        self.last_name = params.get('last_name')
        self.email = params.get('email')
        self.repeat_email = params.get('repeat_email')
        assert self.email == self.repeat_email

        self.passnummer = params.get('passnummer')
        self.lower_date = date_to_number(params.get('lower_date'))
        self.upper_date = date_to_number(params.get('upper_date'))
        self.img_path = params.get('img_path')
        self.txt_file = params.get('txt_file')
        self.br = Browser()
        self.br.set_handle_robots(False)
        self.br.addheaders = [("User-agent", "Firefox")]
        self.wrong_cap_dir = params.get('wrong_cap_dir')

        self.cap_fail_msg = params.get('cap_fail_msg')
        self.no_app_msg = params.get('no_app_msg')
        self.other_month_msg = params.get('other_month_msg')
        self.app_available_msg = params.get('app_available_msg')
        self.odd_path = params.get('odd_path')
        self.now = None
        self.br = webdriver.Firefox(executable_path="/home/azhar/Downloads/geckodriver")

        self.dcap = dict(DesiredCapabilities.PHANTOMJS)
        self.dcap["phantomjs.page.settings.userAgent"] = (
            "Firefox"
        )

        self.odd = open(self.odd_path,'wb')

        if not os.path.exists(self.txt_file):
            fp = open(self.txt_file,'wb')
            fp.write(' ')
            fp.close()


    def captcha_solver(self,content):
        my_file = content
        index = my_file.find("url(\'data:image")
        b = my_file[index:].split(')')
        a = b[0]
        imgdata = base64.b64decode(a[27:-1])

        with open(self.img_path+'.jpg', 'wb') as fp:
            fp.write(imgdata)
        with open(self.img_path+'2.jpg', 'wb') as fp:
            fp.write(imgdata)
        img = imread(self.img_path+'.jpg')
        imsave(self.img_path + '.png', img)
        imsave(self.img_path + '2.png', img)
        while not(os.path.exists(self.txt_file)):
            pass
        time.sleep(0.5)
        with open(self.txt_file,'rb') as fp:
            cap = fp.readline()[:-1]
        print cap
        os.remove(self.img_path+'.png')
        # os.remove(self.img_path + '.jpg')
        os.remove(self.img_path + '2.png')
        plt.subplot(1,2,1)
        plt.imshow(img)
        return cap


    def wrong_captcha(self):
        img  = imread(self.img_path+'2.jpg')
        files = glob(self.wrong_cap_dir+'*.jpg')
        d = len(files)+1
        imsave(self.wrong_cap_dir+str(d)+'.jpg',img)

    def date_select(self):
        response = None
        result = False

        ## select day for appointment
        for link in self.br.find_elements_by_link_text("Appointments are available"):
            d = date_to_number(link.get_attribute('href'))
            if self.lower_date <= d <= self.upper_date:
                link.click()
                break
        time.sleep(0.8)
        response = removeNonAscii(self.br.page_source)
        if "Please select an appointment" in response:
            response2 = None

            ## select appointment time
            for link in self.br.find_elements_by_link_text("Book this appointment"):
                link.click()
                break

            time.sleep(0.8)
            response2 = removeNonAscii(self.br.page_source)
            ## fill appointment form
            if "appointment_newAppointmentForm" in response2:
                print time.time() - self.now
                ##solve captcha
                cap = self.captcha_solver(self.br.page_source)
                a = self.br.find_element_by_name("captchaText")
                a.send_keys(cap)
                print time.time() - self.now
                a = self.br.find_element_by_name("lastname")
                a.send_keys(self.last_name)
                a = self.br.find_element_by_name("firstname")
                a.send_keys(self.first_name)
                a = self.br.find_element_by_name("email")
                a.send_keys(self.email)
                a = self.br.find_element_by_name("emailrepeat")
                a.send_keys(self.repeat_email)
                a = self.br.find_element_by_name("fields[0].content")
                a.send_keys(self.repeat_email)
                print time.time() - self.now
                # self.br['passnummer'] = 'ALAN71954'
                time.sleep(15)

    def search_bot(self):
        while 1:
            time.sleep(0.5)
            # try:
            self.now = time.time()
            # self.br = webdriver.PhantomJS(desired_capabilities=self.dcap)
            # self.br = webdriver.Firefox(executable_path="/home/azhar/Downloads/geckodriver")
            self.br.get(self.app_link)
            print time.time()-self.now
            # cap = self.captcha_solver(contents)
            # c = contents.get_data()

            cap = self.captcha_solver(self.br.page_source)
            print time.time()-self.now
            a = self.br.find_element_by_name("captchaText")

            # print cap2
            # time.sleep(1)
            a.send_keys(cap)
            # time.sleep(0.5)
            # contents1 = removeNonAscii(self.br.page_source)
            a.send_keys(Keys.RETURN)
            # contents1 = removeNonAscii(self.br.page_source)
            print time.time() - self.now
            im2 = imread(self.img_path+'.jpg')
            os.remove(self.img_path+'.jpg')
            time.sleep(0.8)
            print time.time() - self.now
            contents = removeNonAscii(self.br.page_source)
            # while contents == contents1:
            #     time.sleep(0.1)
            #     contents = removeNonAscii(self.br.page_source)

            if self.cap_fail_msg in contents:
                plt.subplot(1, 2, 2)
                # plt.imshow(im2)
                # plt.show()
                self.wrong_captcha()

            elif self.no_app_msg in contents:
                continue

            elif self.other_month_msg in contents:
                continue

            elif self.app_available_msg in contents:
                print "found date"
                self.date_select()

            else:
                cap = self.captcha_solver(self.br.page_source)
                self.odd.write(contents)
                self.odd.write('\n\n\n\n')
            self.br.close()
            # except:
            #     continue
Exemple #32
0
class Presencia(object):
    """
    """
    def __str__(self):
        return "%s.%s" % (self.__module__, self.__class__.__name__)
    
    def __init__(self, request, username,password,context=''):
        """
        Al instanciar la classe, es loguejarà utilitzant el nom d'usuari i password proporcionats,
        Si browser_login conté algo són les cookies guardades de la última sessio loguejada que s'ha fet.
        Recarregarem les cookies en un Browser nou, i aixi estalviarà uns segons que consumiria del login.
        """
        self.context=context
        self.request=request
        registry = self.request.registry
        self.epitool=registry.getUtility(IEPIUtility)

        self.username = username
        self.password = password
        self.browser_login, elk = self.epitool.recoverBrowserSession(self.request, self.username,'presencia')
        if self.browser_login:
          self.br=Browser()
          self.br.set_handle_robots(False)
          cj = LWPCookieJar()
          self.br.set_cookiejar(cj)
          for co in self.browser_login:
              ck = Cookie(version=co['version'], name=co['name'], value=co['value'], port=co['port'], port_specified=co['port_specified'], domain=co['domain'], domain_specified=co['domain_specified'], domain_initial_dot=co['domain_initial_dot'], path=co['path'], path_specified=co['path_specified'], secure=co['secure'], expires=co['expires'], discard=co['discard'], comment=co['comment'], comment_url=co['comment_url'], rest=co['rest'])
              cj.set_cookie(ck)
          print "Logging-in into presencia via browser"
        else:
          self.br = Browser()
          self.br.set_handle_equiv(False)
          self.login(message=u"Logging-in into presencia via regular login")
          return

    def log(self, message):
        """
        """
        logger = logging.getLogger('RUNNING')
        logger.info('%s - %s' % (self.username,message))

    def getBrowserSession(self):
        """
        Retorna la sessio actual del browser per a poderla guardar desde la utility
        """

        cookies = []
        for key in self.br._ua_handlers['_cookies'].cookiejar._cookies.keys():
           domain = self.br._ua_handlers['_cookies'].cookiejar._cookies[key]
           for key2 in domain.keys():
               cookie = domain[key2]
               for key3 in cookie:
                   co = cookie[key3]
                   cookies.append(dict(version=co.version, name=co.name, value=co.value, port=co.port, port_specified=co.port_specified, domain=co.domain, domain_specified=co.domain_specified, domain_initial_dot=co.domain_initial_dot, path=co.path, path_specified=co.path_specified, secure=co.secure, expires=co.expires, discard=co.discard, comment=co.comment, comment_url=co.comment_url, rest=co._rest))
        return (cookies,None)

    def closeBrowser(self):
        """
        """
        self.br.close()

    def saveSessionData(self):
        """
        """
        self.epitool.saveBrowserSession(self.request, self.username,self.getBrowserSession(),'presencia')
        return

    def login(self,message = "Logging-in into presencia via regular login"):
        """
        Es logueja a presència amb el login tradicional web
        """
        self.log(u"Presencia Login %s" % message)
        self.br.open(LOGIN_URL)
        self.br.select_form(nr=0)
        self.br['Username']=self.username
        self.br['Password']=self.password
        response = self.br.submit()
        response_html = response.read()
        response.close()
        self.saveSessionData()

    def checkBrowserExpired(self,html):
        """
        Comprova que el browser nou que hem generat en base a cookies guardades, continua actiu
        Per ferho, comprovem si l'html de la pagina que acavem de obrir conte el text de canvi de contrasenya
        Retorna cert si el browser esta caducat
        """
        return html.find("Introduïu nom d'usuari i contrasenya")>0

    @reloginIfCrashed
    def Marcar(self):
        """
        Canvia l'estat del marcatge actual
        """
        self.log("Marcar")
        persones = self.br.open(FITCHA_URL)
        persones_html = persones.read()
        if self.checkBrowserExpired(persones_html):
            return 'EXPIRED'
        persones.close()
        # getUtility(IRAMCache).invalidate('getMarcatges')
        region_invalidate('epi.presencia.getMarcatges', 'long_term', 'getMarcatges', 'epi.presencia.Presencia', self.username)
        # getUtility(IRAMCache).invalidate('getPresencia')
        region_invalidate('epi.presencia.getPermisos', 'default_term', 'getPermisos', 'epi.presencia.Presencia', self.username)
        return True
        print "S'ha canviat l'estat de marcatge"

    ##@reloginIfCrashedAndCache
    @cache_region('long_term', 'getMarcatgesHistoric')
    def getMarcatgesHistoric(self, username, year):
        """
        Recupera la pàgina de marcatges de presència de l'històric anual, on hi ha tot el que no surt a la pagina principal
        La pàgina no té cap mena de id's ni classes, el parsejat es una mica dur...
        """
        self.log("getMarcatges Historic %s sense cachejar" % year)
        return self.getMarcatgesBase(MARCATGES_HISTORIC_URL % year,year=int(year))

    ##@reloginIfCrashedAndCache
    @cache_region('long_term', 'getMarcatges')
    def getMarcatges(self, username):
        """
        Recupera la pàgina de marcatges de presència, on hi han els dos ultims mesos de marcatges.
        La pàgina no té cap mena de id's ni classes, el parsejat es una mica dur...
        """
        self.log("getMarcatges sense cachejar")
        return self.getMarcatgesBase(MARCATGES_URL)

    def getDiscountHoursForDay(self,dia,hores_dia):
        day = '%s-%s-%s' % (dia)
        years={'2010':[],'2011':[]}
        years['2010']= {
                       '01-01-2010':'F',
                       '04-01-2010':'I',
                       '05-01-2010':'I',
                       '06-01-2010':'F',
                       '07-01-2010':'I',
                       '08-01-2010':'I',
                       #===============
                       '29-03-2010':'I',
                       '30-03-2010':'I',
                       '31-03-2010':'I',
                       #===============
                       '01-04-2010':'I',
                       '02-04-2010':'F',
                       '05-04-2010':'F',
                       #===============
                       '01-05-2010':'F',
                       '24-05-2010':'F',
                       #===============
                       '24-06-2010':'F',
                       #===============
                       '11-09-2010':'F',
                       '24-09-2010':'F',
                       #===============
                       '12-10-2010':'F',
                       #===============
                       '01-11-2010':'F',
                       #===============
                       '06-12-2010':'F',
                       '08-12-2010':'F',
                       '25-12-2010':'F',

                       }
                       
        years['2011']= {
                       '01-01-2011':'F',
                       '03-01-2011':'I',                       
                       '04-01-2011':'I',
                       '05-01-2011':'I',
                       '06-01-2011':'F',
                       '07-01-2011':'I',
                       #===============
                       '07-03-2011':'F',
                       #===============
                       '18-04-2011':'I',
                       '19-04-2011':'I',
                       '20-04-2011':'I',
                       '21-04-2011':'I',                                                                     
                       '22-04-2011':'F',
                       '25-04-2011':'F',
                       #===============
                       '13-06-2011':'F',                       
                       '24-06-2011':'F',
                       #===============
                       '15-08-2011':'F',                       
                       #===============
                       '24-09-2011':'F',
                       #===============
                       '12-10-2011':'F',
                       #===============
                       '01-11-2011':'F',
                       #===============
                       '06-12-2011':'F',
                       '08-12-2011':'F',
                       '26-12-2011':'F',

                       }                       
        if dia[2] in years.keys():
          if day in years[dia[2]]:
            if years[dia[2]][day]=='F':
                hores = hores_dia
            else:
              if hores_dia==7:
                hores = 6
              else:
                hores = hores_dia
            return ({'F':'Festa','I':'Intensiva'}[years[dia[2]][day]],hores)
          else:
            return None
        else:
          return None

    @reloginIfCrashed
    def getMarcatgesBase(self,URL,**kwargs):
        """
        """
        self.now = DateTime().latestTime()
        
        current_year = self.now.year()
        historic_query_year = kwargs.get('year',None)        
        is_historic_query = historic_query_year!=None

        if is_historic_query:
            days_of_query_year = current_year==historic_query_year and deepcopy(self.getPermisos(self.username)) or deepcopy(self.getPermisosHistoric(self.username, historic_query_year))
            days_of_query_past_year = deepcopy(self.getPermisosHistoric(self.username, historic_query_year-1))
            dies = days_of_query_year
            dies.update(days_of_query_past_year)
        else:
            dies = deepcopy(self.getPermisos(self.username))
            if self.now.month()<3:
                days_of_past_year = deepcopy(self.getPermisosHistoric(self.username, current_year-1))
                dies.update(days_of_past_year)
            
            
        marcatges = self.br.open(URL)
        marcatges_html = marcatges.read()
        if self.checkBrowserExpired(marcatges_html):
            return 'EXPIRED'

        marcatges.close()
        soup = BeautifulSoup(marcatges_html,fromEncoding='iso-8859-1')

        try:
            table = soup.findAll('table')[2]           # La tercera taula de l'html és on hi ha el que busquem
        except:
            #Si hem arribat a aquest punt vol dir que hi ha algun problema amb la pagina de presència de l'usuari
            #i no hi han ni marcatges ni l'estructura html que s'espera. retornem una llista de dies buida
            return dies

        # Hi han moltes taules aniuades dins d'altres taules, però les files que ens interessen sabem que
        # que són files que no tenen mes taules aniuades a tins i que tenen td, per tant, les busquem i parsejem
        # el tr **dia** per exteure els marcatges

        meves = False
        for dia in table.findChildren(recursive=False):
           # Per tenir en compte el cas de que una persona vegi els marcatges de varies,
           # Ens parem a les files on hi han 'collapse.gif' per investigar
           collapse = 'collapse.gif' in str(dia.find('img'))
           if collapse:
              dlow = dia.__str__().lower()
              lusername = self.username.replace('.',' ')
              # busquem el nom d'usuari a la fila, i si hi és a partir d'ara guardarem marcatges
              if lusername in dlow:
                  meves=True
              # Si ja estavem guardant marcatges, deixarem de guardarlos només si trobem
              # el colspan="15", que vol dir que hem passat tots els marcatges de l'usuari
              # i hem arrivat al seguent. Sense aquesta condicio no guardariem cap marcatge,
              # ja que el seguen tr despres del nom d'usuari tambe te collapse.gif i posariem meves a False per error.
              elif meves==True and 'colspan="15"' in dia.__str__().lower():
                  meves=False
           if not dia.findAll('table') and dia.td and meves:
               data,marcatge_dict = self.parseDia(dia)

               # Només continuarem si el parseDia ha retornat alguna cosa
               if data!=None:
                   ## Si no tenim res amb la data [data] a dintre de dies, guardem el
                   ## marcatge_dict com a inicialització de la variable

                   if data not in dies.keys():
                       dies[data]=marcatge_dict

                   ## Si ja tenim el dia, vol dir que tenim un permis en aquell dia per tant afegirem les dades de marcatges que vinguin

                   else:
                       dies[data]['total']=dies[data]['total']+marcatge_dict['total']
                       dies[data]['marcatgeobert']=marcatge_dict.get('marcatgeobert','0')
                       ## Concatenem les llistes de marcatges, per si hi han permisos avans que marcatges.
                       ## Els permisos no tenen marcatges, per tant, la majoria de cops concatenarem llistes buides
                       ## però així evitem el cas de que , per exemple, es tiguin el permis de teletreball avans del dia de ferlo
                       dies[data]['marcatges']=dies[data]['marcatges']+marcatge_dict['marcatges']
                       dies[data]['link_marcatge']=marcatge_dict.get('link_marcatge','')

        return dies

    def parseDia(self,dia):
        """
        Parseja un tr que conte un marcatge retornant-los en forma de dicionari
        """
        parsed = {}

        #Agafem els tds (fills de primer nivell del tr)
        children = dia.findChildren(recursive=False)

        #Seleccionem els td's que contenen marcatges amb alguna cosa (<a><font></font></a>) a dins
        marcatges_web = [a for a in children[7:15] if a.font]

        # només si tenim marcatges continuarem, ja que si no hi han marcatges
        # vol dir que és un permís i ja els extraiem de la seccio de permisos
        # avans de començar a parsejar els dies
        if marcatges_web:

            #Agafem el link del marcatge per possibles modificacions
            data_marcatge = children[3].a.string
            parsed['link_marcatge'] = 'https://liszt.upc.es'+children[3].a['href']
            parsed['link_marcatge'] = parsed['link_marcatge'].replace('OpenDocument','EditDocument')+'&AutoFramed'


            #Agafem la suma ja feta del total del dia, que ens servira en tots els casos
            #Menys en els marcatges oberts, que és 0, i la guardem en minuts
            total_dia = children[4].font.string
            parsed['total']=total_dia==None and 0 or HMaMinuts(total_dia,sep='.')

            parsed['marcatges']=[]
            parsed['permisos']=[]

            novamarca = []
            for marca in marcatges_web:
                if novamarca == []:
                    novamarca.append(DateTime('%s %s' % (data_marcatge,marca.font.string)))
                else:
                    novamarca.append(DateTime('%s %s' % (data_marcatge,marca.font.string)))
                    parsed['marcatges'].append(tuple(novamarca))
                    novamarca = []
            #Afegim els marcatge del dia actual si encara no el tenim tancat
            if len(novamarca)==1:
                novamarca_latest = novamarca[0].latestTime()
                # Si és el dia actual, posem None per tal que es compti fins a l'hora actual el marcatge parcial.
                # Qualsevol altre dia afegim la data ultima del dia com a ultim marcatge.
                # En tot cas marquem que és un marcatge obert
                ultimamarca = self.now!=novamarca_latest and novamarca_latest or None
                parsed['marcatgeobert']= '1'
                novamarca.append(ultimamarca)
                parsed['marcatges'].append(tuple(novamarca))

            mm,dd,aaaa = data_marcatge.split('/')
            return (dd,mm,aaaa),parsed
        else:
            return (None,None)

    ##@reloginIfCrashedAndCache
    # OJO!! Revisar si ok!
    @cache_region('short_term', 'getPresencia')
    def getPresencia(self):
        self.log("getPresencia sense cachejar")
        return self.getPresenciaBase()
        
    @reloginIfCrashed    
    def getPresenciaBase(self):
        """
        Recupera la pàgina de persones de presència, on hi han els telèfons de cadascú i si esta o no presents
        La pàgina no té cap mena de id's ni classes, el parsejat es una mica dur...
        """
        self.log("getPresencia")
        personesbr = self.br.open(PRESENCIA_URL)
        persones_html = personesbr.read()
        if self.checkBrowserExpired(persones_html):
            return 'EXPIRED'
        personesbr.close()
        soup = BeautifulSoup(persones_html,fromEncoding='iso-8859-1')

        persones = {}
        try:
            table = soup.findAll('table')[2]           # La tercera taula de l'html és on hi ha el que busquem
        except:
            #Si hem arribat a aquest punt vol dir que hi ha algun problema amb la pagina de presència de l'usuari
            #i no hi han ni marcatges ni l'estructura html que s'espera. retornem una llista de dies buida

            return persones

        # Hi han moltes taules aniuades dins d'altres taules, però les files que ens interessen sabem que
        # que són files que no tenen mes taules aniuades a tins i que tenen td, per tant, les busquem i parsejem
        # el tr **dia** per exteure els marcatges

        for fila in table.findChildren(recursive=False):
           # Per poder escriure les dades de a quin equip pertany
           # Ens parem a les files on hi han 'collapse.gif' per agafar el nom de l'equip
           collapse = 'collapse.gif' in str(fila.find('img'))
           if collapse:
               team = fila.td.b.string
           # La resta seràn pesones, les parsejem i les incorporem a la llista
           else:
             if not fila.findAll('table') and fila.td:
               persona_dict = self.parsePersona(fila)
               persona_dict['equip']=team
               nompersona = persona_dict['nom']
               if nompersona in persones.keys():
                   persones[nompersona]['online'] = persones[nompersona]['online'] or persona_dict['online']
                   persones[nompersona]['equip'] = '%s, %s' % (persones[nompersona]['equip'],team)
               else:
                   persones[nompersona]=persona_dict

        self.saveSessionData()
        return [persones[a] for a in persones.keys()]


    def parsePersona(self,dia):
        """
        Parseja un tr que conte una persona amb els seus telefons retornant-los en forma de dicionari
        """
        parsed = {}

        #Agafem els tds (fills de primer nivell del tr)
        children = dia.findChildren(recursive=False)
        #recollim les dades de la persona
        nom = children[2].a.string
        online = 'vwicn160.gif' in children[1].__str__()
        telefon_intern = children[3].string
        telefon_mobil = children[4].string
        telefon_public = children[5].string
        telefon_public and telefon_public.split('.')[0] or telefon_public

        return dict(nom=nom,online=online,intern=telefon_intern,mobil=telefon_mobil,public=telefon_public)

    #@reloginIfCrashedAndCache
    @cache_region('long_term', 'getPermisosHistoric')
    def getPermisosHistoric(self, username, year):
        """
        """
        self.log("getPermisos Historic %s sense cachejar" % year)
        return self.getPermisosBase(PERMISOS_HISTORIC_URL % year)

    #@reloginIfCrashedAndCache
    @cache_region('default_term', 'getPermisos')
    def getPermisos(self, username):
        """
        """
        self.log("getPermisos sense cachejar")
        return self.getPermisosBase(PERMISOS_URL)
    
    @reloginIfCrashed
    def getPermisosBase(self,url,fname='getPermisos'):
        """
        """
        self.now = DateTime().latestTime()
        dies_permisos = []

        permisos = self.br.open(url)
        permisos_html = permisos.read()
        if self.checkBrowserExpired(permisos_html):
            return 'EXPIRED'

        permisos.close()
        soup = BeautifulSoup(permisos_html,fromEncoding='iso-8859-1')

        try:
            tables = soup.findAll('table')         
            table ='(hh:mm)' in tables[2].__str__() and tables[2] or tables[1]
        except:
          #La sessio del browser ha caducat, per tant refarem el login
            self.login(message="Re-Logging-in into presencia via regular login")
            permisos = self.br.open(PERMISOS_URL)
            permisos_html = permisos.read()
            permisos.close()
            soup = BeautifulSoup(permisos_html,fromEncoding='iso-8859-1')
            try:
              table = soup.findAll('table')[2]
            except:
              #Si hem arribat a aquest punt vol dir que hi ha algun problema amb la pagina de presència de l'usuari
              #i no hi han ni permisos ni l'estructura html que s'espera. retornem una llista de dies buida
              return {}

        # Hi han moltes taules aniuades dins d'altres taules, però les files que ens interessen sabem que
        # que són files que no tenen mes taules aniuades a tins i que tenen td, per tant, les busquem i parsejem
        # el tr **dia** per exteure els permisos

        meves = False
        for dia in table.findChildren(recursive=False):
           # Per tenir en compte el cas de que una persona vegi els permisos de varies,
           # Ens parem a les files on hi han 'collapse.gif' per investigar
           collapse = 'collapse.gif' in str(dia.find('img'))
           if collapse:
              dlow = dia.__str__().lower().decode('utf-8')
              lusername = self.username.replace('.',' ')

              # busquem el nom d'usuari a la fila, i si hi és a partir d'ara guardarem permisos
              if lusername in dlow:
                  meves=True
              # Si ja estavem guardant permisos, deixarem de guardarlos només si trobem
              # el colspan="15", que vol dir que hem passat tots els permisos de l'usuari
              # i hem arrivat al seguent. Sense aquesta condicio no guardariem cap marcatge,
              # ja que el seguen tr despres del nom d'usuari tambe te collapse.gif i posariem meves a False per error.
              elif meves==True and 'colspan="5"' in dia.__str__().lower():
                  meves=False
           if not dia.findAll('table') and dia.td and meves:
               td = dia.findAll('td')[1]
               attrmap = td._getAttrMap()
               if 'colspan' in attrmap.keys():
                   if attrmap['colspan']=='4':
                       # si es compleixen les dues condicions, estem en un tr que marca el motiu del permis
                       motiu = td.b.font.string.encode('utf-8')
               elif 'colspan' not in str(dia):
                   # si no hi ha colspan, en tot el tr estem en una entrada de permis segons lultim tipus
                   permis_data = self.parsePermis(dia)
                   dies_permisos = dies_permisos+self.generarDiesPermisos(permis_data,motiu)
        # XXXXXXX TODO Aqui hauriem de repassar que no hi hagin duplicats a la llista, doncs el diccionari quedaria nomes amb l'ultim permis
        return dict(dies_permisos)

    def generarDiesPermisos(self,permis,motiu):
        """
        Donada la definicio d'un permis, si esta aprovat, genera els seus dies tal com si ens els haguessim trobat en el
        parseig de getMarcatgesBase. Els marcatges seran sempre [] llista buida, doncs els permisos no tenen un marcatge associat
        """
        motiu = motiu.decode('utf-8')
        days = []
        if permis['approved']:
            from_date = TTToDateTime(permis['from_date'])
            to_date = TTToDateTime(permis['to_date'])

            grow_days = True
            counter = 0

            while grow_days:
                current = addDays(from_date,counter)

                # Només afegirem el dia si no és cap de setmana
                # Tampoc l'afegirem si és un dia de festa, ja que si es festa no son vacances...

                dhfd = self.getDiscountHoursForDay(DateTimeToTT(current),7)
                es_festa = dhfd!=None and dhfd[0] or False
                if current.dow() not in [0,6] and es_festa!='Festa':
                    motiu_image = 'permis.jpg'
                    if motiu in MOTIUS.keys():
                        motiu_image = MOTIUS[motiu]['imatge']

                    permisdict = dict(compta_hores=MOTIUS[motiu]['compta_hores'],
                                      image=motiu_image,
                                      title=motiu,
                                      minutes=permis['minutes'])

                    diadict = dict(link_marcatge='',
                                   marcatges=[],
                                   total=0,
                                   permisos=[permisdict,])

                    daytuple = (DateTimeToTT(current),diadict)
                    days.append(daytuple)

                # Condició de final i incrementar l'iterador
                if current==to_date:
                    grow_days=False
                counter = counter +1

        return days

    def parsePermis(self,dia):
        """
        Parseja un tr que conte un permis retornant-los en forma de dicionari
        """
        parsed = {}
        tds = dia.findAll('td')

        approved = 'vwicn083.gif' in str(tds[2].img)
        fd = tds[3].font.a.string.split('/')
        td = tds[4].font.string.split('/')
        
        #Agafarem el td 6 o el 7 segons tingui els ':' , ja que l'historic te una columna de més
        minutestd = ':' in tds[6].__str__() and tds[6] or tds[7]        
        # S'ha de fer un replace 12: per 00: ja que l'html que revem a través del
        # mechanize si hi ha 00 es pensa que es una hora i ens ho transforma en 12 ...
        minutes = HMaMinuts(minutestd.string.split()[0].replace('12:','00:'))
        return dict(approved = approved,
                    from_date = (fd[1],fd[0],fd[2]),
                    to_date = (td[1],td[0],td[2]),
                    minutes = minutes)
    def executa(self, search_url):

        self.SEARCH_URL = search_url

        list = None
        list = [u'Favorecido:', u'Valor:', u'Observação do Documento:']

        socket.socket = socks.socksocket
        socket.create_connection = create_connection
        br = Browser()
        print search_url
        print "ID = " + str(Consulta.ID)
        gravalog(self, search_url + " cont = " + str(Consulta.ID) + "\n")
        LRequest = urllib2.Request(search_url, " ")
        LResponse = br.open(LRequest)
        page = bs_parse(LResponse.read())

        #pode ir para fora!!!!

        soup = bs_parse(LResponse.get_data())
        img_captcha = soup.find('img', alt='captcha')
        if img_captcha != None:
            #caso encontre um captcha, o sistema troca o endereço IP
            try:
                print "CAPTCHA!!!"
                gravalog(self, "CAPTCHA\n")
            finally:
                Consulta.ID = newID(self, Consulta.controller)
                br.close()
                socket.socket = socks.socksocket
                socket.create_connection = self.create_connection
                br = Browser()
                print search_url + " cont = " + str(Consulta.ID)
                gravalog(self,
                         search_url + " cont = " + str(Consulta.ID) + "\n")
                LRequest = urllib2.Request(search_url, " ")
                LResponse = br.open(LRequest)
                page = bs_parse(LResponse.read())
        entra = 0

        #navega na página HTML consultando o Favorecido no link do hypertexto
        for table in page.findAll("table"):
            for row2 in table.findAll('tr'):
                #             print row2
                for col in row2.findAll('td'):
                    for href in col.findAll('a'):
                        print href
                        gravalog(self,
                                 str(href).encode('utf-8', 'ignore') + '\n')
                        #resp = br.follow_link(text_regex=href.string)
                        #html = resp.read()
                        #print html
                    if col.string != None:
                        m = re.search('a href', col.string)
                        if m != None:
                            print 'Link!!!'
                            gravalog(self, 'Link!!!\n')
                            print col.string
                            gravalog(
                                self,
                                str(col.string).encode('utf-8', 'ignore') +
                                '\n')
                        m = re.search('INFORMATICA', col.string)
                        if m != None:
                            entra = 1
                        m = re.search('TECNOLOGIA DA INFORMACAO', col.string)
                        if m != None:
                            entra = 1
                        m = re.search('TELECOMUNICACOES', col.string)
                        if m != None:
                            entra = 1
                        m = re.search('TELECOMUNICACAO', col.string)
                        if m != None:
                            entra = 1
                        m = re.search('NETWORKS', col.string)
                        if m != None:
                            entra = 1
                        m = re.search('NETWORK', col.string)
                        if m != None:
                            entra = 1
                        m = re.search('REDE', col.string)
                        if m != None:
                            entra = 1
                        m = re.search('REDES', col.string)
                        if m != None:
                            entra = 1
                        if entra == 1:
                            logarqui = logging.getLogger("logarqui")
                            logarqui.debug("Inside f!")
                            try:
                                print 'BINGO!'
                                gravalog(self, 'BINGO!\n')
                                print href.string
                                gravalog(
                                    self,
                                    str(href.string).encode('utf-8', 'ignore')
                                    + '\n')
                                LResponse = br.follow_link(
                                    text_regex=href.string)
                                html = LResponse.read()
                                print html
                                gravalog(self, html + '\n')
                                page = bs_parse(html)
                                cont = 3
                                for table in page.findAll("table"):
                                    for row2 in table.findAll('tr'):
                                        #             print row2
                                        favorecido = 0
                                        valor = 0
                                        observacao = 0
                                        for col in row2.findAll('td'):
                                            if favorecido == 1:
                                                texto = str(col.string).decode(
                                                    'utf8').encode(
                                                        'utf8',
                                                        'ignore').replace(
                                                            "'", "").replace(
                                                                ";",
                                                                "").replace(
                                                                    "--", "")
                                                print texto
                                                gravalog(self, texto + '\n')
                                                list.append(texto)
                                            if valor == 1:
                                                texto = str(col.string).decode(
                                                    'utf8').encode(
                                                        'utf8',
                                                        'ignore').replace(
                                                            "'", "").replace(
                                                                ";",
                                                                "").replace(
                                                                    "--", "")
                                                print texto
                                                gravalog(self, texto + '\n')
                                                list.append(texto)
                                            if observacao == 1:
                                                texto = str(col.string).decode(
                                                    'utf8').encode(
                                                        'utf8',
                                                        'ignore').replace(
                                                            "'", "").replace(
                                                                ";",
                                                                "").replace(
                                                                    "--", "")
                                                print texto
                                                gravalog(self, texto + '\n')
                                                list.append(texto)
                                                print list
                                            if col.string != None:
                                                m = re.search(
                                                    u'Favorecido:', col.string)
                                                if m != None:
                                                    print u'Favorecido:'
                                                    gravalog(
                                                        self, u'Favorecido:')
                                                    favorecido = 1
                                                m = re.search(
                                                    u'Valor:', col.string)
                                                if m != None:
                                                    print u'Valor:'
                                                    gravalog(self, u'Valor:')
                                                    valor = 1
                                                m = re.search(
                                                    u'Observação do Documento:',
                                                    col.string)
                                                if m != None:
                                                    print u'Observação do Documento:'
                                                    gravalog(
                                                        self,
                                                        'Observação do Documento:'
                                                    )
                                                    observacao = 1

                                entra = 0
                                br.back()
                            except Exception, ex:

                                logarqui.exception
                                logarqui.error
                                logarqui.exception(
                                    "\nProvlema na gravação de logs! \n" +
                                    search_url)

                            logarqui.debug("Finishing f!")
class RequestQuery:

    def __init__(self,config):
        self.br=Browser()

        self.config = config
        
        # Initialise connections
        self.mySiteDB = SiteDBJSON()
        self.phedex = PhEDEx({"endpoint":"https://cmsweb.cern.ch/phedex/datasvc/json/prod/"}, "json")
        self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/")
        self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/")
        self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/")
        
    def __del__(self):
        self.br.close()

    def login2Savannah(self):
        """
        login2Savannah log into savannah with the given parameters in the config (username and password) 
        User must have admin priviledges for store results requests
        """
        login_page='https://savannah.cern.ch/account/login.php?uri=%2F'
        savannah_page='https://savannah.cern.ch/task/?group=cms-storeresults'
        
        self.br.open(login_page)

        ## 'Search' form is form 0
        ## login form is form 1
        self.br.select_form(nr=1)

        username = self.config["SavannahUser"]
    
        self.br['form_loginname']=username
        self.br['form_pw']=self.config["SavannahPasswd"]
        
        self.br.submit()
        
        response = self.br.open(savannah_page)
        
        # Check to see if login was successful
        if not re.search('Logged in as ' + username, response.read()):
            print('login unsuccessful, please check your username and password')
            return False
        else:
            return True
    
    def selectQueryForm(self,**kargs):       
        """
        selectQueryForm create the browser view to get all the store result tickets from savannah
        """
        if self.isLoggedIn:
            self.br.select_form(name="bug_form")

            ## Use right query form labelled Test
            control = self.br.find_control("report_id",type="select")

            for item in control.items:
                if item.attrs['label'] == "Test":
                    control.value = [item.attrs['value']]
                    
            ##select number of entries displayed per page
            control = self.br.find_control("chunksz",type="text")
            control.value = "150"

            ##check additional searching parameter
            for arg in kargs:
                if arg == "approval_status":
                    control = self.br.find_control("resolution_id",type="select")
                    for item in control.items:
                        if item.attrs['label'] == kargs[arg].strip():
                            control.value = [item.attrs['value']]

                elif arg == "task_status":
                    control = self.br.find_control("status_id",type="select")
                    for item in control.items:
                        if item.attrs['label'] == kargs[arg].strip():
                            control.value = [item.attrs['value']]
                            
                elif arg == "team":
                    control = self.br.find_control("custom_sb5",type="select")
                    for item in control.items:
                        if item.attrs['label'] == kargs[arg].strip():
                            control.value = [item.attrs['value']]

            response = self.br.submit()
            response.read()

        return

    def getScramArchByCMSSW(self):
        """
        Get from the list of available CMSSW releases
        return a dictionary of ScramArchitecture by CMSSW
        """
        
        # Set temporary conection to the server and get the response from cmstags
        url = 'https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML'
        br = Browser()
        br.set_handle_robots(False)
        response=br.open(url)
        soup = BeautifulSoup(response.read())
        
        # Dictionary form
        # {'CMSSW_X_X_X':[slc5_amd64_gcc472], ... }
        archByCmssw={}
        
        # Fill the dictionary
        for arch in soup.find_all('architecture'): 
            for cmssw in arch.find_all('project'): 
                # CMSSW release
                cmsswLabel = cmssw.get('label').encode('ascii', 'ignore')
                if cmsswLabel not in archByCmssw:
                    archByCmssw[cmsswLabel]=[]
                # ScramArch related to this CMSSW release
                archName = arch.get('name').encode('ascii', 'ignore')
                archByCmssw[cmsswLabel].append(archName)
        
        return archByCmssw
      
    def createValueDicts(self):       
        """
        Init dictionaries by value/label:
        - Releases by Value
        - Physics group by value
        - DBS url by value
        - DBS rul by label
        - Status of savannah request by value 
        - Status of savannah ticket by value (Open/Closed/Any)
        """      
        if self.isLoggedIn:
            self.br.select_form(name="bug_form")
            
            control = self.br.find_control("custom_sb2",type="select")
            self.ReleaseByValueDict = self.getLabelByValueDict(control)

            control = self.br.find_control("custom_sb3",type="select")
            self.GroupByValueDict = self.getLabelByValueDict(control)

            control = self.br.find_control("custom_sb4",type="select")
            self.DBSByValueDict = self.getLabelByValueDict(control)
            self.DBSByLabelDict = self.getValueByLabelDict(control)

            control = self.br.find_control("resolution_id",type="select")
            self.StatusByValueDict = self.getLabelByValueDict(control)

            control = self.br.find_control("status_id",type="select")
            self.TicketStatusByLabelDict = self.getValueByLabelDict(control)

        return
    
    def getDatasetOriginSites(self, dbs_url, data):
        """
        Get the origin sites for each block of the dataset.
        Return a list block origin sites.
        """
        
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listBlocks(detail=True,dataset=data)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listBlocks(detail=True,dataset=data)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listBlocks(detail=True,dataset=data)

        pnnList = set()
        for block in response:
            pnnList.add(block['origin_site_name'])
        psnList = self.mySiteDB.PNNstoPSNs(pnnList)
        
        return psnList, list(pnnList)

    def phEDExNodetocmsName(self, nodeList):
        """
        Convert PhEDEx node name list to cms names list 
        """
        names = []
        for node in nodeList:
            name = node.replace('_MSS',
                                '').replace('_Disk',
                                    '').replace('_Buffer',
                                        '').replace('_Export', '')
            if name not in names:
                names.append(name)
        return names
    
    def setGlobalTagFromOrigin(self, dbs_url,input_dataset):
        """
        Get the global tag of the dataset from the source dbs url. If it is not set, then set global tag to 'UNKNOWN'
        """
        
        globalTag = ""
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listOutputConfigs(dataset=input_dataset)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listOutputConfigs(dataset=input_dataset)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listOutputConfigs(dataset=input_dataset)
        
        globalTag = response[0]['global_tag']
        # GlobalTag cannot be empty
        if globalTag == '':
            globalTag = 'UNKNOWN'
            
        return globalTag
    
    def isDataAtUrl(self, dbs_url,input_dataset):
        """
        Returns True if the dataset is at the dbs url, if not returns False
        """
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listDatasets(dataset=input_dataset)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listDatasets(dataset=input_dataset)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listDatasets(dataset=input_dataset)
        # This means that the dataset is not at the url
        if not response:
            return False
        else:
            return True
         
    def getLabelByValueDict(self, control):
        """
        From control items, create a dictionary by values
        """   
        d = {}
        for item in control.items:
            value = item.attrs['value']
            label = item.attrs['label']
            d[value] = label
                
        return d
    
    def getValueByLabelDict(self, control):
        """
        From control items, create a dictionary by labels
        """
        d = {}
        for item in control.items:
            value = item.attrs['value']
            label = item.attrs['label']
            d[label] = value

        return d
    
    def getRequests(self,**kargs):
        """
        getRequests Actually goes through all the savannah requests and create json files if the 
        ticket is not Closed and the status of the item is Done.
        It also reports back the summary of the requests in savannah
        """
        requests = []
        
        # Open Browser and login into Savannah
        self.br=Browser()
        self.isLoggedIn = self.login2Savannah()
        
        if self.isLoggedIn:
            if not kargs:
                self.selectQueryForm(approval_status='1',task_status='0')
            else:
                self.selectQueryForm(**kargs)
            self.createValueDicts()
        
            self.br.select_form(name="bug_form")
            response = self.br.submit()

            html_ouput = response.read()
            
            scramArchByCMSSW = self.getScramArchByCMSSW()
            self.nodeMappings = self.phedex.getNodeMap()
            
            for link in self.br.links(text_regex="#[0-9]+"):
                response = self.br.follow_link(link)
                
                try:
                    ## Get Information
                    self.br.select_form(name="item_form")

                    ## remove leading &nbsp and # from task
                    task = link.text.replace('#','').decode('utf-8').strip()
                    print("Processing ticket: %s" % task)
                    
                    ## Get input dataset name
                    control = self.br.find_control("custom_tf1",type="text")
                    input_dataset = control.value
                    input_primary_dataset = input_dataset.split('/')[1].replace(' ','')
                    input_processed_dataset = input_dataset.split('/')[2].replace(' ','')
                    data_tier = input_dataset.split('/')[3].replace(' ','')
                    
                    ## Get DBS URL by Drop Down
                    control = self.br.find_control("custom_sb4",type="select")
                    dbs_url = self.DBSByValueDict[control.value[0]]

                    ## Get DBS URL by text field (for old entries)
                    if dbs_url=='None':
                        control = self.br.find_control("custom_tf4",type="text")
                        dbs_url = control.value.replace(' ','')
                    else: # Transform input value to a valid DBS url
                        #dbs_url = "https://cmsweb.cern.ch/dbs/prod/"+dbs_url+"/DBSReader"
                        dbs_url = dbs_base_url+dbs_url+"/DBSReader"
                        
                    ## Get Release
                    control = self.br.find_control("custom_sb2",type="select")
                    release_id = control.value
                    
                    ## Get current request status
                    control = self.br.find_control("status_id",type="select")
                    request_status_id = control.value
                    RequestStatusByValueDict = self.getLabelByValueDict(control)
                    
                    # close the request if deprecated release was used
                    try:
                        release = self.ReleaseByValueDict[release_id[0]]
                    except:
                        if len(self.ReleaseByValueDict)>0 and RequestStatusByValueDict[request_status_id[0]] != "Closed":
                            msg = "Your request is not valid anymore, since the given CMSSW release is deprecated. If your request should be still processed, please reopen the request and update the CMSSW release to a more recent *working* release.\n"
                            msg+= "\n"
                            msg+= "Thanks,\n"
                            msg+= "Your StoreResults team"
                            self.closeRequest(task,msg)
                            self.br.back()
                            print("I tried to Close ticket %s due to CMSSW not valid" % task)
                            continue
                    
                    # close the request if release has not ScramArch match
                    if release not in scramArchByCMSSW:
                        if len(self.ReleaseByValueDict)>0 and RequestStatusByValueDict[request_status_id[0]] != "Closed":
                            msg = "Your request is not valid, there is no ScramArch match for the given CMSSW release.\n"
                            msg+= "If your request should be still processed, please reopen the request and update the CMSSW release according to: https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML \n"
                            msg+= "\n"
                            msg+= "Thanks,\n"
                            msg+= "Your StoreResults team"
                            self.closeRequest(task,msg)
                            self.br.back()
                            print("I tried to Close ticket %s due to ScramArch mismatch" % task)
                            continue
                    else: 
                        index=len(scramArchByCMSSW[release])
                        scram_arch = scramArchByCMSSW[release][index-1]

                    # close the request if dataset is not at dbs url
                    try:
                        data_at_url = self.isDataAtUrl(dbs_url,input_dataset)
                    except:
                        print('I got an error trying to look for dataset %s at %s, please look at this ticket: %s' %(input_dataset,dbs_url,task))
                        continue
                    if not data_at_url:
                        msg = "Your request is not valid, I could not find the given dataset at %s\n" % dbs_url
                        msg+= "If your request should be still processed, please reopen the request and change DBS url properly \n"
                        msg+= "\n"
                        msg+= "Thanks,\n"
                        msg+= "Your StoreResults team"
                        self.closeRequest(task,msg)
                        self.br.back()
                        print("I tried to Close ticket %s, dataset is not at DBS url" % task)
                        continue
                        
                    # Avoid not approved Tickets
                    #if not RequestStatusByValueDict[request_status_id[0]] == "Done":
                    #    continue

                    ## Get Physics Group
                    control = self.br.find_control("custom_sb3",type="select")
                    group_id = control.value[0]
                    group_squad = 'cms-storeresults-'+self.GroupByValueDict[group_id].replace("-","_").lower()

                    ## Get Dataset Version
                    control = self.br.find_control("custom_tf3",type="text")
                    dataset_version = control.value.replace(' ','')
                    if dataset_version == "": dataset_version = '1'
                                        
                    ## Get current status
                    control = self.br.find_control("resolution_id",type="select")
                    status_id = control.value

                    ## Get assigned to
                    control = self.br.find_control("assigned_to",type="select")
                    AssignedToByValueDict = self.getLabelByValueDict(control)
                    assignedTo_id = control.value

                    ##Assign task to the physics group squad
                    if AssignedToByValueDict[assignedTo_id[0]]!=group_squad:
                        assignedTo_id = [self.getValueByLabelDict(control)[group_squad]]
                        control.value = assignedTo_id
                        self.br.submit()

                    # Set default Adquisition Era for StoreResults 
                    acquisitionEra = "StoreResults"

                    ## Construction of the new dataset name (ProcessingString)
                    ## remove leading hypernews or physics group name and StoreResults+Version
                    if input_processed_dataset.find(self.GroupByValueDict[group_id])==0:
                        new_dataset = input_processed_dataset.replace(self.GroupByValueDict[group_id],"",1)
                    else:
                        stripped_dataset = input_processed_dataset.split("-")[1:]
                        new_dataset = '_'.join(stripped_dataset)
                    
                except Exception as ex:
                    self.br.back()
                    print("There is a problem with this ticket %s, please have a look to the error:" % task)
                    print(str(ex))
                    print(traceback.format_exc())
                    continue
                
                self.br.back()
                
                # Get dataset site info:
                psnList, pnnList = self.getDatasetOriginSites(dbs_url,input_dataset)
                
                infoDict = {}
                # Build store results json
                # First add all the defaults values
                infoDict["RequestType"] = "StoreResults"
                infoDict["UnmergedLFNBase"] = "/store/unmerged" 
                infoDict["MergedLFNBase"] = "/store/results/" + self.GroupByValueDict[group_id].replace("-","_").lower()
                infoDict["MinMergeSize"] = 1500000000
                infoDict["MaxMergeSize"] = 5000000000
                infoDict["MaxMergeEvents"] = 100000
                infoDict["TimePerEvent"] = 40
                infoDict["SizePerEvent"] = 512.0
                infoDict["Memory"] = 2394
                infoDict["CmsPath"] = "/uscmst1/prod/sw/cms"                                        
                infoDict["Group"] = "DATAOPS"
                infoDict["DbsUrl"] = dbs_url
                
                # Add all the information pulled from Savannah
                infoDict["AcquisitionEra"] = acquisitionEra
                infoDict["GlobalTag"] = self.setGlobalTagFromOrigin(dbs_url,input_dataset)
                infoDict["DataTier"] = data_tier
                infoDict["InputDataset"] = input_dataset
                infoDict["ProcessingString"] = new_dataset
                infoDict["CMSSWVersion"] = release
                infoDict["ScramArch"] = scram_arch
                infoDict["ProcessingVersion"] = dataset_version                    
                infoDict["SiteWhitelist"] = psnList
                
                # Create report for Migration2Global
                report = {}
                 
                #Fill json file, if status is done
                if self.StatusByValueDict[status_id[0]]=='Done' and RequestStatusByValueDict[request_status_id[0]] != "Closed":
                    self.writeJSONFile(task, infoDict)
                    report["json"] = 'y'
                else:
                    report["json"] = 'n'
                    
                report["task"] = int(task)
                report["InputDataset"] = input_dataset
                report["ProcessingString"] = new_dataset
                report["ticketStatus"] = self.StatusByValueDict[status_id[0]]
                report["assignedTo"] = AssignedToByValueDict[assignedTo_id[0]]
                report["localUrl"] = dbs_url
                report["sites"] = psnList
                report["pnns"] = pnnList

                # if the request is closed, change the item status to report to Closed
                if report["ticketStatus"] == "Done" and RequestStatusByValueDict[request_status_id[0]] == "Closed":
                    report["ticketStatus"] = "Closed"

                requests.append(report)
                    
            # Print out report
            self.printReport(requests)
        # Close connections
        self.br.close()
        
        return requests

    def closeRequest(self,task,msg):
        """
        This close a specific savannag ticket
        Insert a message in the ticket
        """
        if self.isLoggedIn:
            #self.createValueDicts()
            
            response = self.br.open('https://savannah.cern.ch/task/?'+str(task))

            html = response.read()

            self.br.select_form(name="item_form")

            control = self.br.find_control("status_id",type="select")
            control.value = [self.TicketStatusByLabelDict["Closed"]]

            #Put reason to the comment field
            control = self.br.find_control("comment",type="textarea")
            control.value = msg
                        
            #DBS Drop Down is a mandatory field, if set to None (for old requests), it is not possible to close the request
            self.setDBSDropDown()
                        
            self.br.submit()

            #remove JSON ticket
            self.removeJSONFile(task)
            
            self.br.back()
        return

    def setDBSDropDown(self):
        ## Get DBS URL by Drop Down
        control = self.br.find_control("custom_sb4",type="select")
        dbs_url = self.DBSByValueDict[control.value[0]]

        ## Get DBS URL by text field (for old entries)
        if dbs_url=='None':
            tmp = self.br.find_control("custom_tf4",type="text")
            dbs_url = tmp.value.replace(' ','')

            if dbs_url.find("phys01")!=-1:
                control.value = [self.DBSByLabelDict["phys01"]]
            elif dbs_url.find("phys02")!=-1:
                control.value = [self.DBSByLabelDict["phys02"]]
            elif dbs_url.find("phys03")!=-1:
                control.value = [self.DBSByLabelDict["phys03"]]
            else:
                msg = 'DBS URL of the old request is neither phys01, phys02 nor phys03. Please, check!'
                print(msg)
                raise RuntimeError(msg)

        return

    def writeJSONFile(self, task, infoDict):
        """
        This writes a JSON file at ComponentDir
        """
        ##check if file already exists
        filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json'
        if not os.access(filename,os.F_OK):
            jsonfile = open(filename,'w')
            request = {'createRequest':infoDict} ## CHECK THIS BEFORE FINISHING
            jsonfile.write(json.dumps(request,sort_keys=True, indent=4))
            jsonfile.close

        return

    def removeJSONFile(self,task):
        """
        This removes the JSON file at ComponentDir if it was created
        """
        filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json'

        if os.access(filename,os.F_OK):
            os.remove(filename)

        return

    def printReport(self, requests):
        """
        Print out a report
        """
        print("%20s %10s %5s %35s %10s %50s %50s" %( 'Savannah Ticket','Status','json','Assigned to','local DBS','Sites','pnns')) 
        print("%20s %10s %5s %35s %10s %50s %50s" %( '-'*20,'-'*10,'-'*5,'-'*35,'-'*10,'-'*50,'-'*50 ))
        
        for report in requests:
            
            json = report["json"]
            ticket = report["task"]
            status = report["ticketStatus"]
            assigned = report["assignedTo"]
            localUrl = report["localUrl"].split('/')[5]
            site = ', '.join(report["sites"])
            pnns = ', '.join(report["pnns"])
            print("%20s %10s %5s %35s %10s %50s %50s" %(ticket,status,json,assigned,localUrl,site,pnns))  
class Session(object):
    def __init__(self):
        """Constructor

        Args:
            None

        Attributes:
            browser (`mechanize._mechanize.Browser`): browser object in session
        """

        self.browser = Browser()

        # set error and debug handlers for the browser

        # cookie jar
        self.browser.set_cookiejar(cookielib.LWPCookieJar())

        # browser options
        self.browser.set_handle_equiv(True)
        self.browser.set_handle_gzip(True)
        self.browser.set_handle_redirect(True)
        self.browser.set_handle_referer(True)
        self.browser.set_handle_robots(False)

        # follows refresh 0 but doesn't hang on refresh > 0
        self.browser.set_handle_refresh(_http.HTTPRefreshProcessor(),
                                        max_time=1)

        # user-Agent
        self.browser.addheaders = [("User-agent", HEADER)]

    def close(self):
        """Destructor for Session. Closes current browser session

        Args:
            None

        Returns:
            None
        """
        self.browser.close()

    def case_id_form(self, case):
        """Grabs the form in the case searching page, and inputs the
        case number to return the response.

        Args:
            case (`str`): case ID to be scraped

        Returns:
            response (`str`): HTML response
        """

        # iterate through the forms to find the correct one
        for form in self.browser.forms():
            if form.attrs["name"] == "inquiryFormByCaseNum":
                self.browser.form = form
                break

        # submit case ID and return the response
        self.browser.form["caseId"] = case
        self.browser.submit()
        response = self.browser.response().read()

        self.browser.back()

        return response if any(
            case_type in response.upper()
            for case_type in ("FORECLOSURE",
                              "FORECLOSURE RIGHTS OF REDEMPTION")) else ''

    def disclaimer_form(self):
        """Navigates to the URL to proceed to the case searching page

        Args:
            None

        Returns:
            None
        """

        # visit the site
        self.browser.open(URL)

        # select the only form on the page
        self.browser.select_form(nr=0)

        # select the checkbox
        self.browser.form["disclaimer"] = ['Y']

        # submit the form
        self.browser.submit()

    @staticmethod
    def server_running():
        """Checks the status of the Casesearch servers

        Args:
            None

        Returns:
            `True` if server is up, `False` otherwise
        """
        return urlopen(URL).getcode() == 200
Exemple #36
0
class LconnectScraper(ClassDataScraper):
    LCONNECT_URL = 'http://leopardweb.wit.edu/'
    USERAGENT = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) ' \
                + 'Gecko/20100122 firefox/3.6.1'

    def __init__(self):
        # Create a cookie jar and a browser
        self._cookieJar = LWPCookieJar()
        self._browser = Browser()
        self._browser.set_cookiejar(self._cookieJar)

        # Set Browser options
        self._browser.set_handle_equiv(True)
        self._browser.set_handle_gzip(True)
        self._browser.set_handle_redirect(True)
        self._browser.set_handle_referer(True)
        self._browser.set_handle_robots(False)
        self._browser.set_handle_refresh(_http.HTTPRefreshProcessor(),
                                         max_time=1)
        self._browser.addheaders = [('User-agent', LconnectScraper.USERAGENT)]

        # Debugging
        self._browser.set_debug_http(True)
        self._browser.set_debug_redirects(True)
        self._browser.set_debug_responses(True)

    def getName(self):
        return "Lconnect Scraper"

    def connect(self):
        """
        Attempts to connect to the data source
        """
        try:
            # Try to open a connection. 8 Second timeout
            self._browser.open(LconnectScraper.LCONNECT_URL, timeout=8)
            return True
        except URLError:
            return False

    def disconnect(self):
        """
        Disconnects from the data source
        """

        self._browser.close()

    def requiresAuthentication(self):
        """
        Returns whether or not the scraper requires authentication information
        """

        return True

    def authenticate(self, username, password):
        """
        Attempts to authenticate the scraper using username and password
        """

        # If we're on the sign in page, try to sign in
        if self._browser.title() == 'Sign In':
            for form in self._browser.forms():
                if form.name is None:
                    self._browser.form = list(self._browser.forms())[0]
                    self._browser['username'] = username
                    self._browser['password'] = password

                    self._browser.submit()

        # If the browser's title is 'Main Menu',
        # we've either successfully logged in, or we were already
        if self._browser.title() == 'Main Menu':
            return True
        else:
            return False

    def getClassData(self):
        """
        Returns a list of ClassData objects
        """

        return []
Exemple #37
0
class RequestQuery:

    def __init__(self,config):
        self.br=Browser()

        self.config = config

        self.isLoggedIn = self.login2Savannah()

    def __del__(self):
        self.br.close()

    def closeRequest(self,task,msg):
        if self.isLoggedIn:
            self.createValueDicts()
            
            response = self.br.open('https://savannah.cern.ch/task/?'+str(task))

            html = response.read()

            self.br.select_form(name="item_form")

            control = self.br.find_control("status_id",type="select")
            control.value = [self.TicketStatusByLabelDict["Closed"]]

            #Put reason to the comment field
            control = self.br.find_control("comment",type="textarea")
            control.value = msg
                        
            #DBS Drop Down is a mandatory field, if set to None (for old requests), it is not possible to close the request
            self.setDBSDropDown()
                        
            self.br.submit()

            #remove JSON ticket
            self.removeJSONFile(task)
            
        return
                
    def createValueDicts(self):       
        if self.isLoggedIn:
            self.br.select_form(name="bug_form")
            
            control = self.br.find_control("custom_sb2",type="select")
            self.ReleaseByValueDict = self.getLabelByValueDict(control)

            control = self.br.find_control("custom_sb3",type="select")
            self.GroupByValueDict = self.getLabelByValueDict(control)

            control = self.br.find_control("custom_sb4",type="select")
            self.DBSByValueDict = self.getLabelByValueDict(control)
            self.DBSByLabelDict = self.getValueByLabelDict(control)

            control = self.br.find_control("resolution_id",type="select")
            self.StatusByValueDict = self.getLabelByValueDict(control)

            control = self.br.find_control("status_id",type="select")
            self.TicketStatusByLabelDict = self.getValueByLabelDict(control)

        return

    def setDBSDropDown(self):
        ## Get DBS URL by Drop Down
        control = self.br.find_control("custom_sb4",type="select")
        dbs_url = self.DBSByValueDict[control.value[0]]

        ## Get DBS URL by text field (for old entries)
        if dbs_url=='None':
            tmp = self.br.find_control("custom_tf4",type="text")
            dbs_url = tmp.value.replace(' ','')

            if dbs_url.find("analysis_02")!=-1:
                control.value = [self.DBSByLabelDict["cms_dbs_ph_analysis_02"]]
            elif dbs_url.find("analysis_01")!=-1:
                control.value = [self.DBSByLabelDict["cms_dbs_ph_analysis_01"]]
            elif dbs_url.find("local_09")!=-1:
                control.value = [self.DBSByLabelDict["cms_dbs_ph_prod_local_09"]]
            else:
                msg = 'DBS URL of the old request is neither analysis_01, analysis_02 nor local_09. Please, check!'
                logging.error(msg)
                raise RuntimeError, msg

        return
                
    def getLabelByValueDict(self, control):
        d = {}
        for item in control.items:
            value = item.attrs['value']
            label = item.attrs['label']
            d[value] = label
                
        return d

    def getRequests(self,**kargs):
        requests = []
        
        if self.isLoggedIn:
            self.selectQueryForm(**kargs)
            self.createValueDicts()
        
            self.br.select_form(name="bug_form")
            response = self.br.submit()

            html_ouput = response.read()

            for link in self.br.links(text_regex="#[0-9]+"):
                    response = self.br.follow_link(link)
    
                    ## Get Information
                    self.br.select_form(name="item_form")

                    ## Get input dataset name
                    control = self.br.find_control("custom_tf1",type="text")
                    input_dataset = control.value.split('/')
                    input_primary_dataset = input_dataset[1].replace(' ','')
                    input_processed_dataset = input_dataset[2].replace(' ','')
                    
                    ## Get DBS URL by Drop Down
                    control = self.br.find_control("custom_sb4",type="select")
                    dbs_url = self.DBSByValueDict[control.value[0]]

                    ## Get DBS URL by text field (for old entries)
                    if dbs_url=='None':
                        control = self.br.find_control("custom_tf4",type="text")
                        dbs_url = control.value.replace(' ','')
                    else: # Transform input value to a valid DBS url
                        dbs_url = "http://cmsdbsprod.cern.ch/"+dbs_url+"/servlet/DBSServlet"
                        
                    ## Get Release
                    control = self.br.find_control("custom_sb2",type="select")
                    release_id = control.value

                    ## Get Physics Group
                    control = self.br.find_control("custom_sb3",type="select")
                    group_id = control.value[0]
                    group_squad = 'cms-storeresults-'+self.GroupByValueDict[group_id].replace("-","_").lower()

                    ## Get Dataset Version
                    control = self.br.find_control("custom_tf3",type="text")
                    dataset_version = control.value.replace(' ','')
                                        
                    ## Get current status
                    control = self.br.find_control("resolution_id",type="select")
                    status_id = control.value
                
                    ## Get current request status
                    control = self.br.find_control("status_id",type="select")
                    request_status_id = control.value
                    RequestStatusByValueDict = self.getLabelByValueDict(control)

                    ## Get assigned to
                    control = self.br.find_control("assigned_to",type="select")
                    AssignedToByValueDict = self.getLabelByValueDict(control)
                    assignedTo_id = control.value

                    ##Assign task to the physics group squad
                    if AssignedToByValueDict[assignedTo_id[0]]!=group_squad:
                        control.value = [self.getValueByLabelDict(control)[group_squad]]
                        self.br.submit()

                    ## Construction of the new dataset name
                    ## remove leading hypernews or physics group name and StoreResults+Version

                    if len(dataset_version)>0:
                        dataset_prefix = "StoreResults-"+dataset_version
                    else:
                        dataset_prefix = "StoreResults"
                    
                    if input_processed_dataset.find(self.GroupByValueDict[group_id])==0:
                        new_dataset = input_processed_dataset.replace(self.GroupByValueDict[group_id],dataset_prefix,1)
                    else:
                        stripped_dataset = input_processed_dataset.split("-")[1:]
                        new_dataset = dataset_prefix+'-'+'-'.join(stripped_dataset)
                
                    self.br.back()

                    ## remove leading &nbsp and # from task
                    task = link.text.replace('#','').decode('utf-8').strip()

                    infoDict = {}
                
                    infoDict["primaryDataset"] = input_primary_dataset
                    infoDict["processedDataset"] = input_processed_dataset
                    infoDict["outputDataset"] = new_dataset
                    infoDict["physicsGroup"] = self.GroupByValueDict[group_id]
                    infoDict["inputDBSURL"] = dbs_url

                    # close the request if deprecated release was used
                    try:
                        infoDict["cmsswRelease"] = self.ReleaseByValueDict[release_id[0]]
                    except:
                        if len(self.ReleaseByValueDict)>0 and RequestStatusByValueDict[request_status_id[0]] != "Closed":
                            msg = "Your request is not valid anymore, since the given CMSSW release is deprecated. If your request should be still processed, please reopen the request and update the CMSSW release to a more recent *working* release.\n"
                            msg+= "\n"
                            msg+= "Thanks,\n"
                            msg+= "Your StoreResults team"
                            self.closeRequest(task,msg)
            
                    
                    #Fill json file, if status is done
                    if self.StatusByValueDict[status_id[0]]=='Done' and RequestStatusByValueDict[request_status_id[0]] != "Closed":
                        self.writeJSONFile(task, infoDict)

                    infoDict["task"] = int(task)
                    infoDict["ticketStatus"] = self.StatusByValueDict[status_id[0]]
                    infoDict["assignedTo"] = AssignedToByValueDict[assignedTo_id[0]]

                    if infoDict["ticketStatus"] == "Done" and RequestStatusByValueDict[request_status_id[0]] == "Closed":
                        infoDict["ticketStatus"] = "Closed"

                    requests.append(infoDict)
                    
        return requests

    def getValueByLabelDict(self, control):
        d = {}
        for item in control.items:
            value = item.attrs['value']
            label = item.attrs['label']
            d[label] = value

        return d

    def login2Savannah(self):
        login_page='https://savannah.cern.ch/account/login.php?uri=%2F'
        savannah_page='https://savannah.cern.ch/task/?group=cms-storeresults'
        
        self.br.open(login_page)

        ## 'Search' form is form 0
        ## login form is form 1
        self.br.select_form(nr=1)

        username = self.config["SavannahUser"]
    
        self.br['form_loginname']=username
        self.br['form_pw']=self.config["SavannahPasswd"]
        
        self.br.submit()
        
        response = self.br.open(savannah_page)
        
        # Check to see if login was successful
        if not re.search('Logged in as ' + username, response.read()):
            logging.error('login unsuccessful, please check your username and password')
            return False
        else:
            return True

    def selectQueryForm(self,**kargs):       
        if self.isLoggedIn:
            self.br.select_form(name="bug_form")

            ## Use right query form labelled Test
            control = self.br.find_control("report_id",type="select")

            for item in control.items:
                if item.attrs['label'] == "Test":
                    control.value = [item.attrs['value']]
                    
            ##select number of entries displayed per page
            control = self.br.find_control("chunksz",type="text")
            control.value = "150"

            ##check additional searching parameter
            for arg in kargs:
                if arg == "approval_status":
                    control = self.br.find_control("resolution_id",type="select")
                    for item in control.items:
                        if item.attrs['label'] == kargs[arg].strip():
                            control.value = [item.attrs['value']]

                elif arg == "task_status":
                    control = self.br.find_control("status_id",type="select")
                    for item in control.items:
                        if item.attrs['label'] == kargs[arg].strip():
                            control.value = [item.attrs['value']]
                            
                elif arg == "team":
                    control = self.br.find_control("custom_sb5",type="select")
                    for item in control.items:
                        if item.attrs['label'] == kargs[arg].strip():
                            control.value = [item.attrs['value']]

            response = self.br.submit()
            response.read()

        return

    def removeJSONFile(self,task):
        filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json'

        if os.access(filename,os.F_OK):
            os.remove(filename)

        return
            
    def writeJSONFile(self, task, infoDict):
        ##check if file already exists
        filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json'
        if not os.access(filename,os.F_OK):
            jsonfile = open(filename,'w')
            jsonfile.write(json.dumps(infoDict,sort_keys=True, indent=4))
            jsonfile.close

        return
Exemple #38
0
    conn = psycopg2.connect(conn_string)
except:
    print "problema ao conectar no banco de dados"

#primeira solicitação no portal da transparência
try:
    LRequest = urllib2.Request(SEARCH_URL, " ")
    LResponse = br.open(LRequest)
    page = bs_parse(LResponse.read())
    print SEARCH_URL
    print page
    #f.write(page)
except:
    print "problema ao realizar primeira consulta na web"

br.close()

#grava array com orgaos superiores e apresenta logs do processo na tela
print "################### Orgaos ###################"
a = []
b = []

#cursor para naveção no banco de dados
cursor = conn.cursor()

#consulta orgão superiores para vincular aos orgãos
cursor.execute("Select codigo from orgao_superior")

rows = cursor.fetchall()

for row in rows:
Exemple #39
0
    def _utrack_gen_fired(self):
        test_dir(join(self.dirname,'img'))
        br = Browser()

        # Ignore robots.txt
        br.set_handle_robots( False )
        # Google demands a user-agent that isn't a robot
        br.addheaders = [('User-agent', 'Firefox')]
         
        # Retrieve the Google home page, saving the response
        resp1 = br.open( "http://utrack.crempa.net/index_cz.php" ) #http://utrack.crempa.net/index_en.php


        forms = ParseResponse(resp1, backwards_compat=False)
        form = forms[0]

        form.add_file(open(self.filename_converted), "text/plain", self.filename_converted)
        form.find_control(name='map_elevation').value = ['1']

        resp3=form.click()
        resp2 = urlopen(resp3).read()

        resp4 = br.open(resp3)

        if self.download:
            resp = None
            weblinks = list(br.links())
            imgi = 1
            for link in weblinks:
                siteMatch = re.compile( 'show_graph' ).search( link.url )
                if siteMatch:
                    imgfile = open(join(self.dirname,'img','%i.png') % imgi, 'wb')
                    resp = urlopen(link.absolute_url).read()
                    imgfile.write(resp)
                    imgfile.close()
                    imgi += 1
                    
            for link in weblinks:
                siteMatch = re.compile( 'report.php' ).search( link.url )
                if siteMatch:
                    pdffile = open(join(self.dirname,'img','report.pdf'), 'wb')
                    resp = urlopen(link.absolute_url).read()
                    pdffile.write(resp)
                    pdffile.close()

        # Print the site
        content = resp4.get_data()

        pattern = re.compile('src="show_graph.*?"')
        lst = pattern.findall(content)
        for ni, name in enumerate(lst):
            content = content.replace(name, 'src="img/%i.png"' % (ni+1))

        pattern = re.compile('href="show_graph.*?"')
        lst = pattern.findall(content)
        for ni, name in enumerate(lst):
            content = content.replace(name, 'href="img/%i.png"' % (ni+1))

        pattern = re.compile('Date of track: </td>\s+<td> (.*?) </td>')
        lst = pattern.findall(content)
        if len(lst) >= 2:
            if lst[0] != lst[-1]:
                track_date = lst[0] + '-' + lst[-1]
            else:
                track_date = lst[0]
        else:
            track_date = lst
        
        pattern = re.compile('href="report.php.*?"')
        lst = pattern.findall(content)
        content = content.replace(lst[0], 'href="img/report.pdf"')
        content = content.replace('<a href="#"', '<a href="#report_0"', 1)
        #content = content.replace('href="../style/style.css"', 'href="../style/style.css"')
        content = content.replace('src="../img/pdf.gif"', 'src="../style/pdf.gif"')
        content = content.replace('src="../img/logo.gif"', 'src="../style/logo.gif"')
        content = content.replace('src="../img/elegend.png"', 'src="../style/elegend.png"')
        #content = content.replace(r' >hide report</a>', r' >show report</a>')
        content = re.sub(r' >skr.*?t report</a>', r' >zobrazit report</a>', content)
        content = content.replace('if(divs[i].id==\'report_0\')', 'if(divs[i].id==\'report\')')
        content = content.replace(UTRACK_API_KEY, API_KEY)

        title = '<h1 style="margin-bottom:10px">%s <span style="font-size:small">%s</span></h1>\n' % (self.title,track_date)

        download_src = '''
        <a href="%s">Download source gpx file</a>
        ''' % os.path.basename(self.filename_converted)

        iframe_map_track = '''
        <iframe src="gps_vis/map_track.html" width="870" height="600" marginwidth="0" marginheight="0" scrolling="no" frameborder="0" style="width: 870px; height: 600px; margin-top:10px; margin-left: 10px; margin-bottom: 10px; position: relative; overflow: hidden; font-family: arial,sans-serif; line-height: normal; padding: 0pt;">
          <a href="gps_vis/map_track.html">Click here for the map</a>
        </iframe>\n
        '''

        iframe_map_speed = '''
        <iframe src="gps_vis/map_speed.html" width="870" height="600" marginwidth="0" marginheight="0" scrolling="no" frameborder="0" style="width: 870px; height: 600px; margin-left: 10px; margin-bottom: 10px; position: relative; overflow: hidden; font-family: arial,sans-serif; line-height: normal; padding: 0pt;">
          <a href="gps_vis/map_speed.html">Click here for the map</a>
        </iframe>\n
        '''

        profile_img = '''
        <img src="gps_vis/profile.png" alt="profile" border="0" height="250" width="870" style="width: 870px; height: 250px; margin-left: 10px; margin-bottom: 10px; position: relative; overflow: hidden; font-family: arial,sans-serif; line-height: normal; padding: 0pt;">
        '''

        content = content.replace('<div id="page">', '<div id="page">\n' + title + iframe_map_track + profile_img)
        ofile = open(join(self.dirname,'track_report.html'),'w')
        ofile.write(content)
        ofile.close()
        br.close()

        print 'utrack generated'
Exemple #40
0
class RequestQuery:

    def __init__(self,config):
        self.br=Browser()

        self.config = config
        
        # Initialise connections
        self.mySiteDB = SiteDBJSON()
        self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/")
        self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/")
        self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/")
        
    def __del__(self):
        self.br.close()

    def getScramArchByCMSSW(self):
        """
        Get from the list of available CMSSW releases
        return a dictionary of ScramArchitecture by CMSSW
        """
        
        # Set temporary conection to the server and get the response from cmstags
        url = 'https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML'
        br = Browser()
        br.set_handle_robots(False)
        response=br.open(url)
        soup = BeautifulSoup(response.read())
        
        # Dictionary form
        # {'CMSSW_X_X_X':[slc5_amd64_gcc472], ... }
        archByCmssw={}
        
        # Fill the dictionary
        for arch in soup.find_all('architecture'): 
            for cmssw in arch.find_all('project'): 
                # CMSSW release
                cmsswLabel = cmssw.get('label').encode('ascii', 'ignore')
                if cmsswLabel not in archByCmssw:
                    archByCmssw[cmsswLabel]=[]
                # ScramArch related to this CMSSW release
                archName = arch.get('name').encode('ascii', 'ignore')
                archByCmssw[cmsswLabel].append(archName)
        
        return archByCmssw
      
    def getDatasetOriginSites(self, dbs_url, data):
        """
        Get the origin sites for each block of the dataset.
        Return a list block origin sites.
        """
        
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listBlocks(detail=True,dataset=data)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listBlocks(detail=True,dataset=data)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listBlocks(detail=True,dataset=data)
        
        pnnList = set()
        for block in response:
            pnnList.add(block['origin_site_name'])
        psnList = self.mySiteDB.PNNstoPSNs(pnnList)
        
        return psnList, list(pnnList)
    
    def setGlobalTagFromOrigin(self, dbs_url,input_dataset):
        """
        Get the global tag of the dataset from the source dbs url. If it is not set, then set global tag to 'UNKNOWN'
        """
        
        globalTag = ""
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listOutputConfigs(dataset=input_dataset)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listOutputConfigs(dataset=input_dataset)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listOutputConfigs(dataset=input_dataset)
        
        globalTag = response[0]['global_tag']
        # GlobalTag cannot be empty
        if globalTag == '':
            globalTag = 'UNKNOWN'
            
        return globalTag
    
    def isDataAtUrl(self, dbs_url,input_dataset):
        """
        Returns True if the dataset is at the dbs url, if not returns False
        """
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listDatasets(dataset=input_dataset)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listDatasets(dataset=input_dataset)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listDatasets(dataset=input_dataset)
        # This means that the dataset is not at the url
        if not response:
            return False
        else:
            return True
         
    def getLabelByValueDict(self, control):
        """
        From control items, create a dictionary by values
        """   
        d = {}
        for item in control.items:
            value = item.attrs['value']
            label = item.attrs['label']
            d[value] = label
                
        return d
    
    def getValueByLabelDict(self, control):
        """
        From control items, create a dictionary by labels
        """
        d = {}
        for item in control.items:
            value = item.attrs['value']
            label = item.attrs['label']
            d[label] = value

        return d
    
    def createRequestJSON(self, ticket, input_dataset, dbs_url, cmssw_release, group_name, version = 1):
        """
        Creates a JSON file 'Ticket_#TICKET.json' with the needed
        information for creating a requeston ReqMgr.
        Input:
            - ticket: the ticket #, for instance 110773 on https://ggus.eu/?mode=ticket_info&ticket_id=110773
            - input_dataset
            - dbs_url: only the instance name, For example: "phys01" for 
             https://cmsweb.cern.ch/dbs/prod/phys01/DBSReader
            - cmssw_release
            - group_name: the physics group name
            - version: the dataset version, 1 by default.
        It returns a dictionary that contains the request information.
        """

        scramArchByCMSSW = self.getScramArchByCMSSW()
        self.nodeMappings = self.phedex.getNodeMap()
        task = ticket
        print("Processing ticket: %s" % task)
        
        #splitting input dataset       
        input_primary_dataset = input_dataset.split('/')[1].replace(' ','')
        input_processed_dataset = input_dataset.split('/')[2].replace(' ','')
        data_tier = input_dataset.split('/')[3].replace(' ','')
                
        # Transform input value to a valid DBS url
        #dbs_url = "https://cmsweb.cern.ch/dbs/prod/"+dbs_url+"/DBSReader"
        dbs_url = dbs_base_url+dbs_url+"/DBSReader"
        release_id = cmssw_release
                
        # check if deprecated release was used
        release = cmssw_release
        # check if release has not ScramArch match
        if release not in scramArchByCMSSW:
            raise Exception("Error on ticket %s due to ScramArch mismatch" % task)
        else:
            scram_arch = scramArchByCMSSW[release][-1]

        # check if dataset is not at dbs url
        try:
            data_at_url = self.isDataAtUrl(dbs_url,input_dataset)
        except:
            raise Exception('Error on ticket %s, dataset %s not available at %s' %(task, input_dataset,dbs_url))

        if not data_at_url:
            raise Exception('Error on ticket %s, dataset %s not available at %s' %(task, input_dataset,dbs_url))
                    
        ## Get Physics Group
        group_squad = 'cms-storeresults-'+group_name.replace("-","_").lower()

        ## Get Dataset Version
        dataset_version = str(version)

        # Set default Adquisition Era for StoreResults 
        acquisitionEra = "StoreResults"

        ## Construction of the new dataset name (ProcessingString)
        ## remove leading hypernews or physics group name and StoreResults+Version
        if input_processed_dataset.find(group_name)==0:
            new_dataset = input_processed_dataset.replace(group_name,"",1)
        else:
            stripped_dataset = input_processed_dataset.split("-")[1:]
            new_dataset = '_'.join(stripped_dataset)
                        
        # Get dataset site info:
        psnList, pnnList = self.getDatasetOriginSites(dbs_url,input_dataset)

        infoDict = {}
        # Build store results json
        # First add all the defaults values
        infoDict["RequestType"] = "StoreResults"
        infoDict["UnmergedLFNBase"] = "/store/unmerged" 
        infoDict["MergedLFNBase"] = "/store/results/" + group_name.replace("-","_").lower()
        infoDict["MinMergeSize"] = 1500000000
        infoDict["MaxMergeSize"] = 5000000000
        infoDict["MaxMergeEvents"] = 100000
        infoDict["TimePerEvent"] = 40
        infoDict["SizePerEvent"] = 512.0
        infoDict["Memory"] = 2394
        infoDict["CmsPath"] = "/uscmst1/prod/sw/cms"                                        
        infoDict["Group"] = "DATAOPS"
        infoDict["DbsUrl"] = dbs_url
        
        # Add all the information pulled from Savannah
        infoDict["AcquisitionEra"] = acquisitionEra
        infoDict["GlobalTag"] = self.setGlobalTagFromOrigin(dbs_url, input_dataset)
        infoDict["DataTier"] = data_tier
        infoDict["InputDataset"] = input_dataset
        infoDict["ProcessingString"] = new_dataset
        infoDict["CMSSWVersion"] = release
        infoDict["ScramArch"] = scram_arch
        infoDict["ProcessingVersion"] = dataset_version                    
        infoDict["SiteWhitelist"] = psnList
        
        # Create report for Migration2Global
        report = {}
         
        #Fill json file, if status is done
        self.writeJSONFile(task, infoDict)
        report["json"] = 'y'
        report["task"] = int(task)
        report["InputDataset"] = input_dataset
        report["ProcessingString"] = new_dataset
        report["localUrl"] = dbs_url
        report["sites"] = psnList
        report["pnns"] = pnnList

        return report

    def writeJSONFile(self, task, infoDict):
        """
        This writes a JSON file at ComponentDir
        """
        ##check if file already exists
        filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json'
        if not os.access(filename,os.F_OK):
            jsonfile = open(filename,'w')
            request = {'createRequest':infoDict} ## CHECK THIS BEFORE FINISHING
            jsonfile.write(json.dumps(request,sort_keys=True, indent=4))
            jsonfile.close

        return

    def removeJSONFile(self,task):
        """
        This removes the JSON file at ComponentDir if it was created
        """
        filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json'

        if os.access(filename,os.F_OK):
            os.remove(filename)
        return

    def printReport(self, report):
        """
        Print out a report
        """
        print("%20s %5s %10s %50s %50s" %( 'Ticket','json','local DBS','Sites','pnns')) 
        print("%20s %5s %10s %50s %50s" %( '-'*20,'-'*5,'-'*10,'-'*50,'-'*50 ))
        
        json = report["json"]
        ticket = report["task"]
        #status = report["ticketStatus"]
        localUrl = report["localUrl"].split('/')[5]
        site = ', '.join(report["sites"])
        pnns = ', '.join(report["pnns"])
        print("%20s %5s %10s %50s %50s" %(ticket,json,localUrl,site,pnns))  
Exemple #41
0
import re

URL_PATH = 'https://ss.zadarma.com/'
USER_FIELD = 'email'
PASS_FIELD = 'password'

out_balance = 0.0
browser = Browser()

browser.open( URL_PATH )

browser.select_form( nr=0 )
browser.form[USER_FIELD]        = in_username
browser.form[PASS_FIELD]        = in_password
browser.submit()

browser.open( URL_PATH )

response = browser.response()
html = response.read()
browser.close()

f1 = re.search( r'>\$(.*)</a></span>', html )
if f1 is not None:
    balance_string = f1.groups()[0]
    print(balance_string)
    out_balance = float( balance_string )


print( out_balance )
def trilegal(outputFileName,
             longitude = 0, latitude = 0,
             coordinateType = "galactic",
             fieldArea = 1,
             passband = 4, magnitudeLimit = 26, magnitudeResolution = 0.1,
             IMFtype = 3,
             includeBinaries = True, binaryFraction = 0.3, lowerBinaryMassRatio = 0.7, upperBinaryMassRatio = 1.0,
             extinctionType = 2, extinctionValue = 0.0378, extinctionSigma = 0.0,
             useThinDisc = False,
             useThickDisc = False,
             useBulge = True):
    
    """
    Query the web interface of the TRILEGAL population synthesis code.
    
    The TRILEGAL webform is automatically filled and submitted. The computations are done locally
    on Girardi's computer. As soon as they are finished, the script retrieves the data file.
    
    Example:
    
    >>> trilegal("output.txt", longitude=3, latitude=14, coordinateType="galactic", fieldArea=1, magnitudeLimit=7, useThinDisc=True)
    
    @param outputFileName: name of file wherein trilegal output will be saved
    @type outputFileName: string
    @param longitude: galactic longitude (degrees) or right ascension (hours)
    @type longitude: integer
    @param latitude:  galactic latitude (degrees) or declination (degrees)
    @type latitude: integer
    @param coordinateType: either "galactic", or "equatorial" 
    @type coordinateType: string
    @param fieldArea: total field area in square degrees (max. 10 deg^2)
    @type fieldArea: float
    @param passband: U,B,V,R,I,J,H,K = 1,2,3,4,5,6,7,8 for magnitude limit
    @type passband: integer
    @param magnitudeLimit: magnitude limit in specified passband
    @type magnitudeLimit: float
    @param magnitudeResolution: Distance modulus resolution of Galaxy components (mag)
    @type magnitudeResolution: float
    @param IMFtype: type of Initial Mass Function of single stars 
                    1 = Salpeter with cutoff at 0.01, Msun, 
                    2 = Chabrier exponential, 
                    3 = Chabrier lognormal, 
                    4 = Kroupa corrected for binaries, 
                    5 = Kroupa not corrected for binaries
    @type IMFtype: integer
    @param includeBinaries: include binaries in the population (True or False)
    @type includeBinaries: boolean
    @param binaryFraction: fraction of binaries 
    @type binaryFraction: float
    @param lowerBinaryMassRatio: lower limit of binary mass fraction
    @type lowerBinaryMassRatio: float
    @param upperBinaryMassRatio: upper limit of binary mass fraction
    @type upperBinaryMassRatio: float
    @param extinctionType: Type of extinction
                           0: no dust extinction
                           1: local calibration
                           2: calibration at infinity
    @type extinctionType: integer
    @param extinctionValue: for a local calibration this is dAv/dr in mag/pc
                            for the calibration at infinity this is Av at infinity in mag.
    @type extinctionValue: float
    @param extinctionSigma: 1-sigma extinction dispersion / total extinction (max. 0.3)
    @type extinctionSigma: float
    @param useThinDisk: if True use squared hyperbolic secant along z, if False don't include
    @type useThinDisk: boolean
    @param useThickDisk: if True use squared hyperbolic secant along z, if False don't include
    @type useThickDisk: boolean
    @param useBulge: if True use triaxal bulge, if False don't include
    @type useBulge: boolean
    @return None. A file is retrieved
    """
    
    # The latest Trilegal web version
    
    trilegalURL = "http://stev.oapd.inaf.it/cgi-bin/trilegal"
    
    # Get the web form
    
    timestamp = strftime("%a, %d %b %Y %H:%M:%S", gmtime())
    print("{0}: Opening TRILEGAL web interface".format(timestamp))
    
    myBrowser = Browser()
    try:
        myBrowser.open(trilegalURL)
    except:
        timestamp = strftime("%a, %d %b %Y %H:%M:%S", gmtime())
        print("{0}: Unable to open the TRILEGAL website".format(timestamp))
        return
        
    myBrowser.select_form(nr=0)    # there is only one form...
    
    # Fill in the form. To know how the different fields in the form are
    # named, we used
    # >>> request = mechanize.Request(trilegalURL)
    # >>> response = mechanize.urlopen(request)
    # >>> forms = mechanize.ParseResponse(response, backwards_compat=False)
    # >>> print forms[0]
    
    timestamp = strftime("%a, %d %b %Y %H:%M:%S", gmtime())
    print("{0}: Filling TRILEGAL web form".format(timestamp))
    
    if coordinateType == "galactic":
        myBrowser["gal_coord"] = ["1"]
        myBrowser["gc_l"]      = str(longitude)
        myBrowser["gc_b"]      = str(latitude)
    else:
        myBrowser["gal_coord"] = ["2"]
        myBrowser["eq_alpha"]  = str(longitude)
        myBrowser["eq_delta"]  = str(latitude)
    
    myBrowser["field"]        = str(fieldArea)
    myBrowser["icm_lim"]      = str(passband) 
    myBrowser["mag_lim"]      = str(magnitudeLimit)
    myBrowser["mag_res"]      = str(magnitudeResolution)
    myBrowser["binary_kind"]  = [str(int(includeBinaries))]
    myBrowser["binary_frac"]  = str(binaryFraction)
    myBrowser["binary_mrinf"] = str(lowerBinaryMassRatio)
    myBrowser["binary_mrsup"] = str(upperBinaryMassRatio)

    myBrowser["extinction_kind"] = [str(extinctionType)]
    if extinctionType == 1:
        myBrowser["extinction_rho_sun"] = str(extinctionValue)
    if extinctionType == 2:
        myBrowser["extinction_infty"] = str(extinctionValue)
        myBrowser["extinction_sigma"] = str(extinctionSigma)

    if useThinDisc:
        myBrowser["thindisk_kind"] = ["3"]
    else:
        myBrowser["thindisk_kind"] = ["0"]
        
    if useThickDisc:
        myBrowser["thickdisk_kind"] = ["3"]
    else:
        myBrowser["thickdisk_kind"] = ["0"]
        
    if useBulge:
        myBrowser["bulge_kind"] = ["2"]
    else:
        myBrowser["bulge_kind"] = ["0"]
     
    # Submit the completed form
    
    timestamp = strftime("%a, %d %b %Y %H:%M:%S", gmtime())
    print("{0}: Submitting completed TRILEGAL web form".format(timestamp))
    
    nextWebPage = myBrowser.submit()
    
    # Trilegal is now computing the result. Click on the special "Refresh" 
    # button until the webpage says that the computations are finished.
    
    timestamp = strftime("%a, %d %b %Y %H:%M:%S", gmtime())
    print ("{0}: Waiting until TRILEGAL computations are finished".format(timestamp))
    
    myBrowser.select_form(nr=0)                   # one form on the "be patient" web page 
    message = "Your job was finished"
    while (message not in nextWebPage.read()):
        nextWebPage = urlopen(myBrowser.click())  # click on the Refresh button
        myBrowser.select_form(nr=0)               # select form again, so that we can make a click again
        sleep(5)                                  # to not overload the website with refresh requests
        
    # Get the url of the outputfile, and retrieve it. This can take a while.
    
    timestamp = strftime("%a, %d %b %Y %H:%M:%S", gmtime())
    print("{0}: Retrieving TRILEGAL output file".format(timestamp))
    
    outputLink = myBrowser.links(url_regex="lgirardi/tmp/output").next()
    urlretrieve(outputLink.absolute_url, outputFileName)
    myBrowser.close()
    
    # Save the parameters in an info file
    
    parameterInfo = """
    coordinateType {0}
    longitude {1}
    latitude {2}
    fieldArea {3}
    passband {4}
    magnitudeLimit {5}
    magnitudeResolution {6}
    IMFtype {7}
    includeBinaries {8}
    binaryFraction {9}
    lowerBinaryMassRatio {10}
    upperBinaryMassRatio {11}
    extinctionType {12}
    extinctionValue {13}
    extinctionSigma {14}
    """.format(coordinateType,
               longitude,
               latitude,
               fieldArea,
               passband, 
               magnitudeLimit, 
               magnitudeResolution,
               IMFtype,
               includeBinaries, 
               binaryFraction, 
               lowerBinaryMassRatio, 
               upperBinaryMassRatio,
               extinctionType,
               extinctionValue,
               extinctionSigma)
                  
    infoFileName = "info_" + outputFileName
    with open(infoFileName, 'w') as infoFile:
        infoFile.write(parameterInfo)
class CoreEmulator(Emulator):

    def __init__(self, username, password):
        super(CoreEmulator, self).__init__(username, password)
        self.setup_emulator()

    def setup_emulator(self):
        self.browser = Browser()
        self.browser.set_handle_robots(False)
        self.browser.addheaders = moodle.USER_AGENT
        self.cookiejar = CookieJar()
        self.browser.set_cookiejar(self.cookiejar)

    def session_expired(self):
        return self.browser.geturl().endswith(moodle.LOGIN_LOCATION)

    @throws_moodlefuse_error(exception.LoginException)
    def login(self):
        self.open_login_page(self.browser.open)
        self.browser.select_form(
            predicate=lambda form: form.attrs.get('id') == attributes.LOGIN
        )
        self.browser.form.set_value(self.username, name='username')
        self.browser.form.set_value(self.password, name='password')
        resp = self.browser.submit()

        if resp.geturl().endswith(moodle.LOGIN_LOCATION):
            raise Exception

    @throws_moodlefuse_error(resource_errors.UnableToDownloadResource)
    def download(self, destination, source):
        source = str(source)
        if not source.startswith('http://') and not source.startswith('file://'):
            source = config['TEST_DATA'] + '/' + source

        self.browser.retrieve(source, destination)

    def open_link(self, url):
        response = self.browser.open(url)
        return BeautifulSoup(response.read())

    def check_form_checkbox(self, checkboxname):
        self.browser.find_control(checkboxname).items[0].selected = True

    def uncheck_form_checkbox(self, checkboxname):
        self.browser.find_control(checkboxname).items[0].selected = False

    def add_form_content(self, inputname, content):
        self.browser.form.set_value(content, name=inputname)

    def close_form(self):
        self.browser.submit()

    def set_form_to_first_form(self):
        self.browser.select_form(nr=0)

    def set_form_to_form_with_control_value(self, value):
        for form in self.browser.forms():
            for control in form.controls:
                if control.value == value:
                    self.browser.form = form

    @throws_moodlefuse_error(exception.UnableToToggleEditing)
    def turn_course_editing_on(self):
        self.set_form_to_form_with_control_value(moodle.EDIT_ON_MOODLE_BUTTON_TEXT)
        response = self.browser.submit()
        return BeautifulSoup(response.read())

    def _setup_assignments_for_parsing(self, submission_filter):
        self.set_form_to_form_with_control_value('Save and update table')
        self.browser.form["filter"] = [submission_filter]
        self.browser.form["perpage"] = ["100"]
        self.uncheck_form_checkbox('quickgrading')
        response = self.browser.submit()
        return BeautifulSoup(response.read())

    def filter_assignment_submissions(self):
        return self._setup_assignments_for_parsing("submitted")

    def unfilter_assignment_submissions(self):
        return self._setup_assignments_for_parsing("")

    @throws_moodlefuse_error(exception.UnableToToggleEditing)
    def turn_course_editing_off(self):
        self.set_form_to_form_with_control_value(moodle.EDIT_OFF_MOODLE_BUTTON_TEXT)
        response = self.browser.submit()
        return BeautifulSoup(response.read())

    @throws_moodlefuse_error(course_errors.InvalidMoodleIndex)
    def get_courses(self):
        return self.open_link(config['MOODLE_INDEX_ADDRESS'])

    @throws_moodlefuse_error(course_errors.UnableToObtainCategoryList)
    def get_course_categories(self, url):
        return self.open_link(url)

    @throws_moodlefuse_error(resource_errors.UnableToObtainResourceList)
    def get_course_resource_names(self, url):
        return self.open_link(url)

    def close(self):
        self.browser.close()
Exemple #44
0
class Operacions(object):

    def __str__(self):
        return "%s.%s" % (self.__module__, self.__class__.__name__)

    def __init__(self, request, username, password, eid='', tid=''):
        """
        """
        self.request = request
        registry = request.registry
        self.epitool=registry.getUtility(IEPIUtility)

        self.initialized = True
        self.username = username
        self.password = password
        self.equipID=eid
        self.tecnicID=tid
        self.browser_login,self.externalLoginKey=self.epitool.recoverBrowserSession(request, self.username,'operacions')
        if self.browser_login:
          #Si tenim cookies anteriors, creem un browser nou i li passem les cookies guardades
          self.br=Browser()
          self.br.set_handle_robots(False)
          cj = LWPCookieJar()
          self.br.set_cookiejar(cj)
          for co in self.browser_login:
              ck = Cookie(version=co['version'], name=co['name'], value=co['value'], port=co['port'], port_specified=co['port_specified'], domain=co['domain'], domain_specified=co['domain_specified'], domain_initial_dot=co['domain_initial_dot'], path=co['path'], path_specified=co['path_specified'], secure=co['secure'], expires=co['expires'], discard=co['discard'], comment=co['comment'], comment_url=co['comment_url'], rest=co['rest'])
              cj.set_cookie(ck)

          print "Logging-in into operacions via browser"
        else:
          #self.br = Browser()
          try:
              self.login()
          except:
              self.initialized=False
        return

    def log(self, message):
        """
        """
        logger = logging.getLogger('RUNNING')
        logger.info('%s - %s' % (self.username,message))

    def getBrowserSession(self):
        cookies = []
        for key in self.br._ua_handlers['_cookies'].cookiejar._cookies.keys():
           domain = self.br._ua_handlers['_cookies'].cookiejar._cookies[key]
           for key2 in domain.keys():
               cookie = domain[key2]
               for key3 in cookie:
                   co = cookie[key3]
                   cookies.append(dict(version=co.version, name=co.name, value=co.value, port=co.port, port_specified=co.port_specified, domain=co.domain, domain_specified=co.domain_specified, domain_initial_dot=co.domain_initial_dot, path=co.path, path_specified=co.path_specified, secure=co.secure, expires=co.expires, discard=co.discard, comment=co.comment, comment_url=co.comment_url, rest=co._rest))

        return (cookies,self.externalLoginKey)

    def reloadExternalLoginKey(self):
        """
        """
        self.log("reloadExternalLoginKey")
        mainpage = self.getOperacionsMainPage()
        self.loadExternalLoginKey(mainpage.read())

    def getOperacionsMainPage(self):
        """
        """
        mainpage = self.br.open(LOGIN_URL)
        return mainpage


    def loadExternalLoginKey(self,html):
        """
        """
        key = html.split('externalLoginKey')[-1][1:15]
        self.externalLoginKey = key.startswith('EL') and key or ''

    def closeBrowser(self):
        """
        """
        self.br.close()

    def saveSessionData(self,):
        """
        """
        self.epitool.saveBrowserSession(self.request, self.username,self.getBrowserSession(),'operacions')
        return
    
    def login(self, message = "Logging-in into operacions via regular login"):
        """
        Es logueja a operacions amb el login tradicional web
        """
        self.log('Operacions Login %s' % message)
        self.br=Browser()
        self.br.set_handle_equiv(False)
        mainpage = self.getOperacionsMainPage()
        self.br.select_form(nr=0)
        self.br['username']=self.username
        self.br['password']=self.password
        login_response = self.br.submit()
        html = login_response.read()
        self.loadExternalLoginKey(html)
        self.saveSessionData()

    def checkBrowserExpired(self,html):
        """
        Comprova que el browser nou que hem generat en base a cookies guardades, continua actiu
        Per ferho, comprovem si l'html de la pagina que acavem de obrir conte el text de canvi de contrasenya
        Retorna cert si el browser esta caducat
        """
        return html.find("http://www.upcnet.es/CanviContrasenyaUPC")>0

    def obtenirCodisUsuari(self):
        """
        """
        self.log("obtenirCodisUsuari")
        imputacions = self.br.open('https://maul.upc.es:8444/imputacions/control/main?idEmpresa=1123&externalLoginKey=%s' % (self.externalLoginKey))
        ihtml = imputacions.read()
        fixedhtml = ihtml.replace('</form\n','</form>\n')
        soup = BeautifulSoup(fixedhtml.replace('value""','value=""'),fromEncoding='utf-8')
        equipId = dict(soup.find('input',type='hidden', id='equipId').attrs)['value']
        tecnicId = dict(soup.find('input',type='hidden', id='tecnicId').attrs)['value']

        return (equipId,tecnicId)

    @reloginIfCrashed
    def obtenirComentariImputacio(self,iid):
        """
        """
        self.log("obtenirComentariImputacio")
        detallimputacio = self.br.open('https://maul.upc.es:8444/imputacions/control/imputacioDetall?timeEntryId=%s&externalLoginKey=%s' % (iid,self.externalLoginKey))
        ihtml = detallimputacio.read()
        if self.checkBrowserExpired(ihtml):
            return 'EXPIRED'
        fixedhtml = ihtml.replace('</form\n','</form>\n')
        soup = BeautifulSoup(fixedhtml.replace('value""','value=""'),fromEncoding='utf-8')
        comentari = soup.find('table').findAll('td')[5].span.string
        return comentari and comentari.encode('utf-8') or ''


    def arreglarCometes(self,params):
        """
        """
        newparam=[]
        newparams = []
        for param in params:
            cometes = len(param.split('"'))
            if cometes>=3:
                newparams.append(param)
            if cometes==2:
                if newparam==[]:
                    newparam.append(param)
                else:
                    newparams.append(','.join(newparam))
            if cometes==1:
                newparam.append(param)

        return newparams

    def fixMalformed(self,html,sep,start,end):
        noucaixadiv = []
        actual = ''
        escriure = True
        for string in html.split(sep):
            actual = string

            if escriure:
               noucaixadiv.append(actual)
            else:
               acomulat = acomulat+string

            if start in string:
                escriure=False
                acomulat = ''
            elif end in string and not escriure:
               escriure=True
               noucaixadiv.append(acomulat.replace(' %s' % (end),'%s %s' % (sep,end)))

        fixedHTML = sep.join(noucaixadiv)

        return fixedHTML

    #@cache(smartCacheKey)
    #@reloginIfCrashedAndCache
    @cache_region('default_term', 'obtenirPortalTecnic')
    def obtenirPortalTecnic(self, username):
        self.log("obtenirPortalTecnic sense cachejar")
        return self.obtenirPortalTecnicBase(username)
        
    @reloginIfCrashed
    def obtenirPortalTecnicBase(self, username):
        """
        """
        self.reloadExternalLoginKey()
        self.log("obtenirPortalTecnic")
        base_url = 'https://maul.upc.es:8444/portal/control/portalTecnicConsulta?'
        #self.reloadExternalLoginKey()
        parts =  ['tipusCerca=',
                  'personaAssignada=%s' % (self.tecnicID),
                  'partyIdAss=%s' % (self.tecnicID),
                  'undefined=%s' % (self.tecnicID),
                  'statusId=ESTAT_OBERT_PENDENT',
                  'sensePaginacio=on',
                  'cercant=on',
                  'externalLoginKey=%s' % (self.externalLoginKey),
                 ]
        url = base_url+'&'.join(parts)
        html = self.br.open(url).read()
        if self.checkBrowserExpired(html):
            return 'EXPIRED'
        html = html.replace('")">',')">')
        soup = BeautifulSoup(html,fromEncoding='UTF-8')

        seccions = [div for div in soup.findAll('div') if '"caixa"' in str(div)[1:30]]
        self.saveSessionData()
        return dict(ordres = seccions[0],
                    tiquets = seccions[1],
                    problemes = seccions[2],
                    canvis = seccions[3],
#                    percepcions = seccions[4]
                    )


    def obtenirOrdres(self,fname='obtenirOrdres'):
        """
        """
        soup = self.obtenirPortalTecnic(self.username)['ordres']
        ordres = []
        if soup:
          for tr in soup.findAll('tr'):
            if tr.td:
               tds = tr.findAll('td')
               ordre = {}
               href = tds[4].span.a._getAttrMap()['href']
               params = dict([tuple(a.split('=')) for a in href.replace('&amp;','&').replace('"','').split('?')[1].split('&')])
               ordre['orderId']= params['orderId']
               ordre['orderItemSeqId']=params['orderItemSeqId']
               title = tds[4].span.a.string
               ordre['title']='%s - %s' % (ordre['orderId'],title)
               if ordre not in ordres:
                   ordres.append(ordre)

        ordres = sorted(ordres,key=lambda ordre: ordre['orderId'])
        ordres.reverse()
        return ordres

    def processarTaula(self,soup):
        """
        """
        items = []

        if soup:
          for tr in soup.findAll('tr'):
            if tr.td:
               tds = tr.findAll('td')
               item = {}
               item['requirementId'] = tds[3].a.span.string
               try:
                 item['title']='%s - %s' % (item['requirementId'],tds[4].a.span.string)
               except:
                 #import ipdb;ipdb.set_trace()
                 pass
               items.append(item)
        return items


    def obtenirTiquetsAssignats(self):
        """
        """
        soup = self.obtenirPortalTecnic(self.username)['tiquets']
        return self.processarTaula(soup)

    def obtenirProblemesAssignats(self):
        """
        """
        soup = self.obtenirPortalTecnic(self.username)['problemes']
        return self.processarTaula(soup)

    def obtenirCanvisAssignats(self):
        """
        """
        soup = self.obtenirPortalTecnic(self.username)['canvis']
        return self.processarTaula(soup)


    def obtenirTiquetsEquip(self):
        """
        """
        self.log("obtenirTiquetsEquip")
        self.reloadExternalLoginKey()
        base_url = 'https://maul.upc.es:8444/tiquets/control/tiquetsEquipConsulta?'
        parts =  ['tipusCerca=simple',
            'VIEW_INDEX=1',
            'VIEW_SIZE=30',
            'statusId=ESTAT_OBERT_PENDENT',
            'nomesTancats=on',
            'personaAssignada=%s' % (self.tecnicID),
            'undefined=%s' % (self.tecnicID),
            'sensePaginacio=on',
            'cercant=on',
            'externalLoginKey=%s' % (self.externalLoginKey)
            ]
        base_url = 'https://maul.upc.es:8444/tiquets/control//tiquetsAssignatsConsulta?'
        parts =  ['statusId=ESTAT_OBERT_PENDENT',
            'sensePaginacio=on',
            'externalLoginKey=%s' % (self.externalLoginKey)
            ]
        url = base_url+'&'.join(parts)
        cerca_tiquets = self.br.open(url)
        thtml = cerca_tiquets.read()
        soup = BeautifulSoup(thtml.replace('value""','value=""').replace('\n','').replace('\t',''),fromEncoding='utf-8')
        inicidiv = thtml.find('div class="caixa"')-1
        fidiv = thtml.find('div class="endcolumns"')
        caixadiv = thtml[inicidiv:fidiv]
        soup = BeautifulSoup(caixadiv,fromEncoding='utf-8')
        tiquets = []
        self.saveSessionData()
        return tiquets


    def getUrlConsultaImputacions(self,di,df):
        """
        """
        # Hem tret el equipId dels parametres, aixi surten les imputacions de tots els equips
        #params = dict(equipId=self.equipID,
        params = dict(equipId='',        
                      tecnicId=self.tecnicID,
                      dataInicial=di,
                      dataFinal=df,
                      sensePaginacio='on',
                      cercant='on',
                      tipusCerca='simple',
                      externalLoginKey=self.externalLoginKey)

        param_string = '&'.join(['%s=%s' % (key,params[key]) for key in params])
        url = 'https://maul.upc.es:8444/imputacions/control/imputacionsConsulta?%s' % param_string
        return url

    def getImputacionsRaw(self,html_raw):
        """
        """
        html= html_raw.replace('value""','value=""').replace('\n','').replace('\t','')
        # Aquesta adreça de correu al mig de l'html ens fa petar el beautifulsoup, la amaguem de moment, ja
        # que només és un cas aïllat d'un tiquet, si torna a passar, s'hauria de buscar una regex per filtrar
        # text amb aquest format avans de parsejar
        fixedhtml = html.replace('<*****@*****.**>','')
        fixedhtml = fixedhtml.replace('form<!--','form><!--')
        soup = BeautifulSoup(fixedhtml,fromEncoding='utf-8')
        imputacions_raw = [a for a in soup.findAll('tr') if str(a).find('class="previsio"')>0][:-1]
        return imputacions_raw

    #@cache(smartCacheKey)
    #@reloginIfCrashedAndCache
    @cache_region('default_term', 'obtenirImputacions')
    def obtenirImputacions(self, username, di, df):
        self.log("obtenirImputacions sense cachejar")
        return self.obtenirImputacionsBase(username, di, df)
        
    @reloginIfCrashed
    def obtenirImputacionsBase(self, username, di, df):
        """
        """
        #import ipdb; ipdb.set_trace()
        self.reloadExternalLoginKey()
        self.log("obtenirImputacions entre %s i %s" % (di,df))
        if di==None and df==None:
            pass
        result = self.br.open(self.getUrlConsultaImputacions(di,df))
        html = result.read()
        if self.checkBrowserExpired(html):
            return 'EXPIRED'
        imputacions_raw = self.getImputacionsRaw(html)


        ## Si no obtenim cap resultat, provarem de recarregar el externalLoginKey, ja que canvia per alguna
        ## extranya raó, tot i que la sessió i les cookies encara són vàlides
        ## Si després d'això no retorna cap resultat, s'enten que realment no te imputacions dins el rang de dates
        if imputacions_raw == []:
            try:
                #self.reloadExternalLoginKey()
                result = self.br.open(self.getUrlConsultaImputacions(di,df))
                imputacions_raw = self.getImputacionsRaw(result.read())
            except:
                pass

        imputacions = []
        for imputacio in imputacions_raw:
            parts = imputacio.findAll('td')
            date = parts[0].span.string
            dd,mm,aaaa = date.split('-')
            iid = parts[0].a.attrs[0][1].split('timeEntryId=')[1].split('"')[0]
            amount = parts[3].span.string
            imp_type = parts[5].acronym.string.__str__().lstrip()
            try:
              referencia = parts[6].span.span.string
            except:
              referencia = parts[6].span.a.string
            if referencia==None:
                referencia = ' - (Sense referència)'

            tdict = dict(type=imp_type, date = (dd,mm,aaaa), iid = iid, amount = amount, referencia = referencia)
            imputacions.append(tdict)
        imputacions.reverse()
        # Guardem els dies que hem consultat a la utility per despres poder cridar correctament als invalidadors de cache
        self.epitool.saveObtenirImputacionsDays(self.request, username, di, df)
        self.saveSessionData()
        return imputacions

    @reloginIfCrashed
    def imputarOrdre(self,data,hores,minuts,orderId,orderItemSeqId,fname='imputarOrdre'):
        """
        """
        self.log("imputarOrdre")
        self.reloadExternalLoginKey()

        parts = ['dataImputacio=%s' % (data),
                 'horesImputades=%s' % (hores),
                 'minutsImputats=%s' % (minuts),
                 'orderId=%s' % (orderId),
                 'orderItemSeqId=%s' % (orderItemSeqId),
                 'cas=ORDRE',
                 'partyId=%s' % (self.tecnicID),
                 'externalLoginKey=%s' % (self.externalLoginKey),
                ]

        url = 'https://maul.upc.es:8444/imputacions/control/imputacioAltaGraella?' + '&'.join(parts)

        response = self.br.open(url)
        html = response.read()
        if self.checkBrowserExpired(html):
            return 'EXPIRED'

        exitcode = eval(html)
        exitcode = eval(html)
        exitcode['hores']=hores
        exitcode['minuts']=minuts.rjust(2,'0')
        # Invalidem la cache
        # getUtility(IRAMCache).invalidate('obtenirImputacions')
        day1, day2 = self.epitool.getObtenirImputacionsDays(self.request, self.username)
        region_invalidate('epi.operacions.obtenirImputacions', 'default_term', 'obtenirImputacions', 'epi.operacions.Operacions', self.username, day1, day2)
        self.saveSessionData()
        return exitcode

    @reloginIfCrashed
    def imputarActivitat(self,data,hores,minuts,proces,activitatId,fname='imputarActivitat'):
        """
        """
        self.log("imputarActivitat")
        #self.reloadExternalLoginKey()

        parts = ['dataImputacio=%s' % (data),
                 'horesImputades=%s' % (hores),
                 'minutsImputats=%s' % (minuts),
                 'proces=%s' % (proces),
                 'activitatId=%s' % (activitatId),
                 'cas=ACTIVITAT',
                 'partyId=%s' % (self.tecnicID),
                 'externalLoginKey=%s' % (self.externalLoginKey),
                ]

        url = 'https://maul.upc.es:8444/imputacions/control/imputacioAltaGraella?' + '&'.join(parts)
        response = self.br.open(url)
        html = response.read()
        if self.checkBrowserExpired(html):
            return 'EXPIRED'
        exitcode = eval(html)
        exitcode['hores']=hores
        exitcode['minuts']=minuts.rjust(2,'0')
        # Invalidem la cache
        # getUtility(IRAMCache).invalidate('obtenirImputacions')
        day1, day2 = self.epitool.getObtenirImputacionsDays(self.request, self.username)
        region_invalidate('epi.operacions.obtenirImputacions', 'default_term', 'obtenirImputacions', 'epi.operacions.Operacions', self.username, day1, day2)
        return exitcode

    def getCodiImputacio(self,data,minuts,ref,tipus):
        """
        Busquem una imputació filtrant per tipus d'imputació, minuts imputats, data i referència.
        D'entre tots els resultats, ens quedem amb la que tingui el iid més alt,
        que serà la última imputada.
        """
        self.log("getCodiImputacio")
        # Invalidem la cache
        # getUtility(IRAMCache).invalidate('obtenirImputacions')
        day1, day2 = self.epitool.getObtenirImputacionsDays(self.request, self.username)
        region_invalidate('epi.operacions.obtenirImputacions', 'default_term', 'obtenirImputacions', 'epi.operacions.Operacions', self.username, day1, day2)
        
        imputacions = self.obtenirImputacions(self.username, data, data)

        tt = tuple(data.split('-'))
        imputacio = None
        newest=True
        for imp in imputacions:
            if imputacio!=None:
                newest = imp['iid']>imputacio['iid']

            if imp['date']==tt and ref in imp['referencia'] and HMaMinuts(imp['amount'])==minuts and newest and imp['type']==tipus:
                imputacio = imp

        if imputacio:
            return imputacio['iid']
        else:
            return ''

    @reloginIfCrashed
    def imputarTiquet(self,data,hores,minuts,tiquetId,fname='imputarTiquet'):
        """
        Imputa a un tiquet utilitzant el formulari del gestor d'operacions
        """
        self.log("imputarTiquet")
        today = '-'.join(DateTimeToTT(DateTime()))

        self.reloadExternalLoginKey()

        parts = ['requirementId=%s' % (tiquetId),
                 'externalLoginKey=%s' % (self.externalLoginKey),
                ]

        url = 'https://maul.upc.es:8444/tiquets/control/tiquetDetallAssignacioHistoria?' + '&'.join(parts)
        self.br.open(url)
        try:
            self.br.select_form(name='afegirImputacio')
        except FormNotFoundError:
            return dict(hores='',
                    minuts='',
                    confirm='error',
                    code='No s''ha pogut imputar en un tiquet tancat.')
        except:
            return 'EXPIRED'
        self.br.form.action='https://maul.upc.es:8444/tiquets/control/imputarTempsTasca'
        self.br.form.find_control('minutsImputats').readonly=False
        minutsImputats = int(hores)*60 + int(minuts)
        self.br['minutsImputats']=str(minutsImputats)
        self.br['horesImputadesHelper']=hores
        self.br['minutsImputatsHelper']=minuts
        addtiquet_response = self.br.submit()
        #html = addtiquet_response.read()

        #Consultem el codi de la imputació
        iid = self.getCodiImputacio(today,minutsImputats,tiquetId,'TI')
        code = iid.encode('utf-8')
        if data!=today:
            code = self.canviarImputacio(data,hores,minuts,iid)
        # Invalidem la cache
        # getUtility(IRAMCache).invalidate('obtenirImputacions')
        day1, day2 = self.epitool.getObtenirImputacionsDays(self.request, self.username)
        region_invalidate('epi.operacions.obtenirImputacions', 'default_term', 'obtenirImputacions', 'epi.operacions.Operacions', self.username, day1, day2)
        self.saveSessionData()
        return dict(hores=hores,
                    minuts=minuts.rjust(2,'0'),
                    confirm=iid=='' and 'error' or 'ok',
                    code=code)

    @reloginIfCrashed
    def imputarProblema(self,data,hores,minuts,tiquetId,fname='imputarProblema'):
        """
        Imputa a un problema utilitzant el formulari del gestor d'operacions
        """
        self.log("imputarProblema")
        today = '-'.join(DateTimeToTT(DateTime()))

        self.reloadExternalLoginKey()

        parts = ['requirementId=%s' % (tiquetId),
                 'externalLoginKey=%s' % (self.externalLoginKey),
                ]

        url = 'https://maul.upc.es:8444/problemes/control/problemaDetallImputacions?' + '&'.join(parts)

        self.br.open(url)
        self.br.select_form(name='afegirImputacio')
        self.br.form.action='https://maul.upc.es:8444/problemes/control/imputarTemps'
        self.br.form.find_control('minutsImputats').readonly=False
        minutsImputats = int(hores)*60 + int(minuts)
        self.br['minutsImputats']=str(minutsImputats)
        self.br['horesImputadesHelper']=hores
        self.br['minutsImputatsHelper']=minuts
        addtiquet_response = self.br.submit()
        #html = addtiquet_response.read()

        #Consultem el codi de la imputació
        iid = self.getCodiImputacio(today,minutsImputats,tiquetId,'PB')
        code=iid.encode('utf-8')
        if data!=today:
            code = self.canviarImputacio(data,hores,minuts,iid)
        # Invalidem la cache
        # getUtility(IRAMCache).invalidate('obtenirImputacions')
        day1, day2 = self.epitool.getObtenirImputacionsDays(self.request, self.username)
        region_invalidate('epi.operacions.obtenirImputacions', 'default_term', 'obtenirImputacions', 'epi.operacions.Operacions', self.username, day1, day2)
        self.saveSessionData()
        return dict(hores=hores,
                    minuts=minuts.rjust(2,'0'),
                    confirm=iid=='' and 'error' or 'ok',
                    code=code)

    @reloginIfCrashed
    def canviarImputacio(self,novadata,hores,minuts,iid,fname='canviarImputacio'):
        """
        """
        self.log("canviarImputacio")
        if iid!='':
            code = iid
            self.reloadExternalLoginKey()
            parts = ['timeEntryId=%s' % (iid),
                     'dataImputacio=%s' % (novadata),
                     'horesImputades=%s' % (hores),
                     'minutsImputats=%s' % (minuts),
                     'externalLoginKey=%s' % (self.externalLoginKey),
                    ]
            url = 'https://maul.upc.es:8444/imputacions/control/editarImputacio?' + '&'.join(parts)
            response = self.br.open(url)
            html = response.read()
            if self.checkBrowserExpired(html):
                return 'EXPIRED'
            # Invalidem la cache
            # getUtility(IRAMCache).invalidate('obtenirImputacions')
            day1, day2 = self.epitool.getObtenirImputacionsDays(self.request, self.username)
            region_invalidate('epi.operacions.obtenirImputacions', 'default_term', 'obtenirImputacions', 'epi.operacions.Operacions', self.username, day1, day2)
            self.saveSessionData()
        else:
            code = "No sha pogut imputar al dia %s. Refresca lepi i mou la imputacio manualment arrossegant-la al dia %s" % (novadata,novadata)
            code.decode('utf-8')
        return code.encode('utf-8')

    @reloginIfCrashed
    def modificarImputacio(self,hores,minuts,iid,comentari='',fname='modificarImputacio'):
        """
        """
        self.log("modificarImputacio")
        self.reloadExternalLoginKey()
        parts = ['timeEntryId=%s' % (iid),
                 'horesImputades=%s' % (hores),
                 'minutsImputats=%s' % (minuts),
                 'externalLoginKey=%s' % (self.externalLoginKey),
                ]
        if comentari:
            parts.append('editComentari=%s' % quote(comentari))
        url = 'https://maul.upc.es:8444/imputacions/control/imputacioEdicioGraella?' + '&'.join(parts)
        response = self.br.open(url)
        html = response.read()
        if self.checkBrowserExpired(html):
            return 'EXPIRED'
        exitcode = eval(html)
        exitcode['hores']=str(int(hores))
        exitcode['minuts']=minuts.rjust(2,'0')
        # Invalidem la cache
        # getUtility(IRAMCache).invalidate('obtenirImputacions')
        day1, day2 = self.epitool.getObtenirImputacionsDays(self.request, self.username)
        region_invalidate('epi.operacions.obtenirImputacions', 'default_term', 'obtenirImputacions', 'epi.operacions.Operacions', self.username, day1, day2)
        self.saveSessionData()
        return exitcode

    @reloginIfCrashed
    def esborrarImputacio(self,iid,fname='esborrarImputacio'):
        """
        """
        self.log("esborrarImputacio")
        self.reloadExternalLoginKey()
        parts = ['timeEntryId=%s' % (iid),
                 'externalLoginKey=%s' % (self.externalLoginKey),
                ]
        url = 'https://maul.upc.es:8444/imputacions/control/imputacioEsborrarGraella?' + '&'.join(parts)
        response = self.br.open(url)
        html = response.read()
        if self.checkBrowserExpired(html):
            return 'EXPIRED'
        exitcode = eval(html)
        # Invalidem la cache
        # getUtility(IRAMCache).invalidate('obtenirImputacions')
        day1, day2 = self.epitool.getObtenirImputacionsDays(self.request, self.username)
        region_invalidate('epi.operacions.obtenirImputacions', 'default_term', 'obtenirImputacions', 'epi.operacions.Operacions', self.username, day1, day2)
        self.saveSessionData()
        return exitcode
def fetch_isochrones(isoc_kind='parsec_CAF09_v1.2S',
                     photsys_version='yang',
                     photsys_file='tab_mag_odfnew/tab_mag_ubvrijhk.dat',
                     kind_cspecmag='aringer09',
                     dust_sourceM='nodustM',
                     dust_sourceC='nodustC',
                     extinction_av='0.0',
                     imf_file='tab_imf/imf_chabrier_lognormal.dat',
                     sequence_type= 'single_isochrone',
                     isoc_age=False,
                     isoc_z =False,
                     isoc_z0=False,
                     isoc_z1=False,
                     isoc_dz=False,
                     isoc_lage0=False,
                     isoc_lage1=False,
                     isoc_dlage=False,
                     path='',
                     filename='Isochrone_teste.dat'):

    #Sequence_type = 'single_isochrone', 'sequence_constant_metallicity', 'sequence_constant_age'
    if sequence_type == 'single_isochrone' or sequence_type == 0: sequence_type = 0
    elif sequence_type == 'constant_metallicity' or sequence_type == 1 : sequence_type = 1
    elif sequence_type == 'constant_age' or sequence_type == 2: sequence_type = 2
    else: raise ValueError("Argument sequence_type must be in ('single_isochrone', 'constant_metallicity', "
                           "'constant_age')")

    warnings.simplefilter('always', UserWarning)

    #Handling bad values given for different sequence types
    if sequence_type == 0:
        if not isoc_age: raise ValueError("For sequence_type == 'single_isochrone', argument isoc_age must be provided")
        if not isoc_z: raise ValueError("For sequence_type == 'single_isochrone', argument isoc_z must be provided")
        if any((isoc_z0, isoc_z1, isoc_dz, isoc_lage0, isoc_lage1, isoc_dlage)):
            warnings.warn("For sequence_type == 'single_isochrone', arguments isoc_z0, isoc_z1, isoc_dz, isoc_lage0, isoc_lage1 and isoc_dlage are not used")

    elif sequence_type == 1:
        if not isoc_z: raise ValueError("For sequence_type == 'constant_metallicity', argument isoc_z must be provided")
        if not isoc_lage0: raise ValueError("For sequence_type == 'constant_metallicity', argument isoc_lage0 must be provided")
        if not isoc_lage1: raise ValueError("For sequence_type == 'constant_metallicity', argument isoc_lage1 must be provided")
        if not isoc_dlage: raise ValueError("For sequence_type == 'constant_metallicity', argument isoc_dlage must be provided")
        if any((isoc_age, isoc_z0, isoc_z1, isoc_dz)):
            warnings.warn("For sequence_type == 'constant_metallicity', arguments isoc_age, isoc_z0, isoc_z1, and isoc_dz are not used")

    elif sequence_type == 2:
        if not isoc_age: raise ValueError("For sequence_type == 'constant_age', argument isoc_age must be provided")
        if not isoc_z0: raise ValueError("For sequence_type == 'constant_age', argument isoc_z0 must be provided")
        if not isoc_z1: raise ValueError("For sequence_type == 'constant_age', argument isoc_z1 must be provided")
        if not isoc_dz: raise ValueError("For sequence_type == 'constant_age', argument isoc_dz must be provided")
        if any((isoc_z, isoc_lage0, isoc_lage1, isoc_dlage)):
            warnings.warn("For sequence_type == 'constant_age', arguments isoc_z, isoc_lage0, isoc_lage1, and isoc_dlage are not used")

    #Error raised when too many isochrones are requested
    if sequence_type == 1:
        N_isoc = len(np.arange(isoc_lage0, isoc_lage1, isoc_dlage))
        if N_isoc > 400:
            raise ValueError("you requested too many isochrones ({0}), maximum allowed is 400.\nTry to increase isoc_dlage or lower the difference between isoc_lage0 and isoc_lage1".fotmat(N_isoc))
    elif sequence_type == 2:
        N_isoc = len(np.arange(isoc_z0, isoc_z1, isoc_dz))
        if N_isoc > 400:
            raise ValueError("you requested too many isochrones ({0}), maximum allowed is 400.\nTry to increase isoc_dz or lower the difference between isoc_z0 and isoc_z1".format(N_isoc))

    #print 'Opening browser'
    br = Browser()
    br.open('http://stev.oapd.inaf.it/cgi-bin/cmd')
    br.select_form(nr = 0)

    #print 'Filling form'
    br.form['isoc_kind'] = [isoc_kind]
    br.form['photsys_version'] = [photsys_version]
    br.form['photsys_file'] = [photsys_file]
    br.form['kind_cspecmag'] = [kind_cspecmag]
    br.form['dust_sourceM'] = [dust_sourceM]
    br.form['dust_sourceC'] = [dust_sourceC]
    br.form['extinction_av'] = (extinction_av)
    br.form['imf_file'] = [imf_file]

    br.find_control("isoc_val").items[sequence_type].selected = True

    if sequence_type == 0:
        br.form['isoc_age'] = str(isoc_age)  # Isochrone age
        br.form['isoc_zeta'] = str(isoc_z)  # Isochrone metallicity

    elif sequence_type == 1:
        br.form['isoc_zeta0'] = str(isoc_z)  # Isochrone metallicity
        br.form['isoc_lage0'] = str(isoc_lage0)  # Isochrone log initial age
        br.form['isoc_lage1'] = str(isoc_lage1) # Isochrone log final age
        br.form['isoc_dlage'] = str(isoc_dlage)  # Isochrone log age step

    elif sequence_type == 2:
        br.form['isoc_age0'] = str(isoc_age)  # Isochrone age
        br.form['isoc_z0'] = str(isoc_z0)  # Isochrone initial metallicity
        br.form['isoc_z1'] = str(isoc_z1)  # Isochrone final metallicity
        br.form['isoc_dz'] = str(isoc_dz)  # Isochrone metallicity step

    #print('Submitting form')
    br.submit()

    #print('Downloading data')
    download_link = list(br.links())[0].absolute_url
    geturl(download_link, path+'/'+filename)
    br.close()
    print('File ' + path+'/'+filename + ' created')
    def __init__(self, d_inic, m_inic, ano, search_url, controller, nao_cria):
        arquivo = '//home//raul//Documents//unb_python//data//data' + str(
            d_inic) + "-" + str(m_inic) + "-" + str(ano) + '.txt'
        if nao_cria == 1:
            self.f = open(arquivo, 'a')
        else:
            self.f = open(arquivo, 'w')

        Consulta.controller = controller

        self.SEARCH_URL = search_url

        socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9050)

        #abre conector na rede tor
        try:
            socket.socket = socks.socksocket
            socket.create_connection = create_connection
        except:
            print "problema ao abrir socket na rede tor"

        br = Browser()

        #prepara para iniciar consultas
        print "################### Consulta Avancada Portal Transparencia ###################"
        gravalog(
            self,
            "\n\n\n################### Consulta Avancada Portal Transparencia ###################\n\n"
        )
        print "################### versao" + self.ver + " ###################"
        gravalog(
            self, "\n################### versao " + self.ver +
            " ###################\n\n")

        try:
            LRequest = urllib2.Request(SEARCH_URL, " ")
            LResponse = br.open(LRequest)
            page = bs_parse(LResponse.read())
            print SEARCH_URL
            print page
            #f.write(page)
        except:
            print "problema ao realizar primeira consulta na web"

        gravalog(self, (page.text).encode('utf-8', 'ignore'))

        br.close()

        #Consulta.ID = newID(self, Consulta.controller)
        Consulta.ID = 000000000

        #Objeto para captura de logs.
        x = logging.getLogger("logarqui")
        x.setLevel(logging.DEBUG)

        #captura logs e grava em arquivo.
        h1 = logging.FileHandler(
            "//home//raul//Documents//unb_python//data//log//erros" +
            str(d_inic) + "-" + str(m_inic) + "-" + str(ano) + '.log')
        f = logging.Formatter(
            "%(levelname)s %(asctime)s %(funcName)s %(lineno)d %(message)s")
        h1.setFormatter(f)
        h1.setLevel(logging.DEBUG)
        x.addHandler(h1)
Exemple #47
0
import re
from mechanize import Browser
import config

browser = Browser()
browser.set_handle_robots(False)
browser.addheaders = [(
    'User-agent',
    'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'
)]
browser.open("https://sodexo4you.be/nl")

browser.select_form(id="account-login")

browser.form['name'] = config.email
browser.form['pass'] = config.password

response = browser.submit()

browser.retrieve(
    'https://sodexo4you.be/nl/mijn-sodexo-card-saldo?description=All&type=LUNCH&export=1',
    'export.csv')
browser.close()