def getMovieLink(self, movietitle): br = Browser() br.set_handle_robots(False) br.open(base_url) br.select_form(nr=0) br.form['q'] = movietitle br.submit() # print br.response().read() pageData = br.response().read() br.close() tree = html.fromstring(pageData) soup = BeautifulSoup(pageData, 'lxml') allFindSections = soup.find_all('div', class_='findSection') # print allFindSections[0] firstSectionHeader = allFindSections[0].findAll( 'h3', class_="findSectionHeader") # print firstSectionHeader[0].contents[1] # if (re.match('Title', firstSectionHeader[0].contents[1])): # print "title found:" # print firstSectionHeader[0].contents[1] findList = allFindSections[0].findAll('tr', class_='findResult') # print findList[0] firstResultLinks = findList[0].findAll('a') firstResultLink = firstResultLinks[1].get('href') allFindTitles = soup.find_all('h3', class_="findSectionHeader") return firstResultLink
def setup_tentative(username, passw): browser = Browser() browser.addheaders = [('User-Agent', sorteed())] browser.set_handle_robots(False) bool = False while bool == False: try: browser.open('https://facebook.com/') browser.select_form(nr=0) bool = True except: system('/etc/init.d/tor restart > /dev/null') sleep(20) browser.form['email'] = username browser.form['pass'] = passw response = browser.submit() link = response.geturl() browser.close() link = link.split('/') ok = '' for l in link: if b'Find Friends' in response.read(): ok = False if (ok == False): system('/etc/init.d/tor stop') system('clear') print(gr + 'Senha ' + red + passw + gr + ' detectada para ' + red + username) exit(0)
def get_title(url): # najpierw w sposob młotkowy # nie trzeba sie łączyc, nie wpadnie na 403 albo robots.txt for ext in ("jpg", "jpeg", "gif", "png", "svg", ":large"): if url.lower().endswith(ext): cache.put(url, (url, True)) continue try: (title, got_image) = cache.get(url) except TypeError: # got None, key not in cache - open and get the title br = Browser() try: br.open(url) # teraz w sposób sprytny, bo facebook spierdolił linki do obrazków if br.response().info()["Content-type"] in [ "image/png", "image/jpeg", "image/gif", "image/svg+xml" ]: cache.put(url, (url, True)) else: cache.put(url, (br.title(), False)) except Exception as exception: print(f"Problem, Sir - {exception} - with {url}") cache.put(url, (url, False)) br.close() # try again (title, got_image) = cache.get(url) return (got_image, title)
def getFileCoverage(baseURL, filename): """For a given filename, fetch coverage from an online LCOV source.""" covURL = baseURL + filename + ".gcov.html" mech = Browser() try: urlObj = mech.open(covURL) except: return ("N/A", "N/A", "N/A", "N/A") parsedUrlObj = lxml.html.parse(urlObj) branchCoveragePercent = "N/A" branchCoverageMissing = "N/A" lineCoveragePercent = "N/A" lineCoverageMissing = "N/A" try: # Xpath to the coverage, see below lineCoveragePercent = float( parsedUrlObj.xpath("/html/body/table[1]/tr[3]/td/table/tr[2]/td[7]" )[0].text.replace(" %", "")) # ------------------------------------------------------------------------------------^ # 2 - line coverage # 3 - function coverage # 4 - branch coverage # ------------------------------------------------------------------------------------------^ # 5 - # hit # 6 - # total # 7 - % hit lineCoverageHit = int( parsedUrlObj.xpath( "/html/body/table[1]/tr[3]/td/table/tr[2]/td[5]")[0].text) lineCoverageTotal = int( parsedUrlObj.xpath( "/html/body/table[1]/tr[3]/td/table/tr[2]/td[6]")[0].text) lineCoverageMissing = lineCoverageTotal - lineCoverageHit except ValueError: pass try: branchCoveragePercent = float( parsedUrlObj.xpath("/html/body/table[1]/tr[3]/td/table/tr[4]/td[7]" )[0].text.replace(" %", "")) branchCoverageHit = int( parsedUrlObj.xpath( "/html/body/table[1]/tr[3]/td/table/tr[4]/td[5]")[0].text) branchCoverageTotal = int( parsedUrlObj.xpath( "/html/body/table[1]/tr[3]/td/table/tr[4]/td[6]")[0].text) branchCoverageMissing = branchCoverageTotal - branchCoverageHit except ValueError: pass mech.close() return (lineCoveragePercent, lineCoverageMissing, branchCoveragePercent, branchCoverageMissing)
def _login(self, username, password): br = Browser() br.open("http://netlogin.kuleuven.be/") br.select_form(name="wayf") br.submit() br.select_form(name="netlogin") br[br.form._pairs()[2][0]]=username br[br.form._pairs()[3][0]]=password result = br.submit() lines = [k for k in result.readlines()] br.close() return lines
def getMovieData(self, link): url = base_url + link br = Browser() br.set_handle_robots(False) # some movies do not contain a rating, so initialise with no values ratingValue = "none" bestRating = "none" title = "" year = "" pageData = br.open(url) soup = BeautifulSoup(pageData, 'lxml') ratingValueElement = soup.find_all('span', itemprop='ratingValue') if len(ratingValueElement) > 0: ratingValue = ratingValueElement[0].string bestRatingElement = soup.find_all('span', itemprop='bestRating') if len(bestRatingElement) > 0: bestRating = bestRatingElement[0].string titleHeadingElement = soup.find_all('h1', itemprop='name') title = titleHeadingElement[0].contents[0] yearElement = soup.find_all('span', id='titleYear') yearElementLink = yearElement[0].find_all('a') year = yearElementLink[0].contents[0] genreElement = soup.find_all('span', class_='itemprop', itemprop='genre') genres = [] for ge in genreElement: genres.append(ge.contents[0]) results = { 'title': title, 'year': year, 'ratingValue': ratingValue, 'bestRating': bestRating, 'genres': genres } br.close() return results
class Liker(threading.Thread): def __init__(self, id, accounts, pageid): self.id = id self.accounts = accounts self.pageid = pageid self.running = False threading.Thread.__init__(self) def createbrowser(self): self.br = Browser() self.br.set_handle_gzip(True) self.br.set_handle_robots(False) self.br.addheaders = [( 'User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1' )] def like(self, user, passw, pageid): try: self.createbrowser() self.br.open('http://m.facebook.com/login.php', timeout=10) self.br.select_form(nr=0) self.br.form['email'] = user self.br.form['pass'] = passw self.br.submit() if 'Your password was incorrect.' in self.br.response().read( ) or "We didn't recognize your email address." in self.br.response( ).read( ) or 'Sorry, your account is temporarily unavailable.' in self.br.response( ).read(): Publisher().sendMessage( "update", "Could not login with {0}".format(user)) return Publisher().sendMessage("update", "Logged in with {0}".format(user)) self.br.open('http://m.facebook.com/' + pageid, timeout=10) for yc in self.br.links(text="Unlike"): Publisher().sendMessage("update", "Already liked with {0}".format(user)) return for xc in self.br.links(text="Like"): self.br.follow_link(xc) break Publisher().sendMessage("update", "Liked with {0}".format(user)) self.br.close() except Exception, e: Publisher().sendMessage("update", "{0} with {1}".format(str(e), str(a))) self.like(user, passw, pageid)
def fetch_page_after_auth(username, password, next_url): print username, password logout_url = 'https://secure.ninjacourses.com/account/logout/' login_url = 'https://secure.ninjacourses.com/account/login/?next=%s' % next_url br = Browser(file_wrapper=ResponseWrapper) br.set_handle_robots(False) br.open(logout_url) br.open(login_url) br.select_form() br['username'], br['password'] = username, password result_page = br.submit().read() br.close() if 'correct username' in result_page: raise ValueError return result_page
def getFileCoverage(baseURL, filename): """For a given filename, fetch coverage from an online LCOV source.""" covURL = baseURL + filename + ".gcov.html" mech = Browser() try: urlObj = mech.open(covURL) except: return ("N/A", "N/A", "N/A", "N/A") parsedUrlObj = lxml.html.parse(urlObj) branchCoveragePercent = "N/A" branchCoverageMissing = "N/A" lineCoveragePercent = "N/A" lineCoverageMissing = "N/A" try: # Xpath to the coverage, see below lineCoveragePercent = float(parsedUrlObj.xpath("/html/body/table[1]/tr[3]/td/table/tr[2]/td[7]")[0].text.replace(" %", "")); # ------------------------------------------------------------------------------------^ # 2 - line coverage # 3 - function coverage # 4 - branch coverage # ------------------------------------------------------------------------------------------^ # 5 - # hit # 6 - # total # 7 - % hit lineCoverageHit = int(parsedUrlObj.xpath("/html/body/table[1]/tr[3]/td/table/tr[2]/td[5]")[0].text) lineCoverageTotal = int(parsedUrlObj.xpath("/html/body/table[1]/tr[3]/td/table/tr[2]/td[6]")[0].text) lineCoverageMissing = lineCoverageTotal - lineCoverageHit except ValueError: pass try: branchCoveragePercent = float(parsedUrlObj.xpath("/html/body/table[1]/tr[3]/td/table/tr[4]/td[7]")[0].text.replace(" %", "")); branchCoverageHit = int(parsedUrlObj.xpath("/html/body/table[1]/tr[3]/td/table/tr[4]/td[5]")[0].text) branchCoverageTotal = int(parsedUrlObj.xpath("/html/body/table[1]/tr[3]/td/table/tr[4]/td[6]")[0].text) branchCoverageMissing = branchCoverageTotal - branchCoverageHit except ValueError: pass mech.close() return (lineCoveragePercent, lineCoverageMissing, branchCoveragePercent, branchCoverageMissing)
def upload(count): br = Browser() br.set_handle_robots(False) br.open('http://zincpharmer.csb.pitt.edu/pharmville/') form = list(br.forms())[0] br.form = form form['receptor'] = ['traf2'] form.add_file(open(outputBase + 'minimized_results.sdf'), 'text/plain', 'upload.sdf') form['userid'] = 'yifengt' form['name'] = 'Test' response = br.submit() print str(count) + '.sdf' analysis = process() analysis.feed(response.read()) analysis.close() br.close()
def getCurrentCoverageDirectory(baseURL): mech = Browser() mech.open(baseURL) currentLink = None for link in mech.links(): # Find the first directory link that is not the parent if (link.url.endswith("/") and not link.url.startswith("/")): currentLink = link break if currentLink == None: mech.close() raise "Unable to find current coverage directory" linkURL = currentLink.base_url + currentLink.url mech.close() return linkURL
def search(arg): assert '/' not in arg # because we use it in a filename cache = rc['authority_cache'] filename = cache + '/' + arg if os.path.exists(filename): return [eval(i) for i in open(filename)] br = Browser() br.set_handle_robots(False) br.open(start) br.select_form(name="querybox") br['Search_Arg'] = arg.encode('utf-8') br['Search_Code'] = ['NHED_'] res = br.submit() found = list(read_serp(res)) br.close() out = open(filename, 'w') for i in found: print >> out, i out.close() return found
def store(): while True: link = raw_input("Enter link\n") link = link.strip() if (link == 'X') or (link == 'x'): break print "Collecting song info..." if link.find("www.youtube.com") == -1: print Fore.GREEN, "Not a valid YouTube link.", Fore.RESET continue br = Browser() global arr try: br.set_handle_robots(False) respo = br.open(link) soup = BeautifulSoup(respo, "html.parser") name = soup.find("title") name = name.text except Exception as e: print "Not working. Trying method 2..." name = method2(link) if name == -1: continue finally: br.close() try: fil = open("MusicLinks.txt", 'rb') arr = pickle.load(fil) fil.close() except Exception as e: print "Creating new file to store links." finally: if link in arr: print Fore.YELLOW, "Song link already present.", Fore.RESET else: arr[link] = name print Fore.YELLOW, "Song added successfully", Fore.RESET pushIntoFile()
def _mapgen_speed_fired(self): test_dir(join(self.dirname, 'gps_vis')) br = Browser() # Ignore robots.txt br.set_handle_robots( False ) # Google demands a user-agent that isn't a robot br.addheaders = [('User-agent', 'Firefox')] resp1 = br.open( "http://www.gpsvisualizer.com/map_input" ) # Select the search box and search for 'foo' br.select_form( name='main' ) br.form['width'] = '870' br.form['height'] = '600' br.set_value(['google_openstreetmap'], name='bg_map') br.set_value(['speed'], name='trk_colorize') br.form['legend_steps'] = '10' br.add_file(open(self.filename_converted), "text/plain", self.filename_converted, name='uploaded_file_1') # Get the search results resp2 = br.submit() resp = None for link in br.links(): siteMatch = re.compile( 'download/' ).search( link.url ) if siteMatch: resp = br.follow_link( link ) break # Print the site content = resp.get_data() ofile = open(join(self.dirname, 'gps_vis', 'map_speed.html'),'w') ofile.write(content) ofile.close() br.close() print 'map generated (speed color)'
def _profilegen_fired(self): test_dir(join(self.dirname, 'gps_vis')) br = Browser() # Ignore robots.txt br.set_handle_robots( False ) # Google demands a user-agent that isn't a robot br.addheaders = [('User-agent', 'Firefox')] # Retrieve the Google home page, saving the response resp1 = br.open( "http://www.gpsvisualizer.com/profile_input" ) # Select the search box and search for 'foo' br.select_form( name='main' ) br.form['width'] = '870' br.form['height'] = '250' br.form['legend_steps'] = '10' br.add_file(open(self.filename_converted), "text/plain", self.filename_converted, name='uploaded_file_1') # Get the search results resp2 = br.submit() resp = None for link in br.links(): siteMatch = re.compile( 'download/' ).search( link.url ) if siteMatch: resp = br.follow_link( link ) break # Print the site content = resp.get_data() ofile = open(join(self.dirname, 'gps_vis', 'profile.png'),'wb') ofile.write(content) ofile.close() br.close() print 'profile generated'
def hax0r(): user = g.user if user.username == 'hax0r': if request.args.get('add'): browser = Browser() url = "http://productivepoop.com/users/new" browser.open(url) browser.select_form(nr=0) browser['user[username]'] = 'johnsmith' browser['user[name]'] = 'johnsmith' browser['user[email]'] = '*****@*****.**' browser['user[password]'] = 'password' browser['user[password_confirmation]'] = 'password' browser.submit() browser.close() return jsonify({'cool': True}) if request.args.get('remove'): browser = Browser() url = "http://productivepoop.com/users/" browser.open(url) browser.form = list(browser.forms())[-1] browser.submit() browser.close() return jsonify({'cool': True}) if request.args.get('addalot'): for i in range(1000000): browser = Browser() url = "http://productivepoop.com/users/new" browser.open(url) browser.select_form(nr=0) browser['user[username]'] = 'johnsmithy' + str(i) browser['user[name]'] = 'johnsmithy' + str(i) browser['user[email]'] = 'johnsmith'+str(i)+'@johnsmith.com' browser['user[password]'] = 'password' browser['user[password_confirmation]'] = 'password' browser.submit() browser.close() if request.args.get('removealot'): for i in range(100): browser = Browser() url = "http://productivepoop.com/users/" browser.open(url) browser.form = list(browser.forms())[-1] browser.submit() browser.close() print 'hello '+str(i) return jsonify({'cool': True}) return render_template('hax0r.html', user=user) abort(404)
def _send_fired(self): br = Browser() # Ignore robots.txt br.set_handle_robots(False) # Google demands a user-agent that isn't a robot br.addheaders = [('User-agent', 'Firefox')] # Retrieve the Google home page, saving the response resp = br.open("https://www.t-mobile.cz/.gang/login-url/portal?nexturl=https%3A%2F%2Fwww.t-mobile.cz%2Fweb%2Fcz%2Fosobni") br.select_form(nr=2) br.form['username'] = '******' br.form['password'] = self.password resp = br.submit() # print resp.get_data() resp = br.open("https://sms.client.tmo.cz/closed.jsp") br.select_form(nr=1) # print br.form # help(br.form) br.form['recipients'] = self.phone_number # '736639077'#'737451193' #'605348558' br.form['text'] = self.message br.form.find_control("confirmation").get("1").selected = self.confirmation resp = br.submit() # logout resp = br.follow_link(url_regex='logout') br.close() information(None, 'SMS sent!')
def get_balance( username, password ): balance = 0.0 browser = Browser() try: browser.open( URL_PATH ) except: return balance browser.select_form( nr=0 ) browser.form[USER_FIELD] = username browser.form[PASS_FIELD] = password browser.submit() response = browser.response() html = response.read() browser.close() balance_string = html_filter( html ) result = get_float( balance_string ) return result
def get_balance( username, password ): balance = 0.0 browser = Browser() try: browser.open( "http://sipnet.ru" ) except: return balance browser.select_form( nr=0 ) browser.form['Name'] = username browser.form['Password'] = password browser.submit() response = browser.response() html = response.read() browser.close() balance_string = html_filter( html ) result = get_float( balance_string ) return result
def activate_emails(emails,email,password): import imaplib m = imaplib.IMAP4_SSL("imap.gmail.com") m.login(email+"@gmail.com",password) uuids = [] for email in emails: m.select("INBOX") replace_string = '(TO "'+string.replace(email,'%2B','+')+'" SUBJECT "Tradeshift Password Reset")' result,data = m.uid('search',None,replace_string) htmlbullshit = m.uid('fetch',data[0],'(RFC822)')[1][0][1] match = re.compile('(?<=href=").*?(?=")') activate = match.findall(htmlbullshit)[1] r = requests.get(activate) browser = Browser() browser.set_handle_robots(False) browser.open(r.url) match = re.compile('(?<=user=).*?(?=&)') uuid = match.findall(r.url)[0] uuids.append(uuid) browser.select_form(nr=0) browser["password"]=password browser.submit() browser.close() return uuids
def toggleStatus(list_com): status = "On.png" if list_com[1] == "/open" else "Off.png" login = list_com[2] passwd = list_com[3] url = 'http://teresinahc.org/wiki/index.php?title=Especial:Autenticar-se&returnto=P%C3%A1gina+principal' br = Browser() br.set_handle_robots(False) br.addheaders = [('User-agent', 'Firefox')] br.open(url) if 'Autenticar-se' in br.response().read(): br.select_form('userlogin') br.form['wpName'] = login br.form['wpPassword'] = passwd br.submit() pag = br.response().read() if '"wgUserName":null' not in pag: br.open('http://teresinahc.org/wiki/index.php?title=Status&action=edit') if 'value="Salvar página"' in br.response().read(): br.select_form('editform') br.form['wpTextbox1'] = '<center>[[Arquivo:'+status+']]</center>' br.submit(name='wpSave') br.close() if status == 'On.png': return 'no momento o Teresina Hacker Clube encontra-se ABERTO!' else: return 'no momento o Teresina Hacker Clube encontra-se FECHADO!' else: br.close() return 'Voc\xc3\xaa n\xc3\xa3o tem permiss\xc3\xa3o para alterar p\xc3\xa1ginas da Wiki do Teresina Hacker Clube' else: br.close() output = re.compile('<div class="errorbox">(.*?)</div>', re.DOTALL | re.IGNORECASE).findall(pag) return "</code>"+output[0].replace("<br />", "").replace("\t", "")+"<code>" else: br.close() return 'Desculpe, por algum motivo n\xc3\xa3o foi poss\xc3\xadvel acessar a Wiki do Teresina Hacker Clube.'
elif "txt" in a["href"]: ext = "txt" elif "srt" in a["href"]: ext = "srt" elif "download.mp4" in a["href"]: ext = "mp4" else: continue filename = ( str(num + 1) + "." + str(j + 1) + "-" + re.sub(r"--*", "-", re.sub(r"[^A-Za-z0-9.]", "-", ltitle.lower())) + "." + ext ) if ext in dwdlist: print ext + ": " + filename download(url, title, filename) # else: # print ext+": Skipping: Not in download list" count += 1 print os.chdir(cd) print br.close() print "Completed downloading " + course + "." pth = os.path.join(pcd, course + ".html") os.remove(pth)
class check_keys(): def __init__(self): #инициализация #variables self.log_file = '' self.keys = [] self.url = 'http://forum.rsload.net/cat-kryaki-seriyniki-varez/topic-4820-page-%d.html' self.login = '******' self.passwd = 'ghbphfr1' self.form_nomber = 0 self.login_name = 'name' self.paswd_name = 'password' self.submit_nomber = 0 self.curPage = 84 self.html_source = '' self.headers = [( 'User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1' )] self.xpath_data = { "max_page" : ".//*[@id='board_index']/div[1]/div/div[2]/ol/li[1]/a/text()", 'keys' : './/blockquote/p/span[@class="texthide"]/text()', "is_login" : ".//*[@id='user_info']/fieldset/dl/dt[1]/a/b/span/text()" } self.br = Browser() self.cj = cookielib.LWPCookieJar() self.br.set_cookiejar(self.cj) self.br.addheaders = self.headers def __del__(self): #уничтожение класса self.br.close() return def max_page(self): tree= etree.HTML(self.html_source) result = tree.xpath(self.xpath_data["max_page"]) maxPage = result[0][-2] + result[0][-1] return maxPage def rw_file(self, data = []): if len(data) > 0: f = open(self.log_file, 'a') for index in data: f.write(index + '\n') else: f = open(self.log_file, 'r') data = [line.strip() for line in f] f.close() return data def get_all_keys(self): oldKeys = self.rw_file() tree = etree.HTML(self.html_source) keysList = tree.xpath(self.xpath_data["keys"]) newKeys = [] buf = ' '.join(keysList) buf = re.sub(r'\s+', ',', buf) keysList = buf.split(',') for key in keysList: if not key in oldKeys and key.startswith("CHZ"): newKeys.append(key) if len(newKeys) > 0: self.keys = list(newKeys) self.rw_file(newKeys) return True else: return False def is_login(self): tree= etree.HTML(self.html_source) result = tree.xpath(self.xpath_data["is_login"]) if len(result) == 1: return self.login == result[0] else: return False def update(self): self.html_source = self.br.open(self.url % (self.curPage)).read() maxPage = int(self.max_page()) if self.curPage != maxPage: self.html_source = self.br.open(self.url % (maxPage)).read() self.curPage = maxPage return self.is_login() def login_url(self): self.br.open(self.url % (self.curPage)) self.br.select_form(nr = self.form_nomber) self.br[self.login_name] = self.login self.br[self.paswd_name] = self.passwd self.html_source = self.br.submit(nr = self.submit_nomber).read() return self.is_login()
def get_filelist(start_url): if not start_url.endswith('/'): start_url += '/' start_url_len = len(start_url) dirCollection = {} directories_todo = [start_url] # walk directories non-recursively # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/435875 while len(directories_todo) > 0: print '+', len(dirCollection.keys()) print '-', len(directories_todo) #if len(directories_todo) == 175: # return dirCollection, 'foo' directory = directories_todo.pop() #if '/drpmsync/' in directory or '/repositories' in directory: # print '>>>>>> skipping', directory # continue print '>' print directory name = directory[start_url_len:].rstrip('/') or '.' dirCollection[name] = mb.core.Directory(name) br = Browser() br.open(directory) # found files for i in br.links(url_regex = _match_file): #if i.url.startswith('./'): # i.url = i.url[2:] #print 'appending file', i.url dirCollection[name].files.append(i.url) found_dirs = [ link.base_url + link.url.lstrip('./') for link in br.links(url_regex = _match_dir) ] found_dirs = [] for link in br.links(url_regex = _match_dir): if link.url.startswith('./'): link.url = link.url[2:] found_dirs.append(link.base_url + link.url) print 'found_dirs:', found_dirs #found_files = [ link.base_url + link.url for link in br.links(url_regex = _match_file) ] #print 'found_files:', found_files br.close() # found directories for found_dir in found_dirs: br = Browser() br.open(found_dir) name = found_dir[start_url_len:].rstrip('/') print 'name:', name dirCollection[name] = mb.core.Directory(name) for i in br.links(url_regex = _match_file): dirCollection[name].files.append(i.url) for i in br.links(url_regex = _match_dir): if i.url.startswith('./'): i.url = i.url[2:] print 'neues todo:', i.base_url + i.url directories_todo.append(i.base_url + i.url) br.close() del br return dirCollection, 'foo'
class RequestQuery: def __init__(self,config): self.br=Browser() self.config = config # Initialise connections self.phedex = PhEDEx({"endpoint":"https://cmsweb.cern.ch/phedex/datasvc/json/prod/"}, "json") self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/") self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/") self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/") def __del__(self): self.br.close() def getScramArchByCMSSW(self): """ Get from the list of available CMSSW releases return a dictionary of ScramArchitecture by CMSSW """ # Set temporary conection to the server and get the response from cmstags url = 'https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML' br = Browser() br.set_handle_robots(False) response=br.open(url) soup = BeautifulSoup(response.read()) # Dictionary form # {'CMSSW_X_X_X':[slc5_amd64_gcc472], ... } archByCmssw={} # Fill the dictionary for arch in soup.find_all('architecture'): for cmssw in arch.find_all('project'): # CMSSW release cmsswLabel = cmssw.get('label').encode('ascii', 'ignore') if cmsswLabel not in archByCmssw: archByCmssw[cmsswLabel]=[] # ScramArch related to this CMSSW release archName = arch.get('name').encode('ascii', 'ignore') archByCmssw[cmsswLabel].append(archName) return archByCmssw def getDatasetOriginSites(self, dbs_url, data): """ Get the origin sites for each block of the dataset. Return a list block origin sites. """ sites=[] local_dbs = dbs_url.split('/')[5] if local_dbs == 'phys01': response = self.dbsPhys01.listBlocks(detail=True,dataset=data) elif local_dbs == 'phys02': response = self.dbsPhys02.listBlocks(detail=True,dataset=data) elif local_dbs == 'phys03': response = self.dbsPhys03.listBlocks(detail=True,dataset=data) seList = [] for block in response: if block['origin_site_name'] not in seList: seList.append(block['origin_site_name']) siteNames = [] for node in self.nodeMappings['phedex']['node']: if node['se'] in seList: siteNames.append(node['name']) return siteNames, seList def phEDExNodetocmsName(self, nodeList): """ Convert PhEDEx node name list to cms names list """ names = [] for node in nodeList: name = node.replace('_MSS', '').replace('_Disk', '').replace('_Buffer', '').replace('_Export', '') if name not in names: names.append(name) return names def setGlobalTagFromOrigin(self, dbs_url,input_dataset): """ Get the global tag of the dataset from the source dbs url. If it is not set, then set global tag to 'UNKNOWN' """ globalTag = "" local_dbs = dbs_url.split('/')[5] if local_dbs == 'phys01': response = self.dbsPhys01.listOutputConfigs(dataset=input_dataset) elif local_dbs == 'phys02': response = self.dbsPhys02.listOutputConfigs(dataset=input_dataset) elif local_dbs == 'phys03': response = self.dbsPhys03.listOutputConfigs(dataset=input_dataset) globalTag = response[0]['global_tag'] # GlobalTag cannot be empty if globalTag == '': globalTag = 'UNKNOWN' return globalTag def isDataAtUrl(self, dbs_url,input_dataset): """ Returns True if the dataset is at the dbs url, if not returns False """ local_dbs = dbs_url.split('/')[5] if local_dbs == 'phys01': response = self.dbsPhys01.listDatasets(dataset=input_dataset) elif local_dbs == 'phys02': response = self.dbsPhys02.listDatasets(dataset=input_dataset) elif local_dbs == 'phys03': response = self.dbsPhys03.listDatasets(dataset=input_dataset) # This means that the dataset is not at the url if not response: return False else: return True def getLabelByValueDict(self, control): """ From control items, create a dictionary by values """ d = {} for item in control.items: value = item.attrs['value'] label = item.attrs['label'] d[value] = label return d def getValueByLabelDict(self, control): """ From control items, create a dictionary by labels """ d = {} for item in control.items: value = item.attrs['value'] label = item.attrs['label'] d[label] = value return d def createRequestJSON(self, ticket, input_dataset, dbs_url, cmssw_release, group_name, version = 1): """ Creates a JSON file 'Ticket_#TICKET.json' with the needed information for creating a requeston ReqMgr. Input: - ticket: the ticket #, for instance 110773 on https://ggus.eu/?mode=ticket_info&ticket_id=110773 - input_dataset - dbs_url: only the instance name, For example: "phys01" for https://cmsweb.cern.ch/dbs/prod/phys01/DBSReader - cmssw_release - group_name: the physics group name - version: the dataset version, 1 by default. It returns a dictionary that contains the request information. """ scramArchByCMSSW = self.getScramArchByCMSSW() self.nodeMappings = self.phedex.getNodeMap() task = ticket print "Processing ticket: %s" % task #splitting input dataset input_primary_dataset = input_dataset.split('/')[1].replace(' ','') input_processed_dataset = input_dataset.split('/')[2].replace(' ','') data_tier = input_dataset.split('/')[3].replace(' ','') # Transform input value to a valid DBS url #dbs_url = "https://cmsweb.cern.ch/dbs/prod/"+dbs_url+"/DBSReader" dbs_url = dbs_base_url+dbs_url+"/DBSReader" release_id = cmssw_release # check if deprecated release was used release = cmssw_release # check if release has not ScramArch match if release not in scramArchByCMSSW: raise Exception("Error on ticket %s due to ScramArch mismatch" % task) else: scram_arch = scramArchByCMSSW[release][-1] # check if dataset is not at dbs url try: data_at_url = self.isDataAtUrl(dbs_url,input_dataset) except: raise Exception('Error on ticket %s, dataset %s not available at %s' %(task, input_dataset,dbs_url)) if not data_at_url: raise Exception('Error on ticket %s, dataset %s not available at %s' %(task, input_dataset,dbs_url)) ## Get Physics Group group_squad = 'cms-storeresults-'+group_name.replace("-","_").lower() ## Get Dataset Version dataset_version = str(version) # Set default Adquisition Era for StoreResults acquisitionEra = "StoreResults" ## Construction of the new dataset name (ProcessingString) ## remove leading hypernews or physics group name and StoreResults+Version if input_processed_dataset.find(group_name)==0: new_dataset = input_processed_dataset.replace(group_name,"",1) else: stripped_dataset = input_processed_dataset.split("-")[1:] new_dataset = '_'.join(stripped_dataset) # Get dataset site info: phedex_map, se_names = self.getDatasetOriginSites(dbs_url,input_dataset) sites = self.phEDExNodetocmsName(phedex_map) infoDict = {} # Build store results json # First add all the defaults values infoDict["RequestType"] = "StoreResults" infoDict["UnmergedLFNBase"] = "/store/unmerged" infoDict["MergedLFNBase"] = "/store/results/" + group_name.replace("-","_").lower() infoDict["MinMergeSize"] = 1500000000 infoDict["MaxMergeSize"] = 5000000000 infoDict["MaxMergeEvents"] = 100000 infoDict["TimePerEvent"] = 40 infoDict["SizePerEvent"] = 512.0 infoDict["Memory"] = 2394 infoDict["CmsPath"] = "/uscmst1/prod/sw/cms" infoDict["Group"] = "DATAOPS" infoDict["DbsUrl"] = dbs_url # Add all the information pulled from Savannah infoDict["AcquisitionEra"] = acquisitionEra infoDict["GlobalTag"] = self.setGlobalTagFromOrigin(dbs_url, input_dataset) infoDict["DataTier"] = data_tier infoDict["InputDataset"] = input_dataset infoDict["ProcessingString"] = new_dataset infoDict["CMSSWVersion"] = release infoDict["ScramArch"] = scram_arch infoDict["ProcessingVersion"] = dataset_version infoDict["SiteWhitelist"] = list(sites) # Create report for Migration2Global report = {} #Fill json file, if status is done self.writeJSONFile(task, infoDict) report["json"] = 'y' report["task"] = int(task) report["InputDataset"] = input_dataset report["ProcessingString"] = new_dataset report["localUrl"] = dbs_url report["sites"] = list(sites) report["se_names"] = list(se_names) return report def writeJSONFile(self, task, infoDict): """ This writes a JSON file at ComponentDir """ ##check if file already exists filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json' if not os.access(filename,os.F_OK): jsonfile = open(filename,'w') request = {'createRequest':infoDict} ## CHECK THIS BEFORE FINISHING jsonfile.write(json.dumps(request,sort_keys=True, indent=4)) jsonfile.close return def removeJSONFile(self,task): """ This removes the JSON file at ComponentDir if it was created """ filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json' if os.access(filename,os.F_OK): os.remove(filename) return def printReport(self, report): """ Print out a report """ print "%20s %5s %10s %50s %50s" %( 'Ticket','json','local DBS','Sites','se_names') print "%20s %5s %10s %50s %50s" %( '-'*20,'-'*5,'-'*10,'-'*50,'-'*50 ) json = report["json"] ticket = report["task"] #status = report["ticketStatus"] localUrl = report["localUrl"].split('/')[5] site = ', '.join(report["sites"]) se_names = ', '.join(report["se_names"]) print "%20s %5s %10s %50s %50s" %(ticket,json,localUrl,site,se_names)
def get_filelist(start_url): if not start_url.endswith('/'): start_url += '/' start_url_len = len(start_url) dirCollection = {} directories_todo = [start_url] # walk directories non-recursively # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/435875 while len(directories_todo) > 0: print '+', len(dirCollection.keys()) print '-', len(directories_todo) #if len(directories_todo) == 175: # return dirCollection, 'foo' directory = directories_todo.pop() #if '/drpmsync/' in directory or '/repositories' in directory: # print '>>>>>> skipping', directory # continue print '>' print directory name = directory[start_url_len:].rstrip('/') or '.' dirCollection[name] = mb.core.Directory(name) br = Browser() br.open(directory) # found files for i in br.links(url_regex=_match_file): #if i.url.startswith('./'): # i.url = i.url[2:] #print 'appending file', i.url dirCollection[name].files.append(i.url) found_dirs = [ link.base_url + link.url.lstrip('./') for link in br.links(url_regex=_match_dir) ] found_dirs = [] for link in br.links(url_regex=_match_dir): if link.url.startswith('./'): link.url = link.url[2:] found_dirs.append(link.base_url + link.url) print 'found_dirs:', found_dirs #found_files = [ link.base_url + link.url for link in br.links(url_regex = _match_file) ] #print 'found_files:', found_files br.close() # found directories for found_dir in found_dirs: br = Browser() br.open(found_dir) name = found_dir[start_url_len:].rstrip('/') print 'name:', name dirCollection[name] = mb.core.Directory(name) for i in br.links(url_regex=_match_file): dirCollection[name].files.append(i.url) for i in br.links(url_regex=_match_dir): if i.url.startswith('./'): i.url = i.url[2:] print 'neues todo:', i.base_url + i.url directories_todo.append(i.base_url + i.url) br.close() del br return dirCollection, 'foo'
def Authorize(self): ''' Authorize the application with Twitter. ''' auth = tweepy.OAuthHandler(self.CON_KEY, self.CON_SEC) try: auth_url = auth.get_authorization_url() except tweepy.error.TweepError: raise NetworkError('Unable to access network') ui_print (colored('Authorizing Twitter Account...', 'yellow')) username = ui_prompt("Username : "******"Password : ", mask = True) ''' Initialize mechanize browser instance ''' br = Browser() cj = cookielib.LWPCookieJar() br.set_cookiejar(cj) br.set_handle_robots(False) br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] ''' Opens browser and authenticate account ''' try: br.open(auth_url) except URLError: raise NetworkError('Unable to access network') br.form = list(br.forms())[0] br.form['session[username_or_email]'] = username br.form['session[password]'] = password try: response = br.submit() except URLError: br.close() raise NetworkError('Unable to access network') content = response.get_data() soup = BeautifulSoup(content) code = soup.find('code') if code: pin = code.text br.close() else: br.form = list(br.forms())[1] try: response = br.submit() except URLError: br.close() raise NetworkError('Unable to access network') content = response.get_data() br.close() soup = BeautifulSoup(content) code = soup.find('code') if code: pin = code.text else: raise AuthorizationError('Authorization Failed') try: auth.get_access_token(pin) except tweepy.error.TweepError, e: raise AuthorizationError('Authorization Failed')
class AppointmentSearch(): def __init__(self, params): """ :param first_name: :param last_name: :param email: :param repeat_email: :param passnummer: :param lower_date: format 'dd.mm.yyyy' :param upper_date: """ self.app_link = params.get('app_link') self.first_name = params.get('first_name') self.last_name = params.get('last_name') self.email = params.get('email') self.repeat_email = params.get('repeat_email') assert self.email == self.repeat_email self.passnummer = params.get('passnummer') self.lower_date = date_to_number(params.get('lower_date')) self.upper_date = date_to_number(params.get('upper_date')) self.img_path = params.get('img_path') self.txt_file = params.get('txt_file') self.br = Browser() self.br.set_handle_robots(False) self.br.addheaders = [("User-agent", "Firefox")] self.wrong_cap_dir = params.get('wrong_cap_dir') self.cap_fail_msg = params.get('cap_fail_msg') self.no_app_msg = params.get('no_app_msg') self.other_month_msg = params.get('other_month_msg') self.app_available_msg = params.get('app_available_msg') self.odd_path = params.get('odd_path') self.now = None self.br = webdriver.Firefox(executable_path="/home/azhar/Downloads/geckodriver") self.dcap = dict(DesiredCapabilities.PHANTOMJS) self.dcap["phantomjs.page.settings.userAgent"] = ( "Firefox" ) self.odd = open(self.odd_path,'wb') if not os.path.exists(self.txt_file): fp = open(self.txt_file,'wb') fp.write(' ') fp.close() def captcha_solver(self,content): my_file = content index = my_file.find("url(\'data:image") b = my_file[index:].split(')') a = b[0] imgdata = base64.b64decode(a[27:-1]) with open(self.img_path+'.jpg', 'wb') as fp: fp.write(imgdata) with open(self.img_path+'2.jpg', 'wb') as fp: fp.write(imgdata) img = imread(self.img_path+'.jpg') imsave(self.img_path + '.png', img) imsave(self.img_path + '2.png', img) while not(os.path.exists(self.txt_file)): pass time.sleep(0.5) with open(self.txt_file,'rb') as fp: cap = fp.readline()[:-1] print cap os.remove(self.img_path+'.png') # os.remove(self.img_path + '.jpg') os.remove(self.img_path + '2.png') plt.subplot(1,2,1) plt.imshow(img) return cap def wrong_captcha(self): img = imread(self.img_path+'2.jpg') files = glob(self.wrong_cap_dir+'*.jpg') d = len(files)+1 imsave(self.wrong_cap_dir+str(d)+'.jpg',img) def date_select(self): response = None result = False ## select day for appointment for link in self.br.find_elements_by_link_text("Appointments are available"): d = date_to_number(link.get_attribute('href')) if self.lower_date <= d <= self.upper_date: link.click() break time.sleep(0.8) response = removeNonAscii(self.br.page_source) if "Please select an appointment" in response: response2 = None ## select appointment time for link in self.br.find_elements_by_link_text("Book this appointment"): link.click() break time.sleep(0.8) response2 = removeNonAscii(self.br.page_source) ## fill appointment form if "appointment_newAppointmentForm" in response2: print time.time() - self.now ##solve captcha cap = self.captcha_solver(self.br.page_source) a = self.br.find_element_by_name("captchaText") a.send_keys(cap) print time.time() - self.now a = self.br.find_element_by_name("lastname") a.send_keys(self.last_name) a = self.br.find_element_by_name("firstname") a.send_keys(self.first_name) a = self.br.find_element_by_name("email") a.send_keys(self.email) a = self.br.find_element_by_name("emailrepeat") a.send_keys(self.repeat_email) a = self.br.find_element_by_name("fields[0].content") a.send_keys(self.repeat_email) print time.time() - self.now # self.br['passnummer'] = 'ALAN71954' time.sleep(15) def search_bot(self): while 1: time.sleep(0.5) # try: self.now = time.time() # self.br = webdriver.PhantomJS(desired_capabilities=self.dcap) # self.br = webdriver.Firefox(executable_path="/home/azhar/Downloads/geckodriver") self.br.get(self.app_link) print time.time()-self.now # cap = self.captcha_solver(contents) # c = contents.get_data() cap = self.captcha_solver(self.br.page_source) print time.time()-self.now a = self.br.find_element_by_name("captchaText") # print cap2 # time.sleep(1) a.send_keys(cap) # time.sleep(0.5) # contents1 = removeNonAscii(self.br.page_source) a.send_keys(Keys.RETURN) # contents1 = removeNonAscii(self.br.page_source) print time.time() - self.now im2 = imread(self.img_path+'.jpg') os.remove(self.img_path+'.jpg') time.sleep(0.8) print time.time() - self.now contents = removeNonAscii(self.br.page_source) # while contents == contents1: # time.sleep(0.1) # contents = removeNonAscii(self.br.page_source) if self.cap_fail_msg in contents: plt.subplot(1, 2, 2) # plt.imshow(im2) # plt.show() self.wrong_captcha() elif self.no_app_msg in contents: continue elif self.other_month_msg in contents: continue elif self.app_available_msg in contents: print "found date" self.date_select() else: cap = self.captcha_solver(self.br.page_source) self.odd.write(contents) self.odd.write('\n\n\n\n') self.br.close() # except: # continue
class Presencia(object): """ """ def __str__(self): return "%s.%s" % (self.__module__, self.__class__.__name__) def __init__(self, request, username,password,context=''): """ Al instanciar la classe, es loguejarà utilitzant el nom d'usuari i password proporcionats, Si browser_login conté algo són les cookies guardades de la última sessio loguejada que s'ha fet. Recarregarem les cookies en un Browser nou, i aixi estalviarà uns segons que consumiria del login. """ self.context=context self.request=request registry = self.request.registry self.epitool=registry.getUtility(IEPIUtility) self.username = username self.password = password self.browser_login, elk = self.epitool.recoverBrowserSession(self.request, self.username,'presencia') if self.browser_login: self.br=Browser() self.br.set_handle_robots(False) cj = LWPCookieJar() self.br.set_cookiejar(cj) for co in self.browser_login: ck = Cookie(version=co['version'], name=co['name'], value=co['value'], port=co['port'], port_specified=co['port_specified'], domain=co['domain'], domain_specified=co['domain_specified'], domain_initial_dot=co['domain_initial_dot'], path=co['path'], path_specified=co['path_specified'], secure=co['secure'], expires=co['expires'], discard=co['discard'], comment=co['comment'], comment_url=co['comment_url'], rest=co['rest']) cj.set_cookie(ck) print "Logging-in into presencia via browser" else: self.br = Browser() self.br.set_handle_equiv(False) self.login(message=u"Logging-in into presencia via regular login") return def log(self, message): """ """ logger = logging.getLogger('RUNNING') logger.info('%s - %s' % (self.username,message)) def getBrowserSession(self): """ Retorna la sessio actual del browser per a poderla guardar desde la utility """ cookies = [] for key in self.br._ua_handlers['_cookies'].cookiejar._cookies.keys(): domain = self.br._ua_handlers['_cookies'].cookiejar._cookies[key] for key2 in domain.keys(): cookie = domain[key2] for key3 in cookie: co = cookie[key3] cookies.append(dict(version=co.version, name=co.name, value=co.value, port=co.port, port_specified=co.port_specified, domain=co.domain, domain_specified=co.domain_specified, domain_initial_dot=co.domain_initial_dot, path=co.path, path_specified=co.path_specified, secure=co.secure, expires=co.expires, discard=co.discard, comment=co.comment, comment_url=co.comment_url, rest=co._rest)) return (cookies,None) def closeBrowser(self): """ """ self.br.close() def saveSessionData(self): """ """ self.epitool.saveBrowserSession(self.request, self.username,self.getBrowserSession(),'presencia') return def login(self,message = "Logging-in into presencia via regular login"): """ Es logueja a presència amb el login tradicional web """ self.log(u"Presencia Login %s" % message) self.br.open(LOGIN_URL) self.br.select_form(nr=0) self.br['Username']=self.username self.br['Password']=self.password response = self.br.submit() response_html = response.read() response.close() self.saveSessionData() def checkBrowserExpired(self,html): """ Comprova que el browser nou que hem generat en base a cookies guardades, continua actiu Per ferho, comprovem si l'html de la pagina que acavem de obrir conte el text de canvi de contrasenya Retorna cert si el browser esta caducat """ return html.find("Introduïu nom d'usuari i contrasenya")>0 @reloginIfCrashed def Marcar(self): """ Canvia l'estat del marcatge actual """ self.log("Marcar") persones = self.br.open(FITCHA_URL) persones_html = persones.read() if self.checkBrowserExpired(persones_html): return 'EXPIRED' persones.close() # getUtility(IRAMCache).invalidate('getMarcatges') region_invalidate('epi.presencia.getMarcatges', 'long_term', 'getMarcatges', 'epi.presencia.Presencia', self.username) # getUtility(IRAMCache).invalidate('getPresencia') region_invalidate('epi.presencia.getPermisos', 'default_term', 'getPermisos', 'epi.presencia.Presencia', self.username) return True print "S'ha canviat l'estat de marcatge" ##@reloginIfCrashedAndCache @cache_region('long_term', 'getMarcatgesHistoric') def getMarcatgesHistoric(self, username, year): """ Recupera la pàgina de marcatges de presència de l'històric anual, on hi ha tot el que no surt a la pagina principal La pàgina no té cap mena de id's ni classes, el parsejat es una mica dur... """ self.log("getMarcatges Historic %s sense cachejar" % year) return self.getMarcatgesBase(MARCATGES_HISTORIC_URL % year,year=int(year)) ##@reloginIfCrashedAndCache @cache_region('long_term', 'getMarcatges') def getMarcatges(self, username): """ Recupera la pàgina de marcatges de presència, on hi han els dos ultims mesos de marcatges. La pàgina no té cap mena de id's ni classes, el parsejat es una mica dur... """ self.log("getMarcatges sense cachejar") return self.getMarcatgesBase(MARCATGES_URL) def getDiscountHoursForDay(self,dia,hores_dia): day = '%s-%s-%s' % (dia) years={'2010':[],'2011':[]} years['2010']= { '01-01-2010':'F', '04-01-2010':'I', '05-01-2010':'I', '06-01-2010':'F', '07-01-2010':'I', '08-01-2010':'I', #=============== '29-03-2010':'I', '30-03-2010':'I', '31-03-2010':'I', #=============== '01-04-2010':'I', '02-04-2010':'F', '05-04-2010':'F', #=============== '01-05-2010':'F', '24-05-2010':'F', #=============== '24-06-2010':'F', #=============== '11-09-2010':'F', '24-09-2010':'F', #=============== '12-10-2010':'F', #=============== '01-11-2010':'F', #=============== '06-12-2010':'F', '08-12-2010':'F', '25-12-2010':'F', } years['2011']= { '01-01-2011':'F', '03-01-2011':'I', '04-01-2011':'I', '05-01-2011':'I', '06-01-2011':'F', '07-01-2011':'I', #=============== '07-03-2011':'F', #=============== '18-04-2011':'I', '19-04-2011':'I', '20-04-2011':'I', '21-04-2011':'I', '22-04-2011':'F', '25-04-2011':'F', #=============== '13-06-2011':'F', '24-06-2011':'F', #=============== '15-08-2011':'F', #=============== '24-09-2011':'F', #=============== '12-10-2011':'F', #=============== '01-11-2011':'F', #=============== '06-12-2011':'F', '08-12-2011':'F', '26-12-2011':'F', } if dia[2] in years.keys(): if day in years[dia[2]]: if years[dia[2]][day]=='F': hores = hores_dia else: if hores_dia==7: hores = 6 else: hores = hores_dia return ({'F':'Festa','I':'Intensiva'}[years[dia[2]][day]],hores) else: return None else: return None @reloginIfCrashed def getMarcatgesBase(self,URL,**kwargs): """ """ self.now = DateTime().latestTime() current_year = self.now.year() historic_query_year = kwargs.get('year',None) is_historic_query = historic_query_year!=None if is_historic_query: days_of_query_year = current_year==historic_query_year and deepcopy(self.getPermisos(self.username)) or deepcopy(self.getPermisosHistoric(self.username, historic_query_year)) days_of_query_past_year = deepcopy(self.getPermisosHistoric(self.username, historic_query_year-1)) dies = days_of_query_year dies.update(days_of_query_past_year) else: dies = deepcopy(self.getPermisos(self.username)) if self.now.month()<3: days_of_past_year = deepcopy(self.getPermisosHistoric(self.username, current_year-1)) dies.update(days_of_past_year) marcatges = self.br.open(URL) marcatges_html = marcatges.read() if self.checkBrowserExpired(marcatges_html): return 'EXPIRED' marcatges.close() soup = BeautifulSoup(marcatges_html,fromEncoding='iso-8859-1') try: table = soup.findAll('table')[2] # La tercera taula de l'html és on hi ha el que busquem except: #Si hem arribat a aquest punt vol dir que hi ha algun problema amb la pagina de presència de l'usuari #i no hi han ni marcatges ni l'estructura html que s'espera. retornem una llista de dies buida return dies # Hi han moltes taules aniuades dins d'altres taules, però les files que ens interessen sabem que # que són files que no tenen mes taules aniuades a tins i que tenen td, per tant, les busquem i parsejem # el tr **dia** per exteure els marcatges meves = False for dia in table.findChildren(recursive=False): # Per tenir en compte el cas de que una persona vegi els marcatges de varies, # Ens parem a les files on hi han 'collapse.gif' per investigar collapse = 'collapse.gif' in str(dia.find('img')) if collapse: dlow = dia.__str__().lower() lusername = self.username.replace('.',' ') # busquem el nom d'usuari a la fila, i si hi és a partir d'ara guardarem marcatges if lusername in dlow: meves=True # Si ja estavem guardant marcatges, deixarem de guardarlos només si trobem # el colspan="15", que vol dir que hem passat tots els marcatges de l'usuari # i hem arrivat al seguent. Sense aquesta condicio no guardariem cap marcatge, # ja que el seguen tr despres del nom d'usuari tambe te collapse.gif i posariem meves a False per error. elif meves==True and 'colspan="15"' in dia.__str__().lower(): meves=False if not dia.findAll('table') and dia.td and meves: data,marcatge_dict = self.parseDia(dia) # Només continuarem si el parseDia ha retornat alguna cosa if data!=None: ## Si no tenim res amb la data [data] a dintre de dies, guardem el ## marcatge_dict com a inicialització de la variable if data not in dies.keys(): dies[data]=marcatge_dict ## Si ja tenim el dia, vol dir que tenim un permis en aquell dia per tant afegirem les dades de marcatges que vinguin else: dies[data]['total']=dies[data]['total']+marcatge_dict['total'] dies[data]['marcatgeobert']=marcatge_dict.get('marcatgeobert','0') ## Concatenem les llistes de marcatges, per si hi han permisos avans que marcatges. ## Els permisos no tenen marcatges, per tant, la majoria de cops concatenarem llistes buides ## però així evitem el cas de que , per exemple, es tiguin el permis de teletreball avans del dia de ferlo dies[data]['marcatges']=dies[data]['marcatges']+marcatge_dict['marcatges'] dies[data]['link_marcatge']=marcatge_dict.get('link_marcatge','') return dies def parseDia(self,dia): """ Parseja un tr que conte un marcatge retornant-los en forma de dicionari """ parsed = {} #Agafem els tds (fills de primer nivell del tr) children = dia.findChildren(recursive=False) #Seleccionem els td's que contenen marcatges amb alguna cosa (<a><font></font></a>) a dins marcatges_web = [a for a in children[7:15] if a.font] # només si tenim marcatges continuarem, ja que si no hi han marcatges # vol dir que és un permís i ja els extraiem de la seccio de permisos # avans de començar a parsejar els dies if marcatges_web: #Agafem el link del marcatge per possibles modificacions data_marcatge = children[3].a.string parsed['link_marcatge'] = 'https://liszt.upc.es'+children[3].a['href'] parsed['link_marcatge'] = parsed['link_marcatge'].replace('OpenDocument','EditDocument')+'&AutoFramed' #Agafem la suma ja feta del total del dia, que ens servira en tots els casos #Menys en els marcatges oberts, que és 0, i la guardem en minuts total_dia = children[4].font.string parsed['total']=total_dia==None and 0 or HMaMinuts(total_dia,sep='.') parsed['marcatges']=[] parsed['permisos']=[] novamarca = [] for marca in marcatges_web: if novamarca == []: novamarca.append(DateTime('%s %s' % (data_marcatge,marca.font.string))) else: novamarca.append(DateTime('%s %s' % (data_marcatge,marca.font.string))) parsed['marcatges'].append(tuple(novamarca)) novamarca = [] #Afegim els marcatge del dia actual si encara no el tenim tancat if len(novamarca)==1: novamarca_latest = novamarca[0].latestTime() # Si és el dia actual, posem None per tal que es compti fins a l'hora actual el marcatge parcial. # Qualsevol altre dia afegim la data ultima del dia com a ultim marcatge. # En tot cas marquem que és un marcatge obert ultimamarca = self.now!=novamarca_latest and novamarca_latest or None parsed['marcatgeobert']= '1' novamarca.append(ultimamarca) parsed['marcatges'].append(tuple(novamarca)) mm,dd,aaaa = data_marcatge.split('/') return (dd,mm,aaaa),parsed else: return (None,None) ##@reloginIfCrashedAndCache # OJO!! Revisar si ok! @cache_region('short_term', 'getPresencia') def getPresencia(self): self.log("getPresencia sense cachejar") return self.getPresenciaBase() @reloginIfCrashed def getPresenciaBase(self): """ Recupera la pàgina de persones de presència, on hi han els telèfons de cadascú i si esta o no presents La pàgina no té cap mena de id's ni classes, el parsejat es una mica dur... """ self.log("getPresencia") personesbr = self.br.open(PRESENCIA_URL) persones_html = personesbr.read() if self.checkBrowserExpired(persones_html): return 'EXPIRED' personesbr.close() soup = BeautifulSoup(persones_html,fromEncoding='iso-8859-1') persones = {} try: table = soup.findAll('table')[2] # La tercera taula de l'html és on hi ha el que busquem except: #Si hem arribat a aquest punt vol dir que hi ha algun problema amb la pagina de presència de l'usuari #i no hi han ni marcatges ni l'estructura html que s'espera. retornem una llista de dies buida return persones # Hi han moltes taules aniuades dins d'altres taules, però les files que ens interessen sabem que # que són files que no tenen mes taules aniuades a tins i que tenen td, per tant, les busquem i parsejem # el tr **dia** per exteure els marcatges for fila in table.findChildren(recursive=False): # Per poder escriure les dades de a quin equip pertany # Ens parem a les files on hi han 'collapse.gif' per agafar el nom de l'equip collapse = 'collapse.gif' in str(fila.find('img')) if collapse: team = fila.td.b.string # La resta seràn pesones, les parsejem i les incorporem a la llista else: if not fila.findAll('table') and fila.td: persona_dict = self.parsePersona(fila) persona_dict['equip']=team nompersona = persona_dict['nom'] if nompersona in persones.keys(): persones[nompersona]['online'] = persones[nompersona]['online'] or persona_dict['online'] persones[nompersona]['equip'] = '%s, %s' % (persones[nompersona]['equip'],team) else: persones[nompersona]=persona_dict self.saveSessionData() return [persones[a] for a in persones.keys()] def parsePersona(self,dia): """ Parseja un tr que conte una persona amb els seus telefons retornant-los en forma de dicionari """ parsed = {} #Agafem els tds (fills de primer nivell del tr) children = dia.findChildren(recursive=False) #recollim les dades de la persona nom = children[2].a.string online = 'vwicn160.gif' in children[1].__str__() telefon_intern = children[3].string telefon_mobil = children[4].string telefon_public = children[5].string telefon_public and telefon_public.split('.')[0] or telefon_public return dict(nom=nom,online=online,intern=telefon_intern,mobil=telefon_mobil,public=telefon_public) #@reloginIfCrashedAndCache @cache_region('long_term', 'getPermisosHistoric') def getPermisosHistoric(self, username, year): """ """ self.log("getPermisos Historic %s sense cachejar" % year) return self.getPermisosBase(PERMISOS_HISTORIC_URL % year) #@reloginIfCrashedAndCache @cache_region('default_term', 'getPermisos') def getPermisos(self, username): """ """ self.log("getPermisos sense cachejar") return self.getPermisosBase(PERMISOS_URL) @reloginIfCrashed def getPermisosBase(self,url,fname='getPermisos'): """ """ self.now = DateTime().latestTime() dies_permisos = [] permisos = self.br.open(url) permisos_html = permisos.read() if self.checkBrowserExpired(permisos_html): return 'EXPIRED' permisos.close() soup = BeautifulSoup(permisos_html,fromEncoding='iso-8859-1') try: tables = soup.findAll('table') table ='(hh:mm)' in tables[2].__str__() and tables[2] or tables[1] except: #La sessio del browser ha caducat, per tant refarem el login self.login(message="Re-Logging-in into presencia via regular login") permisos = self.br.open(PERMISOS_URL) permisos_html = permisos.read() permisos.close() soup = BeautifulSoup(permisos_html,fromEncoding='iso-8859-1') try: table = soup.findAll('table')[2] except: #Si hem arribat a aquest punt vol dir que hi ha algun problema amb la pagina de presència de l'usuari #i no hi han ni permisos ni l'estructura html que s'espera. retornem una llista de dies buida return {} # Hi han moltes taules aniuades dins d'altres taules, però les files que ens interessen sabem que # que són files que no tenen mes taules aniuades a tins i que tenen td, per tant, les busquem i parsejem # el tr **dia** per exteure els permisos meves = False for dia in table.findChildren(recursive=False): # Per tenir en compte el cas de que una persona vegi els permisos de varies, # Ens parem a les files on hi han 'collapse.gif' per investigar collapse = 'collapse.gif' in str(dia.find('img')) if collapse: dlow = dia.__str__().lower().decode('utf-8') lusername = self.username.replace('.',' ') # busquem el nom d'usuari a la fila, i si hi és a partir d'ara guardarem permisos if lusername in dlow: meves=True # Si ja estavem guardant permisos, deixarem de guardarlos només si trobem # el colspan="15", que vol dir que hem passat tots els permisos de l'usuari # i hem arrivat al seguent. Sense aquesta condicio no guardariem cap marcatge, # ja que el seguen tr despres del nom d'usuari tambe te collapse.gif i posariem meves a False per error. elif meves==True and 'colspan="5"' in dia.__str__().lower(): meves=False if not dia.findAll('table') and dia.td and meves: td = dia.findAll('td')[1] attrmap = td._getAttrMap() if 'colspan' in attrmap.keys(): if attrmap['colspan']=='4': # si es compleixen les dues condicions, estem en un tr que marca el motiu del permis motiu = td.b.font.string.encode('utf-8') elif 'colspan' not in str(dia): # si no hi ha colspan, en tot el tr estem en una entrada de permis segons lultim tipus permis_data = self.parsePermis(dia) dies_permisos = dies_permisos+self.generarDiesPermisos(permis_data,motiu) # XXXXXXX TODO Aqui hauriem de repassar que no hi hagin duplicats a la llista, doncs el diccionari quedaria nomes amb l'ultim permis return dict(dies_permisos) def generarDiesPermisos(self,permis,motiu): """ Donada la definicio d'un permis, si esta aprovat, genera els seus dies tal com si ens els haguessim trobat en el parseig de getMarcatgesBase. Els marcatges seran sempre [] llista buida, doncs els permisos no tenen un marcatge associat """ motiu = motiu.decode('utf-8') days = [] if permis['approved']: from_date = TTToDateTime(permis['from_date']) to_date = TTToDateTime(permis['to_date']) grow_days = True counter = 0 while grow_days: current = addDays(from_date,counter) # Només afegirem el dia si no és cap de setmana # Tampoc l'afegirem si és un dia de festa, ja que si es festa no son vacances... dhfd = self.getDiscountHoursForDay(DateTimeToTT(current),7) es_festa = dhfd!=None and dhfd[0] or False if current.dow() not in [0,6] and es_festa!='Festa': motiu_image = 'permis.jpg' if motiu in MOTIUS.keys(): motiu_image = MOTIUS[motiu]['imatge'] permisdict = dict(compta_hores=MOTIUS[motiu]['compta_hores'], image=motiu_image, title=motiu, minutes=permis['minutes']) diadict = dict(link_marcatge='', marcatges=[], total=0, permisos=[permisdict,]) daytuple = (DateTimeToTT(current),diadict) days.append(daytuple) # Condició de final i incrementar l'iterador if current==to_date: grow_days=False counter = counter +1 return days def parsePermis(self,dia): """ Parseja un tr que conte un permis retornant-los en forma de dicionari """ parsed = {} tds = dia.findAll('td') approved = 'vwicn083.gif' in str(tds[2].img) fd = tds[3].font.a.string.split('/') td = tds[4].font.string.split('/') #Agafarem el td 6 o el 7 segons tingui els ':' , ja que l'historic te una columna de més minutestd = ':' in tds[6].__str__() and tds[6] or tds[7] # S'ha de fer un replace 12: per 00: ja que l'html que revem a través del # mechanize si hi ha 00 es pensa que es una hora i ens ho transforma en 12 ... minutes = HMaMinuts(minutestd.string.split()[0].replace('12:','00:')) return dict(approved = approved, from_date = (fd[1],fd[0],fd[2]), to_date = (td[1],td[0],td[2]), minutes = minutes)
def executa(self, search_url): self.SEARCH_URL = search_url list = None list = [u'Favorecido:', u'Valor:', u'Observação do Documento:'] socket.socket = socks.socksocket socket.create_connection = create_connection br = Browser() print search_url print "ID = " + str(Consulta.ID) gravalog(self, search_url + " cont = " + str(Consulta.ID) + "\n") LRequest = urllib2.Request(search_url, " ") LResponse = br.open(LRequest) page = bs_parse(LResponse.read()) #pode ir para fora!!!! soup = bs_parse(LResponse.get_data()) img_captcha = soup.find('img', alt='captcha') if img_captcha != None: #caso encontre um captcha, o sistema troca o endereço IP try: print "CAPTCHA!!!" gravalog(self, "CAPTCHA\n") finally: Consulta.ID = newID(self, Consulta.controller) br.close() socket.socket = socks.socksocket socket.create_connection = self.create_connection br = Browser() print search_url + " cont = " + str(Consulta.ID) gravalog(self, search_url + " cont = " + str(Consulta.ID) + "\n") LRequest = urllib2.Request(search_url, " ") LResponse = br.open(LRequest) page = bs_parse(LResponse.read()) entra = 0 #navega na página HTML consultando o Favorecido no link do hypertexto for table in page.findAll("table"): for row2 in table.findAll('tr'): # print row2 for col in row2.findAll('td'): for href in col.findAll('a'): print href gravalog(self, str(href).encode('utf-8', 'ignore') + '\n') #resp = br.follow_link(text_regex=href.string) #html = resp.read() #print html if col.string != None: m = re.search('a href', col.string) if m != None: print 'Link!!!' gravalog(self, 'Link!!!\n') print col.string gravalog( self, str(col.string).encode('utf-8', 'ignore') + '\n') m = re.search('INFORMATICA', col.string) if m != None: entra = 1 m = re.search('TECNOLOGIA DA INFORMACAO', col.string) if m != None: entra = 1 m = re.search('TELECOMUNICACOES', col.string) if m != None: entra = 1 m = re.search('TELECOMUNICACAO', col.string) if m != None: entra = 1 m = re.search('NETWORKS', col.string) if m != None: entra = 1 m = re.search('NETWORK', col.string) if m != None: entra = 1 m = re.search('REDE', col.string) if m != None: entra = 1 m = re.search('REDES', col.string) if m != None: entra = 1 if entra == 1: logarqui = logging.getLogger("logarqui") logarqui.debug("Inside f!") try: print 'BINGO!' gravalog(self, 'BINGO!\n') print href.string gravalog( self, str(href.string).encode('utf-8', 'ignore') + '\n') LResponse = br.follow_link( text_regex=href.string) html = LResponse.read() print html gravalog(self, html + '\n') page = bs_parse(html) cont = 3 for table in page.findAll("table"): for row2 in table.findAll('tr'): # print row2 favorecido = 0 valor = 0 observacao = 0 for col in row2.findAll('td'): if favorecido == 1: texto = str(col.string).decode( 'utf8').encode( 'utf8', 'ignore').replace( "'", "").replace( ";", "").replace( "--", "") print texto gravalog(self, texto + '\n') list.append(texto) if valor == 1: texto = str(col.string).decode( 'utf8').encode( 'utf8', 'ignore').replace( "'", "").replace( ";", "").replace( "--", "") print texto gravalog(self, texto + '\n') list.append(texto) if observacao == 1: texto = str(col.string).decode( 'utf8').encode( 'utf8', 'ignore').replace( "'", "").replace( ";", "").replace( "--", "") print texto gravalog(self, texto + '\n') list.append(texto) print list if col.string != None: m = re.search( u'Favorecido:', col.string) if m != None: print u'Favorecido:' gravalog( self, u'Favorecido:') favorecido = 1 m = re.search( u'Valor:', col.string) if m != None: print u'Valor:' gravalog(self, u'Valor:') valor = 1 m = re.search( u'Observação do Documento:', col.string) if m != None: print u'Observação do Documento:' gravalog( self, 'Observação do Documento:' ) observacao = 1 entra = 0 br.back() except Exception, ex: logarqui.exception logarqui.error logarqui.exception( "\nProvlema na gravação de logs! \n" + search_url) logarqui.debug("Finishing f!")
class RequestQuery: def __init__(self,config): self.br=Browser() self.config = config # Initialise connections self.mySiteDB = SiteDBJSON() self.phedex = PhEDEx({"endpoint":"https://cmsweb.cern.ch/phedex/datasvc/json/prod/"}, "json") self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/") self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/") self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/") def __del__(self): self.br.close() def login2Savannah(self): """ login2Savannah log into savannah with the given parameters in the config (username and password) User must have admin priviledges for store results requests """ login_page='https://savannah.cern.ch/account/login.php?uri=%2F' savannah_page='https://savannah.cern.ch/task/?group=cms-storeresults' self.br.open(login_page) ## 'Search' form is form 0 ## login form is form 1 self.br.select_form(nr=1) username = self.config["SavannahUser"] self.br['form_loginname']=username self.br['form_pw']=self.config["SavannahPasswd"] self.br.submit() response = self.br.open(savannah_page) # Check to see if login was successful if not re.search('Logged in as ' + username, response.read()): print('login unsuccessful, please check your username and password') return False else: return True def selectQueryForm(self,**kargs): """ selectQueryForm create the browser view to get all the store result tickets from savannah """ if self.isLoggedIn: self.br.select_form(name="bug_form") ## Use right query form labelled Test control = self.br.find_control("report_id",type="select") for item in control.items: if item.attrs['label'] == "Test": control.value = [item.attrs['value']] ##select number of entries displayed per page control = self.br.find_control("chunksz",type="text") control.value = "150" ##check additional searching parameter for arg in kargs: if arg == "approval_status": control = self.br.find_control("resolution_id",type="select") for item in control.items: if item.attrs['label'] == kargs[arg].strip(): control.value = [item.attrs['value']] elif arg == "task_status": control = self.br.find_control("status_id",type="select") for item in control.items: if item.attrs['label'] == kargs[arg].strip(): control.value = [item.attrs['value']] elif arg == "team": control = self.br.find_control("custom_sb5",type="select") for item in control.items: if item.attrs['label'] == kargs[arg].strip(): control.value = [item.attrs['value']] response = self.br.submit() response.read() return def getScramArchByCMSSW(self): """ Get from the list of available CMSSW releases return a dictionary of ScramArchitecture by CMSSW """ # Set temporary conection to the server and get the response from cmstags url = 'https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML' br = Browser() br.set_handle_robots(False) response=br.open(url) soup = BeautifulSoup(response.read()) # Dictionary form # {'CMSSW_X_X_X':[slc5_amd64_gcc472], ... } archByCmssw={} # Fill the dictionary for arch in soup.find_all('architecture'): for cmssw in arch.find_all('project'): # CMSSW release cmsswLabel = cmssw.get('label').encode('ascii', 'ignore') if cmsswLabel not in archByCmssw: archByCmssw[cmsswLabel]=[] # ScramArch related to this CMSSW release archName = arch.get('name').encode('ascii', 'ignore') archByCmssw[cmsswLabel].append(archName) return archByCmssw def createValueDicts(self): """ Init dictionaries by value/label: - Releases by Value - Physics group by value - DBS url by value - DBS rul by label - Status of savannah request by value - Status of savannah ticket by value (Open/Closed/Any) """ if self.isLoggedIn: self.br.select_form(name="bug_form") control = self.br.find_control("custom_sb2",type="select") self.ReleaseByValueDict = self.getLabelByValueDict(control) control = self.br.find_control("custom_sb3",type="select") self.GroupByValueDict = self.getLabelByValueDict(control) control = self.br.find_control("custom_sb4",type="select") self.DBSByValueDict = self.getLabelByValueDict(control) self.DBSByLabelDict = self.getValueByLabelDict(control) control = self.br.find_control("resolution_id",type="select") self.StatusByValueDict = self.getLabelByValueDict(control) control = self.br.find_control("status_id",type="select") self.TicketStatusByLabelDict = self.getValueByLabelDict(control) return def getDatasetOriginSites(self, dbs_url, data): """ Get the origin sites for each block of the dataset. Return a list block origin sites. """ local_dbs = dbs_url.split('/')[5] if local_dbs == 'phys01': response = self.dbsPhys01.listBlocks(detail=True,dataset=data) elif local_dbs == 'phys02': response = self.dbsPhys02.listBlocks(detail=True,dataset=data) elif local_dbs == 'phys03': response = self.dbsPhys03.listBlocks(detail=True,dataset=data) pnnList = set() for block in response: pnnList.add(block['origin_site_name']) psnList = self.mySiteDB.PNNstoPSNs(pnnList) return psnList, list(pnnList) def phEDExNodetocmsName(self, nodeList): """ Convert PhEDEx node name list to cms names list """ names = [] for node in nodeList: name = node.replace('_MSS', '').replace('_Disk', '').replace('_Buffer', '').replace('_Export', '') if name not in names: names.append(name) return names def setGlobalTagFromOrigin(self, dbs_url,input_dataset): """ Get the global tag of the dataset from the source dbs url. If it is not set, then set global tag to 'UNKNOWN' """ globalTag = "" local_dbs = dbs_url.split('/')[5] if local_dbs == 'phys01': response = self.dbsPhys01.listOutputConfigs(dataset=input_dataset) elif local_dbs == 'phys02': response = self.dbsPhys02.listOutputConfigs(dataset=input_dataset) elif local_dbs == 'phys03': response = self.dbsPhys03.listOutputConfigs(dataset=input_dataset) globalTag = response[0]['global_tag'] # GlobalTag cannot be empty if globalTag == '': globalTag = 'UNKNOWN' return globalTag def isDataAtUrl(self, dbs_url,input_dataset): """ Returns True if the dataset is at the dbs url, if not returns False """ local_dbs = dbs_url.split('/')[5] if local_dbs == 'phys01': response = self.dbsPhys01.listDatasets(dataset=input_dataset) elif local_dbs == 'phys02': response = self.dbsPhys02.listDatasets(dataset=input_dataset) elif local_dbs == 'phys03': response = self.dbsPhys03.listDatasets(dataset=input_dataset) # This means that the dataset is not at the url if not response: return False else: return True def getLabelByValueDict(self, control): """ From control items, create a dictionary by values """ d = {} for item in control.items: value = item.attrs['value'] label = item.attrs['label'] d[value] = label return d def getValueByLabelDict(self, control): """ From control items, create a dictionary by labels """ d = {} for item in control.items: value = item.attrs['value'] label = item.attrs['label'] d[label] = value return d def getRequests(self,**kargs): """ getRequests Actually goes through all the savannah requests and create json files if the ticket is not Closed and the status of the item is Done. It also reports back the summary of the requests in savannah """ requests = [] # Open Browser and login into Savannah self.br=Browser() self.isLoggedIn = self.login2Savannah() if self.isLoggedIn: if not kargs: self.selectQueryForm(approval_status='1',task_status='0') else: self.selectQueryForm(**kargs) self.createValueDicts() self.br.select_form(name="bug_form") response = self.br.submit() html_ouput = response.read() scramArchByCMSSW = self.getScramArchByCMSSW() self.nodeMappings = self.phedex.getNodeMap() for link in self.br.links(text_regex="#[0-9]+"): response = self.br.follow_link(link) try: ## Get Information self.br.select_form(name="item_form") ## remove leading   and # from task task = link.text.replace('#','').decode('utf-8').strip() print("Processing ticket: %s" % task) ## Get input dataset name control = self.br.find_control("custom_tf1",type="text") input_dataset = control.value input_primary_dataset = input_dataset.split('/')[1].replace(' ','') input_processed_dataset = input_dataset.split('/')[2].replace(' ','') data_tier = input_dataset.split('/')[3].replace(' ','') ## Get DBS URL by Drop Down control = self.br.find_control("custom_sb4",type="select") dbs_url = self.DBSByValueDict[control.value[0]] ## Get DBS URL by text field (for old entries) if dbs_url=='None': control = self.br.find_control("custom_tf4",type="text") dbs_url = control.value.replace(' ','') else: # Transform input value to a valid DBS url #dbs_url = "https://cmsweb.cern.ch/dbs/prod/"+dbs_url+"/DBSReader" dbs_url = dbs_base_url+dbs_url+"/DBSReader" ## Get Release control = self.br.find_control("custom_sb2",type="select") release_id = control.value ## Get current request status control = self.br.find_control("status_id",type="select") request_status_id = control.value RequestStatusByValueDict = self.getLabelByValueDict(control) # close the request if deprecated release was used try: release = self.ReleaseByValueDict[release_id[0]] except: if len(self.ReleaseByValueDict)>0 and RequestStatusByValueDict[request_status_id[0]] != "Closed": msg = "Your request is not valid anymore, since the given CMSSW release is deprecated. If your request should be still processed, please reopen the request and update the CMSSW release to a more recent *working* release.\n" msg+= "\n" msg+= "Thanks,\n" msg+= "Your StoreResults team" self.closeRequest(task,msg) self.br.back() print("I tried to Close ticket %s due to CMSSW not valid" % task) continue # close the request if release has not ScramArch match if release not in scramArchByCMSSW: if len(self.ReleaseByValueDict)>0 and RequestStatusByValueDict[request_status_id[0]] != "Closed": msg = "Your request is not valid, there is no ScramArch match for the given CMSSW release.\n" msg+= "If your request should be still processed, please reopen the request and update the CMSSW release according to: https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML \n" msg+= "\n" msg+= "Thanks,\n" msg+= "Your StoreResults team" self.closeRequest(task,msg) self.br.back() print("I tried to Close ticket %s due to ScramArch mismatch" % task) continue else: index=len(scramArchByCMSSW[release]) scram_arch = scramArchByCMSSW[release][index-1] # close the request if dataset is not at dbs url try: data_at_url = self.isDataAtUrl(dbs_url,input_dataset) except: print('I got an error trying to look for dataset %s at %s, please look at this ticket: %s' %(input_dataset,dbs_url,task)) continue if not data_at_url: msg = "Your request is not valid, I could not find the given dataset at %s\n" % dbs_url msg+= "If your request should be still processed, please reopen the request and change DBS url properly \n" msg+= "\n" msg+= "Thanks,\n" msg+= "Your StoreResults team" self.closeRequest(task,msg) self.br.back() print("I tried to Close ticket %s, dataset is not at DBS url" % task) continue # Avoid not approved Tickets #if not RequestStatusByValueDict[request_status_id[0]] == "Done": # continue ## Get Physics Group control = self.br.find_control("custom_sb3",type="select") group_id = control.value[0] group_squad = 'cms-storeresults-'+self.GroupByValueDict[group_id].replace("-","_").lower() ## Get Dataset Version control = self.br.find_control("custom_tf3",type="text") dataset_version = control.value.replace(' ','') if dataset_version == "": dataset_version = '1' ## Get current status control = self.br.find_control("resolution_id",type="select") status_id = control.value ## Get assigned to control = self.br.find_control("assigned_to",type="select") AssignedToByValueDict = self.getLabelByValueDict(control) assignedTo_id = control.value ##Assign task to the physics group squad if AssignedToByValueDict[assignedTo_id[0]]!=group_squad: assignedTo_id = [self.getValueByLabelDict(control)[group_squad]] control.value = assignedTo_id self.br.submit() # Set default Adquisition Era for StoreResults acquisitionEra = "StoreResults" ## Construction of the new dataset name (ProcessingString) ## remove leading hypernews or physics group name and StoreResults+Version if input_processed_dataset.find(self.GroupByValueDict[group_id])==0: new_dataset = input_processed_dataset.replace(self.GroupByValueDict[group_id],"",1) else: stripped_dataset = input_processed_dataset.split("-")[1:] new_dataset = '_'.join(stripped_dataset) except Exception as ex: self.br.back() print("There is a problem with this ticket %s, please have a look to the error:" % task) print(str(ex)) print(traceback.format_exc()) continue self.br.back() # Get dataset site info: psnList, pnnList = self.getDatasetOriginSites(dbs_url,input_dataset) infoDict = {} # Build store results json # First add all the defaults values infoDict["RequestType"] = "StoreResults" infoDict["UnmergedLFNBase"] = "/store/unmerged" infoDict["MergedLFNBase"] = "/store/results/" + self.GroupByValueDict[group_id].replace("-","_").lower() infoDict["MinMergeSize"] = 1500000000 infoDict["MaxMergeSize"] = 5000000000 infoDict["MaxMergeEvents"] = 100000 infoDict["TimePerEvent"] = 40 infoDict["SizePerEvent"] = 512.0 infoDict["Memory"] = 2394 infoDict["CmsPath"] = "/uscmst1/prod/sw/cms" infoDict["Group"] = "DATAOPS" infoDict["DbsUrl"] = dbs_url # Add all the information pulled from Savannah infoDict["AcquisitionEra"] = acquisitionEra infoDict["GlobalTag"] = self.setGlobalTagFromOrigin(dbs_url,input_dataset) infoDict["DataTier"] = data_tier infoDict["InputDataset"] = input_dataset infoDict["ProcessingString"] = new_dataset infoDict["CMSSWVersion"] = release infoDict["ScramArch"] = scram_arch infoDict["ProcessingVersion"] = dataset_version infoDict["SiteWhitelist"] = psnList # Create report for Migration2Global report = {} #Fill json file, if status is done if self.StatusByValueDict[status_id[0]]=='Done' and RequestStatusByValueDict[request_status_id[0]] != "Closed": self.writeJSONFile(task, infoDict) report["json"] = 'y' else: report["json"] = 'n' report["task"] = int(task) report["InputDataset"] = input_dataset report["ProcessingString"] = new_dataset report["ticketStatus"] = self.StatusByValueDict[status_id[0]] report["assignedTo"] = AssignedToByValueDict[assignedTo_id[0]] report["localUrl"] = dbs_url report["sites"] = psnList report["pnns"] = pnnList # if the request is closed, change the item status to report to Closed if report["ticketStatus"] == "Done" and RequestStatusByValueDict[request_status_id[0]] == "Closed": report["ticketStatus"] = "Closed" requests.append(report) # Print out report self.printReport(requests) # Close connections self.br.close() return requests def closeRequest(self,task,msg): """ This close a specific savannag ticket Insert a message in the ticket """ if self.isLoggedIn: #self.createValueDicts() response = self.br.open('https://savannah.cern.ch/task/?'+str(task)) html = response.read() self.br.select_form(name="item_form") control = self.br.find_control("status_id",type="select") control.value = [self.TicketStatusByLabelDict["Closed"]] #Put reason to the comment field control = self.br.find_control("comment",type="textarea") control.value = msg #DBS Drop Down is a mandatory field, if set to None (for old requests), it is not possible to close the request self.setDBSDropDown() self.br.submit() #remove JSON ticket self.removeJSONFile(task) self.br.back() return def setDBSDropDown(self): ## Get DBS URL by Drop Down control = self.br.find_control("custom_sb4",type="select") dbs_url = self.DBSByValueDict[control.value[0]] ## Get DBS URL by text field (for old entries) if dbs_url=='None': tmp = self.br.find_control("custom_tf4",type="text") dbs_url = tmp.value.replace(' ','') if dbs_url.find("phys01")!=-1: control.value = [self.DBSByLabelDict["phys01"]] elif dbs_url.find("phys02")!=-1: control.value = [self.DBSByLabelDict["phys02"]] elif dbs_url.find("phys03")!=-1: control.value = [self.DBSByLabelDict["phys03"]] else: msg = 'DBS URL of the old request is neither phys01, phys02 nor phys03. Please, check!' print(msg) raise RuntimeError(msg) return def writeJSONFile(self, task, infoDict): """ This writes a JSON file at ComponentDir """ ##check if file already exists filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json' if not os.access(filename,os.F_OK): jsonfile = open(filename,'w') request = {'createRequest':infoDict} ## CHECK THIS BEFORE FINISHING jsonfile.write(json.dumps(request,sort_keys=True, indent=4)) jsonfile.close return def removeJSONFile(self,task): """ This removes the JSON file at ComponentDir if it was created """ filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json' if os.access(filename,os.F_OK): os.remove(filename) return def printReport(self, requests): """ Print out a report """ print("%20s %10s %5s %35s %10s %50s %50s" %( 'Savannah Ticket','Status','json','Assigned to','local DBS','Sites','pnns')) print("%20s %10s %5s %35s %10s %50s %50s" %( '-'*20,'-'*10,'-'*5,'-'*35,'-'*10,'-'*50,'-'*50 )) for report in requests: json = report["json"] ticket = report["task"] status = report["ticketStatus"] assigned = report["assignedTo"] localUrl = report["localUrl"].split('/')[5] site = ', '.join(report["sites"]) pnns = ', '.join(report["pnns"]) print("%20s %10s %5s %35s %10s %50s %50s" %(ticket,status,json,assigned,localUrl,site,pnns))
class Session(object): def __init__(self): """Constructor Args: None Attributes: browser (`mechanize._mechanize.Browser`): browser object in session """ self.browser = Browser() # set error and debug handlers for the browser # cookie jar self.browser.set_cookiejar(cookielib.LWPCookieJar()) # browser options self.browser.set_handle_equiv(True) self.browser.set_handle_gzip(True) self.browser.set_handle_redirect(True) self.browser.set_handle_referer(True) self.browser.set_handle_robots(False) # follows refresh 0 but doesn't hang on refresh > 0 self.browser.set_handle_refresh(_http.HTTPRefreshProcessor(), max_time=1) # user-Agent self.browser.addheaders = [("User-agent", HEADER)] def close(self): """Destructor for Session. Closes current browser session Args: None Returns: None """ self.browser.close() def case_id_form(self, case): """Grabs the form in the case searching page, and inputs the case number to return the response. Args: case (`str`): case ID to be scraped Returns: response (`str`): HTML response """ # iterate through the forms to find the correct one for form in self.browser.forms(): if form.attrs["name"] == "inquiryFormByCaseNum": self.browser.form = form break # submit case ID and return the response self.browser.form["caseId"] = case self.browser.submit() response = self.browser.response().read() self.browser.back() return response if any( case_type in response.upper() for case_type in ("FORECLOSURE", "FORECLOSURE RIGHTS OF REDEMPTION")) else '' def disclaimer_form(self): """Navigates to the URL to proceed to the case searching page Args: None Returns: None """ # visit the site self.browser.open(URL) # select the only form on the page self.browser.select_form(nr=0) # select the checkbox self.browser.form["disclaimer"] = ['Y'] # submit the form self.browser.submit() @staticmethod def server_running(): """Checks the status of the Casesearch servers Args: None Returns: `True` if server is up, `False` otherwise """ return urlopen(URL).getcode() == 200
class LconnectScraper(ClassDataScraper): LCONNECT_URL = 'http://leopardweb.wit.edu/' USERAGENT = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) ' \ + 'Gecko/20100122 firefox/3.6.1' def __init__(self): # Create a cookie jar and a browser self._cookieJar = LWPCookieJar() self._browser = Browser() self._browser.set_cookiejar(self._cookieJar) # Set Browser options self._browser.set_handle_equiv(True) self._browser.set_handle_gzip(True) self._browser.set_handle_redirect(True) self._browser.set_handle_referer(True) self._browser.set_handle_robots(False) self._browser.set_handle_refresh(_http.HTTPRefreshProcessor(), max_time=1) self._browser.addheaders = [('User-agent', LconnectScraper.USERAGENT)] # Debugging self._browser.set_debug_http(True) self._browser.set_debug_redirects(True) self._browser.set_debug_responses(True) def getName(self): return "Lconnect Scraper" def connect(self): """ Attempts to connect to the data source """ try: # Try to open a connection. 8 Second timeout self._browser.open(LconnectScraper.LCONNECT_URL, timeout=8) return True except URLError: return False def disconnect(self): """ Disconnects from the data source """ self._browser.close() def requiresAuthentication(self): """ Returns whether or not the scraper requires authentication information """ return True def authenticate(self, username, password): """ Attempts to authenticate the scraper using username and password """ # If we're on the sign in page, try to sign in if self._browser.title() == 'Sign In': for form in self._browser.forms(): if form.name is None: self._browser.form = list(self._browser.forms())[0] self._browser['username'] = username self._browser['password'] = password self._browser.submit() # If the browser's title is 'Main Menu', # we've either successfully logged in, or we were already if self._browser.title() == 'Main Menu': return True else: return False def getClassData(self): """ Returns a list of ClassData objects """ return []
class RequestQuery: def __init__(self,config): self.br=Browser() self.config = config self.isLoggedIn = self.login2Savannah() def __del__(self): self.br.close() def closeRequest(self,task,msg): if self.isLoggedIn: self.createValueDicts() response = self.br.open('https://savannah.cern.ch/task/?'+str(task)) html = response.read() self.br.select_form(name="item_form") control = self.br.find_control("status_id",type="select") control.value = [self.TicketStatusByLabelDict["Closed"]] #Put reason to the comment field control = self.br.find_control("comment",type="textarea") control.value = msg #DBS Drop Down is a mandatory field, if set to None (for old requests), it is not possible to close the request self.setDBSDropDown() self.br.submit() #remove JSON ticket self.removeJSONFile(task) return def createValueDicts(self): if self.isLoggedIn: self.br.select_form(name="bug_form") control = self.br.find_control("custom_sb2",type="select") self.ReleaseByValueDict = self.getLabelByValueDict(control) control = self.br.find_control("custom_sb3",type="select") self.GroupByValueDict = self.getLabelByValueDict(control) control = self.br.find_control("custom_sb4",type="select") self.DBSByValueDict = self.getLabelByValueDict(control) self.DBSByLabelDict = self.getValueByLabelDict(control) control = self.br.find_control("resolution_id",type="select") self.StatusByValueDict = self.getLabelByValueDict(control) control = self.br.find_control("status_id",type="select") self.TicketStatusByLabelDict = self.getValueByLabelDict(control) return def setDBSDropDown(self): ## Get DBS URL by Drop Down control = self.br.find_control("custom_sb4",type="select") dbs_url = self.DBSByValueDict[control.value[0]] ## Get DBS URL by text field (for old entries) if dbs_url=='None': tmp = self.br.find_control("custom_tf4",type="text") dbs_url = tmp.value.replace(' ','') if dbs_url.find("analysis_02")!=-1: control.value = [self.DBSByLabelDict["cms_dbs_ph_analysis_02"]] elif dbs_url.find("analysis_01")!=-1: control.value = [self.DBSByLabelDict["cms_dbs_ph_analysis_01"]] elif dbs_url.find("local_09")!=-1: control.value = [self.DBSByLabelDict["cms_dbs_ph_prod_local_09"]] else: msg = 'DBS URL of the old request is neither analysis_01, analysis_02 nor local_09. Please, check!' logging.error(msg) raise RuntimeError, msg return def getLabelByValueDict(self, control): d = {} for item in control.items: value = item.attrs['value'] label = item.attrs['label'] d[value] = label return d def getRequests(self,**kargs): requests = [] if self.isLoggedIn: self.selectQueryForm(**kargs) self.createValueDicts() self.br.select_form(name="bug_form") response = self.br.submit() html_ouput = response.read() for link in self.br.links(text_regex="#[0-9]+"): response = self.br.follow_link(link) ## Get Information self.br.select_form(name="item_form") ## Get input dataset name control = self.br.find_control("custom_tf1",type="text") input_dataset = control.value.split('/') input_primary_dataset = input_dataset[1].replace(' ','') input_processed_dataset = input_dataset[2].replace(' ','') ## Get DBS URL by Drop Down control = self.br.find_control("custom_sb4",type="select") dbs_url = self.DBSByValueDict[control.value[0]] ## Get DBS URL by text field (for old entries) if dbs_url=='None': control = self.br.find_control("custom_tf4",type="text") dbs_url = control.value.replace(' ','') else: # Transform input value to a valid DBS url dbs_url = "http://cmsdbsprod.cern.ch/"+dbs_url+"/servlet/DBSServlet" ## Get Release control = self.br.find_control("custom_sb2",type="select") release_id = control.value ## Get Physics Group control = self.br.find_control("custom_sb3",type="select") group_id = control.value[0] group_squad = 'cms-storeresults-'+self.GroupByValueDict[group_id].replace("-","_").lower() ## Get Dataset Version control = self.br.find_control("custom_tf3",type="text") dataset_version = control.value.replace(' ','') ## Get current status control = self.br.find_control("resolution_id",type="select") status_id = control.value ## Get current request status control = self.br.find_control("status_id",type="select") request_status_id = control.value RequestStatusByValueDict = self.getLabelByValueDict(control) ## Get assigned to control = self.br.find_control("assigned_to",type="select") AssignedToByValueDict = self.getLabelByValueDict(control) assignedTo_id = control.value ##Assign task to the physics group squad if AssignedToByValueDict[assignedTo_id[0]]!=group_squad: control.value = [self.getValueByLabelDict(control)[group_squad]] self.br.submit() ## Construction of the new dataset name ## remove leading hypernews or physics group name and StoreResults+Version if len(dataset_version)>0: dataset_prefix = "StoreResults-"+dataset_version else: dataset_prefix = "StoreResults" if input_processed_dataset.find(self.GroupByValueDict[group_id])==0: new_dataset = input_processed_dataset.replace(self.GroupByValueDict[group_id],dataset_prefix,1) else: stripped_dataset = input_processed_dataset.split("-")[1:] new_dataset = dataset_prefix+'-'+'-'.join(stripped_dataset) self.br.back() ## remove leading   and # from task task = link.text.replace('#','').decode('utf-8').strip() infoDict = {} infoDict["primaryDataset"] = input_primary_dataset infoDict["processedDataset"] = input_processed_dataset infoDict["outputDataset"] = new_dataset infoDict["physicsGroup"] = self.GroupByValueDict[group_id] infoDict["inputDBSURL"] = dbs_url # close the request if deprecated release was used try: infoDict["cmsswRelease"] = self.ReleaseByValueDict[release_id[0]] except: if len(self.ReleaseByValueDict)>0 and RequestStatusByValueDict[request_status_id[0]] != "Closed": msg = "Your request is not valid anymore, since the given CMSSW release is deprecated. If your request should be still processed, please reopen the request and update the CMSSW release to a more recent *working* release.\n" msg+= "\n" msg+= "Thanks,\n" msg+= "Your StoreResults team" self.closeRequest(task,msg) #Fill json file, if status is done if self.StatusByValueDict[status_id[0]]=='Done' and RequestStatusByValueDict[request_status_id[0]] != "Closed": self.writeJSONFile(task, infoDict) infoDict["task"] = int(task) infoDict["ticketStatus"] = self.StatusByValueDict[status_id[0]] infoDict["assignedTo"] = AssignedToByValueDict[assignedTo_id[0]] if infoDict["ticketStatus"] == "Done" and RequestStatusByValueDict[request_status_id[0]] == "Closed": infoDict["ticketStatus"] = "Closed" requests.append(infoDict) return requests def getValueByLabelDict(self, control): d = {} for item in control.items: value = item.attrs['value'] label = item.attrs['label'] d[label] = value return d def login2Savannah(self): login_page='https://savannah.cern.ch/account/login.php?uri=%2F' savannah_page='https://savannah.cern.ch/task/?group=cms-storeresults' self.br.open(login_page) ## 'Search' form is form 0 ## login form is form 1 self.br.select_form(nr=1) username = self.config["SavannahUser"] self.br['form_loginname']=username self.br['form_pw']=self.config["SavannahPasswd"] self.br.submit() response = self.br.open(savannah_page) # Check to see if login was successful if not re.search('Logged in as ' + username, response.read()): logging.error('login unsuccessful, please check your username and password') return False else: return True def selectQueryForm(self,**kargs): if self.isLoggedIn: self.br.select_form(name="bug_form") ## Use right query form labelled Test control = self.br.find_control("report_id",type="select") for item in control.items: if item.attrs['label'] == "Test": control.value = [item.attrs['value']] ##select number of entries displayed per page control = self.br.find_control("chunksz",type="text") control.value = "150" ##check additional searching parameter for arg in kargs: if arg == "approval_status": control = self.br.find_control("resolution_id",type="select") for item in control.items: if item.attrs['label'] == kargs[arg].strip(): control.value = [item.attrs['value']] elif arg == "task_status": control = self.br.find_control("status_id",type="select") for item in control.items: if item.attrs['label'] == kargs[arg].strip(): control.value = [item.attrs['value']] elif arg == "team": control = self.br.find_control("custom_sb5",type="select") for item in control.items: if item.attrs['label'] == kargs[arg].strip(): control.value = [item.attrs['value']] response = self.br.submit() response.read() return def removeJSONFile(self,task): filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json' if os.access(filename,os.F_OK): os.remove(filename) return def writeJSONFile(self, task, infoDict): ##check if file already exists filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json' if not os.access(filename,os.F_OK): jsonfile = open(filename,'w') jsonfile.write(json.dumps(infoDict,sort_keys=True, indent=4)) jsonfile.close return
conn = psycopg2.connect(conn_string) except: print "problema ao conectar no banco de dados" #primeira solicitação no portal da transparência try: LRequest = urllib2.Request(SEARCH_URL, " ") LResponse = br.open(LRequest) page = bs_parse(LResponse.read()) print SEARCH_URL print page #f.write(page) except: print "problema ao realizar primeira consulta na web" br.close() #grava array com orgaos superiores e apresenta logs do processo na tela print "################### Orgaos ###################" a = [] b = [] #cursor para naveção no banco de dados cursor = conn.cursor() #consulta orgão superiores para vincular aos orgãos cursor.execute("Select codigo from orgao_superior") rows = cursor.fetchall() for row in rows:
def _utrack_gen_fired(self): test_dir(join(self.dirname,'img')) br = Browser() # Ignore robots.txt br.set_handle_robots( False ) # Google demands a user-agent that isn't a robot br.addheaders = [('User-agent', 'Firefox')] # Retrieve the Google home page, saving the response resp1 = br.open( "http://utrack.crempa.net/index_cz.php" ) #http://utrack.crempa.net/index_en.php forms = ParseResponse(resp1, backwards_compat=False) form = forms[0] form.add_file(open(self.filename_converted), "text/plain", self.filename_converted) form.find_control(name='map_elevation').value = ['1'] resp3=form.click() resp2 = urlopen(resp3).read() resp4 = br.open(resp3) if self.download: resp = None weblinks = list(br.links()) imgi = 1 for link in weblinks: siteMatch = re.compile( 'show_graph' ).search( link.url ) if siteMatch: imgfile = open(join(self.dirname,'img','%i.png') % imgi, 'wb') resp = urlopen(link.absolute_url).read() imgfile.write(resp) imgfile.close() imgi += 1 for link in weblinks: siteMatch = re.compile( 'report.php' ).search( link.url ) if siteMatch: pdffile = open(join(self.dirname,'img','report.pdf'), 'wb') resp = urlopen(link.absolute_url).read() pdffile.write(resp) pdffile.close() # Print the site content = resp4.get_data() pattern = re.compile('src="show_graph.*?"') lst = pattern.findall(content) for ni, name in enumerate(lst): content = content.replace(name, 'src="img/%i.png"' % (ni+1)) pattern = re.compile('href="show_graph.*?"') lst = pattern.findall(content) for ni, name in enumerate(lst): content = content.replace(name, 'href="img/%i.png"' % (ni+1)) pattern = re.compile('Date of track: </td>\s+<td> (.*?) </td>') lst = pattern.findall(content) if len(lst) >= 2: if lst[0] != lst[-1]: track_date = lst[0] + '-' + lst[-1] else: track_date = lst[0] else: track_date = lst pattern = re.compile('href="report.php.*?"') lst = pattern.findall(content) content = content.replace(lst[0], 'href="img/report.pdf"') content = content.replace('<a href="#"', '<a href="#report_0"', 1) #content = content.replace('href="../style/style.css"', 'href="../style/style.css"') content = content.replace('src="../img/pdf.gif"', 'src="../style/pdf.gif"') content = content.replace('src="../img/logo.gif"', 'src="../style/logo.gif"') content = content.replace('src="../img/elegend.png"', 'src="../style/elegend.png"') #content = content.replace(r' >hide report</a>', r' >show report</a>') content = re.sub(r' >skr.*?t report</a>', r' >zobrazit report</a>', content) content = content.replace('if(divs[i].id==\'report_0\')', 'if(divs[i].id==\'report\')') content = content.replace(UTRACK_API_KEY, API_KEY) title = '<h1 style="margin-bottom:10px">%s <span style="font-size:small">%s</span></h1>\n' % (self.title,track_date) download_src = ''' <a href="%s">Download source gpx file</a> ''' % os.path.basename(self.filename_converted) iframe_map_track = ''' <iframe src="gps_vis/map_track.html" width="870" height="600" marginwidth="0" marginheight="0" scrolling="no" frameborder="0" style="width: 870px; height: 600px; margin-top:10px; margin-left: 10px; margin-bottom: 10px; position: relative; overflow: hidden; font-family: arial,sans-serif; line-height: normal; padding: 0pt;"> <a href="gps_vis/map_track.html">Click here for the map</a> </iframe>\n ''' iframe_map_speed = ''' <iframe src="gps_vis/map_speed.html" width="870" height="600" marginwidth="0" marginheight="0" scrolling="no" frameborder="0" style="width: 870px; height: 600px; margin-left: 10px; margin-bottom: 10px; position: relative; overflow: hidden; font-family: arial,sans-serif; line-height: normal; padding: 0pt;"> <a href="gps_vis/map_speed.html">Click here for the map</a> </iframe>\n ''' profile_img = ''' <img src="gps_vis/profile.png" alt="profile" border="0" height="250" width="870" style="width: 870px; height: 250px; margin-left: 10px; margin-bottom: 10px; position: relative; overflow: hidden; font-family: arial,sans-serif; line-height: normal; padding: 0pt;"> ''' content = content.replace('<div id="page">', '<div id="page">\n' + title + iframe_map_track + profile_img) ofile = open(join(self.dirname,'track_report.html'),'w') ofile.write(content) ofile.close() br.close() print 'utrack generated'
class RequestQuery: def __init__(self,config): self.br=Browser() self.config = config # Initialise connections self.mySiteDB = SiteDBJSON() self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/") self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/") self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/") def __del__(self): self.br.close() def getScramArchByCMSSW(self): """ Get from the list of available CMSSW releases return a dictionary of ScramArchitecture by CMSSW """ # Set temporary conection to the server and get the response from cmstags url = 'https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML' br = Browser() br.set_handle_robots(False) response=br.open(url) soup = BeautifulSoup(response.read()) # Dictionary form # {'CMSSW_X_X_X':[slc5_amd64_gcc472], ... } archByCmssw={} # Fill the dictionary for arch in soup.find_all('architecture'): for cmssw in arch.find_all('project'): # CMSSW release cmsswLabel = cmssw.get('label').encode('ascii', 'ignore') if cmsswLabel not in archByCmssw: archByCmssw[cmsswLabel]=[] # ScramArch related to this CMSSW release archName = arch.get('name').encode('ascii', 'ignore') archByCmssw[cmsswLabel].append(archName) return archByCmssw def getDatasetOriginSites(self, dbs_url, data): """ Get the origin sites for each block of the dataset. Return a list block origin sites. """ local_dbs = dbs_url.split('/')[5] if local_dbs == 'phys01': response = self.dbsPhys01.listBlocks(detail=True,dataset=data) elif local_dbs == 'phys02': response = self.dbsPhys02.listBlocks(detail=True,dataset=data) elif local_dbs == 'phys03': response = self.dbsPhys03.listBlocks(detail=True,dataset=data) pnnList = set() for block in response: pnnList.add(block['origin_site_name']) psnList = self.mySiteDB.PNNstoPSNs(pnnList) return psnList, list(pnnList) def setGlobalTagFromOrigin(self, dbs_url,input_dataset): """ Get the global tag of the dataset from the source dbs url. If it is not set, then set global tag to 'UNKNOWN' """ globalTag = "" local_dbs = dbs_url.split('/')[5] if local_dbs == 'phys01': response = self.dbsPhys01.listOutputConfigs(dataset=input_dataset) elif local_dbs == 'phys02': response = self.dbsPhys02.listOutputConfigs(dataset=input_dataset) elif local_dbs == 'phys03': response = self.dbsPhys03.listOutputConfigs(dataset=input_dataset) globalTag = response[0]['global_tag'] # GlobalTag cannot be empty if globalTag == '': globalTag = 'UNKNOWN' return globalTag def isDataAtUrl(self, dbs_url,input_dataset): """ Returns True if the dataset is at the dbs url, if not returns False """ local_dbs = dbs_url.split('/')[5] if local_dbs == 'phys01': response = self.dbsPhys01.listDatasets(dataset=input_dataset) elif local_dbs == 'phys02': response = self.dbsPhys02.listDatasets(dataset=input_dataset) elif local_dbs == 'phys03': response = self.dbsPhys03.listDatasets(dataset=input_dataset) # This means that the dataset is not at the url if not response: return False else: return True def getLabelByValueDict(self, control): """ From control items, create a dictionary by values """ d = {} for item in control.items: value = item.attrs['value'] label = item.attrs['label'] d[value] = label return d def getValueByLabelDict(self, control): """ From control items, create a dictionary by labels """ d = {} for item in control.items: value = item.attrs['value'] label = item.attrs['label'] d[label] = value return d def createRequestJSON(self, ticket, input_dataset, dbs_url, cmssw_release, group_name, version = 1): """ Creates a JSON file 'Ticket_#TICKET.json' with the needed information for creating a requeston ReqMgr. Input: - ticket: the ticket #, for instance 110773 on https://ggus.eu/?mode=ticket_info&ticket_id=110773 - input_dataset - dbs_url: only the instance name, For example: "phys01" for https://cmsweb.cern.ch/dbs/prod/phys01/DBSReader - cmssw_release - group_name: the physics group name - version: the dataset version, 1 by default. It returns a dictionary that contains the request information. """ scramArchByCMSSW = self.getScramArchByCMSSW() self.nodeMappings = self.phedex.getNodeMap() task = ticket print("Processing ticket: %s" % task) #splitting input dataset input_primary_dataset = input_dataset.split('/')[1].replace(' ','') input_processed_dataset = input_dataset.split('/')[2].replace(' ','') data_tier = input_dataset.split('/')[3].replace(' ','') # Transform input value to a valid DBS url #dbs_url = "https://cmsweb.cern.ch/dbs/prod/"+dbs_url+"/DBSReader" dbs_url = dbs_base_url+dbs_url+"/DBSReader" release_id = cmssw_release # check if deprecated release was used release = cmssw_release # check if release has not ScramArch match if release not in scramArchByCMSSW: raise Exception("Error on ticket %s due to ScramArch mismatch" % task) else: scram_arch = scramArchByCMSSW[release][-1] # check if dataset is not at dbs url try: data_at_url = self.isDataAtUrl(dbs_url,input_dataset) except: raise Exception('Error on ticket %s, dataset %s not available at %s' %(task, input_dataset,dbs_url)) if not data_at_url: raise Exception('Error on ticket %s, dataset %s not available at %s' %(task, input_dataset,dbs_url)) ## Get Physics Group group_squad = 'cms-storeresults-'+group_name.replace("-","_").lower() ## Get Dataset Version dataset_version = str(version) # Set default Adquisition Era for StoreResults acquisitionEra = "StoreResults" ## Construction of the new dataset name (ProcessingString) ## remove leading hypernews or physics group name and StoreResults+Version if input_processed_dataset.find(group_name)==0: new_dataset = input_processed_dataset.replace(group_name,"",1) else: stripped_dataset = input_processed_dataset.split("-")[1:] new_dataset = '_'.join(stripped_dataset) # Get dataset site info: psnList, pnnList = self.getDatasetOriginSites(dbs_url,input_dataset) infoDict = {} # Build store results json # First add all the defaults values infoDict["RequestType"] = "StoreResults" infoDict["UnmergedLFNBase"] = "/store/unmerged" infoDict["MergedLFNBase"] = "/store/results/" + group_name.replace("-","_").lower() infoDict["MinMergeSize"] = 1500000000 infoDict["MaxMergeSize"] = 5000000000 infoDict["MaxMergeEvents"] = 100000 infoDict["TimePerEvent"] = 40 infoDict["SizePerEvent"] = 512.0 infoDict["Memory"] = 2394 infoDict["CmsPath"] = "/uscmst1/prod/sw/cms" infoDict["Group"] = "DATAOPS" infoDict["DbsUrl"] = dbs_url # Add all the information pulled from Savannah infoDict["AcquisitionEra"] = acquisitionEra infoDict["GlobalTag"] = self.setGlobalTagFromOrigin(dbs_url, input_dataset) infoDict["DataTier"] = data_tier infoDict["InputDataset"] = input_dataset infoDict["ProcessingString"] = new_dataset infoDict["CMSSWVersion"] = release infoDict["ScramArch"] = scram_arch infoDict["ProcessingVersion"] = dataset_version infoDict["SiteWhitelist"] = psnList # Create report for Migration2Global report = {} #Fill json file, if status is done self.writeJSONFile(task, infoDict) report["json"] = 'y' report["task"] = int(task) report["InputDataset"] = input_dataset report["ProcessingString"] = new_dataset report["localUrl"] = dbs_url report["sites"] = psnList report["pnns"] = pnnList return report def writeJSONFile(self, task, infoDict): """ This writes a JSON file at ComponentDir """ ##check if file already exists filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json' if not os.access(filename,os.F_OK): jsonfile = open(filename,'w') request = {'createRequest':infoDict} ## CHECK THIS BEFORE FINISHING jsonfile.write(json.dumps(request,sort_keys=True, indent=4)) jsonfile.close return def removeJSONFile(self,task): """ This removes the JSON file at ComponentDir if it was created """ filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json' if os.access(filename,os.F_OK): os.remove(filename) return def printReport(self, report): """ Print out a report """ print("%20s %5s %10s %50s %50s" %( 'Ticket','json','local DBS','Sites','pnns')) print("%20s %5s %10s %50s %50s" %( '-'*20,'-'*5,'-'*10,'-'*50,'-'*50 )) json = report["json"] ticket = report["task"] #status = report["ticketStatus"] localUrl = report["localUrl"].split('/')[5] site = ', '.join(report["sites"]) pnns = ', '.join(report["pnns"]) print("%20s %5s %10s %50s %50s" %(ticket,json,localUrl,site,pnns))
import re URL_PATH = 'https://ss.zadarma.com/' USER_FIELD = 'email' PASS_FIELD = 'password' out_balance = 0.0 browser = Browser() browser.open( URL_PATH ) browser.select_form( nr=0 ) browser.form[USER_FIELD] = in_username browser.form[PASS_FIELD] = in_password browser.submit() browser.open( URL_PATH ) response = browser.response() html = response.read() browser.close() f1 = re.search( r'>\$(.*)</a></span>', html ) if f1 is not None: balance_string = f1.groups()[0] print(balance_string) out_balance = float( balance_string ) print( out_balance )
def trilegal(outputFileName, longitude = 0, latitude = 0, coordinateType = "galactic", fieldArea = 1, passband = 4, magnitudeLimit = 26, magnitudeResolution = 0.1, IMFtype = 3, includeBinaries = True, binaryFraction = 0.3, lowerBinaryMassRatio = 0.7, upperBinaryMassRatio = 1.0, extinctionType = 2, extinctionValue = 0.0378, extinctionSigma = 0.0, useThinDisc = False, useThickDisc = False, useBulge = True): """ Query the web interface of the TRILEGAL population synthesis code. The TRILEGAL webform is automatically filled and submitted. The computations are done locally on Girardi's computer. As soon as they are finished, the script retrieves the data file. Example: >>> trilegal("output.txt", longitude=3, latitude=14, coordinateType="galactic", fieldArea=1, magnitudeLimit=7, useThinDisc=True) @param outputFileName: name of file wherein trilegal output will be saved @type outputFileName: string @param longitude: galactic longitude (degrees) or right ascension (hours) @type longitude: integer @param latitude: galactic latitude (degrees) or declination (degrees) @type latitude: integer @param coordinateType: either "galactic", or "equatorial" @type coordinateType: string @param fieldArea: total field area in square degrees (max. 10 deg^2) @type fieldArea: float @param passband: U,B,V,R,I,J,H,K = 1,2,3,4,5,6,7,8 for magnitude limit @type passband: integer @param magnitudeLimit: magnitude limit in specified passband @type magnitudeLimit: float @param magnitudeResolution: Distance modulus resolution of Galaxy components (mag) @type magnitudeResolution: float @param IMFtype: type of Initial Mass Function of single stars 1 = Salpeter with cutoff at 0.01, Msun, 2 = Chabrier exponential, 3 = Chabrier lognormal, 4 = Kroupa corrected for binaries, 5 = Kroupa not corrected for binaries @type IMFtype: integer @param includeBinaries: include binaries in the population (True or False) @type includeBinaries: boolean @param binaryFraction: fraction of binaries @type binaryFraction: float @param lowerBinaryMassRatio: lower limit of binary mass fraction @type lowerBinaryMassRatio: float @param upperBinaryMassRatio: upper limit of binary mass fraction @type upperBinaryMassRatio: float @param extinctionType: Type of extinction 0: no dust extinction 1: local calibration 2: calibration at infinity @type extinctionType: integer @param extinctionValue: for a local calibration this is dAv/dr in mag/pc for the calibration at infinity this is Av at infinity in mag. @type extinctionValue: float @param extinctionSigma: 1-sigma extinction dispersion / total extinction (max. 0.3) @type extinctionSigma: float @param useThinDisk: if True use squared hyperbolic secant along z, if False don't include @type useThinDisk: boolean @param useThickDisk: if True use squared hyperbolic secant along z, if False don't include @type useThickDisk: boolean @param useBulge: if True use triaxal bulge, if False don't include @type useBulge: boolean @return None. A file is retrieved """ # The latest Trilegal web version trilegalURL = "http://stev.oapd.inaf.it/cgi-bin/trilegal" # Get the web form timestamp = strftime("%a, %d %b %Y %H:%M:%S", gmtime()) print("{0}: Opening TRILEGAL web interface".format(timestamp)) myBrowser = Browser() try: myBrowser.open(trilegalURL) except: timestamp = strftime("%a, %d %b %Y %H:%M:%S", gmtime()) print("{0}: Unable to open the TRILEGAL website".format(timestamp)) return myBrowser.select_form(nr=0) # there is only one form... # Fill in the form. To know how the different fields in the form are # named, we used # >>> request = mechanize.Request(trilegalURL) # >>> response = mechanize.urlopen(request) # >>> forms = mechanize.ParseResponse(response, backwards_compat=False) # >>> print forms[0] timestamp = strftime("%a, %d %b %Y %H:%M:%S", gmtime()) print("{0}: Filling TRILEGAL web form".format(timestamp)) if coordinateType == "galactic": myBrowser["gal_coord"] = ["1"] myBrowser["gc_l"] = str(longitude) myBrowser["gc_b"] = str(latitude) else: myBrowser["gal_coord"] = ["2"] myBrowser["eq_alpha"] = str(longitude) myBrowser["eq_delta"] = str(latitude) myBrowser["field"] = str(fieldArea) myBrowser["icm_lim"] = str(passband) myBrowser["mag_lim"] = str(magnitudeLimit) myBrowser["mag_res"] = str(magnitudeResolution) myBrowser["binary_kind"] = [str(int(includeBinaries))] myBrowser["binary_frac"] = str(binaryFraction) myBrowser["binary_mrinf"] = str(lowerBinaryMassRatio) myBrowser["binary_mrsup"] = str(upperBinaryMassRatio) myBrowser["extinction_kind"] = [str(extinctionType)] if extinctionType == 1: myBrowser["extinction_rho_sun"] = str(extinctionValue) if extinctionType == 2: myBrowser["extinction_infty"] = str(extinctionValue) myBrowser["extinction_sigma"] = str(extinctionSigma) if useThinDisc: myBrowser["thindisk_kind"] = ["3"] else: myBrowser["thindisk_kind"] = ["0"] if useThickDisc: myBrowser["thickdisk_kind"] = ["3"] else: myBrowser["thickdisk_kind"] = ["0"] if useBulge: myBrowser["bulge_kind"] = ["2"] else: myBrowser["bulge_kind"] = ["0"] # Submit the completed form timestamp = strftime("%a, %d %b %Y %H:%M:%S", gmtime()) print("{0}: Submitting completed TRILEGAL web form".format(timestamp)) nextWebPage = myBrowser.submit() # Trilegal is now computing the result. Click on the special "Refresh" # button until the webpage says that the computations are finished. timestamp = strftime("%a, %d %b %Y %H:%M:%S", gmtime()) print ("{0}: Waiting until TRILEGAL computations are finished".format(timestamp)) myBrowser.select_form(nr=0) # one form on the "be patient" web page message = "Your job was finished" while (message not in nextWebPage.read()): nextWebPage = urlopen(myBrowser.click()) # click on the Refresh button myBrowser.select_form(nr=0) # select form again, so that we can make a click again sleep(5) # to not overload the website with refresh requests # Get the url of the outputfile, and retrieve it. This can take a while. timestamp = strftime("%a, %d %b %Y %H:%M:%S", gmtime()) print("{0}: Retrieving TRILEGAL output file".format(timestamp)) outputLink = myBrowser.links(url_regex="lgirardi/tmp/output").next() urlretrieve(outputLink.absolute_url, outputFileName) myBrowser.close() # Save the parameters in an info file parameterInfo = """ coordinateType {0} longitude {1} latitude {2} fieldArea {3} passband {4} magnitudeLimit {5} magnitudeResolution {6} IMFtype {7} includeBinaries {8} binaryFraction {9} lowerBinaryMassRatio {10} upperBinaryMassRatio {11} extinctionType {12} extinctionValue {13} extinctionSigma {14} """.format(coordinateType, longitude, latitude, fieldArea, passband, magnitudeLimit, magnitudeResolution, IMFtype, includeBinaries, binaryFraction, lowerBinaryMassRatio, upperBinaryMassRatio, extinctionType, extinctionValue, extinctionSigma) infoFileName = "info_" + outputFileName with open(infoFileName, 'w') as infoFile: infoFile.write(parameterInfo)
class CoreEmulator(Emulator): def __init__(self, username, password): super(CoreEmulator, self).__init__(username, password) self.setup_emulator() def setup_emulator(self): self.browser = Browser() self.browser.set_handle_robots(False) self.browser.addheaders = moodle.USER_AGENT self.cookiejar = CookieJar() self.browser.set_cookiejar(self.cookiejar) def session_expired(self): return self.browser.geturl().endswith(moodle.LOGIN_LOCATION) @throws_moodlefuse_error(exception.LoginException) def login(self): self.open_login_page(self.browser.open) self.browser.select_form( predicate=lambda form: form.attrs.get('id') == attributes.LOGIN ) self.browser.form.set_value(self.username, name='username') self.browser.form.set_value(self.password, name='password') resp = self.browser.submit() if resp.geturl().endswith(moodle.LOGIN_LOCATION): raise Exception @throws_moodlefuse_error(resource_errors.UnableToDownloadResource) def download(self, destination, source): source = str(source) if not source.startswith('http://') and not source.startswith('file://'): source = config['TEST_DATA'] + '/' + source self.browser.retrieve(source, destination) def open_link(self, url): response = self.browser.open(url) return BeautifulSoup(response.read()) def check_form_checkbox(self, checkboxname): self.browser.find_control(checkboxname).items[0].selected = True def uncheck_form_checkbox(self, checkboxname): self.browser.find_control(checkboxname).items[0].selected = False def add_form_content(self, inputname, content): self.browser.form.set_value(content, name=inputname) def close_form(self): self.browser.submit() def set_form_to_first_form(self): self.browser.select_form(nr=0) def set_form_to_form_with_control_value(self, value): for form in self.browser.forms(): for control in form.controls: if control.value == value: self.browser.form = form @throws_moodlefuse_error(exception.UnableToToggleEditing) def turn_course_editing_on(self): self.set_form_to_form_with_control_value(moodle.EDIT_ON_MOODLE_BUTTON_TEXT) response = self.browser.submit() return BeautifulSoup(response.read()) def _setup_assignments_for_parsing(self, submission_filter): self.set_form_to_form_with_control_value('Save and update table') self.browser.form["filter"] = [submission_filter] self.browser.form["perpage"] = ["100"] self.uncheck_form_checkbox('quickgrading') response = self.browser.submit() return BeautifulSoup(response.read()) def filter_assignment_submissions(self): return self._setup_assignments_for_parsing("submitted") def unfilter_assignment_submissions(self): return self._setup_assignments_for_parsing("") @throws_moodlefuse_error(exception.UnableToToggleEditing) def turn_course_editing_off(self): self.set_form_to_form_with_control_value(moodle.EDIT_OFF_MOODLE_BUTTON_TEXT) response = self.browser.submit() return BeautifulSoup(response.read()) @throws_moodlefuse_error(course_errors.InvalidMoodleIndex) def get_courses(self): return self.open_link(config['MOODLE_INDEX_ADDRESS']) @throws_moodlefuse_error(course_errors.UnableToObtainCategoryList) def get_course_categories(self, url): return self.open_link(url) @throws_moodlefuse_error(resource_errors.UnableToObtainResourceList) def get_course_resource_names(self, url): return self.open_link(url) def close(self): self.browser.close()
class Operacions(object): def __str__(self): return "%s.%s" % (self.__module__, self.__class__.__name__) def __init__(self, request, username, password, eid='', tid=''): """ """ self.request = request registry = request.registry self.epitool=registry.getUtility(IEPIUtility) self.initialized = True self.username = username self.password = password self.equipID=eid self.tecnicID=tid self.browser_login,self.externalLoginKey=self.epitool.recoverBrowserSession(request, self.username,'operacions') if self.browser_login: #Si tenim cookies anteriors, creem un browser nou i li passem les cookies guardades self.br=Browser() self.br.set_handle_robots(False) cj = LWPCookieJar() self.br.set_cookiejar(cj) for co in self.browser_login: ck = Cookie(version=co['version'], name=co['name'], value=co['value'], port=co['port'], port_specified=co['port_specified'], domain=co['domain'], domain_specified=co['domain_specified'], domain_initial_dot=co['domain_initial_dot'], path=co['path'], path_specified=co['path_specified'], secure=co['secure'], expires=co['expires'], discard=co['discard'], comment=co['comment'], comment_url=co['comment_url'], rest=co['rest']) cj.set_cookie(ck) print "Logging-in into operacions via browser" else: #self.br = Browser() try: self.login() except: self.initialized=False return def log(self, message): """ """ logger = logging.getLogger('RUNNING') logger.info('%s - %s' % (self.username,message)) def getBrowserSession(self): cookies = [] for key in self.br._ua_handlers['_cookies'].cookiejar._cookies.keys(): domain = self.br._ua_handlers['_cookies'].cookiejar._cookies[key] for key2 in domain.keys(): cookie = domain[key2] for key3 in cookie: co = cookie[key3] cookies.append(dict(version=co.version, name=co.name, value=co.value, port=co.port, port_specified=co.port_specified, domain=co.domain, domain_specified=co.domain_specified, domain_initial_dot=co.domain_initial_dot, path=co.path, path_specified=co.path_specified, secure=co.secure, expires=co.expires, discard=co.discard, comment=co.comment, comment_url=co.comment_url, rest=co._rest)) return (cookies,self.externalLoginKey) def reloadExternalLoginKey(self): """ """ self.log("reloadExternalLoginKey") mainpage = self.getOperacionsMainPage() self.loadExternalLoginKey(mainpage.read()) def getOperacionsMainPage(self): """ """ mainpage = self.br.open(LOGIN_URL) return mainpage def loadExternalLoginKey(self,html): """ """ key = html.split('externalLoginKey')[-1][1:15] self.externalLoginKey = key.startswith('EL') and key or '' def closeBrowser(self): """ """ self.br.close() def saveSessionData(self,): """ """ self.epitool.saveBrowserSession(self.request, self.username,self.getBrowserSession(),'operacions') return def login(self, message = "Logging-in into operacions via regular login"): """ Es logueja a operacions amb el login tradicional web """ self.log('Operacions Login %s' % message) self.br=Browser() self.br.set_handle_equiv(False) mainpage = self.getOperacionsMainPage() self.br.select_form(nr=0) self.br['username']=self.username self.br['password']=self.password login_response = self.br.submit() html = login_response.read() self.loadExternalLoginKey(html) self.saveSessionData() def checkBrowserExpired(self,html): """ Comprova que el browser nou que hem generat en base a cookies guardades, continua actiu Per ferho, comprovem si l'html de la pagina que acavem de obrir conte el text de canvi de contrasenya Retorna cert si el browser esta caducat """ return html.find("http://www.upcnet.es/CanviContrasenyaUPC")>0 def obtenirCodisUsuari(self): """ """ self.log("obtenirCodisUsuari") imputacions = self.br.open('https://maul.upc.es:8444/imputacions/control/main?idEmpresa=1123&externalLoginKey=%s' % (self.externalLoginKey)) ihtml = imputacions.read() fixedhtml = ihtml.replace('</form\n','</form>\n') soup = BeautifulSoup(fixedhtml.replace('value""','value=""'),fromEncoding='utf-8') equipId = dict(soup.find('input',type='hidden', id='equipId').attrs)['value'] tecnicId = dict(soup.find('input',type='hidden', id='tecnicId').attrs)['value'] return (equipId,tecnicId) @reloginIfCrashed def obtenirComentariImputacio(self,iid): """ """ self.log("obtenirComentariImputacio") detallimputacio = self.br.open('https://maul.upc.es:8444/imputacions/control/imputacioDetall?timeEntryId=%s&externalLoginKey=%s' % (iid,self.externalLoginKey)) ihtml = detallimputacio.read() if self.checkBrowserExpired(ihtml): return 'EXPIRED' fixedhtml = ihtml.replace('</form\n','</form>\n') soup = BeautifulSoup(fixedhtml.replace('value""','value=""'),fromEncoding='utf-8') comentari = soup.find('table').findAll('td')[5].span.string return comentari and comentari.encode('utf-8') or '' def arreglarCometes(self,params): """ """ newparam=[] newparams = [] for param in params: cometes = len(param.split('"')) if cometes>=3: newparams.append(param) if cometes==2: if newparam==[]: newparam.append(param) else: newparams.append(','.join(newparam)) if cometes==1: newparam.append(param) return newparams def fixMalformed(self,html,sep,start,end): noucaixadiv = [] actual = '' escriure = True for string in html.split(sep): actual = string if escriure: noucaixadiv.append(actual) else: acomulat = acomulat+string if start in string: escriure=False acomulat = '' elif end in string and not escriure: escriure=True noucaixadiv.append(acomulat.replace(' %s' % (end),'%s %s' % (sep,end))) fixedHTML = sep.join(noucaixadiv) return fixedHTML #@cache(smartCacheKey) #@reloginIfCrashedAndCache @cache_region('default_term', 'obtenirPortalTecnic') def obtenirPortalTecnic(self, username): self.log("obtenirPortalTecnic sense cachejar") return self.obtenirPortalTecnicBase(username) @reloginIfCrashed def obtenirPortalTecnicBase(self, username): """ """ self.reloadExternalLoginKey() self.log("obtenirPortalTecnic") base_url = 'https://maul.upc.es:8444/portal/control/portalTecnicConsulta?' #self.reloadExternalLoginKey() parts = ['tipusCerca=', 'personaAssignada=%s' % (self.tecnicID), 'partyIdAss=%s' % (self.tecnicID), 'undefined=%s' % (self.tecnicID), 'statusId=ESTAT_OBERT_PENDENT', 'sensePaginacio=on', 'cercant=on', 'externalLoginKey=%s' % (self.externalLoginKey), ] url = base_url+'&'.join(parts) html = self.br.open(url).read() if self.checkBrowserExpired(html): return 'EXPIRED' html = html.replace('")">',')">') soup = BeautifulSoup(html,fromEncoding='UTF-8') seccions = [div for div in soup.findAll('div') if '"caixa"' in str(div)[1:30]] self.saveSessionData() return dict(ordres = seccions[0], tiquets = seccions[1], problemes = seccions[2], canvis = seccions[3], # percepcions = seccions[4] ) def obtenirOrdres(self,fname='obtenirOrdres'): """ """ soup = self.obtenirPortalTecnic(self.username)['ordres'] ordres = [] if soup: for tr in soup.findAll('tr'): if tr.td: tds = tr.findAll('td') ordre = {} href = tds[4].span.a._getAttrMap()['href'] params = dict([tuple(a.split('=')) for a in href.replace('&','&').replace('"','').split('?')[1].split('&')]) ordre['orderId']= params['orderId'] ordre['orderItemSeqId']=params['orderItemSeqId'] title = tds[4].span.a.string ordre['title']='%s - %s' % (ordre['orderId'],title) if ordre not in ordres: ordres.append(ordre) ordres = sorted(ordres,key=lambda ordre: ordre['orderId']) ordres.reverse() return ordres def processarTaula(self,soup): """ """ items = [] if soup: for tr in soup.findAll('tr'): if tr.td: tds = tr.findAll('td') item = {} item['requirementId'] = tds[3].a.span.string try: item['title']='%s - %s' % (item['requirementId'],tds[4].a.span.string) except: #import ipdb;ipdb.set_trace() pass items.append(item) return items def obtenirTiquetsAssignats(self): """ """ soup = self.obtenirPortalTecnic(self.username)['tiquets'] return self.processarTaula(soup) def obtenirProblemesAssignats(self): """ """ soup = self.obtenirPortalTecnic(self.username)['problemes'] return self.processarTaula(soup) def obtenirCanvisAssignats(self): """ """ soup = self.obtenirPortalTecnic(self.username)['canvis'] return self.processarTaula(soup) def obtenirTiquetsEquip(self): """ """ self.log("obtenirTiquetsEquip") self.reloadExternalLoginKey() base_url = 'https://maul.upc.es:8444/tiquets/control/tiquetsEquipConsulta?' parts = ['tipusCerca=simple', 'VIEW_INDEX=1', 'VIEW_SIZE=30', 'statusId=ESTAT_OBERT_PENDENT', 'nomesTancats=on', 'personaAssignada=%s' % (self.tecnicID), 'undefined=%s' % (self.tecnicID), 'sensePaginacio=on', 'cercant=on', 'externalLoginKey=%s' % (self.externalLoginKey) ] base_url = 'https://maul.upc.es:8444/tiquets/control//tiquetsAssignatsConsulta?' parts = ['statusId=ESTAT_OBERT_PENDENT', 'sensePaginacio=on', 'externalLoginKey=%s' % (self.externalLoginKey) ] url = base_url+'&'.join(parts) cerca_tiquets = self.br.open(url) thtml = cerca_tiquets.read() soup = BeautifulSoup(thtml.replace('value""','value=""').replace('\n','').replace('\t',''),fromEncoding='utf-8') inicidiv = thtml.find('div class="caixa"')-1 fidiv = thtml.find('div class="endcolumns"') caixadiv = thtml[inicidiv:fidiv] soup = BeautifulSoup(caixadiv,fromEncoding='utf-8') tiquets = [] self.saveSessionData() return tiquets def getUrlConsultaImputacions(self,di,df): """ """ # Hem tret el equipId dels parametres, aixi surten les imputacions de tots els equips #params = dict(equipId=self.equipID, params = dict(equipId='', tecnicId=self.tecnicID, dataInicial=di, dataFinal=df, sensePaginacio='on', cercant='on', tipusCerca='simple', externalLoginKey=self.externalLoginKey) param_string = '&'.join(['%s=%s' % (key,params[key]) for key in params]) url = 'https://maul.upc.es:8444/imputacions/control/imputacionsConsulta?%s' % param_string return url def getImputacionsRaw(self,html_raw): """ """ html= html_raw.replace('value""','value=""').replace('\n','').replace('\t','') # Aquesta adreça de correu al mig de l'html ens fa petar el beautifulsoup, la amaguem de moment, ja # que només és un cas aïllat d'un tiquet, si torna a passar, s'hauria de buscar una regex per filtrar # text amb aquest format avans de parsejar fixedhtml = html.replace('<*****@*****.**>','') fixedhtml = fixedhtml.replace('form<!--','form><!--') soup = BeautifulSoup(fixedhtml,fromEncoding='utf-8') imputacions_raw = [a for a in soup.findAll('tr') if str(a).find('class="previsio"')>0][:-1] return imputacions_raw #@cache(smartCacheKey) #@reloginIfCrashedAndCache @cache_region('default_term', 'obtenirImputacions') def obtenirImputacions(self, username, di, df): self.log("obtenirImputacions sense cachejar") return self.obtenirImputacionsBase(username, di, df) @reloginIfCrashed def obtenirImputacionsBase(self, username, di, df): """ """ #import ipdb; ipdb.set_trace() self.reloadExternalLoginKey() self.log("obtenirImputacions entre %s i %s" % (di,df)) if di==None and df==None: pass result = self.br.open(self.getUrlConsultaImputacions(di,df)) html = result.read() if self.checkBrowserExpired(html): return 'EXPIRED' imputacions_raw = self.getImputacionsRaw(html) ## Si no obtenim cap resultat, provarem de recarregar el externalLoginKey, ja que canvia per alguna ## extranya raó, tot i que la sessió i les cookies encara són vàlides ## Si després d'això no retorna cap resultat, s'enten que realment no te imputacions dins el rang de dates if imputacions_raw == []: try: #self.reloadExternalLoginKey() result = self.br.open(self.getUrlConsultaImputacions(di,df)) imputacions_raw = self.getImputacionsRaw(result.read()) except: pass imputacions = [] for imputacio in imputacions_raw: parts = imputacio.findAll('td') date = parts[0].span.string dd,mm,aaaa = date.split('-') iid = parts[0].a.attrs[0][1].split('timeEntryId=')[1].split('"')[0] amount = parts[3].span.string imp_type = parts[5].acronym.string.__str__().lstrip() try: referencia = parts[6].span.span.string except: referencia = parts[6].span.a.string if referencia==None: referencia = ' - (Sense referència)' tdict = dict(type=imp_type, date = (dd,mm,aaaa), iid = iid, amount = amount, referencia = referencia) imputacions.append(tdict) imputacions.reverse() # Guardem els dies que hem consultat a la utility per despres poder cridar correctament als invalidadors de cache self.epitool.saveObtenirImputacionsDays(self.request, username, di, df) self.saveSessionData() return imputacions @reloginIfCrashed def imputarOrdre(self,data,hores,minuts,orderId,orderItemSeqId,fname='imputarOrdre'): """ """ self.log("imputarOrdre") self.reloadExternalLoginKey() parts = ['dataImputacio=%s' % (data), 'horesImputades=%s' % (hores), 'minutsImputats=%s' % (minuts), 'orderId=%s' % (orderId), 'orderItemSeqId=%s' % (orderItemSeqId), 'cas=ORDRE', 'partyId=%s' % (self.tecnicID), 'externalLoginKey=%s' % (self.externalLoginKey), ] url = 'https://maul.upc.es:8444/imputacions/control/imputacioAltaGraella?' + '&'.join(parts) response = self.br.open(url) html = response.read() if self.checkBrowserExpired(html): return 'EXPIRED' exitcode = eval(html) exitcode = eval(html) exitcode['hores']=hores exitcode['minuts']=minuts.rjust(2,'0') # Invalidem la cache # getUtility(IRAMCache).invalidate('obtenirImputacions') day1, day2 = self.epitool.getObtenirImputacionsDays(self.request, self.username) region_invalidate('epi.operacions.obtenirImputacions', 'default_term', 'obtenirImputacions', 'epi.operacions.Operacions', self.username, day1, day2) self.saveSessionData() return exitcode @reloginIfCrashed def imputarActivitat(self,data,hores,minuts,proces,activitatId,fname='imputarActivitat'): """ """ self.log("imputarActivitat") #self.reloadExternalLoginKey() parts = ['dataImputacio=%s' % (data), 'horesImputades=%s' % (hores), 'minutsImputats=%s' % (minuts), 'proces=%s' % (proces), 'activitatId=%s' % (activitatId), 'cas=ACTIVITAT', 'partyId=%s' % (self.tecnicID), 'externalLoginKey=%s' % (self.externalLoginKey), ] url = 'https://maul.upc.es:8444/imputacions/control/imputacioAltaGraella?' + '&'.join(parts) response = self.br.open(url) html = response.read() if self.checkBrowserExpired(html): return 'EXPIRED' exitcode = eval(html) exitcode['hores']=hores exitcode['minuts']=minuts.rjust(2,'0') # Invalidem la cache # getUtility(IRAMCache).invalidate('obtenirImputacions') day1, day2 = self.epitool.getObtenirImputacionsDays(self.request, self.username) region_invalidate('epi.operacions.obtenirImputacions', 'default_term', 'obtenirImputacions', 'epi.operacions.Operacions', self.username, day1, day2) return exitcode def getCodiImputacio(self,data,minuts,ref,tipus): """ Busquem una imputació filtrant per tipus d'imputació, minuts imputats, data i referència. D'entre tots els resultats, ens quedem amb la que tingui el iid més alt, que serà la última imputada. """ self.log("getCodiImputacio") # Invalidem la cache # getUtility(IRAMCache).invalidate('obtenirImputacions') day1, day2 = self.epitool.getObtenirImputacionsDays(self.request, self.username) region_invalidate('epi.operacions.obtenirImputacions', 'default_term', 'obtenirImputacions', 'epi.operacions.Operacions', self.username, day1, day2) imputacions = self.obtenirImputacions(self.username, data, data) tt = tuple(data.split('-')) imputacio = None newest=True for imp in imputacions: if imputacio!=None: newest = imp['iid']>imputacio['iid'] if imp['date']==tt and ref in imp['referencia'] and HMaMinuts(imp['amount'])==minuts and newest and imp['type']==tipus: imputacio = imp if imputacio: return imputacio['iid'] else: return '' @reloginIfCrashed def imputarTiquet(self,data,hores,minuts,tiquetId,fname='imputarTiquet'): """ Imputa a un tiquet utilitzant el formulari del gestor d'operacions """ self.log("imputarTiquet") today = '-'.join(DateTimeToTT(DateTime())) self.reloadExternalLoginKey() parts = ['requirementId=%s' % (tiquetId), 'externalLoginKey=%s' % (self.externalLoginKey), ] url = 'https://maul.upc.es:8444/tiquets/control/tiquetDetallAssignacioHistoria?' + '&'.join(parts) self.br.open(url) try: self.br.select_form(name='afegirImputacio') except FormNotFoundError: return dict(hores='', minuts='', confirm='error', code='No s''ha pogut imputar en un tiquet tancat.') except: return 'EXPIRED' self.br.form.action='https://maul.upc.es:8444/tiquets/control/imputarTempsTasca' self.br.form.find_control('minutsImputats').readonly=False minutsImputats = int(hores)*60 + int(minuts) self.br['minutsImputats']=str(minutsImputats) self.br['horesImputadesHelper']=hores self.br['minutsImputatsHelper']=minuts addtiquet_response = self.br.submit() #html = addtiquet_response.read() #Consultem el codi de la imputació iid = self.getCodiImputacio(today,minutsImputats,tiquetId,'TI') code = iid.encode('utf-8') if data!=today: code = self.canviarImputacio(data,hores,minuts,iid) # Invalidem la cache # getUtility(IRAMCache).invalidate('obtenirImputacions') day1, day2 = self.epitool.getObtenirImputacionsDays(self.request, self.username) region_invalidate('epi.operacions.obtenirImputacions', 'default_term', 'obtenirImputacions', 'epi.operacions.Operacions', self.username, day1, day2) self.saveSessionData() return dict(hores=hores, minuts=minuts.rjust(2,'0'), confirm=iid=='' and 'error' or 'ok', code=code) @reloginIfCrashed def imputarProblema(self,data,hores,minuts,tiquetId,fname='imputarProblema'): """ Imputa a un problema utilitzant el formulari del gestor d'operacions """ self.log("imputarProblema") today = '-'.join(DateTimeToTT(DateTime())) self.reloadExternalLoginKey() parts = ['requirementId=%s' % (tiquetId), 'externalLoginKey=%s' % (self.externalLoginKey), ] url = 'https://maul.upc.es:8444/problemes/control/problemaDetallImputacions?' + '&'.join(parts) self.br.open(url) self.br.select_form(name='afegirImputacio') self.br.form.action='https://maul.upc.es:8444/problemes/control/imputarTemps' self.br.form.find_control('minutsImputats').readonly=False minutsImputats = int(hores)*60 + int(minuts) self.br['minutsImputats']=str(minutsImputats) self.br['horesImputadesHelper']=hores self.br['minutsImputatsHelper']=minuts addtiquet_response = self.br.submit() #html = addtiquet_response.read() #Consultem el codi de la imputació iid = self.getCodiImputacio(today,minutsImputats,tiquetId,'PB') code=iid.encode('utf-8') if data!=today: code = self.canviarImputacio(data,hores,minuts,iid) # Invalidem la cache # getUtility(IRAMCache).invalidate('obtenirImputacions') day1, day2 = self.epitool.getObtenirImputacionsDays(self.request, self.username) region_invalidate('epi.operacions.obtenirImputacions', 'default_term', 'obtenirImputacions', 'epi.operacions.Operacions', self.username, day1, day2) self.saveSessionData() return dict(hores=hores, minuts=minuts.rjust(2,'0'), confirm=iid=='' and 'error' or 'ok', code=code) @reloginIfCrashed def canviarImputacio(self,novadata,hores,minuts,iid,fname='canviarImputacio'): """ """ self.log("canviarImputacio") if iid!='': code = iid self.reloadExternalLoginKey() parts = ['timeEntryId=%s' % (iid), 'dataImputacio=%s' % (novadata), 'horesImputades=%s' % (hores), 'minutsImputats=%s' % (minuts), 'externalLoginKey=%s' % (self.externalLoginKey), ] url = 'https://maul.upc.es:8444/imputacions/control/editarImputacio?' + '&'.join(parts) response = self.br.open(url) html = response.read() if self.checkBrowserExpired(html): return 'EXPIRED' # Invalidem la cache # getUtility(IRAMCache).invalidate('obtenirImputacions') day1, day2 = self.epitool.getObtenirImputacionsDays(self.request, self.username) region_invalidate('epi.operacions.obtenirImputacions', 'default_term', 'obtenirImputacions', 'epi.operacions.Operacions', self.username, day1, day2) self.saveSessionData() else: code = "No sha pogut imputar al dia %s. Refresca lepi i mou la imputacio manualment arrossegant-la al dia %s" % (novadata,novadata) code.decode('utf-8') return code.encode('utf-8') @reloginIfCrashed def modificarImputacio(self,hores,minuts,iid,comentari='',fname='modificarImputacio'): """ """ self.log("modificarImputacio") self.reloadExternalLoginKey() parts = ['timeEntryId=%s' % (iid), 'horesImputades=%s' % (hores), 'minutsImputats=%s' % (minuts), 'externalLoginKey=%s' % (self.externalLoginKey), ] if comentari: parts.append('editComentari=%s' % quote(comentari)) url = 'https://maul.upc.es:8444/imputacions/control/imputacioEdicioGraella?' + '&'.join(parts) response = self.br.open(url) html = response.read() if self.checkBrowserExpired(html): return 'EXPIRED' exitcode = eval(html) exitcode['hores']=str(int(hores)) exitcode['minuts']=minuts.rjust(2,'0') # Invalidem la cache # getUtility(IRAMCache).invalidate('obtenirImputacions') day1, day2 = self.epitool.getObtenirImputacionsDays(self.request, self.username) region_invalidate('epi.operacions.obtenirImputacions', 'default_term', 'obtenirImputacions', 'epi.operacions.Operacions', self.username, day1, day2) self.saveSessionData() return exitcode @reloginIfCrashed def esborrarImputacio(self,iid,fname='esborrarImputacio'): """ """ self.log("esborrarImputacio") self.reloadExternalLoginKey() parts = ['timeEntryId=%s' % (iid), 'externalLoginKey=%s' % (self.externalLoginKey), ] url = 'https://maul.upc.es:8444/imputacions/control/imputacioEsborrarGraella?' + '&'.join(parts) response = self.br.open(url) html = response.read() if self.checkBrowserExpired(html): return 'EXPIRED' exitcode = eval(html) # Invalidem la cache # getUtility(IRAMCache).invalidate('obtenirImputacions') day1, day2 = self.epitool.getObtenirImputacionsDays(self.request, self.username) region_invalidate('epi.operacions.obtenirImputacions', 'default_term', 'obtenirImputacions', 'epi.operacions.Operacions', self.username, day1, day2) self.saveSessionData() return exitcode
def fetch_isochrones(isoc_kind='parsec_CAF09_v1.2S', photsys_version='yang', photsys_file='tab_mag_odfnew/tab_mag_ubvrijhk.dat', kind_cspecmag='aringer09', dust_sourceM='nodustM', dust_sourceC='nodustC', extinction_av='0.0', imf_file='tab_imf/imf_chabrier_lognormal.dat', sequence_type= 'single_isochrone', isoc_age=False, isoc_z =False, isoc_z0=False, isoc_z1=False, isoc_dz=False, isoc_lage0=False, isoc_lage1=False, isoc_dlage=False, path='', filename='Isochrone_teste.dat'): #Sequence_type = 'single_isochrone', 'sequence_constant_metallicity', 'sequence_constant_age' if sequence_type == 'single_isochrone' or sequence_type == 0: sequence_type = 0 elif sequence_type == 'constant_metallicity' or sequence_type == 1 : sequence_type = 1 elif sequence_type == 'constant_age' or sequence_type == 2: sequence_type = 2 else: raise ValueError("Argument sequence_type must be in ('single_isochrone', 'constant_metallicity', " "'constant_age')") warnings.simplefilter('always', UserWarning) #Handling bad values given for different sequence types if sequence_type == 0: if not isoc_age: raise ValueError("For sequence_type == 'single_isochrone', argument isoc_age must be provided") if not isoc_z: raise ValueError("For sequence_type == 'single_isochrone', argument isoc_z must be provided") if any((isoc_z0, isoc_z1, isoc_dz, isoc_lage0, isoc_lage1, isoc_dlage)): warnings.warn("For sequence_type == 'single_isochrone', arguments isoc_z0, isoc_z1, isoc_dz, isoc_lage0, isoc_lage1 and isoc_dlage are not used") elif sequence_type == 1: if not isoc_z: raise ValueError("For sequence_type == 'constant_metallicity', argument isoc_z must be provided") if not isoc_lage0: raise ValueError("For sequence_type == 'constant_metallicity', argument isoc_lage0 must be provided") if not isoc_lage1: raise ValueError("For sequence_type == 'constant_metallicity', argument isoc_lage1 must be provided") if not isoc_dlage: raise ValueError("For sequence_type == 'constant_metallicity', argument isoc_dlage must be provided") if any((isoc_age, isoc_z0, isoc_z1, isoc_dz)): warnings.warn("For sequence_type == 'constant_metallicity', arguments isoc_age, isoc_z0, isoc_z1, and isoc_dz are not used") elif sequence_type == 2: if not isoc_age: raise ValueError("For sequence_type == 'constant_age', argument isoc_age must be provided") if not isoc_z0: raise ValueError("For sequence_type == 'constant_age', argument isoc_z0 must be provided") if not isoc_z1: raise ValueError("For sequence_type == 'constant_age', argument isoc_z1 must be provided") if not isoc_dz: raise ValueError("For sequence_type == 'constant_age', argument isoc_dz must be provided") if any((isoc_z, isoc_lage0, isoc_lage1, isoc_dlage)): warnings.warn("For sequence_type == 'constant_age', arguments isoc_z, isoc_lage0, isoc_lage1, and isoc_dlage are not used") #Error raised when too many isochrones are requested if sequence_type == 1: N_isoc = len(np.arange(isoc_lage0, isoc_lage1, isoc_dlage)) if N_isoc > 400: raise ValueError("you requested too many isochrones ({0}), maximum allowed is 400.\nTry to increase isoc_dlage or lower the difference between isoc_lage0 and isoc_lage1".fotmat(N_isoc)) elif sequence_type == 2: N_isoc = len(np.arange(isoc_z0, isoc_z1, isoc_dz)) if N_isoc > 400: raise ValueError("you requested too many isochrones ({0}), maximum allowed is 400.\nTry to increase isoc_dz or lower the difference between isoc_z0 and isoc_z1".format(N_isoc)) #print 'Opening browser' br = Browser() br.open('http://stev.oapd.inaf.it/cgi-bin/cmd') br.select_form(nr = 0) #print 'Filling form' br.form['isoc_kind'] = [isoc_kind] br.form['photsys_version'] = [photsys_version] br.form['photsys_file'] = [photsys_file] br.form['kind_cspecmag'] = [kind_cspecmag] br.form['dust_sourceM'] = [dust_sourceM] br.form['dust_sourceC'] = [dust_sourceC] br.form['extinction_av'] = (extinction_av) br.form['imf_file'] = [imf_file] br.find_control("isoc_val").items[sequence_type].selected = True if sequence_type == 0: br.form['isoc_age'] = str(isoc_age) # Isochrone age br.form['isoc_zeta'] = str(isoc_z) # Isochrone metallicity elif sequence_type == 1: br.form['isoc_zeta0'] = str(isoc_z) # Isochrone metallicity br.form['isoc_lage0'] = str(isoc_lage0) # Isochrone log initial age br.form['isoc_lage1'] = str(isoc_lage1) # Isochrone log final age br.form['isoc_dlage'] = str(isoc_dlage) # Isochrone log age step elif sequence_type == 2: br.form['isoc_age0'] = str(isoc_age) # Isochrone age br.form['isoc_z0'] = str(isoc_z0) # Isochrone initial metallicity br.form['isoc_z1'] = str(isoc_z1) # Isochrone final metallicity br.form['isoc_dz'] = str(isoc_dz) # Isochrone metallicity step #print('Submitting form') br.submit() #print('Downloading data') download_link = list(br.links())[0].absolute_url geturl(download_link, path+'/'+filename) br.close() print('File ' + path+'/'+filename + ' created')
def __init__(self, d_inic, m_inic, ano, search_url, controller, nao_cria): arquivo = '//home//raul//Documents//unb_python//data//data' + str( d_inic) + "-" + str(m_inic) + "-" + str(ano) + '.txt' if nao_cria == 1: self.f = open(arquivo, 'a') else: self.f = open(arquivo, 'w') Consulta.controller = controller self.SEARCH_URL = search_url socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9050) #abre conector na rede tor try: socket.socket = socks.socksocket socket.create_connection = create_connection except: print "problema ao abrir socket na rede tor" br = Browser() #prepara para iniciar consultas print "################### Consulta Avancada Portal Transparencia ###################" gravalog( self, "\n\n\n################### Consulta Avancada Portal Transparencia ###################\n\n" ) print "################### versao" + self.ver + " ###################" gravalog( self, "\n################### versao " + self.ver + " ###################\n\n") try: LRequest = urllib2.Request(SEARCH_URL, " ") LResponse = br.open(LRequest) page = bs_parse(LResponse.read()) print SEARCH_URL print page #f.write(page) except: print "problema ao realizar primeira consulta na web" gravalog(self, (page.text).encode('utf-8', 'ignore')) br.close() #Consulta.ID = newID(self, Consulta.controller) Consulta.ID = 000000000 #Objeto para captura de logs. x = logging.getLogger("logarqui") x.setLevel(logging.DEBUG) #captura logs e grava em arquivo. h1 = logging.FileHandler( "//home//raul//Documents//unb_python//data//log//erros" + str(d_inic) + "-" + str(m_inic) + "-" + str(ano) + '.log') f = logging.Formatter( "%(levelname)s %(asctime)s %(funcName)s %(lineno)d %(message)s") h1.setFormatter(f) h1.setLevel(logging.DEBUG) x.addHandler(h1)
import re from mechanize import Browser import config browser = Browser() browser.set_handle_robots(False) browser.addheaders = [( 'User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1' )] browser.open("https://sodexo4you.be/nl") browser.select_form(id="account-login") browser.form['name'] = config.email browser.form['pass'] = config.password response = browser.submit() browser.retrieve( 'https://sodexo4you.be/nl/mijn-sodexo-card-saldo?description=All&type=LUNCH&export=1', 'export.csv') browser.close()