def getDags(options): windowSize = 100 parameters = {} dags = [] id = 0 logger = logging.getLogger() start = toMs(options.start) end = toMs(options.end) retrieved = 100 parameters["windowStart"] = start parameters["windowEnd"] = end while retrieved == 100: query = "&".join([ x + "=" + str(parameters[x]) for x in parameters.keys() if parameters[x] != None ]) atsUrl = "%s/ws/v1/timeline/TEZ_DAG_ID?%s" % (options.ats, query) logger.debug("Request url %s" % atsUrl) if options.secure: opener = url.build_opener(u2k.HTTPKerberosAuthHandler()) else: opener = url.build_opener() opened = opener.open(atsUrl) response = opened.read() objects = json.loads(response) newDags = objects["entities"] retrieved = len(newDags) if retrieved == 0: return dags lastTime = newDags[-1]["starttime"] parameters["windowEnd"] = lastTime dags.extend(newDags) return dags
def getAccessPriv(cf, user, groupList, path): isOK = False #getReplUrl = cf['HTTPFS_PROTO']+"://"+cf['HTTPFS_HOST'] + ":" + cf['HTTPFS_PORT'] + "/webhdfs/v1" + path + "?user.name=ec2-user&op=GETFILESTATUS" getReplUrl = cf['HTTPFS_PROTO'] + "://" + cf['HTTPFS_HOST'] + ":" + cf[ 'HTTPFS_PORT'] + "/webhdfs/v1" + path + "?op=GETFILESTATUS" LOG.debug("Getting file status with: " + getReplUrl) opener = urllib2.build_opener() opener.add_handler(ul2k.HTTPKerberosAuthHandler()) resp = opener.open(getReplUrl) fsData = json.load(resp) output_json = json.dumps(fsData) LOG.debug("HTTPFS OUTPUT: " + output_json) pOwner = fsData['FileStatus']['owner'] pGroup = fsData['FileStatus']['group'] perms = fsData['FileStatus']['permission'] # if the owner or group as write privs if (pOwner == user and perms[0] in ['7', '3', '2']) or ( pGroup == group and perms[0] in ['7', '3', '2']): isOK = True else: #getReplUrl = cf['HTTPFS_PROTO']+"://"+cf['HTTPFS_HOST'] + ":" + cf['HTTPFS_PORT'] + "/webhdfs/V1" + path + "?user.name=ec2-user&op=GETACLSTATUS" getReplUrl = cf['HTTPFS_PROTO'] + "://" + cf['HTTPFS_HOST'] + ":" + cf[ 'HTTPFS_PORT'] + "/webhdfs/V1" + path + "?op=GETACLSTATUS" LOG.debug("Getting ACLS with: " + getReplUrl) opener = urllib2.build_opener() opener.add_handler(ul2k.HTTPKerberosAuthHandler()) resp = opener.open(getReplUrl) aclData = json.load(resp) output_json = json.dumps(aclData) LOG.debug("HTTPFS ACL OUTPUT: " + output_json) # get acls entryList = aclData['AclStatus']['entries'] pattern = '.*:' + group + ':' x = re.compile(pattern) sub_list = filter(x.match, entryList) for f in sub_list: (pcoll, name, priv) = f.split(':') if 'w' in priv: isOK = True # end else return isOK
def get_rmstats(kerb,period_secs,rm_host): app.logger.debug("In get_rmstats") nKey='queue' timenow = unix_timestamp_secs() app.logger.debug("TIMENOW is " + str(timenow) ) app.logger.debug("Checking URL http://"+rm_host +":" + app.config['YARN_PORT'] +"/ws/v1/cluster/apps") opener = urllib2.build_opener() if kerb==True: opener.add_handler(ul2k.HTTPKerberosAuthHandler()) jobdict = json.load(opener.open("http://"+rm_host+":" + app.config['YARN_PORT'] +"/ws/v1/cluster/apps")) # app.logger.debug("In get_rmstats 5 " + json.dumps(jobdict) ) if jobdict['apps']: alljobs = jobdict['apps']['app'] batch_offset=timedelta(seconds=period_secs) periodBegin_dt = datetime.now() - batch_offset periodBegin_secs = periodBegin_dt.strftime('%s') runningOrFinished = filter(lambda x: (x['progress'] < 100.0 and x['progress'] > 0) or int(x['finishedTime']/1000) > periodBegin_secs, alljobs) app.logger.debug("In get_rmstats 6 " + json.dumps(runningOrFinished) ) if len(runningOrFinished): for x in runningOrFinished: x.update({"periodBegin_dt" : datetime.fromtimestamp(periodBegin_secs) }) x.update({"host" : rm_host}) x.update({"startedTime_dt" : datetime.fromtimestamp( x['startedTime']/1000 ) } ) x.update({"finishedTime_dt" : datetime.fromtimestamp( x['finishedTime']/1000 ) } ) x.pop('startedTime',None) x.pop('finishedTime',None) app.logger.debug("job " + x['id'] + " periodBegin " + str(x['periodBegin_dt']) + " " + " finishedTime " + str(x['finishedTime_dt']) ) # app.logger.debug("job " + item['id'] + " periodBegin " + str(periodBegin) + " " + " finishedTime " + str(int(item['finishedTime']/1000))+ " finished time + 3 hours" + str(st) ) # app.logger.debug("about to insert" ) with app.app_context(): db.engine.execute(RMStats.__table__.insert(),runningOrFinished) # app.logger.debug("back from insert" ) else: app.logger.debug("No jobs found" )
def getDatabaseLocation(cf, database): #getReplUrl = cf['WEBHCAT_PROTO']+"://" + cf['WEBHCAT_HOST'] + ":" + cf['WEBHCAT_PORT'] + "/templeton/v1/ddl/database/" + database + "/?user.name=ec2-user" getReplUrl = cf['WEBHCAT_PROTO'] + "://" + cf['WEBHCAT_HOST'] + ":" + cf[ 'WEBHCAT_PORT'] + "/templeton/v1/ddl/database/" + database + "/" LOG.debug("Polling WebHCat URL: " + getReplUrl) try: opener = urllib2.build_opener() opener.add_handler(ul2k.HTTPKerberosAuthHandler()) resp = opener.open(getReplUrl) except urllib2.HTTPError, e: print >> sys.stderr, '\n\tCould not retrieve location for database \'' + database + '\' Skipping...' return None
def make_request(pqsUrl, request): print "Sending : " pp.pprint(request) request_json = json.dumps(request) if secure: opener = url.build_opener(u2k.HTTPKerberosAuthHandler()) else: opener = url.build_opener() opened = opener.open(pqsUrl, request_json) response_json = opened.read() response = json.loads(response_json) print "Response : " pp.pprint(response) return response
def getExtraHiveInfo(options, id): atsUrl = "%s/ws/v1/timeline/HIVE_QUERY_ID/%s" % (options.ats, id) logger = logging.getLogger() logger.debug("Settings Hive url %s" % atsUrl) try: if options.secure: opener = url.build_opener(u2k.HTTPKerberosAuthHandler()) else: opener = url.build_opener() opened = opener.open(atsUrl) response = opened.read() objects = json.loads(response) return objects except: return None
def create_http_opener(proxy=None): """Creates http opener with spnego handler.""" # This is to clear proxy support https_support = urllib2.HTTPSHandler(debuglevel=1) if not proxy: proxy = {} if proxy == 'ENV': proxy_support = urllib2.ProxyHandler() else: proxy_support = urllib2.ProxyHandler(proxy) krb_support = urllib2_kerberos.HTTPKerberosAuthHandler(mutual=False) return urllib2.build_opener(https_support, proxy_support, krb_support)
def getDagSettings(options, applicationId): applicationId.replace("application", "tez") logger = logging.getLogger() atsUrl = "%s/ws/v1/timeline/TEZ_APPLICATION/tez_%s" % (options.ats, applicationId) logger.debug("Settings url %s" % atsUrl) if options.secure: opener = url.build_opener(u2k.HTTPKerberosAuthHandler()) else: opener = url.build_opener() opened = opener.open(atsUrl) response = opened.read() objects = json.loads(response) if "config" not in objects['otherinfo']: return {} return objects['otherinfo']['config']
def timeline(server, on=None, author=None): try: import feedparser from time import strftime except ImportError: u_vim.command('echoerr "Please install feedparser.py!"') return parse_kwargs = {} if server['auth_type'] == Trac.KERBEROS_AUTH: try: kerberos_handler = urllib2_kerberos.HTTPKerberosAuthHandler() parse_kwargs['handlers'] = [kerberos_handler] except NameError: pass query = 'max={0}&format=rss'.format(u_vim.eval('tracTimelineMax')) if on in ('wiki', 'ticket', 'changeset'): query = '{0}=on&{1}'.format(on, query) elif not author: author = on if author: query = u'authors={0}&{1}'.format(author, query) feed = u'{scheme}://{server}/timeline?{q}'.format(q=query, **server) d = feedparser.parse(feed, **parse_kwargs) str_feed = ['Hit <enter> on a line containing :>>', ''] for item in d['items']: str_feed.append(strftime(u'%Y-%m-%d %H:%M:%S', item.updated_parsed)) if 'ticket' in item.category: m = re.match(r'^Ticket #(\d+)', item.title) if m: str_feed.append('Ticket:>> {0}'.format(m.group(1))) if 'wiki' in item.category: str_feed.append('Wiki:>> {0}'.format(item.title.split(' ', 1)[0])) if 'changeset' in item.category: m = re.match(r'^Changeset .*\[(\w+)\]:', item.title) if m: str_feed.append('Changeset:>> {0}'.format(m.group(1))) str_feed.append(item.title) if item.get('author'): str_feed.append(u'Author: {0}'.format(item.author)) str_feed.append(u'Link: {0}'.format(item.link)) str_feed.append('') return u'\n'.join(str_feed)
def testWebDAVFileUrlLib(self): #_ignore = kerberos.GSS_C_DELEG_FLAG #from kerberos import GSS_C_DELEG_FLAG,GSS_C_MUTUAL_FLAG,GSS_C_SEQUENCE_FLAG #_ignore, ctx = kerberos.authGSSClientInit('krbtgt/[email protected]', gssflags=GSS_C_DELEG_FLAG|GSS_C_MUTUAL_FLAG|GSS_C_SEQUENCE_FLAG) _ignore, ctx = kerberos.authGSSClientInit('*****@*****.**') _ignore = kerberos.authGSSClientStep(ctx, '') tgt = kerberos.authGSSClientResponse(ctx) opener = urllib2.build_opener() opener.add_handler(urllib2_kerberos.HTTPKerberosAuthHandler()) resp = opener.open(theurl) print resp return req = urllib2.Request(theurl) try: handle = urllib2.urlopen(req) except IOError, e: pass
def getJobXML(self, job): opener = urllib2.build_opener() opener.add_handler(urllib2_kerberos.HTTPKerberosAuthHandler()) resp = None req = properties.BASE_URL + properties.CONFIG_CONTEXT + job + properties.CONFIX_XML try: resp = opener.open(req) except urllib2.URLError: for trial in range(0, 6): sys.stdout.write("...") sys.stdout.flush() time.sleep(1) try: resp = opener.open(req) if resp.code == 200: break except urllib2.URLError: sys.stdout.write("...") if resp != None and resp.code == 200: return resp.read() else: return None
def getJobs(self): opener = urllib2.build_opener() opener.add_handler(urllib2_kerberos.HTTPKerberosAuthHandler()) resp = opener.open(properties.URL + properties.API_URL) jobs_json = json.load(resp) self.jobs = [x['name'] for x in jobs_json['jobs']]
def __init__(self, scheme): xmlrpclib.Transport.__init__(self) self.scheme = scheme self._opener = urllib2.build_opener() kerberos_handler = urllib2_kerberos.HTTPKerberosAuthHandler() self._opener.add_handler(kerberos_handler)
def secure_connect(self, uri): opener = urllib2.build_opener() opener.add_handler(urllib2_kerberos.HTTPKerberosAuthHandler()) resp = opener.open(uri) a = resp.read() return json.loads(a)
elif option in ('-k','--key'): nKey=val else: print >>sys.stderr, '\n\tUnknown Flag:', option usage() return cf['RET_BADOPTS'] LOG.info("Checking URL http://"+yarn_host+":" + yarn_port +"/ws/v1/cluster/apps") #handlers.append(ul2k.HTTPKerberosAuthHandler()) #opener = urllib2.build_opener(*handlers) #urllib2.install_opener(opener) opener = urllib2.build_opener() opener.add_handler(ul2k.HTTPKerberosAuthHandler()) yarnjson = json.load(opener.open("http://"+yarn_host+":" + yarn_port +"/ws/v1/cluster/apps")) #yarnjson = json.load(urllib2.urlopen("http://"+yarn_host+":" + yarn_port +"/ws/v1/cluster/apps")) output_json = json.dumps(yarnjson) # LOG.debug( "HTTPFS ACL OUTPUT: " + output_json) # print output_json # print '-----' alljobs= map(extract,yarnjson['apps']['app']) # print alljobs timenow = unix_timestamp_secs() periodBegin = timenow - (period *60)*1000
number_of_jobs_done += 1 progress = number_of_jobs_done / (float(number_of_jobs) / 100.) sys.stdout.write("Progress: %d %%, Processing job %s ..." % (progress, job.tag)) sys.stdout.flush() project = job.find("project") if project == None: # it can be also a matrix job, lets try it project = job.find("matrix-project") if project == None: # it can be also a build flow job, lets try it project = job.find("com.cloudbees.plugins.flow.BuildFlow") project = ET.tostring(project, encoding=properties.ENCODING) if regex_job_name.match(job.tag): opener = urllib2.build_opener() opener.add_handler(urllib2_kerberos.HTTPKerberosAuthHandler()) req = urllib2.Request( properties.BASE_URL + properties.CONFIG_CONTEXT + job.tag + properties.CONFIX_XML, data=project, headers={ "Content-type": "text/xml; charset=" + properties.ENCODING }) response = None try: response = opener.open(req) except urllib2.URLError: for trial in range(0, 6): sys.stdout.write("...") sys.stdout.flush() time.sleep(1)
def get_impala_stats(kerb,period_secs,impalad): # app.logger.debug("In get_impala_stats for " + impalad) nKey='queue' # app.logger.debug("Checking URL http://"+impalad +":" + app.config['IMPALA_PORT'] +app.config['IMPALA_URI']) batch_offset=timedelta(seconds=period_secs) periodBegin_dt = datetime.now() - batch_offset opener = urllib2.build_opener() if kerb==True: opener.add_handler(ul2k.HTTPKerberosAuthHandler()) impala_dict = json.load(opener.open("http://"+impalad+":" + app.config['IMPALA_PORT'] +app.config['IMPALA_URI'])) # app.logger.debug("In get_impala_stats 5 " + json.dumps(impala_dict) ) #num_in_flight_queries # # if impala_dict['num_executing_queries'] > 0: # app.logger.debug("num_executing_queries is " + str(impala_dict['num_executing_queries']) ) # # if impala_dict['num_waiting_queries'] > 0: # app.logger.debug("num_waiting_queries is " + str(impala_dict['num_waiting_queries']) ) # if impala_dict['completed_log_size'] > 0: # entries = impala_dict['completed_queries'] entries = list() if impala_dict['num_in_flight_queries'] > 0: app.logger.debug("num_in_flight_queries is " + str(impala_dict['num_in_flight_queries']) ) entries+=impala_dict['in_flight_queries'] app.logger.debug("num_in_flight_queries is DONE " ) app.logger.debug("entries list is len: " + str(len(entries))) if impala_dict['completed_log_size'] > 0: app.logger.debug("completed_queries ") entries += impala_dict['completed_queries'] app.logger.debug("entries list is NOW len: " + str(len(entries))) for x in entries: # end_time_dt = datetime.strptime( x['end_time'][:-3],"%Y-%m-%d %H:%M:%S.%f") start_time_dt = datetime.strptime( x['start_time'][:-3],"%Y-%m-%d %H:%M:%S.%f") duration_ms = conv_duration_to_millis(x['duration']) waiting_time_ms = conv_duration_to_millis(x['waiting_time']) batch_offset=timedelta(milliseconds=(duration_ms - waiting_time_ms)) derived_end_time_dt = start_time_dt + batch_offset x.update({'derived_end_time_dt' : derived_end_time_dt }) if (x['state'] == 'FINISHED' or x['state'] == 'EXCEPTION') and derived_end_time_dt > periodBegin_dt and x['stmt_type'] == 'QUERY': app.logger.debug(" INFLIGHT4 BATCH TIME: " + str(periodBegin_dt) + " END TIME: " + str(derived_end_time_dt) + " STMT " + x['stmt'][:32] ) # keepers = filter(lambda x: ( x['end_time_dt'] > periodBegin_dt), entries) keepers = filter(lambda x: (x['derived_end_time_dt'] > periodBegin_dt and x['stmt_type'] == 'QUERY' and (x['state'] == 'FINISHED' or x['state'] == 'EXCEPTION') ), entries) if len(keepers): for x in keepers: start_time_dt = datetime.strptime( x['start_time'][:-3],"%Y-%m-%d %H:%M:%S.%f") # trim off last 3 000s from timestamp string end_time_dt = datetime.strptime( x['start_time'][:-3],"%Y-%m-%d %H:%M:%S.%f") x.update({'duration_ms' : conv_duration_to_millis(x['duration'] ) }) x.update({'waiting_time_ms' : conv_duration_to_millis(x['waiting_time'] ) }) x.update({'start_time_dt' : start_time_dt }) x.update({'end_time_dt' : end_time_dt }) x.update({"periodBegin_dt" : periodBegin_dt }) x.update({"host" : impalad }) del x['derived_end_time_dt'] del x['start_time' ] del x['end_time' ] del x['duration' ] del x['waiting_time'] app.logger.debug("inserting query " + x['stmt'][:32] ) app.logger.debug("about to insert" ) with app.app_context(): db.engine.execute(Impala_Stats.__table__.insert(),keepers) app.logger.debug("back from insert" ) else: app.logger.debug("No queries found" )