def __setDomain(self, domain): tmp = domain if UrlUtils.containsHTTP(domain): tmp = tmp.replace('http://', '').replace('https://', '') if UrlUtils.containsWWW(tmp): tmp = tmp.replace('www.', '') self._domain = tmp.split('/')[0]
def download_file(url, path, user=None, pw=None): ''' Download file for input. ''' from utils.UrlUtils import UrlUtils uu = UrlUtils() return True if uu.download(url, path, user, pw) == 0 else False '''
def setImages(self, html, url): self.setHTML(html) imgList = self.getHTML().findAll('img') images = [] for img in imgList: linkToImg = img.get('src') if UrlUtils.containsHTTP(img.get('src')) is False: linkToImg = UrlUtils.assertSiteWithFile(url, img.get('src')) images.append(linkToImg.strip()) self.__allImages(images)
def setFiles(self, html, url): self.setHTML(html) internalList = [] for files in self.getHTML().findAll('a', href=True): linkToFile = files['href'] if UrlUtils.externalLink(url, linkToFile) and UrlUtils.containsHTTP(linkToFile): self._externals.append(linkToFile) else: if UrlUtils.containsHTTP(linkToFile) is False: linkToFile = UrlUtils.assertSiteWithFile(url, linkToFile) if ExtensionsFile.hasExtension(linkToFile) and not UrlUtils.externalLink(url, linkToFile): internalList.append(linkToFile) self.filterFiles(internalList)
def searchMasterFrames(self, peg): uu = UrlUtils() beamID = self._frame.beamID direction = self._frame.direction lat_min = min(peg.latStart, peg.latEnd) lat_max = max(peg.latStart, peg.latEnd) params = { 'platform': self._frame.spacecraftName, 'trackNumber': self._frame.trackNumber, 'dataset_type': self._frame.dataset_type, 'beamID': beamID, 'system_version': uu.version, 'latitudeIndexMin': int(math.floor( (lat_min) / FrameInfoExtractor._latitudeResolution)), 'latitudeIndexMax': int(math.floor( (lat_max) / FrameInfoExtractor._latitudeResolution)), 'direction': direction } #get the list of meta close to the reference frame metList = postQuery(buildQuery(params, ['cross-boundaries'])) if metList[0]: metList = self.refineFromTime(metList[0], self._frame.sensingStart, self._maxTimeStitch) return metList
def create_mask(self,sizes): minlat = 1000 minlon = 1000 maxlat = -1000 maxlon = -1000 for i in sizes: for j in i: #delta for lat is negative while for lon positive that's why #one applies to min and the other to max if j['lat']['val'] + j['lat']['size']*j['lat']['delta'] < minlat: minlat = j['lat']['val'] + j['lat']['size']*j['lat']['delta'] if j['lat']['val'] > maxlat: maxlat = j['lat']['val'] if j['lon']['val'] < minlon: minlon = j['lon']['val'] if j['lon']['val'] + j['lon']['size']*j['lon']['delta'] > maxlon: maxlon = j['lon']['val'] + j['lon']['size']*j['lon']['delta'] if not self._debug: tf = tempfile.NamedTemporaryFile() tf.close() oname = tf.name else: oname = 'wbdmask.wbd' if not os.path.exists(oname): self.create_wbd_template() bbox = ''.join(str([ int(np.floor(minlat)), int(np.ceil(maxlat)), int(np.floor(minlon)), int(np.ceil(maxlon))]).split()) uu = UrlUtils() command = 'wbdStitcher.py wbdStitcher.xml wbdstitcher.wbdstitcher.bbox=' + bbox \ + ' wbdstitcher.wbdstitcher.outputfile=' + oname \ + ' wbdstitcher.wbdstitcher.url=' + uu.wbd_url if os.system(command) != 0: print("Error creating water mask") raise Exception self._wmask = get_image(oname + '.xml')
def curlProductMeta(prod_url, verbose=False, remove=True): """ curlProductMeta(prod_url,verbose=False) Arguments: - prod_url: product url Keyword Arguments: - verbose: verbose output (default=False) Returns: metadata dict from product .met.json """ if prod_url.endswith('/'): prod_url = prod_url[:-1] prod_json = url2pid(prod_url) + '.met.json' try: uu = UrlUtils() silentoutput = ' ' if verbose else ' --silent ' userstr = uu.dav_u + ':' + uu.dav_p command = 'curl' + silentoutput + '-k -f -u' + userstr + ' -O ' + pathjoin( prod_url, prod_json) os.system(command) except Exception: return {} if not pathexists(prod_json): return {} meta = loadjson(prod_json) if remove: os.remove(prod_json) return meta
def getMetadata(track=None, frame=None, beam=None, passdir=None, platform=None): """ Download metadata json from product repo for product with ID passed in. """ uu = UrlUtils() params = { "dataset": "interferogram", "trackNumber": str(track), "direction": passdir, "latitudeIndexMin": frame[0], "latitudeIndexMax": frame[1], "beamID": beam, 'system_version': uu.version } # get GRQ request ''' r = requests.get(url, params=params, verify=False) r.raise_for_status() res_json = r.json() if res_json['count'] == 0: raise ValueError("Found no interferogram product for Track %d, Frame %d."%(track,frame)) ''' query = buildQuery(params, 'within') metList, status = postQuery(query) return metList
def fetch(outdir, dry_run): # get endpoint configurations uu = UrlUtils() es_url = uu.rest_url # get active calibration ids active_ids = get_active_ids(es_url) print(active_ids) # get urls for active calibration files cal_urls = [get_cal_url(i, es_url) for i in active_ids] print(cal_urls) if len(cal_urls) == 0: print('Failed to find calibration auxiliary files') if dry_run: print('\n'.join(cal_urls)) else: if not os.path.isdir(outdir): os.makedirs(outdir) for cal_url in cal_urls: try: cal_file = download_file(cal_url, outdir) except: print('Failed to download URL: ', cal_url) raise try: cal_dir = untar_file(cal_file, outdir) except: print('Failed to untar: ', cal_file) raise os.unlink(cal_file)
def metaQuery(self, queryKey, peg, date, delta): uu = UrlUtils() beamID = self._frame.beamID direction = self._frame.direction lat_min = min(peg.latStart, peg.latEnd) lat_max = max(peg.latStart, peg.latEnd) params = { queryKey: getattr(self._frame, self._frame._mappingLoad[queryKey]), #'orbitNumber':orbit, 'beamID': beamID, 'system_version': uu.version, 'latitudeIndexMin': int(math.floor( (lat_min) / FrameInfoExtractor._latitudeResolution)), 'latitudeIndexMax': int(math.floor( (lat_max) / FrameInfoExtractor._latitudeResolution)), 'direction': direction } metList = postQuery(buildQuery(params, ['cross-boundaries'])) if metList: metList = [ self.refineFromTime(metList[0], date, delta), metList[1] ] metList = [self.refineByPlatform(metList[0]), metList[1]] return metList
def getIp(self, url): if UrlUtils.containsHTTP(url): url = url.replace('http://', '') url = url.replace('https://', '') url = sub('/.*', '', url) return str(gethostbyname(url).strip()) else: return gethostbyname(url).strip()
def __setIp(self, url): try: if UrlUtils.containsHTTP(url): url = url.replace('http://', '') url = url.replace('https://', '') url = sub('/.*', '', url) self.__listOfIps(gethostbyname(url)) else: self.__listOfIps(gethostbyname(url)) except: pass
def createStitcherXml(outfile): """Write stitcher.xml.""" base_dir = os.path.dirname(__file__) tmpl_file = os.path.join(base_dir, 'stitcher.xml.tmpl') with open(tmpl_file) as f: tmpl = f.read() uu = UrlUtils() with open(outfile, 'w') as f: f.write(tmpl.format(dem_url=uu.dem_url))
def getData(self): uu = UrlUtils() for pr in self._productList: command = 'curl -k -f -u' + uu.dav_u + ':' + uu.dav_p + ' -O ' + os.path.join( self._url, pr) print(command) if os.system(command) != 0: command = 'curl -k -f -u' + uu.dav_u + ':' + uu.dav_p + ' -O ' + os.path.join( self._url, "merged", pr) print(command) if os.system(command) != 0: print("Failed to find: {0}".format(pr))
def getData(args): uu = UrlUtils() # create a password manager meta = {'tags': args.tags, 'tag_operator': args.operator} ret, status = postQuery(buildQuery(meta)) try: os.mkdir(args.dir) except Exception: print("directory", args.dir, "already present") os.chdir(args.dir) for i in ret: url = i['url'] odir = os.getcwd() ndir = url.split('/')[-1] try: os.mkdir(ndir) except Exception: pass os.chdir(ndir) for pr in args.products: if pr.endswith('.xml'): command = 'curl -k -f -u' + uu.dav_u + ':' + uu.dav_p + ' -O ' + os.path.join( url, pr.replace('.xml', '')) os.system(command) command = 'curl -k -f -u' + uu.dav_u + ':' + uu.dav_p + ' -O ' + os.path.join( url, pr) os.system(command) try: password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm() # Add the username and password. password_mgr.add_password(None, urllib.parse.urlparse(url).netloc, uu.dav_u, uu.dav_p) handler = urllib.request.HTTPBasicAuthHandler(password_mgr) # create "opener" (OpenerDirector instance) opener = urllib.request.build_opener(handler) # use the opener to fetch a URL response = opener.open(url).read().decode('utf-8') except Exception as e: print(e) if (response): parser = MyHTMLParser(args.re, args.nre) parser.feed(response) print(parser.results) for i in parser.results: command = 'curl -k -f -u' + uu.dav_u + ':' + uu.dav_p + ' -O ' + os.path.join( url, i) os.system(command) os.chdir(odir)
def __init__(self, logger: Logger, build_name: str, build_number: str, urlutils=UrlUtils(), xml=XmlUtils(), auth=None): self.buildname = build_name self.buildnumber = build_number self.auth = auth self.logger = logger self.urlutils = urlutils self.xml = xml
def createInputList(self,metaList,master,slave): retList = [] urlsList = [] imgList = [] for frames in metaList: imgL = [] urls = [] for frame in frames: url = frame.url urls.append(url) response = None try: # get the aria-dav url from utils.UrlUtils import UrlUtils uu = UrlUtils() # create a password manager password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm() # Add the username and password. password_mgr.add_password(None, uu.dav_url, uu.dav_u, uu.dav_p) handler = urllib.request.HTTPBasicAuthHandler(password_mgr) # create "opener" (OpenerDirector instance) context = ssl._create_unverified_context() opener = urllib.request.build_opener(handler, urllib.request.HTTPSHandler(context=context)) # use the opener to fetch a URL response = opener.open(url).read().decode('utf-8') except Exception as e: print(e) if response is None: try: r = requests.get(url, auth=(uu.dav_u, uu.dav_p), verify=False) r.raise_for_status() response = r.text except Exception as e: print(e) if(response): pattern = 'EL.*?tar.gz' found = re.findall(pattern,response) if(found): imgL.append(found[0]) else: print('Expected to have found a EL*.tar.gz file') raise Exception imgList.append(imgL) urlsList.append([urls,imgL]) retList = [[imgList[0],master],[imgList[1],slave]] return retList,urlsList
def get_data_from_url(url): uu = UrlUtils() command = 'curl -k -f -u' + uu.dav_u + ':' + uu.dav_p + ' -O ' + url ntrials = 4 failed = True for i in range(ntrials): p = sp.Popen(command, shell=True) try: p.wait(60 + i * 20) failed = False break except Exception as e: print(e) p.kill() return failed
def addStitcher(self, fp): from utils.UrlUtils import UrlUtils uu = UrlUtils() fp1 = open( os.path.join(os.path.dirname(__file__), 'demstitchertemplate.xml')) extraLines = fp1.readlines() fp1.close() for el in extraLines: if el.count('httpsvalue'): fp.write(el.replace('httpsvalue', str(uu.dem_url))) elif el.count('usernamevalue'): fp.write(el.replace('usernamevalue', str(uu.dem_u))) elif el.count('passwordvalue'): fp.write(el.replace('passwordvalue', str(uu.dem_p))) else: fp.write(el) fp.write('\n')
def __init__(self, urlutils=UrlUtils(), xml=XmlUtils(), fileutils=FileUtils()): JmakeModule.__init__(self) self.command = 'investigate' self.description = 'Investigates JBAC failures and prepares to reproduce them locally. This command will ' \ 'produce a file, that will contain information what tests failed and what should be done ' \ 'to replay what happened locally. By default, the failed tests will be accompanied with ' \ '5 tests that preceded it on the particular run in an attempt to repliacate the way the ' \ 'instance might have been corrupted by previous tests (which is random on regular rerun ' \ 'due to how hallelujah works).' self.urlutils = urlutils self.xml = xml self.fileutils = fileutils self.replays_dir = self.fileutils.existing_dir( os.sep.join(['target', 'replays']))
def add_metadata(product_dir, metadata_file): """Add metadata to json file.""" with open(metadata_file) as f: metadata = json.load(f) # get datasets config uu = UrlUtils() dsets_file = uu.datasets_cfg r = Recognizer(dsets_file, product_dir, product_dir, 'v0.1') # add metadata.setdefault('dataset_type', r.getType()) metadata.setdefault('dataset_level', r.getLevel()) # overwrite metadata json file with open(metadata_file, 'w') as f: json.dump(metadata, f, indent=2, sort_keys=True)
def __init__(self, fs: FileUtils = FileUtils(), url: UrlUtils = UrlUtils(), tar_utils=TarUtils(), bamboo_utils: BambooUtils = BambooUtils()): super().__init__() self.command = 'eh-metrics-investigate' self.prevent_post_diagnostics = True self.description = wrap_text( 'Finds the reason for failing engineering health metrics. Should work ' 'automatically, comparing the latest result of ./jmake eh-metrics with the ' 'latest tagged CI build. You should run this command immediately after failing ' './jmake eh-metrics, and with no parameters. The parameters available to this ' 'command are mostly for debug or tricky usages.') self.fs = fs self.url = url self.tar_utils = tar_utils self.bamboo_utils = bamboo_utils
def __init__(self, frame=None, project=None): self._maxTimeStitch = td(seconds=60) uu = UrlUtils() self._frame = frame self.rest_url = uu.rest_url self.requester = Http() self._referenceFrame = "" self._pegList = [] self._pegFilename = "" self._breakAfterFirst = False #when searching for multiple passes stop as soon as on orbit #covers the peg region. useful for trigger mode self._sensorType = None self._searchUpTo = 200 self._deltaSearch = td(days=1) self._searchDirection = 0 #0 searches (+-)self._searchOrbitUpTo from input frame orbit, 1 +self._searchOrbitUpTo and -1 -self._searchOrbitUpTo self._datesToSearch = [] #single frame needs to be treated differently. It's hard to create an exact #peg region containing exactly one frame unless we know already all the refbbox. #in this case just create a peg region based on the refbbox of the input frame #so it's always satisfied if project.endswith('sf'): extremes = frame.getExtremes(frame.refbbox) delta = (extremes[1] - extremes[0]) / 5. if frame.direction == 'asc': latS = extremes[0] + delta latE = extremes[1] - delta else: latS = extremes[1] - delta latE = extremes[0] + delta self._pegList.append( PegInfoFactory.createPegInfo(0, frame.trackNumber, frame.direction, latS, latE, (latS + latE) / 2., (extremes[2] + extremes[3]) / 2., 0)) else: # if filename is provided in constructor, then initialize the pegList as well if frame.spacecraftName is not None and project is not None: self._pegFilename = self.getPegFile(frame.spacecraftName, project) self.initPegList() self._sensorType = frame.spacecraftName self._project = project
def getMetadata(id, output_file): """Download metadata json from product repo for product with ID passed in.""" # get conf settings uu = UrlUtils() # query query = { "fields": [ "urls" ], "query": { "ids": { "values": [ id ] } }, "filter": { "term": { "system_version": uu.version } } } # get GRQ url r = requests.post("%s/%s/_search" % (uu.rest_url, uu.grq_index_prefix), data=json.dumps(query)) r.raise_for_status() res_json = r.json() if res_json['hits']['total'] == 0: raise RuntimeError("Found no product with id %s." % id) res = res_json['hits']['hits'][0] urls = res['fields']['urls'] if not isinstance(urls, types.ListType) or len(urls) == 0: raise RuntimeError("Found no urls for product with id %s." % id) prod_url = urls[0] # get product metadata json product = os.path.basename(prod_url) met_url = os.path.join(prod_url, '%s.met.json' % product) r = requests.get(met_url, auth=(uu.dav_u, uu.dav_p), verify=False) r.raise_for_status() met_json = r.json() with open(output_file, 'w') as f: json.dump(met_json, f, indent=2, sort_keys=True)
def searchAndAddLinksFromMain(self, html, url): urls = [] for link in html.findAll('a', href=True): page = link['href'] try: if page[0] != '#' and url not in page and not UrlUtils.containsHTTP( page): urls.append('http://' + url + '/' + page) else: if type(page) is str: if self.pageOrExternal(page, url): urls.append(page) elif type(page) is list: for string in list: if self.pageOrExternal(string, url): urls.append(string) except: continue list(set(urls)) return urls
def masterExists(self, fm): uu = UrlUtils() extremes = fm.getExtremes(fm.bbox) latMin = extremes[0] latMax = extremes[1] latDelta = (latMax - latMin) / 3. latitudeResolution = .1 params = { 'sensor': fm.platform, 'trackNumber': fm.trackNumber, 'latitudeIndexMin': int(math.floor((latMin - latDelta) / latitudeResolution)), 'latitudeIndexMax': int(math.ceil((latMax + latDelta) / latitudeResolution)), 'dataset_type': fm.dataset_type, 'system_version': uu.version, 'direction': fm.direction, 'lookDirection': fm.lookDirection, 'reference': True, } if fm.beamID: params['beamID'] = fm.beamID exists = False metList, status = postQuery(buildQuery(params, ['within'])) if (status): metObj = createMetaObjects(metList) if (len(metObj) > 1): print( "WARNING FrameInfoExtractor: Expecting only one frame to be reference" ) if metObj: exists = True return exists
def __init__(self, url_utils: UrlUtils = None, fs: FileUtils = FileUtils()): self.url_utils = url_utils if url_utils is not None else UrlUtils(fs) zone_dev = 'jirastudio-dev' zone_dog = 'jirastudio-dog' zone_prod = 'jirastudio-prd' zone_prodv = 'jirastudio-prd-virtual' self.manifesto_zones = [zone_dev, zone_dog, zone_prod, zone_prodv] self.manifesto_zone_aliases = { 'default': zone_dev, 'dev': zone_dev, 'dog': zone_dog, 'prod': zone_prod, 'prodv': zone_prodv, 'preprod': zone_prodv } self.response_cache = {}
def retrieveInputFile(self,url,fileIn): import urllib.request, urllib.error, urllib.parse import os import time import shutil from utils.UrlUtils import UrlUtils uu = UrlUtils() t0 = time.time() command = 'curl -k -u ' + uu.dav_u + ':' + uu.dav_p + ' -O ' + os.path.join(url,fileIn) os.system(command) tmpDir = 'tmp' #not needed but when debugging the directory might already exist try: shutil.rmtree(tmpDir) except: pass os.mkdir(tmpDir) shutil.move(fileIn,tmpDir) os.chdir(tmpDir) os.system('tar -xzvf ' + fileIn) allF = os.listdir('./') ret = '' for name in allF: if name.endswith('.h5'): try: shutil.move(name,'../') except Exception: #normally is for debugging and the file is already present pass os.chdir('../') ret = name try: shutil.rmtree(tmpDir) except: pass break print((time.time() - t0)) return ret
def get_build_name(self, logger, args, branchDiscovery, urlutils=UrlUtils(), xml=XmlUtils()): if branchDiscovery.branch == 'master': return self.__fulljobname(args.type) else: # translate the branch into bamboo branch name: bamboo_branch_name = branchDiscovery.branch.replace('/', '-') logger.info('Trying to find bamboo branch "%s"...' % bamboo_branch_name) auth = JbacAuthentication.get() url = '%s/rest/api/latest/plan/%s?expand=branches&max-results=10000' % ( Jbac.url, CIInvestigate.planname[args.type]) logger.info('Querying JBAC: ' + url) text = self.urlutils.read(url, auth.login, auth.password) try: root = self.xml.parse_string(text) for branchElement in root.findall('.//branches/branch'): branch_key = branchElement.attrib['key'] branch_name = branchElement.attrib['shortName'] if branch_name == bamboo_branch_name: logger.debug('Bamboo branch plan key is: "%s".' % branch_key) return '-'.join( [branch_key, CIInvestigate.jobname[args.type]]) except ParseError: logger.debug('\n' + text) logger.error('Could not parse JBAC reply.') logger.warn( 'Could not find the Bamboo branch for branch: "%s". Will inspect master instead.' % bamboo_branch_name) return self.__fulljobname(args.type)
def searchSlaveFrames(self, peg): uu = UrlUtils() beamID = self._frame.beamID direction = self._frame.direction lat_min = min(peg.latStart, peg.latEnd) lat_max = max(peg.latStart, peg.latEnd) params = { 'sensor': self._frame.platform, 'trackNumber': self._frame.trackNumber, 'dataset_type': self._frame.dataset_type, 'beamID': beamID, 'system_version': uu.version, 'latitudeIndexMin': int(math.floor( (lat_min) / FrameInfoExtractor._latitudeResolution)), 'latitudeIndexMax': int(math.floor( (lat_max) / FrameInfoExtractor._latitudeResolution)), 'direction': direction } #get the list of meta close to the reference frame metList = postQuery(buildQuery(params, ['cross-boundaries'])) if metList: metList = self.refineByPlatform(metList[0]) newMet = [] for met in metList: newMet.append(self.groupByTime(met)) return newMet